From f84dbb912f344270f31d5cce974f12908a47798d Mon Sep 17 00:00:00 2001
From: Eric W. Biederman
Date: Thu, 10 Jul 2008 14:48:54 -0700
Subject: genirq: enable polling for disabled screaming irqs

When we disable a screaming irq we never see it again.  If the irq
line is shared or if the driver half works this is a real pain.  So
periodically poll the handlers for screaming interrupts.

I use a timer instead of the classic irq poll technique of working off
the timer interrupt because when we use the local apic timers
note_interrupt is never called (bug?).  Further on a system with
dynamic ticks the timer interrupt might not even fire unless there is
a timer telling it it needs to.

I forced this case on my test system with an e1000 nic and my ssh
session remained responsive despite the interrupt handler only being
called every 10th of a second.

Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 kernel/irq/spurious.c | 146 +++++++++++++++++++++++++++++++-------------------
 1 file changed, 91 insertions(+), 55 deletions(-)

diff --git a/kernel/irq/spurious.c b/kernel/irq/spurious.c
index c66d3f10e853..19fe9d6ebfe8 100644
--- a/kernel/irq/spurious.c
+++ b/kernel/irq/spurious.c
@@ -12,83 +12,117 @@
 #include <linux/kallsyms.h>
 #include <linux/interrupt.h>
 #include <linux/moduleparam.h>
+#include <linux/timer.h>
 
 static int irqfixup __read_mostly;
 
+#define POLL_SPURIOUS_IRQ_INTERVAL (HZ/10)
+static void poll_spurious_irqs(unsigned long dummy);
+static DEFINE_TIMER(poll_spurious_irq_timer, poll_spurious_irqs, 0, 0);
+
 /*
  * Recovery handler for misrouted interrupts.
  */
-static int misrouted_irq(int irq)
+static int try_one_irq(int irq, struct irq_desc *desc)
 {
-	int i;
+	struct irqaction *action;
 	int ok = 0;
 	int work = 0;	/* Did we do work for a real IRQ */
 
-	for (i = 1; i < NR_IRQS; i++) {
-		struct irq_desc *desc = irq_desc + i;
-		struct irqaction *action;
-
-		if (i == irq)	/* Already tried */
-			continue;
-
-		spin_lock(&desc->lock);
-		/* Already running on another processor */
-		if (desc->status & IRQ_INPROGRESS) {
-			/*
-			 * Already running: If it is shared get the other
-			 * CPU to go looking for our mystery interrupt too
-			 */
-			if (desc->action && (desc->action->flags & IRQF_SHARED))
-				desc->status |= IRQ_PENDING;
-			spin_unlock(&desc->lock);
-			continue;
-		}
-		/* Honour the normal IRQ locking */
-		desc->status |= IRQ_INPROGRESS;
-		action = desc->action;
+	spin_lock(&desc->lock);
+	/* Already running on another processor */
+	if (desc->status & IRQ_INPROGRESS) {
+		/*
+		 * Already running: If it is shared get the other
+		 * CPU to go looking for our mystery interrupt too
+		 */
+		if (desc->action && (desc->action->flags & IRQF_SHARED))
+			desc->status |= IRQ_PENDING;
 		spin_unlock(&desc->lock);
+		return ok;
+	}
+	/* Honour the normal IRQ locking */
+	desc->status |= IRQ_INPROGRESS;
+	action = desc->action;
+	spin_unlock(&desc->lock);
 
-		while (action) {
-			/* Only shared IRQ handlers are safe to call */
-			if (action->flags & IRQF_SHARED) {
-				if (action->handler(i, action->dev_id) ==
-						IRQ_HANDLED)
-					ok = 1;
-			}
-			action = action->next;
+	while (action) {
+		/* Only shared IRQ handlers are safe to call */
+		if (action->flags & IRQF_SHARED) {
+			if (action->handler(irq, action->dev_id) ==
+				IRQ_HANDLED)
+				ok = 1;
 		}
-		local_irq_disable();
-		/* Now clean up the flags */
-		spin_lock(&desc->lock);
-		action = desc->action;
+		action = action->next;
+	}
+	local_irq_disable();
+	/* Now clean up the flags */
+	spin_lock(&desc->lock);
+	action = desc->action;
 
+	/*
+	 * While we were looking for a fixup someone queued a real
+	 * IRQ clashing with our walk:
+	 */
+	while ((desc->status & IRQ_PENDING) && action) {
 		/*
-		 * While we were looking for a fixup someone queued a real
-		 * IRQ clashing with our walk:
-		 */
-		while ((desc->status & IRQ_PENDING) && action) {
-			/*
-			 * Perform real IRQ processing for the IRQ we deferred
-			 */
-			work = 1;
-			spin_unlock(&desc->lock);
-			handle_IRQ_event(i, action);
-			spin_lock(&desc->lock);
-			desc->status &= ~IRQ_PENDING;
-		}
-		desc->status &= ~IRQ_INPROGRESS;
-		/*
-		 * If we did actual work for the real IRQ line we must let the
-		 * IRQ controller clean up too
+		 * Perform real IRQ processing for the IRQ we deferred
 		 */
-		if (work && desc->chip && desc->chip->end)
-			desc->chip->end(i);
+		work = 1;
 		spin_unlock(&desc->lock);
+		handle_IRQ_event(irq, action);
+		spin_lock(&desc->lock);
+		desc->status &= ~IRQ_PENDING;
+	}
+	desc->status &= ~IRQ_INPROGRESS;
+	/*
+	 * If we did actual work for the real IRQ line we must let the
+	 * IRQ controller clean up too
+	 */
+	if (work && desc->chip && desc->chip->end)
+		desc->chip->end(irq);
+	spin_unlock(&desc->lock);
+
+	return ok;
+}
+
+static int misrouted_irq(int irq)
+{
+	int i;
+	int ok = 0;
+
+	for (i = 1; i < NR_IRQS; i++) {
+		struct irq_desc *desc = irq_desc + i;
+
+		if (i == irq)	/* Already tried */
+			continue;
+
+		if (try_one_irq(i, desc))
+			ok = 1;
 	}
 	/* So the caller can adjust the irq error counts */
 	return ok;
 }
 
+static void poll_spurious_irqs(unsigned long dummy)
+{
+	int i;
+	for (i = 1; i < NR_IRQS; i++) {
+		struct irq_desc *desc = irq_desc + i;
+		unsigned int status;
+
+		/* Racy but it doesn't matter */
+		status = desc->status;
+		barrier();
+		if (!(status & IRQ_SPURIOUS_DISABLED))
+			continue;
+
+		try_one_irq(i, desc);
+	}
+
+	mod_timer(&poll_spurious_irq_timer, jiffies + POLL_SPURIOUS_IRQ_INTERVAL);
+}
+
 /*
  * If 99,900 of the previous 100,000 interrupts have not been handled
  * then assume that the IRQ is stuck in some manner. Drop a diagnostic
@@ -212,6 +246,8 @@ void note_interrupt(unsigned int irq, struct irq_desc *desc,
 		desc->status |= IRQ_DISABLED | IRQ_SPURIOUS_DISABLED;
 		desc->depth++;
 		desc->chip->disable(irq);
+
+		mod_timer(&poll_spurious_irq_timer, jiffies + POLL_SPURIOUS_IRQ_INTERVAL);
 	}
 	desc->irqs_unhandled = 0;
 }
-- 
cgit v1.2.3-55-g7522


From 8d00a6c8f6b08e7167bc03bf955cdc7e47c5132e Mon Sep 17 00:00:00 2001
From: Thomas Gleixner
Date: Tue, 22 Jul 2008 08:39:57 +0200
Subject: genirq: remove last NO_IDLE_HZ leftovers

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/irq.h | 4 ----
 kernel/irq/handle.c | 2 --
 2 files changed, 6 deletions(-)

diff --git a/include/linux/irq.h b/include/linux/irq.h
index 8ccb462ea42c..f3047df2d23c 100644
--- a/include/linux/irq.h
+++ b/include/linux/irq.h
@@ -197,10 +197,6 @@ extern int setup_irq(unsigned int irq, struct irqaction *new);
 
 #ifdef CONFIG_GENERIC_HARDIRQS
 
-#ifndef handle_dynamic_tick
-# define handle_dynamic_tick(a)		do { } while (0)
-#endif
-
 #ifdef CONFIG_SMP
 
 #if defined(CONFIG_GENERIC_PENDING_IRQ) || defined(CONFIG_IRQBALANCE)
diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c
index 5fa6198e9139..f4c8a03a9fbb 100644
--- a/kernel/irq/handle.c
+++ b/kernel/irq/handle.c
@@ -131,8 +131,6 @@ irqreturn_t handle_IRQ_event(unsigned int irq, struct irqaction *action)
 	irqreturn_t ret, retval = IRQ_NONE;
 	unsigned int status = 0;
 
-	handle_dynamic_tick(action);
-
 	if (!(action->flags & IRQF_DISABLED))
 		local_irq_enable_in_hardirq();
 
-- 
cgit v1.2.3-55-g7522


From ee974e01e5ef2914036f08c8e41d1a3fa8bfc9d9 Mon Sep 17 00:00:00 2001
From: David Howells
Date: Wed, 20 Aug 2008 16:37:26 -0700
Subject: clocksource: check range

Check that the value being passed to parse_pmtmr() does not exceed the
limits of pmtmr_ioport.

Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 drivers/clocksource/acpi_pm.c | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/drivers/clocksource/acpi_pm.c b/drivers/clocksource/acpi_pm.c
index 5ca1d80de182..3df338481004 100644
--- a/drivers/clocksource/acpi_pm.c
+++ b/drivers/clocksource/acpi_pm.c
@@ -226,9 +226,12 @@ static int __init parse_pmtmr(char *arg)
 
 	if (strict_strtoul(arg, 16, &base))
 		return -EINVAL;
-
+#ifdef CONFIG_X86_64
+	if (base > UINT_MAX)
+		return -ERANGE;
+#endif
 	printk(KERN_INFO "PMTMR IOPort override: 0x%04x -> 0x%04lx\n",
-	       (unsigned int)pmtmr_ioport, base);
+	       pmtmr_ioport, base);
 	pmtmr_ioport = base;
 
 	return 1;
-- 
cgit v1.2.3-55-g7522


From 1aa5dfb751d275ae7117d3b73ac423b4a46f2a73 Mon Sep 17 00:00:00 2001
From: John Stultz
Date: Wed, 20 Aug 2008 16:37:28 -0700
Subject: clocksource: keep track of original clocksource frequency

The clocksource frequency is represented by
clocksource->mult/2^(clocksource->shift).  Currently, when NTP makes
adjustments to the clock frequency, they are made directly to the mult
value.

This has the drawback that once changed, we cannot know what the orignal
mult value was, or how much adjustment has been applied.

This property causes problems in calculating proper ntp intervals when
switching back and forth between clocksources.

This patch separates the current mult value into a mult and mult_orig
pair.  The mult_orig value stays constant, while the ntp clocksource
adjustments are done only to the mult value.

This allows for correct ntp interval calculation and additionally lays the
groundwork for a new notion of time, what I'm calling the monotonic-raw
time, which is introduced in a following patch.

Signed-off-by: John Stultz <johnstul@us.ibm.com>
Signed-off-by: Roman Zippel <zippel@linux-m68k.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/clocksource.h | 11 +++++++----
 kernel/time/clocksource.c   |  3 +++
 kernel/time/jiffies.c       |  1 +
 3 files changed, 11 insertions(+), 4 deletions(-)

diff --git a/include/linux/clocksource.h b/include/linux/clocksource.h
index 55e434feec99..f0a7fb984413 100644
--- a/include/linux/clocksource.h
+++ b/include/linux/clocksource.h
@@ -45,7 +45,8 @@ struct clocksource;
  * @read:		returns a cycle value
  * @mask:		bitmask for two's complement
  *			subtraction of non 64 bit counters
- * @mult:		cycle to nanosecond multiplier
+ * @mult:		cycle to nanosecond multiplier (adjusted by NTP)
+ * @mult_orig:		cycle to nanosecond multiplier (unadjusted by NTP)
  * @shift:		cycle to nanosecond divisor (power of two)
  * @flags:		flags describing special properties
  * @vread:		vsyscall based read
@@ -63,6 +64,7 @@ struct clocksource {
 	cycle_t (*read)(void);
 	cycle_t mask;
 	u32 mult;
+	u32 mult_orig;
 	u32 shift;
 	unsigned long flags;
 	cycle_t (*vread)(void);
@@ -201,16 +203,17 @@ static inline void clocksource_calculate_interval(struct clocksource *c,
 {
 	u64 tmp;
 
-	/* XXX - All of this could use a whole lot of optimization */
+	/* Do the ns -> cycle conversion first, using original mult */
 	tmp = length_nsec;
 	tmp <<= c->shift;
-	tmp += c->mult/2;
-	do_div(tmp, c->mult);
+	tmp += c->mult_orig/2;
+	do_div(tmp, c->mult_orig);
 
 	c->cycle_interval = (cycle_t)tmp;
 	if (c->cycle_interval == 0)
 		c->cycle_interval = 1;
 
+	/* Go back from cycles -> shifted ns, this time use ntp adjused mult */
 	c->xtime_interval = (u64)c->cycle_interval * c->mult;
 }
 
diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c
index 093d4acf993b..9ed2eec97526 100644
--- a/kernel/time/clocksource.c
+++ b/kernel/time/clocksource.c
@@ -325,6 +325,9 @@ int clocksource_register(struct clocksource *c)
 	unsigned long flags;
 	int ret;
 
+	/* save mult_orig on registration */
+	c->mult_orig = c->mult;
+
 	spin_lock_irqsave(&clocksource_lock, flags);
 	ret = clocksource_enqueue(c);
 	if (!ret)
diff --git a/kernel/time/jiffies.c b/kernel/time/jiffies.c
index 4c256fdb8875..1ca99557e929 100644
--- a/kernel/time/jiffies.c
+++ b/kernel/time/jiffies.c
@@ -61,6 +61,7 @@ struct clocksource clocksource_jiffies = {
 	.read		= jiffies_read,
 	.mask		= 0xffffffff, /*32bits*/
 	.mult		= NSEC_PER_JIFFY << JIFFIES_SHIFT, /* details above */
+	.mult_orig	= NSEC_PER_JIFFY << JIFFIES_SHIFT,
 	.shift		= JIFFIES_SHIFT,
 };
 
-- 
cgit v1.2.3-55-g7522


From 9a055117d3d9cb562f83f8d4cd88772761f4cab0 Mon Sep 17 00:00:00 2001
From: Roman Zippel
Date: Wed, 20 Aug 2008 16:37:28 -0700
Subject: clocksource: introduce clocksource_forward_now()

To keep the raw monotonic patch simple first introduce
clocksource_forward_now(), which takes care of the offset since the last
update_wall_time() call and adds it to the clock, so there is no need
anymore to deal with it explicitly at various places, which need to make
significant changes to the clock.

This is also gets rid of the timekeeping_suspend_nsecs, instead of
waiting until resume, the value is accumulated during suspend. In the end
there is only a single user of __get_nsec_offset() left, so I integrated
it back to getnstimeofday().

Signed-off-by: Roman Zippel <zippel@linux-m68k.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 kernel/time/timekeeping.c | 71 ++++++++++++++++++++++-------------------------
 1 file changed, 33 insertions(+), 38 deletions(-)

diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index e91c29f961c9..83d3555a6998 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -58,27 +58,23 @@ struct clocksource *clock;
 
 #ifdef CONFIG_GENERIC_TIME
 /**
- * __get_nsec_offset - Returns nanoseconds since last call to periodic_hook
+ * clocksource_forward_now - update clock to the current time
  *
- * private function, must hold xtime_lock lock when being
- * called. Returns the number of nanoseconds since the
- * last call to update_wall_time() (adjusted by NTP scaling)
+ * Forward the current clock to update its state since the last call to
+ * update_wall_time(). This is useful before significant clock changes,
+ * as it avoids having to deal with this time offset explicitly.
  */
-static inline s64 __get_nsec_offset(void)
+static void clocksource_forward_now(void)
 {
 	cycle_t cycle_now, cycle_delta;
-	s64 ns_offset;
+	s64 nsec;
 
-	/* read clocksource: */
 	cycle_now = clocksource_read(clock);
-
-	/* calculate the delta since the last update_wall_time: */
 	cycle_delta = (cycle_now - clock->cycle_last) & clock->mask;
+	clock->cycle_last = cycle_now;
 
-	/* convert to nanoseconds: */
-	ns_offset = cyc2ns(clock, cycle_delta);
-
-	return ns_offset;
+	nsec = cyc2ns(clock, cycle_delta);
+	timespec_add_ns(&xtime, nsec);
 }
 
 /**
@@ -89,6 +85,7 @@ static inline s64 __get_nsec_offset(void)
  */
 void getnstimeofday(struct timespec *ts)
 {
+	cycle_t cycle_now, cycle_delta;
 	unsigned long seq;
 	s64 nsecs;
 
@@ -96,7 +93,15 @@ void getnstimeofday(struct timespec *ts)
 		seq = read_seqbegin(&xtime_lock);
 
 		*ts = xtime;
-		nsecs = __get_nsec_offset();
+
+		/* read clocksource: */
+		cycle_now = clocksource_read(clock);
+
+		/* calculate the delta since the last update_wall_time: */
+		cycle_delta = (cycle_now - clock->cycle_last) & clock->mask;
+
+		/* convert to nanoseconds: */
+		nsecs = cyc2ns(clock, cycle_delta);
 
 	} while (read_seqretry(&xtime_lock, seq));
 
@@ -129,22 +134,22 @@ EXPORT_SYMBOL(do_gettimeofday);
  */
 int do_settimeofday(struct timespec *tv)
 {
+	struct timespec ts_delta;
 	unsigned long flags;
-	time_t wtm_sec, sec = tv->tv_sec;
-	long wtm_nsec, nsec = tv->tv_nsec;
 
 	if ((unsigned long)tv->tv_nsec >= NSEC_PER_SEC)
 		return -EINVAL;
 
 	write_seqlock_irqsave(&xtime_lock, flags);
 
-	nsec -= __get_nsec_offset();
+	clocksource_forward_now();
+
+	ts_delta.tv_sec = tv->tv_sec - xtime.tv_sec;
+	ts_delta.tv_nsec = tv->tv_nsec - xtime.tv_nsec;
+	wall_to_monotonic = timespec_sub(wall_to_monotonic, ts_delta);
 
-	wtm_sec  = wall_to_monotonic.tv_sec + (xtime.tv_sec - sec);
-	wtm_nsec = wall_to_monotonic.tv_nsec + (xtime.tv_nsec - nsec);
+	xtime = *tv;
 
-	set_normalized_timespec(&xtime, sec, nsec);
-	set_normalized_timespec(&wall_to_monotonic, wtm_sec, wtm_nsec);
 	update_xtime_cache(0);
 
 	clock->error = 0;
@@ -170,22 +175,17 @@ EXPORT_SYMBOL(do_settimeofday);
 static void change_clocksource(void)
 {
 	struct clocksource *new;
-	cycle_t now;
-	u64 nsec;
 
 	new = clocksource_get_next();
 
 	if (clock == new)
 		return;
 
-	new->cycle_last = 0;
-	now = clocksource_read(new);
-	nsec =  __get_nsec_offset();
-	timespec_add_ns(&xtime, nsec);
+	clocksource_forward_now();
 
 	clock = new;
-	clock->cycle_last = now;
-
+	clock->cycle_last = 0;
+	clock->cycle_last = clocksource_read(new);
 	clock->error = 0;
 	clock->xtime_nsec = 0;
 	clocksource_calculate_interval(clock, NTP_INTERVAL_LENGTH);
@@ -200,8 +200,8 @@ static void change_clocksource(void)
 	 */
 }
 #else
+static inline void clocksource_forward_now(void) { }
 static inline void change_clocksource(void) { }
-static inline s64 __get_nsec_offset(void) { return 0; }
 #endif
 
 /**
@@ -265,8 +265,6 @@ void __init timekeeping_init(void)
 static int timekeeping_suspended;
 /* time in seconds when suspend began */
 static unsigned long timekeeping_suspend_time;
-/* xtime offset when we went into suspend */
-static s64 timekeeping_suspend_nsecs;
 
 /**
  * timekeeping_resume - Resumes the generic timekeeping subsystem.
@@ -292,8 +290,6 @@ static int timekeeping_resume(struct sys_device *dev)
 		wall_to_monotonic.tv_sec -= sleep_length;
 		total_sleep_time += sleep_length;
 	}
-	/* Make sure that we have the correct xtime reference */
-	timespec_add_ns(&xtime, timekeeping_suspend_nsecs);
 	update_xtime_cache(0);
 	/* re-base the last cycle value */
 	clock->cycle_last = 0;
@@ -319,8 +315,7 @@ static int timekeeping_suspend(struct sys_device *dev, pm_message_t state)
 	timekeeping_suspend_time = read_persistent_clock();
 
 	write_seqlock_irqsave(&xtime_lock, flags);
-	/* Get the current xtime offset */
-	timekeeping_suspend_nsecs = __get_nsec_offset();
+	clocksource_forward_now();
 	timekeeping_suspended = 1;
 	write_sequnlock_irqrestore(&xtime_lock, flags);
 
@@ -461,10 +456,10 @@ void update_wall_time(void)
 	 */
 	while (offset >= clock->cycle_interval) {
 		/* accumulate one interval */
-		clock->xtime_nsec += clock->xtime_interval;
-		clock->cycle_last += clock->cycle_interval;
 		offset -= clock->cycle_interval;
+		clock->cycle_last += clock->cycle_interval;
 
+		clock->xtime_nsec += clock->xtime_interval;
 		if (clock->xtime_nsec >= (u64)NSEC_PER_SEC << clock->shift) {
 			clock->xtime_nsec -= (u64)NSEC_PER_SEC << clock->shift;
 			xtime.tv_sec++;
-- 
cgit v1.2.3-55-g7522


From 2d42244ae71d6c7b0884b5664cf2eda30fb2ae68 Mon Sep 17 00:00:00 2001
From: John Stultz
Date: Wed, 20 Aug 2008 16:37:30 -0700
Subject: clocksource: introduce CLOCK_MONOTONIC_RAW

In talking with Josip Loncaric, and his work on clock synchronization (see
btime.sf.net), he mentioned that for really close synchronization, it is
useful to have access to "hardware time", that is a notion of time that is
not in any way adjusted by the clock slewing done to keep close time sync.

Part of the issue is if we are using the kernel's ntp adjusted
representation of time in order to measure how we should correct time, we
can run into what Paul McKenney aptly described as "Painting a road using
the lines we're painting as the guide".

I had been thinking of a similar problem, and was trying to come up with a
way to give users access to a purely hardware based time representation
that avoided users having to know the underlying frequency and mask values
needed to deal with the wide variety of possible underlying hardware
counters.

My solution is to introduce CLOCK_MONOTONIC_RAW.  This exposes a
nanosecond based time value, that increments starting at bootup and has no
frequency adjustments made to it what so ever.

The time is accessed from userspace via the posix_clock_gettime() syscall,
passing CLOCK_MONOTONIC_RAW as the clock_id.

Signed-off-by: John Stultz <johnstul@us.ibm.com>
Signed-off-by: Roman Zippel <zippel@linux-m68k.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/clocksource.h |  3 +++
 include/linux/time.h        |  2 ++
 kernel/posix-timers.c       | 15 +++++++++++++++
 kernel/time/timekeeping.c   | 44 ++++++++++++++++++++++++++++++++++++++++++++
 4 files changed, 64 insertions(+)

diff --git a/include/linux/clocksource.h b/include/linux/clocksource.h
index f0a7fb984413..f88d32f8ff7c 100644
--- a/include/linux/clocksource.h
+++ b/include/linux/clocksource.h
@@ -79,6 +79,7 @@ struct clocksource {
 	/* timekeeping specific data, ignore */
 	cycle_t cycle_interval;
 	u64	xtime_interval;
+	u32	raw_interval;
 	/*
 	 * Second part is written at each timer interrupt
 	 * Keep it in a different cache line to dirty no
@@ -87,6 +88,7 @@ struct clocksource {
 	cycle_t cycle_last ____cacheline_aligned_in_smp;
 	u64 xtime_nsec;
 	s64 error;
+	struct timespec raw_time;
 
 #ifdef CONFIG_CLOCKSOURCE_WATCHDOG
 	/* Watchdog related data, used by the framework */
@@ -215,6 +217,7 @@ static inline void clocksource_calculate_interval(struct clocksource *c,
 
 	/* Go back from cycles -> shifted ns, this time use ntp adjused mult */
 	c->xtime_interval = (u64)c->cycle_interval * c->mult;
+	c->raw_interval = ((u64)c->cycle_interval * c->mult_orig) >> c->shift;
 }
 
 
diff --git a/include/linux/time.h b/include/linux/time.h
index e15206a7e82e..205f974b9ebf 100644
--- a/include/linux/time.h
+++ b/include/linux/time.h
@@ -117,6 +117,7 @@ extern int do_setitimer(int which, struct itimerval *value,
 extern unsigned int alarm_setitimer(unsigned int seconds);
 extern int do_getitimer(int which, struct itimerval *value);
 extern void getnstimeofday(struct timespec *tv);
+extern void getrawmonotonic(struct timespec *ts);
 extern void getboottime(struct timespec *ts);
 extern void monotonic_to_bootbased(struct timespec *ts);
 
@@ -214,6 +215,7 @@ struct itimerval {
 #define CLOCK_MONOTONIC			1
 #define CLOCK_PROCESS_CPUTIME_ID	2
 #define CLOCK_THREAD_CPUTIME_ID		3
+#define CLOCK_MONOTONIC_RAW		4
 
 /*
  * The IDs of various hardware clocks:
diff --git a/kernel/posix-timers.c b/kernel/posix-timers.c
index e36d5798cbff..d3c66b53dff6 100644
--- a/kernel/posix-timers.c
+++ b/kernel/posix-timers.c
@@ -222,6 +222,15 @@ static int posix_ktime_get_ts(clockid_t which_clock, struct timespec *tp)
 	return 0;
 }
 
+/*
+ * Get monotonic time for posix timers
+ */
+static int posix_get_monotonic_raw(clockid_t which_clock, struct timespec *tp)
+{
+	getrawmonotonic(tp);
+	return 0;
+}
+
 /*
  * Initialize everything, well, just everything in Posix clocks/timers ;)
  */
@@ -235,9 +244,15 @@ static __init int init_posix_timers(void)
 		.clock_get = posix_ktime_get_ts,
 		.clock_set = do_posix_clock_nosettime,
 	};
+	struct k_clock clock_monotonic_raw = {
+		.clock_getres = hrtimer_get_res,
+		.clock_get = posix_get_monotonic_raw,
+		.clock_set = do_posix_clock_nosettime,
+	};
 
 	register_posix_clock(CLOCK_REALTIME, &clock_realtime);
 	register_posix_clock(CLOCK_MONOTONIC, &clock_monotonic);
+	register_posix_clock(CLOCK_MONOTONIC_RAW, &clock_monotonic_raw);
 
 	posix_timers_cache = kmem_cache_create("posix_timers_cache",
 					sizeof (struct k_itimer), 0, SLAB_PANIC,
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index 83d3555a6998..5099c95b8aa2 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -75,6 +75,9 @@ static void clocksource_forward_now(void)
 
 	nsec = cyc2ns(clock, cycle_delta);
 	timespec_add_ns(&xtime, nsec);
+
+	nsec = ((s64)cycle_delta * clock->mult_orig) >> clock->shift;
+	clock->raw_time.tv_nsec += nsec;
 }
 
 /**
@@ -183,6 +186,8 @@ static void change_clocksource(void)
 
 	clocksource_forward_now();
 
+	new->raw_time = clock->raw_time;
+
 	clock = new;
 	clock->cycle_last = 0;
 	clock->cycle_last = clocksource_read(new);
@@ -204,6 +209,39 @@ static inline void clocksource_forward_now(void) { }
 static inline void change_clocksource(void) { }
 #endif
 
+/**
+ * getrawmonotonic - Returns the raw monotonic time in a timespec
+ * @ts:		pointer to the timespec to be set
+ *
+ * Returns the raw monotonic time (completely un-modified by ntp)
+ */
+void getrawmonotonic(struct timespec *ts)
+{
+	unsigned long seq;
+	s64 nsecs;
+	cycle_t cycle_now, cycle_delta;
+
+	do {
+		seq = read_seqbegin(&xtime_lock);
+
+		/* read clocksource: */
+		cycle_now = clocksource_read(clock);
+
+		/* calculate the delta since the last update_wall_time: */
+		cycle_delta = (cycle_now - clock->cycle_last) & clock->mask;
+
+		/* convert to nanoseconds: */
+		nsecs = ((s64)cycle_delta * clock->mult_orig) >> clock->shift;
+
+		*ts = clock->raw_time;
+
+	} while (read_seqretry(&xtime_lock, seq));
+
+	timespec_add_ns(ts, nsecs);
+}
+EXPORT_SYMBOL(getrawmonotonic);
+
+
 /**
  * timekeeping_valid_for_hres - Check if timekeeping is suitable for hres
  */
@@ -466,6 +504,12 @@ void update_wall_time(void)
 			second_overflow();
 		}
 
+		clock->raw_time.tv_nsec += clock->raw_interval;
+		if (clock->raw_time.tv_nsec >= NSEC_PER_SEC) {
+			clock->raw_time.tv_nsec -= NSEC_PER_SEC;
+			clock->raw_time.tv_sec++;
+		}
+
 		/* accumulate error between NTP and clock interval */
 		clock->error += tick_length;
 		clock->error -= clock->xtime_interval << (NTP_SCALE_SHIFT - clock->shift);
-- 
cgit v1.2.3-55-g7522


From d82f0b0f6f1a0a25afc288fb7135b1601fe6df18 Mon Sep 17 00:00:00 2001
From: Oleg Nesterov
Date: Wed, 20 Aug 2008 16:46:04 -0700
Subject: migrate_timers: add comment, use spinlock_irq()

Add the comment to explain why the double lock in migrate_timers()
can't deadlock.

Change the code to use spinlock_irq() instead of local_irq_disable()
+ spin_lock().

Signed-off-by: Oleg Nesterov <oleg@tv-sign.ru>
Acked-by: Steven Rostedt <srostedt@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 kernel/hrtimer.c | 11 ++++++-----
 kernel/timer.c   | 11 ++++++-----
 2 files changed, 12 insertions(+), 10 deletions(-)

diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c
index b8e4dce80a74..03ea1378c43b 100644
--- a/kernel/hrtimer.c
+++ b/kernel/hrtimer.c
@@ -1620,9 +1620,11 @@ static void migrate_hrtimers(int cpu)
 	new_base = &get_cpu_var(hrtimer_bases);
 
 	tick_cancel_sched_timer(cpu);
-
-	local_irq_disable();
-	spin_lock(&new_base->lock);
+	/*
+	 * The caller is globally serialized and nobody else
+	 * takes two locks at once, deadlock is not possible.
+	 */
+	spin_lock_irq(&new_base->lock);
 	spin_lock_nested(&old_base->lock, SINGLE_DEPTH_NESTING);
 
 	for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++) {
@@ -1631,8 +1633,7 @@ static void migrate_hrtimers(int cpu)
 	}
 
 	spin_unlock(&old_base->lock);
-	spin_unlock(&new_base->lock);
-	local_irq_enable();
+	spin_unlock_irq(&new_base->lock);
 	put_cpu_var(hrtimer_bases);
 }
 #endif /* CONFIG_HOTPLUG_CPU */
diff --git a/kernel/timer.c b/kernel/timer.c
index 03bc7f1f1593..e8019cc3418d 100644
--- a/kernel/timer.c
+++ b/kernel/timer.c
@@ -1435,9 +1435,11 @@ static void __cpuinit migrate_timers(int cpu)
 	BUG_ON(cpu_online(cpu));
 	old_base = per_cpu(tvec_bases, cpu);
 	new_base = get_cpu_var(tvec_bases);
-
-	local_irq_disable();
-	spin_lock(&new_base->lock);
+	/*
+	 * The caller is globally serialized and nobody else
+	 * takes two locks at once, deadlock is not possible.
+	 */
+	spin_lock_irq(&new_base->lock);
 	spin_lock_nested(&old_base->lock, SINGLE_DEPTH_NESTING);
 
 	BUG_ON(old_base->running_timer);
@@ -1452,8 +1454,7 @@ static void __cpuinit migrate_timers(int cpu)
 	}
 
 	spin_unlock(&old_base->lock);
-	spin_unlock(&new_base->lock);
-	local_irq_enable();
+	spin_unlock_irq(&new_base->lock);
 	put_cpu_var(tvec_bases);
 }
 #endif /* CONFIG_HOTPLUG_CPU */
-- 
cgit v1.2.3-55-g7522


From 916c7a855174e3b53d182b97a26b2e27a29726a1 Mon Sep 17 00:00:00 2001
From: Roman Zippel
Date: Wed, 20 Aug 2008 16:46:08 -0700
Subject: ntp: fix ADJ_OFFSET_SS_READ bug and do_adjtimex() cleanup

Thanks to the review by Michael Kerrisk a bug in the recent
ADJ_OFFSET_SS_READ option was discovered, where the ntp time_offset was
inadvertently set by it.  This fixes this by making the adjtime code
more separate from the ntp_adjtime code (both of which really want to
be separate syscalls).

Signed-off-by: Roman Zippel <zippel@linux-m68k.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Acked-by: John Stultz <johnstul@us.ibm.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/timex.h |  9 +++++-
 kernel/time/ntp.c     | 76 +++++++++++++++++++++++++++------------------------
 2 files changed, 48 insertions(+), 37 deletions(-)

diff --git a/include/linux/timex.h b/include/linux/timex.h
index fc6035d29d56..c00bcdd3ae42 100644
--- a/include/linux/timex.h
+++ b/include/linux/timex.h
@@ -141,8 +141,15 @@ struct timex {
 #define ADJ_MICRO		0x1000	/* select microsecond resolution */
 #define ADJ_NANO		0x2000	/* select nanosecond resolution */
 #define ADJ_TICK		0x4000	/* tick value */
+
+#ifdef __KERNEL__
+#define ADJ_ADJTIME		0x8000	/* switch between adjtime/adjtimex modes */
+#define ADJ_OFFSET_SINGLESHOT	0x0001	/* old-fashioned adjtime */
+#define ADJ_OFFSET_READONLY	0x2000	/* read-only adjtime */
+#else
 #define ADJ_OFFSET_SINGLESHOT	0x8001	/* old-fashioned adjtime */
-#define ADJ_OFFSET_SS_READ	0xa001  /* read-only adjtime */
+#define ADJ_OFFSET_SS_READ	0xa001	/* read-only adjtime */
+#endif
 
 /* xntp 3.4 compatibility names */
 #define MOD_OFFSET	ADJ_OFFSET
diff --git a/kernel/time/ntp.c b/kernel/time/ntp.c
index 5125ddd8196b..c6921aa1a42a 100644
--- a/kernel/time/ntp.c
+++ b/kernel/time/ntp.c
@@ -277,38 +277,50 @@ static inline void notify_cmos_timer(void) { }
 int do_adjtimex(struct timex *txc)
 {
 	struct timespec ts;
-	long save_adjust, sec;
 	int result;
 
-	/* In order to modify anything, you gotta be super-user! */
-	if (txc->modes && !capable(CAP_SYS_TIME))
-		return -EPERM;
-
-	/* Now we validate the data before disabling interrupts */
-
-	if ((txc->modes & ADJ_OFFSET_SINGLESHOT) == ADJ_OFFSET_SINGLESHOT) {
+	/* Validate the data before disabling interrupts */
+	if (txc->modes & ADJ_ADJTIME) {
 		/* singleshot must not be used with any other mode bits */
-		if (txc->modes & ~ADJ_OFFSET_SS_READ)
+		if (!(txc->modes & ADJ_OFFSET_SINGLESHOT))
 			return -EINVAL;
+		if (!(txc->modes & ADJ_OFFSET_READONLY) &&
+		    !capable(CAP_SYS_TIME))
+			return -EPERM;
+	} else {
+		/* In order to modify anything, you gotta be super-user! */
+		 if (txc->modes && !capable(CAP_SYS_TIME))
+			return -EPERM;
+
+		/* if the quartz is off by more than 10% something is VERY wrong! */
+		if (txc->modes & ADJ_TICK &&
+		    (txc->tick <  900000/USER_HZ ||
+		     txc->tick > 1100000/USER_HZ))
+				return -EINVAL;
+
+		if (txc->modes & ADJ_STATUS && time_state != TIME_OK)
+			hrtimer_cancel(&leap_timer);
 	}
 
-	/* if the quartz is off by more than 10% something is VERY wrong ! */
-	if (txc->modes & ADJ_TICK)
-		if (txc->tick <  900000/USER_HZ ||
-		    txc->tick > 1100000/USER_HZ)
-			return -EINVAL;
-
-	if (time_state != TIME_OK && txc->modes & ADJ_STATUS)
-		hrtimer_cancel(&leap_timer);
 	getnstimeofday(&ts);
 
 	write_seqlock_irq(&xtime_lock);
 
-	/* Save for later - semantics of adjtime is to return old value */
-	save_adjust = time_adjust;
-
 	/* If there are input parameters, then process them */
+	if (txc->modes & ADJ_ADJTIME) {
+		long save_adjust = time_adjust;
+
+		if (!(txc->modes & ADJ_OFFSET_READONLY)) {
+			/* adjtime() is independent from ntp_adjtime() */
+			time_adjust = txc->offset;
+			ntp_update_frequency();
+		}
+		txc->offset = save_adjust;
+		goto adj_done;
+	}
 	if (txc->modes) {
+		long sec;
+
 		if (txc->modes & ADJ_STATUS) {
 			if ((time_status & STA_PLL) &&
 			    !(txc->status & STA_PLL)) {
@@ -375,13 +387,8 @@ int do_adjtimex(struct timex *txc)
 		if (txc->modes & ADJ_TAI && txc->constant > 0)
 			time_tai = txc->constant;
 
-		if (txc->modes & ADJ_OFFSET) {
-			if (txc->modes == ADJ_OFFSET_SINGLESHOT)
-				/* adjtime() is independent from ntp_adjtime() */
-				time_adjust = txc->offset;
-			else
-				ntp_update_offset(txc->offset);
-		}
+		if (txc->modes & ADJ_OFFSET)
+			ntp_update_offset(txc->offset);
 		if (txc->modes & ADJ_TICK)
 			tick_usec = txc->tick;
 
@@ -389,19 +396,16 @@ int do_adjtimex(struct timex *txc)
 			ntp_update_frequency();
 	}
 
+	txc->offset = shift_right(time_offset * NTP_INTERVAL_FREQ,
+				  NTP_SCALE_SHIFT);
+	if (!(time_status & STA_NANO))
+		txc->offset /= NSEC_PER_USEC;
+
+adj_done:
 	result = time_state;	/* mostly `TIME_OK' */
 	if (time_status & (STA_UNSYNC|STA_CLOCKERR))
 		result = TIME_ERROR;
 
-	if ((txc->modes == ADJ_OFFSET_SINGLESHOT) ||
-	    (txc->modes == ADJ_OFFSET_SS_READ))
-		txc->offset = save_adjust;
-	else {
-		txc->offset = shift_right(time_offset * NTP_INTERVAL_FREQ,
-					  NTP_SCALE_SHIFT);
-		if (!(time_status & STA_NANO))
-			txc->offset /= NSEC_PER_USEC;
-	}
 	txc->freq	   = shift_right((s32)(time_freq >> PPM_SCALE_INV_SHIFT) *
 					 (s64)PPM_SCALE_INV,
 					 NTP_SCALE_SHIFT);
-- 
cgit v1.2.3-55-g7522


From 377bf1e4ac2d894791733270604594c7c851ef83 Mon Sep 17 00:00:00 2001
From: Anton Vorontsov
Date: Thu, 21 Aug 2008 22:58:28 +0400
Subject: genirq: fix irq_desc->depth handling with DEBUG_SHIRQ

When DEBUG_SHIRQ is selected, a spurious IRQ is issued before
the setup_irq() initializes the desc->depth. An IRQ handler may
call disable_irq_nosync(), but then setup_irq() will overwrite
desc->depth, and upon enable_irq() we'll catch this WARN:

------------[ cut here ]------------
Badness at kernel/irq/manage.c:180
NIP: c0061ab8 LR: c0061f10 CTR: 00000000
REGS: cf83be50 TRAP: 0700   Not tainted  (2.6.27-rc3-23450-g74919b0)
MSR: 00021032 <ME,IR,DR>  CR: 22042022  XER: 20000000
TASK = cf829100[5] 'events/0' THREAD: cf83a000
GPR00: c0061f10 cf83bf00 cf829100 c038e674 00000016 00000000 cf83bef8 00000038
GPR08: c0298910 00000000 c0310d28 cf83a000 00000c9c 1001a1a8 0fffe000 00800000
GPR16: ffffffff 00000000 007fff00 00000000 007ffeb0 c03320a0 c031095c c0310924
GPR24: cf8292ec cf807190 cf83a000 00009032 c038e6a4 c038e674 cf99b1cc c038e674
NIP [c0061ab8] __enable_irq+0x20/0x80
LR [c0061f10] enable_irq+0x50/0x70
Call Trace:
[cf83bf00] [c038e674] irq_desc+0x630/0x9000 (unreliable)
[cf83bf10] [c0061f10] enable_irq+0x50/0x70
[cf83bf30] [c01abe94] phy_change+0x68/0x108
[cf83bf50] [c0046394] run_workqueue+0xc4/0x16c
[cf83bf90] [c0046834] worker_thread+0x74/0xd4
[cf83bfd0] [c004ab7c] kthread+0x48/0x84
[cf83bff0] [c00135e0] kernel_thread+0x44/0x60
Instruction dump:
4e800020 3d20c031 38a94214 4bffffcc 9421fff0 7c0802a6 93e1000c 7c7f1b78
90010014 8123001c 2f890000 409e001c <0fe00000> 80010014 83e1000c 38210010

That trace corresponds to this line:
WARN(1, KERN_WARNING "Unbalanced enable for IRQ %d\n", irq);

The patch fixes the problem by moving the SHIRQ code below the
setup_irq().

Unfortunately we can't easily move the SHIRQ code inside the
setup_irq(), since it grabs a spinlock, so to prvent a 'real'
IRQ from interfere us we should disable that IRQ.

p.s. The driver in question is drivers/net/phy/phy.c.

Signed-off-by: Anton Vorontsov <avorontsov@ru.mvista.com>
Cc: David Woodhouse <dwmw2@infradead.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 kernel/irq/manage.c | 17 ++++++++++-------
 1 file changed, 10 insertions(+), 7 deletions(-)

diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
index 77a51be36010..ae1b684e048c 100644
--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c
@@ -596,26 +596,29 @@ int request_irq(unsigned int irq, irq_handler_t handler,
 	action->next = NULL;
 	action->dev_id = dev_id;
 
+	retval = setup_irq(irq, action);
+	if (retval)
+		kfree(action);
+
 #ifdef CONFIG_DEBUG_SHIRQ
 	if (irqflags & IRQF_SHARED) {
 		/*
 		 * It's a shared IRQ -- the driver ought to be prepared for it
 		 * to happen immediately, so let's make sure....
-		 * We do this before actually registering it, to make sure that
-		 * a 'real' IRQ doesn't run in parallel with our fake
+		 * We disable the irq to make sure that a 'real' IRQ doesn't
+		 * run in parallel with our fake.
 		 */
 		unsigned long flags;
 
+		disable_irq(irq);
 		local_irq_save(flags);
+
 		handler(irq, dev_id);
+
 		local_irq_restore(flags);
+		enable_irq(irq);
 	}
 #endif
-
-	retval = setup_irq(irq, action);
-	if (retval)
-		kfree(action);
-
 	return retval;
 }
 EXPORT_SYMBOL(request_irq);
-- 
cgit v1.2.3-55-g7522


From 7e6e178ab1548c8d894a77593e757acf4510b8ba Mon Sep 17 00:00:00 2001
From: Pawel MOLL
Date: Mon, 1 Sep 2008 10:12:11 +0100
Subject: genirq: irq_chip->startup() usage in setup_irq and set_irq_chained
 handler

This patch clarifies usage of irq_chip->startup() callback:

1. The "if (startup) startup(); else enabled();" code in setup_irq()
   is unnecessary, as startup() falls back to enabled() via
   default callbacks, set by irq_chip_set_defaults().

2. When using set_irq_chained_handler() the startup() was never called,
   which is not good at all... Fixed. And again - when startup() is not
   defined the call will fall back to enable() than to unmask() via
   default callbacks.

Signed-off-by: Pawel Moll <pawel.moll@st.com>
Acked-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 kernel/irq/chip.c   | 2 +-
 kernel/irq/manage.c | 5 +----
 2 files changed, 2 insertions(+), 5 deletions(-)

diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c
index 964964baefa2..240c64d59267 100644
--- a/kernel/irq/chip.c
+++ b/kernel/irq/chip.c
@@ -587,7 +587,7 @@ __set_irq_handler(unsigned int irq, irq_flow_handler_t handle, int is_chained,
 		desc->status &= ~IRQ_DISABLED;
 		desc->status |= IRQ_NOREQUEST | IRQ_NOPROBE;
 		desc->depth = 0;
-		desc->chip->unmask(irq);
+		desc->chip->startup(irq);
 	}
 	spin_unlock_irqrestore(&desc->lock, flags);
 }
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
index ae1b684e048c..9aa3e7b81389 100644
--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c
@@ -397,10 +397,7 @@ int setup_irq(unsigned int irq, struct irqaction *new)
 		if (!(desc->status & IRQ_NOAUTOEN)) {
 			desc->depth = 0;
 			desc->status &= ~IRQ_DISABLED;
-			if (desc->chip->startup)
-				desc->chip->startup(irq);
-			else
-				desc->chip->enable(irq);
+			desc->chip->startup(irq);
 		} else
 			/* Undo nested disables: */
 			desc->depth = 1;
-- 
cgit v1.2.3-55-g7522


From fdb0ac80618729e6b12121c66449b8532990eaf3 Mon Sep 17 00:00:00 2001
From: Dan Williams
Date: Sat, 13 Sep 2008 19:57:04 -0700
Subject: async_tx: make async_tx_run_dependencies() easier to read

* Rename 'next' to 'dep'
* Move the channel switch check inside the loop to simplify
  termination

Acked-by: Ilya Yanok <yanok@emcraft.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 crypto/async_tx/async_tx.c | 34 ++++++++++++++++------------------
 1 file changed, 16 insertions(+), 18 deletions(-)

diff --git a/crypto/async_tx/async_tx.c b/crypto/async_tx/async_tx.c
index e8362c1efa30..dcbf1be149f3 100644
--- a/crypto/async_tx/async_tx.c
+++ b/crypto/async_tx/async_tx.c
@@ -115,34 +115,32 @@ EXPORT_SYMBOL_GPL(dma_wait_for_async_tx);
  *	(start) dependent operations on their target channel
  * @tx: transaction with dependencies
  */
-void
-async_tx_run_dependencies(struct dma_async_tx_descriptor *tx)
+void async_tx_run_dependencies(struct dma_async_tx_descriptor *tx)
 {
-	struct dma_async_tx_descriptor *next = tx->next;
+	struct dma_async_tx_descriptor *dep = tx->next;
+	struct dma_async_tx_descriptor *dep_next;
 	struct dma_chan *chan;
 
-	if (!next)
+	if (!dep)
 		return;
 
-	tx->next = NULL;
-	chan = next->chan;
+	chan = dep->chan;
 
 	/* keep submitting up until a channel switch is detected
 	 * in that case we will be called again as a result of
 	 * processing the interrupt from async_tx_channel_switch
 	 */
-	while (next && next->chan == chan) {
-		struct dma_async_tx_descriptor *_next;
-
-		spin_lock_bh(&next->lock);
-		next->parent = NULL;
-		_next = next->next;
-		if (_next && _next->chan == chan)
-			next->next = NULL;
-		spin_unlock_bh(&next->lock);
-
-		next->tx_submit(next);
-		next = _next;
+	for (; dep; dep = dep_next) {
+		spin_lock_bh(&dep->lock);
+		dep->parent = NULL;
+		dep_next = dep->next;
+		if (dep_next && dep_next->chan == chan)
+			dep->next = NULL; /* ->next will be submitted */
+		else
+			dep_next = NULL; /* submit current dep and terminate */
+		spin_unlock_bh(&dep->lock);
+
+		dep->tx_submit(dep);
 	}
 
 	chan->device->device_issue_pending(chan);
-- 
cgit v1.2.3-55-g7522


From 89f72a0633d1d4f28c4c5c8831ec814523d7671a Mon Sep 17 00:00:00 2001
From: Julia Lawall
Date: Sat, 13 Sep 2008 20:05:34 -0700
Subject: drivers/dma/ioat_dma.c: drop code after return

The break after the return serves no purpose.

Signed-off-by: Julia Lawall <julia@diku.dk>
Reviewed-by: Richard Genoud <richard.genoud@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 drivers/dma/ioat_dma.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/drivers/dma/ioat_dma.c b/drivers/dma/ioat_dma.c
index bc8c6e3470ca..1ef68b315657 100644
--- a/drivers/dma/ioat_dma.c
+++ b/drivers/dma/ioat_dma.c
@@ -971,11 +971,9 @@ static struct ioat_desc_sw *ioat_dma_get_next_descriptor(
 	switch (ioat_chan->device->version) {
 	case IOAT_VER_1_2:
 		return ioat1_dma_get_next_descriptor(ioat_chan);
-		break;
 	case IOAT_VER_2_0:
 	case IOAT_VER_3_0:
 		return ioat2_dma_get_next_descriptor(ioat_chan);
-		break;
 	}
 	return NULL;
 }
-- 
cgit v1.2.3-55-g7522


From f06febc96ba8e0af80bcc3eaec0a109e88275fac Mon Sep 17 00:00:00 2001
From: Frank Mayhar
Date: Fri, 12 Sep 2008 09:54:39 -0700
Subject: timers: fix itimer/many thread hang

Overview

This patch reworks the handling of POSIX CPU timers, including the
ITIMER_PROF, ITIMER_VIRT timers and rlimit handling.  It was put together
with the help of Roland McGrath, the owner and original writer of this code.

The problem we ran into, and the reason for this rework, has to do with using
a profiling timer in a process with a large number of threads.  It appears
that the performance of the old implementation of run_posix_cpu_timers() was
at least O(n*3) (where "n" is the number of threads in a process) or worse.
Everything is fine with an increasing number of threads until the time taken
for that routine to run becomes the same as or greater than the tick time, at
which point things degrade rather quickly.

This patch fixes bug 9906, "Weird hang with NPTL and SIGPROF."

Code Changes

This rework corrects the implementation of run_posix_cpu_timers() to make it
run in constant time for a particular machine.  (Performance may vary between
one machine and another depending upon whether the kernel is built as single-
or multiprocessor and, in the latter case, depending upon the number of
running processors.)  To do this, at each tick we now update fields in
signal_struct as well as task_struct.  The run_posix_cpu_timers() function
uses those fields to make its decisions.

We define a new structure, "task_cputime," to contain user, system and
scheduler times and use these in appropriate places:

struct task_cputime {
	cputime_t utime;
	cputime_t stime;
	unsigned long long sum_exec_runtime;
};

This is included in the structure "thread_group_cputime," which is a new
substructure of signal_struct and which varies for uniprocessor versus
multiprocessor kernels.  For uniprocessor kernels, it uses "task_cputime" as
a simple substructure, while for multiprocessor kernels it is a pointer:

struct thread_group_cputime {
	struct task_cputime totals;
};

struct thread_group_cputime {
	struct task_cputime *totals;
};

We also add a new task_cputime substructure directly to signal_struct, to
cache the earliest expiration of process-wide timers, and task_cputime also
replaces the it_*_expires fields of task_struct (used for earliest expiration
of thread timers).  The "thread_group_cputime" structure contains process-wide
timers that are updated via account_user_time() and friends.  In the non-SMP
case the structure is a simple aggregator; unfortunately in the SMP case that
simplicity was not achievable due to cache-line contention between CPUs (in
one measured case performance was actually _worse_ on a 16-cpu system than
the same test on a 4-cpu system, due to this contention).  For SMP, the
thread_group_cputime counters are maintained as a per-cpu structure allocated
using alloc_percpu().  The timer functions update only the timer field in
the structure corresponding to the running CPU, obtained using per_cpu_ptr().

We define a set of inline functions in sched.h that we use to maintain the
thread_group_cputime structure and hide the differences between UP and SMP
implementations from the rest of the kernel.  The thread_group_cputime_init()
function initializes the thread_group_cputime structure for the given task.
The thread_group_cputime_alloc() is a no-op for UP; for SMP it calls the
out-of-line function thread_group_cputime_alloc_smp() to allocate and fill
in the per-cpu structures and fields.  The thread_group_cputime_free()
function, also a no-op for UP, in SMP frees the per-cpu structures.  The
thread_group_cputime_clone_thread() function (also a UP no-op) for SMP calls
thread_group_cputime_alloc() if the per-cpu structures haven't yet been
allocated.  The thread_group_cputime() function fills the task_cputime
structure it is passed with the contents of the thread_group_cputime fields;
in UP it's that simple but in SMP it must also safely check that tsk->signal
is non-NULL (if it is it just uses the appropriate fields of task_struct) and,
if so, sums the per-cpu values for each online CPU.  Finally, the three
functions account_group_user_time(), account_group_system_time() and
account_group_exec_runtime() are used by timer functions to update the
respective fields of the thread_group_cputime structure.

Non-SMP operation is trivial and will not be mentioned further.

The per-cpu structure is always allocated when a task creates its first new
thread, via a call to thread_group_cputime_clone_thread() from copy_signal().
It is freed at process exit via a call to thread_group_cputime_free() from
cleanup_signal().

All functions that formerly summed utime/stime/sum_sched_runtime values from
from all threads in the thread group now use thread_group_cputime() to
snapshot the values in the thread_group_cputime structure or the values in
the task structure itself if the per-cpu structure hasn't been allocated.

Finally, the code in kernel/posix-cpu-timers.c has changed quite a bit.
The run_posix_cpu_timers() function has been split into a fast path and a
slow path; the former safely checks whether there are any expired thread
timers and, if not, just returns, while the slow path does the heavy lifting.
With the dedicated thread group fields, timers are no longer "rebalanced" and
the process_timer_rebalance() function and related code has gone away.  All
summing loops are gone and all code that used them now uses the
thread_group_cputime() inline.  When process-wide timers are set, the new
task_cputime structure in signal_struct is used to cache the earliest
expiration; this is checked in the fast path.

Performance

The fix appears not to add significant overhead to existing operations.  It
generally performs the same as the current code except in two cases, one in
which it performs slightly worse (Case 5 below) and one in which it performs
very significantly better (Case 2 below).  Overall it's a wash except in those
two cases.

I've since done somewhat more involved testing on a dual-core Opteron system.

Case 1: With no itimer running, for a test with 100,000 threads, the fixed
	kernel took 1428.5 seconds, 513 seconds more than the unfixed system,
	all of which was spent in the system.  There were twice as many
	voluntary context switches with the fix as without it.

Case 2: With an itimer running at .01 second ticks and 4000 threads (the most
	an unmodified kernel can handle), the fixed kernel ran the test in
	eight percent of the time (5.8 seconds as opposed to 70 seconds) and
	had better tick accuracy (.012 seconds per tick as opposed to .023
	seconds per tick).

Case 3: A 4000-thread test with an initial timer tick of .01 second and an
	interval of 10,000 seconds (i.e. a timer that ticks only once) had
	very nearly the same performance in both cases:  6.3 seconds elapsed
	for the fixed kernel versus 5.5 seconds for the unfixed kernel.

With fewer threads (eight in these tests), the Case 1 test ran in essentially
the same time on both the modified and unmodified kernels (5.2 seconds versus
5.8 seconds).  The Case 2 test ran in about the same time as well, 5.9 seconds
versus 5.4 seconds but again with much better tick accuracy, .013 seconds per
tick versus .025 seconds per tick for the unmodified kernel.

Since the fix affected the rlimit code, I also tested soft and hard CPU limits.

Case 4: With a hard CPU limit of 20 seconds and eight threads (and an itimer
	running), the modified kernel was very slightly favored in that while
	it killed the process in 19.997 seconds of CPU time (5.002 seconds of
	wall time), only .003 seconds of that was system time, the rest was
	user time.  The unmodified kernel killed the process in 20.001 seconds
	of CPU (5.014 seconds of wall time) of which .016 seconds was system
	time.  Really, though, the results were too close to call.  The results
	were essentially the same with no itimer running.

Case 5: With a soft limit of 20 seconds and a hard limit of 2000 seconds
	(where the hard limit would never be reached) and an itimer running,
	the modified kernel exhibited worse tick accuracy than the unmodified
	kernel: .050 seconds/tick versus .028 seconds/tick.  Otherwise,
	performance was almost indistinguishable.  With no itimer running this
	test exhibited virtually identical behavior and times in both cases.

In times past I did some limited performance testing.  those results are below.

On a four-cpu Opteron system without this fix, a sixteen-thread test executed
in 3569.991 seconds, of which user was 3568.435s and system was 1.556s.  On
the same system with the fix, user and elapsed time were about the same, but
system time dropped to 0.007 seconds.  Performance with eight, four and one
thread were comparable.  Interestingly, the timer ticks with the fix seemed
more accurate:  The sixteen-thread test with the fix received 149543 ticks
for 0.024 seconds per tick, while the same test without the fix received 58720
for 0.061 seconds per tick.  Both cases were configured for an interval of
0.01 seconds.  Again, the other tests were comparable.  Each thread in this
test computed the primes up to 25,000,000.

I also did a test with a large number of threads, 100,000 threads, which is
impossible without the fix.  In this case each thread computed the primes only
up to 10,000 (to make the runtime manageable).  System time dominated, at
1546.968 seconds out of a total 2176.906 seconds (giving a user time of
629.938s).  It received 147651 ticks for 0.015 seconds per tick, still quite
accurate.  There is obviously no comparable test without the fix.

Signed-off-by: Frank Mayhar <fmayhar@google.com>
Cc: Roland McGrath <roland@redhat.com>
Cc: Alexey Dobriyan <adobriyan@gmail.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 fs/binfmt_elf.c              |  19 +-
 fs/proc/array.c              |   8 +-
 include/linux/posix-timers.h |   2 +
 include/linux/sched.h        | 257 +++++++++++++++++++++--
 include/linux/time.h         |   3 +
 kernel/compat.c              |  53 ++---
 kernel/exit.c                |  19 +-
 kernel/fork.c                |  88 ++++----
 kernel/itimer.c              |  33 +--
 kernel/posix-cpu-timers.c    | 471 +++++++++++++++++++++++--------------------
 kernel/sched.c               |  53 ++++-
 kernel/sched_fair.c          |   1 +
 kernel/sched_rt.c            |   4 +-
 kernel/signal.c              |   8 +-
 kernel/sys.c                 |  75 +++----
 security/selinux/hooks.c     |   9 +-
 16 files changed, 677 insertions(+), 426 deletions(-)

diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index 655ed8d30a86..a8635f637038 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -1333,20 +1333,15 @@ static void fill_prstatus(struct elf_prstatus *prstatus,
 	prstatus->pr_pgrp = task_pgrp_vnr(p);
 	prstatus->pr_sid = task_session_vnr(p);
 	if (thread_group_leader(p)) {
+		struct task_cputime cputime;
+
 		/*
-		 * This is the record for the group leader.  Add in the
-		 * cumulative times of previous dead threads.  This total
-		 * won't include the time of each live thread whose state
-		 * is included in the core dump.  The final total reported
-		 * to our parent process when it calls wait4 will include
-		 * those sums as well as the little bit more time it takes
-		 * this and each other thread to finish dying after the
-		 * core dump synchronization phase.
+		 * This is the record for the group leader.  It shows the
+		 * group-wide total, not its individual thread total.
 		 */
-		cputime_to_timeval(cputime_add(p->utime, p->signal->utime),
-				   &prstatus->pr_utime);
-		cputime_to_timeval(cputime_add(p->stime, p->signal->stime),
-				   &prstatus->pr_stime);
+		thread_group_cputime(p, &cputime);
+		cputime_to_timeval(cputime.utime, &prstatus->pr_utime);
+		cputime_to_timeval(cputime.stime, &prstatus->pr_stime);
 	} else {
 		cputime_to_timeval(p->utime, &prstatus->pr_utime);
 		cputime_to_timeval(p->stime, &prstatus->pr_stime);
diff --git a/fs/proc/array.c b/fs/proc/array.c
index 71c9be59c9c2..933953c4e407 100644
--- a/fs/proc/array.c
+++ b/fs/proc/array.c
@@ -395,20 +395,20 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns,
 
 		/* add up live thread stats at the group level */
 		if (whole) {
+			struct task_cputime cputime;
 			struct task_struct *t = task;
 			do {
 				min_flt += t->min_flt;
 				maj_flt += t->maj_flt;
-				utime = cputime_add(utime, task_utime(t));
-				stime = cputime_add(stime, task_stime(t));
 				gtime = cputime_add(gtime, task_gtime(t));
 				t = next_thread(t);
 			} while (t != task);
 
 			min_flt += sig->min_flt;
 			maj_flt += sig->maj_flt;
-			utime = cputime_add(utime, sig->utime);
-			stime = cputime_add(stime, sig->stime);
+			thread_group_cputime(task, &cputime);
+			utime = cputime.utime;
+			stime = cputime.stime;
 			gtime = cputime_add(gtime, sig->gtime);
 		}
 
diff --git a/include/linux/posix-timers.h b/include/linux/posix-timers.h
index a7dd38f30ade..f9d8e9e94e9b 100644
--- a/include/linux/posix-timers.h
+++ b/include/linux/posix-timers.h
@@ -115,4 +115,6 @@ void set_process_cpu_timer(struct task_struct *task, unsigned int clock_idx,
 
 long clock_nanosleep_restart(struct restart_block *restart_block);
 
+void update_rlimit_cpu(unsigned long rlim_new);
+
 #endif
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 3d9120c5ad15..26d7a5f2d0ba 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -425,6 +425,45 @@ struct pacct_struct {
 	unsigned long		ac_minflt, ac_majflt;
 };
 
+/**
+ * struct task_cputime - collected CPU time counts
+ * @utime:		time spent in user mode, in &cputime_t units
+ * @stime:		time spent in kernel mode, in &cputime_t units
+ * @sum_exec_runtime:	total time spent on the CPU, in nanoseconds
+ * 
+ * This structure groups together three kinds of CPU time that are
+ * tracked for threads and thread groups.  Most things considering
+ * CPU time want to group these counts together and treat all three
+ * of them in parallel.
+ */
+struct task_cputime {
+	cputime_t utime;
+	cputime_t stime;
+	unsigned long long sum_exec_runtime;
+};
+/* Alternate field names when used to cache expirations. */
+#define prof_exp	stime
+#define virt_exp	utime
+#define sched_exp	sum_exec_runtime
+
+/**
+ * struct thread_group_cputime - thread group interval timer counts
+ * @totals:		thread group interval timers; substructure for
+ *			uniprocessor kernel, per-cpu for SMP kernel.
+ *
+ * This structure contains the version of task_cputime, above, that is
+ * used for thread group CPU clock calculations.
+ */
+#ifdef CONFIG_SMP
+struct thread_group_cputime {
+	struct task_cputime *totals;
+};
+#else
+struct thread_group_cputime {
+	struct task_cputime totals;
+};
+#endif
+
 /*
  * NOTE! "signal_struct" does not have it's own
  * locking, because a shared signal_struct always
@@ -470,6 +509,17 @@ struct signal_struct {
 	cputime_t it_prof_expires, it_virt_expires;
 	cputime_t it_prof_incr, it_virt_incr;
 
+	/*
+	 * Thread group totals for process CPU clocks.
+	 * See thread_group_cputime(), et al, for details.
+	 */
+	struct thread_group_cputime cputime;
+
+	/* Earliest-expiration cache. */
+	struct task_cputime cputime_expires;
+
+	struct list_head cpu_timers[3];
+
 	/* job control IDs */
 
 	/*
@@ -500,7 +550,7 @@ struct signal_struct {
 	 * Live threads maintain their own counters and add to these
 	 * in __exit_signal, except for the group leader.
 	 */
-	cputime_t utime, stime, cutime, cstime;
+	cputime_t cutime, cstime;
 	cputime_t gtime;
 	cputime_t cgtime;
 	unsigned long nvcsw, nivcsw, cnvcsw, cnivcsw;
@@ -508,14 +558,6 @@ struct signal_struct {
 	unsigned long inblock, oublock, cinblock, coublock;
 	struct task_io_accounting ioac;
 
-	/*
-	 * Cumulative ns of scheduled CPU time for dead threads in the
-	 * group, not including a zombie group leader.  (This only differs
-	 * from jiffies_to_ns(utime + stime) if sched_clock uses something
-	 * other than jiffies.)
-	 */
-	unsigned long long sum_sched_runtime;
-
 	/*
 	 * We don't bother to synchronize most readers of this at all,
 	 * because there is no reader checking a limit that actually needs
@@ -527,8 +569,6 @@ struct signal_struct {
 	 */
 	struct rlimit rlim[RLIM_NLIMITS];
 
-	struct list_head cpu_timers[3];
-
 	/* keep the process-shared keyrings here so that they do the right
 	 * thing in threads created with CLONE_THREAD */
 #ifdef CONFIG_KEYS
@@ -1134,8 +1174,7 @@ struct task_struct {
 /* mm fault and swap info: this can arguably be seen as either mm-specific or thread-specific */
 	unsigned long min_flt, maj_flt;
 
-  	cputime_t it_prof_expires, it_virt_expires;
-	unsigned long long it_sched_expires;
+	struct task_cputime cputime_expires;
 	struct list_head cpu_timers[3];
 
 /* process credentials */
@@ -1585,6 +1624,7 @@ extern unsigned long long cpu_clock(int cpu);
 
 extern unsigned long long
 task_sched_runtime(struct task_struct *task);
+extern unsigned long long thread_group_sched_runtime(struct task_struct *task);
 
 /* sched_exec is called by processes performing an exec */
 #ifdef CONFIG_SMP
@@ -2081,6 +2121,197 @@ static inline int spin_needbreak(spinlock_t *lock)
 #endif
 }
 
+/*
+ * Thread group CPU time accounting.
+ */
+#ifdef CONFIG_SMP
+
+extern int thread_group_cputime_alloc_smp(struct task_struct *);
+extern void thread_group_cputime_smp(struct task_struct *, struct task_cputime *);
+
+static inline void thread_group_cputime_init(struct signal_struct *sig)
+{
+	sig->cputime.totals = NULL;
+}
+
+static inline int thread_group_cputime_clone_thread(struct task_struct *curr,
+						    struct task_struct *new)
+{
+	if (curr->signal->cputime.totals)
+		return 0;
+	return thread_group_cputime_alloc_smp(curr);
+}
+
+static inline void thread_group_cputime_free(struct signal_struct *sig)
+{
+	free_percpu(sig->cputime.totals);
+}
+
+/**
+ * thread_group_cputime - Sum the thread group time fields across all CPUs.
+ *
+ * This is a wrapper for the real routine, thread_group_cputime_smp().  See
+ * that routine for details.
+ */
+static inline void thread_group_cputime(
+	struct task_struct *tsk,
+	struct task_cputime *times)
+{
+	thread_group_cputime_smp(tsk, times);
+}
+
+/**
+ * thread_group_cputime_account_user - Maintain utime for a thread group.
+ *
+ * @tgtimes:	Pointer to thread_group_cputime structure.
+ * @cputime:	Time value by which to increment the utime field of that
+ *		structure.
+ *
+ * If thread group time is being maintained, get the structure for the
+ * running CPU and update the utime field there.
+ */
+static inline void thread_group_cputime_account_user(
+	struct thread_group_cputime *tgtimes,
+	cputime_t cputime)
+{
+	if (tgtimes->totals) {
+		struct task_cputime *times;
+
+		times = per_cpu_ptr(tgtimes->totals, get_cpu());
+		times->utime = cputime_add(times->utime, cputime);
+		put_cpu_no_resched();
+	}
+}
+
+/**
+ * thread_group_cputime_account_system - Maintain stime for a thread group.
+ *
+ * @tgtimes:	Pointer to thread_group_cputime structure.
+ * @cputime:	Time value by which to increment the stime field of that
+ *		structure.
+ *
+ * If thread group time is being maintained, get the structure for the
+ * running CPU and update the stime field there.
+ */
+static inline void thread_group_cputime_account_system(
+	struct thread_group_cputime *tgtimes,
+	cputime_t cputime)
+{
+	if (tgtimes->totals) {
+		struct task_cputime *times;
+
+		times = per_cpu_ptr(tgtimes->totals, get_cpu());
+		times->stime = cputime_add(times->stime, cputime);
+		put_cpu_no_resched();
+	}
+}
+
+/**
+ * thread_group_cputime_account_exec_runtime - Maintain exec runtime for a
+ *						thread group.
+ *
+ * @tgtimes:	Pointer to thread_group_cputime structure.
+ * @ns:		Time value by which to increment the sum_exec_runtime field
+ *		of that structure.
+ *
+ * If thread group time is being maintained, get the structure for the
+ * running CPU and update the sum_exec_runtime field there.
+ */
+static inline void thread_group_cputime_account_exec_runtime(
+	struct thread_group_cputime *tgtimes,
+	unsigned long long ns)
+{
+	if (tgtimes->totals) {
+		struct task_cputime *times;
+
+		times = per_cpu_ptr(tgtimes->totals, get_cpu());
+		times->sum_exec_runtime += ns;
+		put_cpu_no_resched();
+	}
+}
+
+#else /* CONFIG_SMP */
+
+static inline void thread_group_cputime_init(struct signal_struct *sig)
+{
+	sig->cputime.totals.utime = cputime_zero;
+	sig->cputime.totals.stime = cputime_zero;
+	sig->cputime.totals.sum_exec_runtime = 0;
+}
+
+static inline int thread_group_cputime_alloc(struct task_struct *tsk)
+{
+	return 0;
+}
+
+static inline void thread_group_cputime_free(struct signal_struct *sig)
+{
+}
+
+static inline int thread_group_cputime_clone_thread(struct task_struct *curr,
+						     struct task_struct *tsk)
+{
+}
+
+static inline void thread_group_cputime(struct task_struct *tsk,
+					 struct task_cputime *cputime)
+{
+	*cputime = tsk->signal->cputime.totals;
+}
+
+static inline void thread_group_cputime_account_user(
+	struct thread_group_cputime *tgtimes,
+	cputime_t cputime)
+{
+	tgtimes->totals->utime = cputime_add(tgtimes->totals->utime, cputime);
+}
+
+static inline void thread_group_cputime_account_system(
+	struct thread_group_cputime *tgtimes,
+	cputime_t cputime)
+{
+	tgtimes->totals->stime = cputime_add(tgtimes->totals->stime, cputime);
+}
+
+static inline void thread_group_cputime_account_exec_runtime(
+	struct thread_group_cputime *tgtimes,
+	unsigned long long ns)
+{
+	tgtimes->totals->sum_exec_runtime += ns;
+}
+
+#endif /* CONFIG_SMP */
+
+static inline void account_group_user_time(struct task_struct *tsk,
+					    cputime_t cputime)
+{
+	struct signal_struct *sig;
+
+	sig = tsk->signal;
+	if (likely(sig))
+		thread_group_cputime_account_user(&sig->cputime, cputime);
+}
+
+static inline void account_group_system_time(struct task_struct *tsk,
+					      cputime_t cputime)
+{
+	struct signal_struct *sig;
+
+	sig = tsk->signal;
+	if (likely(sig))
+		thread_group_cputime_account_system(&sig->cputime, cputime);
+}
+
+static inline void account_group_exec_runtime(struct task_struct *tsk,
+					       unsigned long long ns)
+{
+	struct signal_struct *sig;
+
+	sig = tsk->signal;
+	if (likely(sig))
+		thread_group_cputime_account_exec_runtime(&sig->cputime, ns);
+}
+
 /*
  * Reevaluate whether the task has signals pending delivery.
  * Wake the task if so.
diff --git a/include/linux/time.h b/include/linux/time.h
index e15206a7e82e..1b70b3c293e9 100644
--- a/include/linux/time.h
+++ b/include/linux/time.h
@@ -125,6 +125,9 @@ extern int timekeeping_valid_for_hres(void);
 extern void update_wall_time(void);
 extern void update_xtime_cache(u64 nsec);
 
+struct tms;
+extern void do_sys_times(struct tms *);
+
 /**
  * timespec_to_ns - Convert timespec to nanoseconds
  * @ts:		pointer to the timespec variable to be converted
diff --git a/kernel/compat.c b/kernel/compat.c
index 32c254a8ab9a..72650e39b3e6 100644
--- a/kernel/compat.c
+++ b/kernel/compat.c
@@ -23,6 +23,7 @@
 #include <linux/timex.h>
 #include <linux/migrate.h>
 #include <linux/posix-timers.h>
+#include <linux/times.h>
 
 #include <asm/uaccess.h>
 
@@ -150,49 +151,23 @@ asmlinkage long compat_sys_setitimer(int which,
 	return 0;
 }
 
+static compat_clock_t clock_t_to_compat_clock_t(clock_t x)
+{
+	return compat_jiffies_to_clock_t(clock_t_to_jiffies(x));
+}
+
 asmlinkage long compat_sys_times(struct compat_tms __user *tbuf)
 {
-	/*
-	 *	In the SMP world we might just be unlucky and have one of
-	 *	the times increment as we use it. Since the value is an
-	 *	atomically safe type this is just fine. Conceptually its
-	 *	as if the syscall took an instant longer to occur.
-	 */
 	if (tbuf) {
+		struct tms tms;
 		struct compat_tms tmp;
-		struct task_struct *tsk = current;
-		struct task_struct *t;
-		cputime_t utime, stime, cutime, cstime;
-
-		read_lock(&tasklist_lock);
-		utime = tsk->signal->utime;
-		stime = tsk->signal->stime;
-		t = tsk;
-		do {
-			utime = cputime_add(utime, t->utime);
-			stime = cputime_add(stime, t->stime);
-			t = next_thread(t);
-		} while (t != tsk);
-
-		/*
-		 * While we have tasklist_lock read-locked, no dying thread
-		 * can be updating current->signal->[us]time.  Instead,
-		 * we got their counts included in the live thread loop.
-		 * However, another thread can come in right now and
-		 * do a wait call that updates current->signal->c[us]time.
-		 * To make sure we always see that pair updated atomically,
-		 * we take the siglock around fetching them.
-		 */
-		spin_lock_irq(&tsk->sighand->siglock);
-		cutime = tsk->signal->cutime;
-		cstime = tsk->signal->cstime;
-		spin_unlock_irq(&tsk->sighand->siglock);
-		read_unlock(&tasklist_lock);
-
-		tmp.tms_utime = compat_jiffies_to_clock_t(cputime_to_jiffies(utime));
-		tmp.tms_stime = compat_jiffies_to_clock_t(cputime_to_jiffies(stime));
-		tmp.tms_cutime = compat_jiffies_to_clock_t(cputime_to_jiffies(cutime));
-		tmp.tms_cstime = compat_jiffies_to_clock_t(cputime_to_jiffies(cstime));
+
+		do_sys_times(&tms);
+		/* Convert our struct tms to the compat version. */
+		tmp.tms_utime = clock_t_to_compat_clock_t(tms.tms_utime);
+		tmp.tms_stime = clock_t_to_compat_clock_t(tms.tms_stime);
+		tmp.tms_cutime = clock_t_to_compat_clock_t(tms.tms_cutime);
+		tmp.tms_cstime = clock_t_to_compat_clock_t(tms.tms_cstime);
 		if (copy_to_user(tbuf, &tmp, sizeof(tmp)))
 			return -EFAULT;
 	}
diff --git a/kernel/exit.c b/kernel/exit.c
index 16395644a98f..40036ac04271 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -112,8 +112,6 @@ static void __exit_signal(struct task_struct *tsk)
 		 * We won't ever get here for the group leader, since it
 		 * will have been the last reference on the signal_struct.
 		 */
-		sig->utime = cputime_add(sig->utime, task_utime(tsk));
-		sig->stime = cputime_add(sig->stime, task_stime(tsk));
 		sig->gtime = cputime_add(sig->gtime, task_gtime(tsk));
 		sig->min_flt += tsk->min_flt;
 		sig->maj_flt += tsk->maj_flt;
@@ -122,7 +120,6 @@ static void __exit_signal(struct task_struct *tsk)
 		sig->inblock += task_io_get_inblock(tsk);
 		sig->oublock += task_io_get_oublock(tsk);
 		task_io_accounting_add(&sig->ioac, &tsk->ioac);
-		sig->sum_sched_runtime += tsk->se.sum_exec_runtime;
 		sig = NULL; /* Marker for below. */
 	}
 
@@ -1294,6 +1291,7 @@ static int wait_task_zombie(struct task_struct *p, int options,
 	if (likely(!traced)) {
 		struct signal_struct *psig;
 		struct signal_struct *sig;
+		struct task_cputime cputime;
 
 		/*
 		 * The resource counters for the group leader are in its
@@ -1309,20 +1307,23 @@ static int wait_task_zombie(struct task_struct *p, int options,
 		 * need to protect the access to p->parent->signal fields,
 		 * as other threads in the parent group can be right
 		 * here reaping other children at the same time.
+		 *
+		 * We use thread_group_cputime() to get times for the thread
+		 * group, which consolidates times for all threads in the
+		 * group including the group leader.
 		 */
 		spin_lock_irq(&p->parent->sighand->siglock);
 		psig = p->parent->signal;
 		sig = p->signal;
+		thread_group_cputime(p, &cputime);
 		psig->cutime =
 			cputime_add(psig->cutime,
-			cputime_add(p->utime,
-			cputime_add(sig->utime,
-				    sig->cutime)));
+			cputime_add(cputime.utime,
+				    sig->cutime));
 		psig->cstime =
 			cputime_add(psig->cstime,
-			cputime_add(p->stime,
-			cputime_add(sig->stime,
-				    sig->cstime)));
+			cputime_add(cputime.stime,
+				    sig->cstime));
 		psig->cgtime =
 			cputime_add(psig->cgtime,
 			cputime_add(p->gtime,
diff --git a/kernel/fork.c b/kernel/fork.c
index 7ce2ebe84796..a8ac2efb8e30 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -759,15 +759,44 @@ void __cleanup_sighand(struct sighand_struct *sighand)
 		kmem_cache_free(sighand_cachep, sighand);
 }
 
+
+/*
+ * Initialize POSIX timer handling for a thread group.
+ */
+static void posix_cpu_timers_init_group(struct signal_struct *sig)
+{
+	/* Thread group counters. */
+	thread_group_cputime_init(sig);
+
+	/* Expiration times and increments. */
+	sig->it_virt_expires = cputime_zero;
+	sig->it_virt_incr = cputime_zero;
+	sig->it_prof_expires = cputime_zero;
+	sig->it_prof_incr = cputime_zero;
+
+	/* Cached expiration times. */
+	sig->cputime_expires.prof_exp = cputime_zero;
+	sig->cputime_expires.virt_exp = cputime_zero;
+	sig->cputime_expires.sched_exp = 0;
+
+	/* The timer lists. */
+	INIT_LIST_HEAD(&sig->cpu_timers[0]);
+	INIT_LIST_HEAD(&sig->cpu_timers[1]);
+	INIT_LIST_HEAD(&sig->cpu_timers[2]);
+}
+
 static int copy_signal(unsigned long clone_flags, struct task_struct *tsk)
 {
 	struct signal_struct *sig;
 	int ret;
 
 	if (clone_flags & CLONE_THREAD) {
-		atomic_inc(&current->signal->count);
-		atomic_inc(&current->signal->live);
-		return 0;
+		ret = thread_group_cputime_clone_thread(current, tsk);
+		if (likely(!ret)) {
+			atomic_inc(&current->signal->count);
+			atomic_inc(&current->signal->live);
+		}
+		return ret;
 	}
 	sig = kmem_cache_alloc(signal_cachep, GFP_KERNEL);
 	tsk->signal = sig;
@@ -795,15 +824,10 @@ static int copy_signal(unsigned long clone_flags, struct task_struct *tsk)
 	sig->it_real_incr.tv64 = 0;
 	sig->real_timer.function = it_real_fn;
 
-	sig->it_virt_expires = cputime_zero;
-	sig->it_virt_incr = cputime_zero;
-	sig->it_prof_expires = cputime_zero;
-	sig->it_prof_incr = cputime_zero;
-
 	sig->leader = 0;	/* session leadership doesn't inherit */
 	sig->tty_old_pgrp = NULL;
 
-	sig->utime = sig->stime = sig->cutime = sig->cstime = cputime_zero;
+	sig->cutime = sig->cstime = cputime_zero;
 	sig->gtime = cputime_zero;
 	sig->cgtime = cputime_zero;
 	sig->nvcsw = sig->nivcsw = sig->cnvcsw = sig->cnivcsw = 0;
@@ -820,14 +844,8 @@ static int copy_signal(unsigned long clone_flags, struct task_struct *tsk)
 	memcpy(sig->rlim, current->signal->rlim, sizeof sig->rlim);
 	task_unlock(current->group_leader);
 
-	if (sig->rlim[RLIMIT_CPU].rlim_cur != RLIM_INFINITY) {
-		/*
-		 * New sole thread in the process gets an expiry time
-		 * of the whole CPU time limit.
-		 */
-		tsk->it_prof_expires =
-			secs_to_cputime(sig->rlim[RLIMIT_CPU].rlim_cur);
-	}
+	posix_cpu_timers_init_group(sig);
+
 	acct_init_pacct(&sig->pacct);
 
 	tty_audit_fork(sig);
@@ -837,6 +855,7 @@ static int copy_signal(unsigned long clone_flags, struct task_struct *tsk)
 
 void __cleanup_signal(struct signal_struct *sig)
 {
+	thread_group_cputime_free(sig);
 	exit_thread_group_keys(sig);
 	kmem_cache_free(signal_cachep, sig);
 }
@@ -885,6 +904,19 @@ void mm_init_owner(struct mm_struct *mm, struct task_struct *p)
 }
 #endif /* CONFIG_MM_OWNER */
 
+/*
+ * Initialize POSIX timer handling for a single task.
+ */
+static void posix_cpu_timers_init(struct task_struct *tsk)
+{
+	tsk->cputime_expires.prof_exp = cputime_zero;
+	tsk->cputime_expires.virt_exp = cputime_zero;
+	tsk->cputime_expires.sched_exp = 0;
+	INIT_LIST_HEAD(&tsk->cpu_timers[0]);
+	INIT_LIST_HEAD(&tsk->cpu_timers[1]);
+	INIT_LIST_HEAD(&tsk->cpu_timers[2]);
+}
+
 /*
  * This creates a new process as a copy of the old one,
  * but does not actually start it yet.
@@ -995,12 +1027,7 @@ static struct task_struct *copy_process(unsigned long clone_flags,
 	task_io_accounting_init(&p->ioac);
 	acct_clear_integrals(p);
 
-	p->it_virt_expires = cputime_zero;
-	p->it_prof_expires = cputime_zero;
-	p->it_sched_expires = 0;
-	INIT_LIST_HEAD(&p->cpu_timers[0]);
-	INIT_LIST_HEAD(&p->cpu_timers[1]);
-	INIT_LIST_HEAD(&p->cpu_timers[2]);
+	posix_cpu_timers_init(p);
 
 	p->lock_depth = -1;		/* -1 = no lock */
 	do_posix_clock_monotonic_gettime(&p->start_time);
@@ -1201,21 +1228,6 @@ static struct task_struct *copy_process(unsigned long clone_flags,
 	if (clone_flags & CLONE_THREAD) {
 		p->group_leader = current->group_leader;
 		list_add_tail_rcu(&p->thread_group, &p->group_leader->thread_group);
-
-		if (!cputime_eq(current->signal->it_virt_expires,
-				cputime_zero) ||
-		    !cputime_eq(current->signal->it_prof_expires,
-				cputime_zero) ||
-		    current->signal->rlim[RLIMIT_CPU].rlim_cur != RLIM_INFINITY ||
-		    !list_empty(&current->signal->cpu_timers[0]) ||
-		    !list_empty(&current->signal->cpu_timers[1]) ||
-		    !list_empty(&current->signal->cpu_timers[2])) {
-			/*
-			 * Have child wake up on its first tick to check
-			 * for process CPU timers.
-			 */
-			p->it_prof_expires = jiffies_to_cputime(1);
-		}
 	}
 
 	if (likely(p->pid)) {
diff --git a/kernel/itimer.c b/kernel/itimer.c
index ab982747d9bd..db7c358b9a02 100644
--- a/kernel/itimer.c
+++ b/kernel/itimer.c
@@ -55,17 +55,15 @@ int do_getitimer(int which, struct itimerval *value)
 		spin_unlock_irq(&tsk->sighand->siglock);
 		break;
 	case ITIMER_VIRTUAL:
-		read_lock(&tasklist_lock);
 		spin_lock_irq(&tsk->sighand->siglock);
 		cval = tsk->signal->it_virt_expires;
 		cinterval = tsk->signal->it_virt_incr;
 		if (!cputime_eq(cval, cputime_zero)) {
-			struct task_struct *t = tsk;
-			cputime_t utime = tsk->signal->utime;
-			do {
-				utime = cputime_add(utime, t->utime);
-				t = next_thread(t);
-			} while (t != tsk);
+			struct task_cputime cputime;
+			cputime_t utime;
+
+			thread_group_cputime(tsk, &cputime);
+			utime = cputime.utime;
 			if (cputime_le(cval, utime)) { /* about to fire */
 				cval = jiffies_to_cputime(1);
 			} else {
@@ -73,25 +71,19 @@ int do_getitimer(int which, struct itimerval *value)
 			}
 		}
 		spin_unlock_irq(&tsk->sighand->siglock);
-		read_unlock(&tasklist_lock);
 		cputime_to_timeval(cval, &value->it_value);
 		cputime_to_timeval(cinterval, &value->it_interval);
 		break;
 	case ITIMER_PROF:
-		read_lock(&tasklist_lock);
 		spin_lock_irq(&tsk->sighand->siglock);
 		cval = tsk->signal->it_prof_expires;
 		cinterval = tsk->signal->it_prof_incr;
 		if (!cputime_eq(cval, cputime_zero)) {
-			struct task_struct *t = tsk;
-			cputime_t ptime = cputime_add(tsk->signal->utime,
-						      tsk->signal->stime);
-			do {
-				ptime = cputime_add(ptime,
-						    cputime_add(t->utime,
-								t->stime));
-				t = next_thread(t);
-			} while (t != tsk);
+			struct task_cputime times;
+			cputime_t ptime;
+
+			thread_group_cputime(tsk, &times);
+			ptime = cputime_add(times.utime, times.stime);
 			if (cputime_le(cval, ptime)) { /* about to fire */
 				cval = jiffies_to_cputime(1);
 			} else {
@@ -99,7 +91,6 @@ int do_getitimer(int which, struct itimerval *value)
 			}
 		}
 		spin_unlock_irq(&tsk->sighand->siglock);
-		read_unlock(&tasklist_lock);
 		cputime_to_timeval(cval, &value->it_value);
 		cputime_to_timeval(cinterval, &value->it_interval);
 		break;
@@ -185,7 +176,6 @@ again:
 	case ITIMER_VIRTUAL:
 		nval = timeval_to_cputime(&value->it_value);
 		ninterval = timeval_to_cputime(&value->it_interval);
-		read_lock(&tasklist_lock);
 		spin_lock_irq(&tsk->sighand->siglock);
 		cval = tsk->signal->it_virt_expires;
 		cinterval = tsk->signal->it_virt_incr;
@@ -200,7 +190,6 @@ again:
 		tsk->signal->it_virt_expires = nval;
 		tsk->signal->it_virt_incr = ninterval;
 		spin_unlock_irq(&tsk->sighand->siglock);
-		read_unlock(&tasklist_lock);
 		if (ovalue) {
 			cputime_to_timeval(cval, &ovalue->it_value);
 			cputime_to_timeval(cinterval, &ovalue->it_interval);
@@ -209,7 +198,6 @@ again:
 	case ITIMER_PROF:
 		nval = timeval_to_cputime(&value->it_value);
 		ninterval = timeval_to_cputime(&value->it_interval);
-		read_lock(&tasklist_lock);
 		spin_lock_irq(&tsk->sighand->siglock);
 		cval = tsk->signal->it_prof_expires;
 		cinterval = tsk->signal->it_prof_incr;
@@ -224,7 +212,6 @@ again:
 		tsk->signal->it_prof_expires = nval;
 		tsk->signal->it_prof_incr = ninterval;
 		spin_unlock_irq(&tsk->sighand->siglock);
-		read_unlock(&tasklist_lock);
 		if (ovalue) {
 			cputime_to_timeval(cval, &ovalue->it_value);
 			cputime_to_timeval(cinterval, &ovalue->it_interval);
diff --git a/kernel/posix-cpu-timers.c b/kernel/posix-cpu-timers.c
index c42a03aef36f..dba1c334c3e8 100644
--- a/kernel/posix-cpu-timers.c
+++ b/kernel/posix-cpu-timers.c
@@ -8,6 +8,99 @@
 #include <linux/math64.h>
 #include <asm/uaccess.h>
 
+#ifdef CONFIG_SMP
+/*
+ * Allocate the thread_group_cputime structure appropriately for SMP kernels
+ * and fill in the current values of the fields.  Called from copy_signal()
+ * via thread_group_cputime_clone_thread() when adding a second or subsequent
+ * thread to a thread group.  Assumes interrupts are enabled when called.
+ */
+int thread_group_cputime_alloc_smp(struct task_struct *tsk)
+{
+	struct signal_struct *sig = tsk->signal;
+	struct task_cputime *cputime;
+
+	/*
+	 * If we have multiple threads and we don't already have a
+	 * per-CPU task_cputime struct, allocate one and fill it in with
+	 * the times accumulated so far.
+	 */
+	if (sig->cputime.totals)
+		return 0;
+	cputime = alloc_percpu(struct task_cputime);
+	if (cputime == NULL)
+		return -ENOMEM;
+	read_lock(&tasklist_lock);
+	spin_lock_irq(&tsk->sighand->siglock);
+	if (sig->cputime.totals) {
+		spin_unlock_irq(&tsk->sighand->siglock);
+		read_unlock(&tasklist_lock);
+		free_percpu(cputime);
+		return 0;
+	}
+	sig->cputime.totals = cputime;
+	cputime = per_cpu_ptr(sig->cputime.totals, get_cpu());
+	cputime->utime = tsk->utime;
+	cputime->stime = tsk->stime;
+	cputime->sum_exec_runtime = tsk->se.sum_exec_runtime;
+	put_cpu_no_resched();
+	spin_unlock_irq(&tsk->sighand->siglock);
+	read_unlock(&tasklist_lock);
+	return 0;
+}
+
+/**
+ * thread_group_cputime_smp - Sum the thread group time fields across all CPUs.
+ *
+ * @tsk:	The task we use to identify the thread group.
+ * @times:	task_cputime structure in which we return the summed fields.
+ *
+ * Walk the list of CPUs to sum the per-CPU time fields in the thread group
+ * time structure.
+ */
+void thread_group_cputime_smp(
+	struct task_struct *tsk,
+	struct task_cputime *times)
+{
+	struct signal_struct *sig;
+	int i;
+	struct task_cputime *tot;
+
+	sig = tsk->signal;
+	if (unlikely(!sig) || !sig->cputime.totals) {
+		times->utime = tsk->utime;
+		times->stime = tsk->stime;
+		times->sum_exec_runtime = tsk->se.sum_exec_runtime;
+		return;
+	}
+	times->stime = times->utime = cputime_zero;
+	times->sum_exec_runtime = 0;
+	for_each_possible_cpu(i) {
+		tot = per_cpu_ptr(tsk->signal->cputime.totals, i);
+		times->utime = cputime_add(times->utime, tot->utime);
+		times->stime = cputime_add(times->stime, tot->stime);
+		times->sum_exec_runtime += tot->sum_exec_runtime;
+	}
+}
+
+#endif /* CONFIG_SMP */
+
+/*
+ * Called after updating RLIMIT_CPU to set timer expiration if necessary.
+ */
+void update_rlimit_cpu(unsigned long rlim_new)
+{
+	cputime_t cputime;
+
+	cputime = secs_to_cputime(rlim_new);
+	if (cputime_eq(current->signal->it_prof_expires, cputime_zero) ||
+            cputime_lt(current->signal->it_prof_expires, cputime)) {
+		spin_lock_irq(&current->sighand->siglock);
+		set_process_cpu_timer(current, CPUCLOCK_PROF, &cputime, NULL);
+		spin_unlock_irq(&current->sighand->siglock);
+	}
+}
+
 static int check_clock(const clockid_t which_clock)
 {
 	int error = 0;
@@ -158,10 +251,6 @@ static inline cputime_t virt_ticks(struct task_struct *p)
 {
 	return p->utime;
 }
-static inline unsigned long long sched_ns(struct task_struct *p)
-{
-	return task_sched_runtime(p);
-}
 
 int posix_cpu_clock_getres(const clockid_t which_clock, struct timespec *tp)
 {
@@ -211,7 +300,7 @@ static int cpu_clock_sample(const clockid_t which_clock, struct task_struct *p,
 		cpu->cpu = virt_ticks(p);
 		break;
 	case CPUCLOCK_SCHED:
-		cpu->sched = sched_ns(p);
+		cpu->sched = task_sched_runtime(p);
 		break;
 	}
 	return 0;
@@ -226,31 +315,20 @@ static int cpu_clock_sample_group_locked(unsigned int clock_idx,
 					 struct task_struct *p,
 					 union cpu_time_count *cpu)
 {
-	struct task_struct *t = p;
- 	switch (clock_idx) {
+	struct task_cputime cputime;
+
+	thread_group_cputime(p, &cputime);
+	switch (clock_idx) {
 	default:
 		return -EINVAL;
 	case CPUCLOCK_PROF:
-		cpu->cpu = cputime_add(p->signal->utime, p->signal->stime);
-		do {
-			cpu->cpu = cputime_add(cpu->cpu, prof_ticks(t));
-			t = next_thread(t);
-		} while (t != p);
+		cpu->cpu = cputime_add(cputime.utime, cputime.stime);
 		break;
 	case CPUCLOCK_VIRT:
-		cpu->cpu = p->signal->utime;
-		do {
-			cpu->cpu = cputime_add(cpu->cpu, virt_ticks(t));
-			t = next_thread(t);
-		} while (t != p);
+		cpu->cpu = cputime.utime;
 		break;
 	case CPUCLOCK_SCHED:
-		cpu->sched = p->signal->sum_sched_runtime;
-		/* Add in each other live thread.  */
-		while ((t = next_thread(t)) != p) {
-			cpu->sched += t->se.sum_exec_runtime;
-		}
-		cpu->sched += sched_ns(p);
+		cpu->sched = thread_group_sched_runtime(p);
 		break;
 	}
 	return 0;
@@ -471,80 +549,11 @@ void posix_cpu_timers_exit(struct task_struct *tsk)
 }
 void posix_cpu_timers_exit_group(struct task_struct *tsk)
 {
-	cleanup_timers(tsk->signal->cpu_timers,
-		       cputime_add(tsk->utime, tsk->signal->utime),
-		       cputime_add(tsk->stime, tsk->signal->stime),
-		     tsk->se.sum_exec_runtime + tsk->signal->sum_sched_runtime);
-}
-
-
-/*
- * Set the expiry times of all the threads in the process so one of them
- * will go off before the process cumulative expiry total is reached.
- */
-static void process_timer_rebalance(struct task_struct *p,
-				    unsigned int clock_idx,
-				    union cpu_time_count expires,
-				    union cpu_time_count val)
-{
-	cputime_t ticks, left;
-	unsigned long long ns, nsleft;
- 	struct task_struct *t = p;
-	unsigned int nthreads = atomic_read(&p->signal->live);
-
-	if (!nthreads)
-		return;
+	struct task_cputime cputime;
 
-	switch (clock_idx) {
-	default:
-		BUG();
-		break;
-	case CPUCLOCK_PROF:
-		left = cputime_div_non_zero(cputime_sub(expires.cpu, val.cpu),
-				       nthreads);
-		do {
-			if (likely(!(t->flags & PF_EXITING))) {
-				ticks = cputime_add(prof_ticks(t), left);
-				if (cputime_eq(t->it_prof_expires,
-					       cputime_zero) ||
-				    cputime_gt(t->it_prof_expires, ticks)) {
-					t->it_prof_expires = ticks;
-				}
-			}
-			t = next_thread(t);
-		} while (t != p);
-		break;
-	case CPUCLOCK_VIRT:
-		left = cputime_div_non_zero(cputime_sub(expires.cpu, val.cpu),
-				       nthreads);
-		do {
-			if (likely(!(t->flags & PF_EXITING))) {
-				ticks = cputime_add(virt_ticks(t), left);
-				if (cputime_eq(t->it_virt_expires,
-					       cputime_zero) ||
-				    cputime_gt(t->it_virt_expires, ticks)) {
-					t->it_virt_expires = ticks;
-				}
-			}
-			t = next_thread(t);
-		} while (t != p);
-		break;
-	case CPUCLOCK_SCHED:
-		nsleft = expires.sched - val.sched;
-		do_div(nsleft, nthreads);
-		nsleft = max_t(unsigned long long, nsleft, 1);
-		do {
-			if (likely(!(t->flags & PF_EXITING))) {
-				ns = t->se.sum_exec_runtime + nsleft;
-				if (t->it_sched_expires == 0 ||
-				    t->it_sched_expires > ns) {
-					t->it_sched_expires = ns;
-				}
-			}
-			t = next_thread(t);
-		} while (t != p);
-		break;
-	}
+	thread_group_cputime(tsk, &cputime);
+	cleanup_timers(tsk->signal->cpu_timers,
+		       cputime.utime, cputime.stime, cputime.sum_exec_runtime);
 }
 
 static void clear_dead_task(struct k_itimer *timer, union cpu_time_count now)
@@ -608,29 +617,32 @@ static void arm_timer(struct k_itimer *timer, union cpu_time_count now)
 			default:
 				BUG();
 			case CPUCLOCK_PROF:
-				if (cputime_eq(p->it_prof_expires,
+				if (cputime_eq(p->cputime_expires.prof_exp,
 					       cputime_zero) ||
-				    cputime_gt(p->it_prof_expires,
+				    cputime_gt(p->cputime_expires.prof_exp,
 					       nt->expires.cpu))
-					p->it_prof_expires = nt->expires.cpu;
+					p->cputime_expires.prof_exp =
+						nt->expires.cpu;
 				break;
 			case CPUCLOCK_VIRT:
-				if (cputime_eq(p->it_virt_expires,
+				if (cputime_eq(p->cputime_expires.virt_exp,
 					       cputime_zero) ||
-				    cputime_gt(p->it_virt_expires,
+				    cputime_gt(p->cputime_expires.virt_exp,
 					       nt->expires.cpu))
-					p->it_virt_expires = nt->expires.cpu;
+					p->cputime_expires.virt_exp =
+						nt->expires.cpu;
 				break;
 			case CPUCLOCK_SCHED:
-				if (p->it_sched_expires == 0 ||
-				    p->it_sched_expires > nt->expires.sched)
-					p->it_sched_expires = nt->expires.sched;
+				if (p->cputime_expires.sched_exp == 0 ||
+				    p->cputime_expires.sched_exp >
+							nt->expires.sched)
+					p->cputime_expires.sched_exp =
+						nt->expires.sched;
 				break;
 			}
 		} else {
 			/*
-			 * For a process timer, we must balance
-			 * all the live threads' expirations.
+			 * For a process timer, set the cached expiration time.
 			 */
 			switch (CPUCLOCK_WHICH(timer->it_clock)) {
 			default:
@@ -641,7 +653,9 @@ static void arm_timer(struct k_itimer *timer, union cpu_time_count now)
 				    cputime_lt(p->signal->it_virt_expires,
 					       timer->it.cpu.expires.cpu))
 					break;
-				goto rebalance;
+				p->signal->cputime_expires.virt_exp =
+					timer->it.cpu.expires.cpu;
+				break;
 			case CPUCLOCK_PROF:
 				if (!cputime_eq(p->signal->it_prof_expires,
 						cputime_zero) &&
@@ -652,13 +666,12 @@ static void arm_timer(struct k_itimer *timer, union cpu_time_count now)
 				if (i != RLIM_INFINITY &&
 				    i <= cputime_to_secs(timer->it.cpu.expires.cpu))
 					break;
-				goto rebalance;
+				p->signal->cputime_expires.prof_exp =
+					timer->it.cpu.expires.cpu;
+				break;
 			case CPUCLOCK_SCHED:
-			rebalance:
-				process_timer_rebalance(
-					timer->it.cpu.task,
-					CPUCLOCK_WHICH(timer->it_clock),
-					timer->it.cpu.expires, now);
+				p->signal->cputime_expires.sched_exp =
+					timer->it.cpu.expires.sched;
 				break;
 			}
 		}
@@ -969,13 +982,13 @@ static void check_thread_timers(struct task_struct *tsk,
 	struct signal_struct *const sig = tsk->signal;
 
 	maxfire = 20;
-	tsk->it_prof_expires = cputime_zero;
+	tsk->cputime_expires.prof_exp = cputime_zero;
 	while (!list_empty(timers)) {
 		struct cpu_timer_list *t = list_first_entry(timers,
 						      struct cpu_timer_list,
 						      entry);
 		if (!--maxfire || cputime_lt(prof_ticks(tsk), t->expires.cpu)) {
-			tsk->it_prof_expires = t->expires.cpu;
+			tsk->cputime_expires.prof_exp = t->expires.cpu;
 			break;
 		}
 		t->firing = 1;
@@ -984,13 +997,13 @@ static void check_thread_timers(struct task_struct *tsk,
 
 	++timers;
 	maxfire = 20;
-	tsk->it_virt_expires = cputime_zero;
+	tsk->cputime_expires.virt_exp = cputime_zero;
 	while (!list_empty(timers)) {
 		struct cpu_timer_list *t = list_first_entry(timers,
 						      struct cpu_timer_list,
 						      entry);
 		if (!--maxfire || cputime_lt(virt_ticks(tsk), t->expires.cpu)) {
-			tsk->it_virt_expires = t->expires.cpu;
+			tsk->cputime_expires.virt_exp = t->expires.cpu;
 			break;
 		}
 		t->firing = 1;
@@ -999,13 +1012,13 @@ static void check_thread_timers(struct task_struct *tsk,
 
 	++timers;
 	maxfire = 20;
-	tsk->it_sched_expires = 0;
+	tsk->cputime_expires.sched_exp = 0;
 	while (!list_empty(timers)) {
 		struct cpu_timer_list *t = list_first_entry(timers,
 						      struct cpu_timer_list,
 						      entry);
 		if (!--maxfire || tsk->se.sum_exec_runtime < t->expires.sched) {
-			tsk->it_sched_expires = t->expires.sched;
+			tsk->cputime_expires.sched_exp = t->expires.sched;
 			break;
 		}
 		t->firing = 1;
@@ -1055,10 +1068,10 @@ static void check_process_timers(struct task_struct *tsk,
 {
 	int maxfire;
 	struct signal_struct *const sig = tsk->signal;
-	cputime_t utime, stime, ptime, virt_expires, prof_expires;
+	cputime_t utime, ptime, virt_expires, prof_expires;
 	unsigned long long sum_sched_runtime, sched_expires;
-	struct task_struct *t;
 	struct list_head *timers = sig->cpu_timers;
+	struct task_cputime cputime;
 
 	/*
 	 * Don't sample the current process CPU clocks if there are no timers.
@@ -1074,18 +1087,10 @@ static void check_process_timers(struct task_struct *tsk,
 	/*
 	 * Collect the current process totals.
 	 */
-	utime = sig->utime;
-	stime = sig->stime;
-	sum_sched_runtime = sig->sum_sched_runtime;
-	t = tsk;
-	do {
-		utime = cputime_add(utime, t->utime);
-		stime = cputime_add(stime, t->stime);
-		sum_sched_runtime += t->se.sum_exec_runtime;
-		t = next_thread(t);
-	} while (t != tsk);
-	ptime = cputime_add(utime, stime);
-
+	thread_group_cputime(tsk, &cputime);
+	utime = cputime.utime;
+	ptime = cputime_add(utime, cputime.stime);
+	sum_sched_runtime = cputime.sum_exec_runtime;
 	maxfire = 20;
 	prof_expires = cputime_zero;
 	while (!list_empty(timers)) {
@@ -1193,60 +1198,18 @@ static void check_process_timers(struct task_struct *tsk,
 		}
 	}
 
-	if (!cputime_eq(prof_expires, cputime_zero) ||
-	    !cputime_eq(virt_expires, cputime_zero) ||
-	    sched_expires != 0) {
-		/*
-		 * Rebalance the threads' expiry times for the remaining
-		 * process CPU timers.
-		 */
-
-		cputime_t prof_left, virt_left, ticks;
-		unsigned long long sched_left, sched;
-		const unsigned int nthreads = atomic_read(&sig->live);
-
-		if (!nthreads)
-			return;
-
-		prof_left = cputime_sub(prof_expires, utime);
-		prof_left = cputime_sub(prof_left, stime);
-		prof_left = cputime_div_non_zero(prof_left, nthreads);
-		virt_left = cputime_sub(virt_expires, utime);
-		virt_left = cputime_div_non_zero(virt_left, nthreads);
-		if (sched_expires) {
-			sched_left = sched_expires - sum_sched_runtime;
-			do_div(sched_left, nthreads);
-			sched_left = max_t(unsigned long long, sched_left, 1);
-		} else {
-			sched_left = 0;
-		}
-		t = tsk;
-		do {
-			if (unlikely(t->flags & PF_EXITING))
-				continue;
-
-			ticks = cputime_add(cputime_add(t->utime, t->stime),
-					    prof_left);
-			if (!cputime_eq(prof_expires, cputime_zero) &&
-			    (cputime_eq(t->it_prof_expires, cputime_zero) ||
-			     cputime_gt(t->it_prof_expires, ticks))) {
-				t->it_prof_expires = ticks;
-			}
-
-			ticks = cputime_add(t->utime, virt_left);
-			if (!cputime_eq(virt_expires, cputime_zero) &&
-			    (cputime_eq(t->it_virt_expires, cputime_zero) ||
-			     cputime_gt(t->it_virt_expires, ticks))) {
-				t->it_virt_expires = ticks;
-			}
-
-			sched = t->se.sum_exec_runtime + sched_left;
-			if (sched_expires && (t->it_sched_expires == 0 ||
-					      t->it_sched_expires > sched)) {
-				t->it_sched_expires = sched;
-			}
-		} while ((t = next_thread(t)) != tsk);
-	}
+	if (!cputime_eq(prof_expires, cputime_zero) &&
+	    (cputime_eq(sig->cputime_expires.prof_exp, cputime_zero) ||
+	     cputime_gt(sig->cputime_expires.prof_exp, prof_expires)))
+		sig->cputime_expires.prof_exp = prof_expires;
+	if (!cputime_eq(virt_expires, cputime_zero) &&
+	    (cputime_eq(sig->cputime_expires.virt_exp, cputime_zero) ||
+	     cputime_gt(sig->cputime_expires.virt_exp, virt_expires)))
+		sig->cputime_expires.virt_exp = virt_expires;
+	if (sched_expires != 0 &&
+	    (sig->cputime_expires.sched_exp == 0 ||
+	     sig->cputime_expires.sched_exp > sched_expires))
+		sig->cputime_expires.sched_exp = sched_expires;
 }
 
 /*
@@ -1314,6 +1277,78 @@ out:
 	++timer->it_requeue_pending;
 }
 
+/**
+ * task_cputime_zero - Check a task_cputime struct for all zero fields.
+ *
+ * @cputime:	The struct to compare.
+ *
+ * Checks @cputime to see if all fields are zero.  Returns true if all fields
+ * are zero, false if any field is nonzero.
+ */
+static inline int task_cputime_zero(const struct task_cputime *cputime)
+{
+	if (cputime_eq(cputime->utime, cputime_zero) &&
+	    cputime_eq(cputime->stime, cputime_zero) &&
+	    cputime->sum_exec_runtime == 0)
+		return 1;
+	return 0;
+}
+
+/**
+ * task_cputime_expired - Compare two task_cputime entities.
+ *
+ * @sample:	The task_cputime structure to be checked for expiration.
+ * @expires:	Expiration times, against which @sample will be checked.
+ *
+ * Checks @sample against @expires to see if any field of @sample has expired.
+ * Returns true if any field of the former is greater than the corresponding
+ * field of the latter if the latter field is set.  Otherwise returns false.
+ */
+static inline int task_cputime_expired(const struct task_cputime *sample,
+					const struct task_cputime *expires)
+{
+	if (!cputime_eq(expires->utime, cputime_zero) &&
+	    cputime_ge(sample->utime, expires->utime))
+		return 1;
+	if (!cputime_eq(expires->stime, cputime_zero) &&
+	    cputime_ge(cputime_add(sample->utime, sample->stime),
+		       expires->stime))
+		return 1;
+	if (expires->sum_exec_runtime != 0 &&
+	    sample->sum_exec_runtime >= expires->sum_exec_runtime)
+		return 1;
+	return 0;
+}
+
+/**
+ * fastpath_timer_check - POSIX CPU timers fast path.
+ *
+ * @tsk:	The task (thread) being checked.
+ * @sig:	The signal pointer for that task.
+ *
+ * If there are no timers set return false.  Otherwise snapshot the task and
+ * thread group timers, then compare them with the corresponding expiration
+ # times.  Returns true if a timer has expired, else returns false.
+ */
+static inline int fastpath_timer_check(struct task_struct *tsk,
+					struct signal_struct *sig)
+{
+	struct task_cputime task_sample = {
+		.utime = tsk->utime,
+		.stime = tsk->stime,
+		.sum_exec_runtime = tsk->se.sum_exec_runtime
+	};
+	struct task_cputime group_sample;
+
+	if (task_cputime_zero(&tsk->cputime_expires) &&
+	    task_cputime_zero(&sig->cputime_expires))
+		return 0;
+	if (task_cputime_expired(&task_sample, &tsk->cputime_expires))
+		return 1;
+	thread_group_cputime(tsk, &group_sample);
+	return task_cputime_expired(&group_sample, &sig->cputime_expires);
+}
+
 /*
  * This is called from the timer interrupt handler.  The irq handler has
  * already updated our counts.  We need to check if any timers fire now.
@@ -1323,30 +1358,29 @@ void run_posix_cpu_timers(struct task_struct *tsk)
 {
 	LIST_HEAD(firing);
 	struct k_itimer *timer, *next;
+	struct signal_struct *sig;
+	struct sighand_struct *sighand;
+	unsigned long flags;
 
 	BUG_ON(!irqs_disabled());
 
-#define UNEXPIRED(clock) \
-		(cputime_eq(tsk->it_##clock##_expires, cputime_zero) || \
-		 cputime_lt(clock##_ticks(tsk), tsk->it_##clock##_expires))
-
-	if (UNEXPIRED(prof) && UNEXPIRED(virt) &&
-	    (tsk->it_sched_expires == 0 ||
-	     tsk->se.sum_exec_runtime < tsk->it_sched_expires))
-		return;
-
-#undef	UNEXPIRED
-
+	/* Pick up tsk->signal and make sure it's valid. */
+	sig = tsk->signal;
 	/*
-	 * Double-check with locks held.
+	 * The fast path checks that there are no expired thread or thread
+	 * group timers.  If that's so, just return.  Also check that
+	 * tsk->signal is non-NULL; this probably can't happen but cover the
+	 * possibility anyway.
 	 */
-	read_lock(&tasklist_lock);
-	if (likely(tsk->signal != NULL)) {
-		spin_lock(&tsk->sighand->siglock);
-
+	if (unlikely(!sig) || !fastpath_timer_check(tsk, sig)) {
+		return;
+	}
+	sighand = lock_task_sighand(tsk, &flags);
+	if (likely(sighand)) {
 		/*
-		 * Here we take off tsk->cpu_timers[N] and tsk->signal->cpu_timers[N]
-		 * all the timers that are firing, and put them on the firing list.
+		 * Here we take off tsk->signal->cpu_timers[N] and
+		 * tsk->cpu_timers[N] all the timers that are firing, and
+		 * put them on the firing list.
 		 */
 		check_thread_timers(tsk, &firing);
 		check_process_timers(tsk, &firing);
@@ -1359,9 +1393,8 @@ void run_posix_cpu_timers(struct task_struct *tsk)
 		 * that gets the timer lock before we do will give it up and
 		 * spin until we've taken care of that timer below.
 		 */
-		spin_unlock(&tsk->sighand->siglock);
 	}
-	read_unlock(&tasklist_lock);
+	unlock_task_sighand(tsk, &flags);
 
 	/*
 	 * Now that all the timers on our list have the firing flag,
@@ -1389,10 +1422,9 @@ void run_posix_cpu_timers(struct task_struct *tsk)
 
 /*
  * Set one of the process-wide special case CPU timers.
- * The tasklist_lock and tsk->sighand->siglock must be held by the caller.
- * The oldval argument is null for the RLIMIT_CPU timer, where *newval is
- * absolute; non-null for ITIMER_*, where *newval is relative and we update
- * it to be absolute, *oldval is absolute and we update it to be relative.
+ * The tsk->sighand->siglock must be held by the caller.
+ * The *newval argument is relative and we update it to be absolute, *oldval
+ * is absolute and we update it to be relative.
  */
 void set_process_cpu_timer(struct task_struct *tsk, unsigned int clock_idx,
 			   cputime_t *newval, cputime_t *oldval)
@@ -1435,13 +1467,14 @@ void set_process_cpu_timer(struct task_struct *tsk, unsigned int clock_idx,
 	    cputime_ge(list_first_entry(head,
 				  struct cpu_timer_list, entry)->expires.cpu,
 		       *newval)) {
-		/*
-		 * Rejigger each thread's expiry time so that one will
-		 * notice before we hit the process-cumulative expiry time.
-		 */
-		union cpu_time_count expires = { .sched = 0 };
-		expires.cpu = *newval;
-		process_timer_rebalance(tsk, clock_idx, expires, now);
+		switch (clock_idx) {
+		case CPUCLOCK_PROF:
+			tsk->signal->cputime_expires.prof_exp = *newval;
+			break;
+		case CPUCLOCK_VIRT:
+			tsk->signal->cputime_expires.virt_exp = *newval;
+			break;
+		}
 	}
 }
 
diff --git a/kernel/sched.c b/kernel/sched.c
index cc1f81b50b82..c51b5d276665 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -4036,6 +4036,25 @@ DEFINE_PER_CPU(struct kernel_stat, kstat);
 
 EXPORT_PER_CPU_SYMBOL(kstat);
 
+/*
+ * Return any ns on the sched_clock that have not yet been banked in
+ * @p in case that task is currently running.
+ *
+ * Called with task_rq_lock() held on @rq.
+ */
+static unsigned long long task_delta_exec(struct task_struct *p, struct rq *rq)
+{
+	if (task_current(rq, p)) {
+		u64 delta_exec;
+
+		update_rq_clock(rq);
+		delta_exec = rq->clock - p->se.exec_start;
+		if ((s64)delta_exec > 0)
+			return delta_exec;
+	}
+	return 0;
+}
+
 /*
  * Return p->sum_exec_runtime plus any more ns on the sched_clock
  * that have not yet been banked in case the task is currently running.
@@ -4043,17 +4062,31 @@ EXPORT_PER_CPU_SYMBOL(kstat);
 unsigned long long task_sched_runtime(struct task_struct *p)
 {
 	unsigned long flags;
-	u64 ns, delta_exec;
+	u64 ns;
 	struct rq *rq;
 
 	rq = task_rq_lock(p, &flags);
-	ns = p->se.sum_exec_runtime;
-	if (task_current(rq, p)) {
-		update_rq_clock(rq);
-		delta_exec = rq->clock - p->se.exec_start;
-		if ((s64)delta_exec > 0)
-			ns += delta_exec;
-	}
+	ns = p->se.sum_exec_runtime + task_delta_exec(p, rq);
+	task_rq_unlock(rq, &flags);
+
+	return ns;
+}
+
+/*
+ * Return sum_exec_runtime for the thread group plus any more ns on the
+ * sched_clock that have not yet been banked in case the task is currently
+ * running.
+ */
+unsigned long long thread_group_sched_runtime(struct task_struct *p)
+{
+	unsigned long flags;
+	u64 ns;
+	struct rq *rq;
+	struct task_cputime totals;
+
+	rq = task_rq_lock(p, &flags);
+	thread_group_cputime(p, &totals);
+	ns = totals.sum_exec_runtime + task_delta_exec(p, rq);
 	task_rq_unlock(rq, &flags);
 
 	return ns;
@@ -4070,6 +4103,7 @@ void account_user_time(struct task_struct *p, cputime_t cputime)
 	cputime64_t tmp;
 
 	p->utime = cputime_add(p->utime, cputime);
+	account_group_user_time(p, cputime);
 
 	/* Add user time to cpustat. */
 	tmp = cputime_to_cputime64(cputime);
@@ -4094,6 +4128,7 @@ static void account_guest_time(struct task_struct *p, cputime_t cputime)
 	tmp = cputime_to_cputime64(cputime);
 
 	p->utime = cputime_add(p->utime, cputime);
+	account_group_user_time(p, cputime);
 	p->gtime = cputime_add(p->gtime, cputime);
 
 	cpustat->user = cputime64_add(cpustat->user, tmp);
@@ -4129,6 +4164,7 @@ void account_system_time(struct task_struct *p, int hardirq_offset,
 	}
 
 	p->stime = cputime_add(p->stime, cputime);
+	account_group_system_time(p, cputime);
 
 	/* Add system time to cpustat. */
 	tmp = cputime_to_cputime64(cputime);
@@ -4170,6 +4206,7 @@ void account_steal_time(struct task_struct *p, cputime_t steal)
 
 	if (p == rq->idle) {
 		p->stime = cputime_add(p->stime, steal);
+		account_group_system_time(p, steal);
 		if (atomic_read(&rq->nr_iowait) > 0)
 			cpustat->iowait = cputime64_add(cpustat->iowait, tmp);
 		else
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
index fb8994c6d4bb..99aa31acc544 100644
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -507,6 +507,7 @@ static void update_curr(struct cfs_rq *cfs_rq)
 		struct task_struct *curtask = task_of(curr);
 
 		cpuacct_charge(curtask, delta_exec);
+		account_group_exec_runtime(curtask, delta_exec);
 	}
 }
 
diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c
index 552310798dad..8375e69af36a 100644
--- a/kernel/sched_rt.c
+++ b/kernel/sched_rt.c
@@ -483,6 +483,8 @@ static void update_curr_rt(struct rq *rq)
 	schedstat_set(curr->se.exec_max, max(curr->se.exec_max, delta_exec));
 
 	curr->se.sum_exec_runtime += delta_exec;
+	account_group_exec_runtime(curr, delta_exec);
+
 	curr->se.exec_start = rq->clock;
 	cpuacct_charge(curr, delta_exec);
 
@@ -1412,7 +1414,7 @@ static void watchdog(struct rq *rq, struct task_struct *p)
 		p->rt.timeout++;
 		next = DIV_ROUND_UP(min(soft, hard), USEC_PER_SEC/HZ);
 		if (p->rt.timeout > next)
-			p->it_sched_expires = p->se.sum_exec_runtime;
+			p->cputime_expires.sched_exp = p->se.sum_exec_runtime;
 	}
 }
 
diff --git a/kernel/signal.c b/kernel/signal.c
index e661b01d340f..6eea5826d618 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -1338,6 +1338,7 @@ int do_notify_parent(struct task_struct *tsk, int sig)
 	struct siginfo info;
 	unsigned long flags;
 	struct sighand_struct *psig;
+	struct task_cputime cputime;
 	int ret = sig;
 
 	BUG_ON(sig == -1);
@@ -1368,10 +1369,9 @@ int do_notify_parent(struct task_struct *tsk, int sig)
 
 	info.si_uid = tsk->uid;
 
-	info.si_utime = cputime_to_clock_t(cputime_add(tsk->utime,
-						       tsk->signal->utime));
-	info.si_stime = cputime_to_clock_t(cputime_add(tsk->stime,
-						       tsk->signal->stime));
+	thread_group_cputime(tsk, &cputime);
+	info.si_utime = cputime_to_jiffies(cputime.utime);
+	info.si_stime = cputime_to_jiffies(cputime.stime);
 
 	info.si_status = tsk->exit_code & 0x7f;
 	if (tsk->exit_code & 0x80)
diff --git a/kernel/sys.c b/kernel/sys.c
index 038a7bc0901d..d046a7a055c2 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -853,38 +853,28 @@ asmlinkage long sys_setfsgid(gid_t gid)
 	return old_fsgid;
 }
 
+void do_sys_times(struct tms *tms)
+{
+	struct task_cputime cputime;
+	cputime_t cutime, cstime;
+
+	spin_lock_irq(&current->sighand->siglock);
+	thread_group_cputime(current, &cputime);
+	cutime = current->signal->cutime;
+	cstime = current->signal->cstime;
+	spin_unlock_irq(&current->sighand->siglock);
+	tms->tms_utime = cputime_to_clock_t(cputime.utime);
+	tms->tms_stime = cputime_to_clock_t(cputime.stime);
+	tms->tms_cutime = cputime_to_clock_t(cutime);
+	tms->tms_cstime = cputime_to_clock_t(cstime);
+}
+
 asmlinkage long sys_times(struct tms __user * tbuf)
 {
-	/*
-	 *	In the SMP world we might just be unlucky and have one of
-	 *	the times increment as we use it. Since the value is an
-	 *	atomically safe type this is just fine. Conceptually its
-	 *	as if the syscall took an instant longer to occur.
-	 */
 	if (tbuf) {
 		struct tms tmp;
-		struct task_struct *tsk = current;
-		struct task_struct *t;
-		cputime_t utime, stime, cutime, cstime;
-
-		spin_lock_irq(&tsk->sighand->siglock);
-		utime = tsk->signal->utime;
-		stime = tsk->signal->stime;
-		t = tsk;
-		do {
-			utime = cputime_add(utime, t->utime);
-			stime = cputime_add(stime, t->stime);
-			t = next_thread(t);
-		} while (t != tsk);
-
-		cutime = tsk->signal->cutime;
-		cstime = tsk->signal->cstime;
-		spin_unlock_irq(&tsk->sighand->siglock);
-
-		tmp.tms_utime = cputime_to_clock_t(utime);
-		tmp.tms_stime = cputime_to_clock_t(stime);
-		tmp.tms_cutime = cputime_to_clock_t(cutime);
-		tmp.tms_cstime = cputime_to_clock_t(cstime);
+
+		do_sys_times(&tmp);
 		if (copy_to_user(tbuf, &tmp, sizeof(struct tms)))
 			return -EFAULT;
 	}
@@ -1445,7 +1435,6 @@ asmlinkage long sys_old_getrlimit(unsigned int resource, struct rlimit __user *r
 asmlinkage long sys_setrlimit(unsigned int resource, struct rlimit __user *rlim)
 {
 	struct rlimit new_rlim, *old_rlim;
-	unsigned long it_prof_secs;
 	int retval;
 
 	if (resource >= RLIM_NLIMITS)
@@ -1491,18 +1480,7 @@ asmlinkage long sys_setrlimit(unsigned int resource, struct rlimit __user *rlim)
 	if (new_rlim.rlim_cur == RLIM_INFINITY)
 		goto out;
 
-	it_prof_secs = cputime_to_secs(current->signal->it_prof_expires);
-	if (it_prof_secs == 0 || new_rlim.rlim_cur <= it_prof_secs) {
-		unsigned long rlim_cur = new_rlim.rlim_cur;
-		cputime_t cputime;
-
-		cputime = secs_to_cputime(rlim_cur);
-		read_lock(&tasklist_lock);
-		spin_lock_irq(&current->sighand->siglock);
-		set_process_cpu_timer(current, CPUCLOCK_PROF, &cputime, NULL);
-		spin_unlock_irq(&current->sighand->siglock);
-		read_unlock(&tasklist_lock);
-	}
+	update_rlimit_cpu(new_rlim.rlim_cur);
 out:
 	return 0;
 }
@@ -1540,11 +1518,8 @@ out:
  *
  */
 
-static void accumulate_thread_rusage(struct task_struct *t, struct rusage *r,
-				     cputime_t *utimep, cputime_t *stimep)
+static void accumulate_thread_rusage(struct task_struct *t, struct rusage *r)
 {
-	*utimep = cputime_add(*utimep, t->utime);
-	*stimep = cputime_add(*stimep, t->stime);
 	r->ru_nvcsw += t->nvcsw;
 	r->ru_nivcsw += t->nivcsw;
 	r->ru_minflt += t->min_flt;
@@ -1558,12 +1533,13 @@ static void k_getrusage(struct task_struct *p, int who, struct rusage *r)
 	struct task_struct *t;
 	unsigned long flags;
 	cputime_t utime, stime;
+	struct task_cputime cputime;
 
 	memset((char *) r, 0, sizeof *r);
 	utime = stime = cputime_zero;
 
 	if (who == RUSAGE_THREAD) {
-		accumulate_thread_rusage(p, r, &utime, &stime);
+		accumulate_thread_rusage(p, r);
 		goto out;
 	}
 
@@ -1586,8 +1562,9 @@ static void k_getrusage(struct task_struct *p, int who, struct rusage *r)
 				break;
 
 		case RUSAGE_SELF:
-			utime = cputime_add(utime, p->signal->utime);
-			stime = cputime_add(stime, p->signal->stime);
+			thread_group_cputime(p, &cputime);
+			utime = cputime_add(utime, cputime.utime);
+			stime = cputime_add(stime, cputime.stime);
 			r->ru_nvcsw += p->signal->nvcsw;
 			r->ru_nivcsw += p->signal->nivcsw;
 			r->ru_minflt += p->signal->min_flt;
@@ -1596,7 +1573,7 @@ static void k_getrusage(struct task_struct *p, int who, struct rusage *r)
 			r->ru_oublock += p->signal->oublock;
 			t = p;
 			do {
-				accumulate_thread_rusage(t, r, &utime, &stime);
+				accumulate_thread_rusage(t, r);
 				t = next_thread(t);
 			} while (t != p);
 			break;
diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c
index 03fc6a81ae32..69649783c266 100644
--- a/security/selinux/hooks.c
+++ b/security/selinux/hooks.c
@@ -75,6 +75,7 @@
 #include <linux/string.h>
 #include <linux/selinux.h>
 #include <linux/mutex.h>
+#include <linux/posix-timers.h>
 
 #include "avc.h"
 #include "objsec.h"
@@ -2321,13 +2322,7 @@ static void selinux_bprm_post_apply_creds(struct linux_binprm *bprm)
 			initrlim = init_task.signal->rlim+i;
 			rlim->rlim_cur = min(rlim->rlim_max, initrlim->rlim_cur);
 		}
-		if (current->signal->rlim[RLIMIT_CPU].rlim_cur != RLIM_INFINITY) {
-			/*
-			 * This will cause RLIMIT_CPU calculations
-			 * to be refigured.
-			 */
-			current->it_prof_expires = jiffies_to_cputime(1);
-		}
+		update_rlimit_cpu(rlim->rlim_cur);
 	}
 
 	/* Wake up the parent if it is waiting so that it can
-- 
cgit v1.2.3-55-g7522


From 430b5294bd72c085c730e1e4b86580f164d976bf Mon Sep 17 00:00:00 2001
From: Ingo Molnar
Date: Sun, 14 Sep 2008 16:33:01 +0200
Subject: timers: fix itimer/many thread hang, fix

fix:

 kernel/fork.c:843: error: ‘struct signal_struct’ has no member named ‘sum_sched_runtime’
 kernel/irq/handle.c:117: warning: ‘sparse_irq_lock’ defined but not used

Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 kernel/fork.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/kernel/fork.c b/kernel/fork.c
index a8ac2efb8e30..1181b9aac48e 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -834,7 +834,6 @@ static int copy_signal(unsigned long clone_flags, struct task_struct *tsk)
 	sig->min_flt = sig->maj_flt = sig->cmin_flt = sig->cmaj_flt = 0;
 	sig->inblock = sig->oublock = sig->cinblock = sig->coublock = 0;
 	task_io_accounting_init(&sig->ioac);
-	sig->sum_sched_runtime = 0;
 	INIT_LIST_HEAD(&sig->cpu_timers[0]);
 	INIT_LIST_HEAD(&sig->cpu_timers[1]);
 	INIT_LIST_HEAD(&sig->cpu_timers[2]);
-- 
cgit v1.2.3-55-g7522


From 0a8eaa4f9b58759595a1bfe13a1295fdc25ba026 Mon Sep 17 00:00:00 2001
From: Ingo Molnar
Date: Sun, 14 Sep 2008 17:03:52 +0200
Subject: timers: fix itimer/many thread hang, fix #2

fix the UP build:

In file included from arch/x86/kernel/asm-offsets_32.c:9,
                 from arch/x86/kernel/asm-offsets.c:3:
include/linux/sched.h: In function ‘thread_group_cputime_clone_thread’:
include/linux/sched.h:2272: warning: no return statement in function returning non-void
include/linux/sched.h: In function ‘thread_group_cputime_account_user’:
include/linux/sched.h:2284: error: invalid type argument of ‘->’ (have ‘struct task_cputime’)
include/linux/sched.h:2284: error: invalid type argument of ‘->’ (have ‘struct task_cputime’)
include/linux/sched.h: In function ‘thread_group_cputime_account_system’:
include/linux/sched.h:2291: error: invalid type argument of ‘->’ (have ‘struct task_cputime’)
include/linux/sched.h:2291: error: invalid type argument of ‘->’ (have ‘struct task_cputime’)
include/linux/sched.h: In function ‘thread_group_cputime_account_exec_runtime’:
include/linux/sched.h:2298: error: invalid type argument of ‘->’ (have ‘struct task_cputime’)
distcc[14501] ERROR: compile arch/x86/kernel/asm-offsets.c on a/30 failed
make[1]: *** [arch/x86/kernel/asm-offsets.s] Error 1

Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/sched.h | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 26d7a5f2d0ba..ed355f02d329 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -2251,6 +2251,7 @@ static inline void thread_group_cputime_free(struct signal_struct *sig)
 static inline int thread_group_cputime_clone_thread(struct task_struct *curr,
 						     struct task_struct *tsk)
 {
+	return 0;
 }
 
 static inline void thread_group_cputime(struct task_struct *tsk,
@@ -2263,21 +2264,21 @@ static inline void thread_group_cputime_account_user(
 	struct thread_group_cputime *tgtimes,
 	cputime_t cputime)
 {
-	tgtimes->totals->utime = cputime_add(tgtimes->totals->utime, cputime);
+	tgtimes->totals.utime = cputime_add(tgtimes->totals.utime, cputime);
 }
 
 static inline void thread_group_cputime_account_system(
 	struct thread_group_cputime *tgtimes,
 	cputime_t cputime)
 {
-	tgtimes->totals->stime = cputime_add(tgtimes->totals->stime, cputime);
+	tgtimes->totals.stime = cputime_add(tgtimes->totals.stime, cputime);
 }
 
 static inline void thread_group_cputime_account_exec_runtime(
 	struct thread_group_cputime *tgtimes,
 	unsigned long long ns)
 {
-	tgtimes->totals->sum_exec_runtime += ns;
+	tgtimes->totals.sum_exec_runtime += ns;
 }
 
 #endif /* CONFIG_SMP */
-- 
cgit v1.2.3-55-g7522


From 5ce73a4a5a4893a1aa4cdeed1b1a5a6de42c43b6 Mon Sep 17 00:00:00 2001
From: Ingo Molnar
Date: Sun, 14 Sep 2008 17:11:46 +0200
Subject: timers: fix itimer/many thread hang, cleanups

Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/sched.h     | 2 +-
 kernel/posix-cpu-timers.c | 6 +++---
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/include/linux/sched.h b/include/linux/sched.h
index ed355f02d329..7ce8d4e53565 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -430,7 +430,7 @@ struct pacct_struct {
  * @utime:		time spent in user mode, in &cputime_t units
  * @stime:		time spent in kernel mode, in &cputime_t units
  * @sum_exec_runtime:	total time spent on the CPU, in nanoseconds
- * 
+ *
  * This structure groups together three kinds of CPU time that are
  * tracked for threads and thread groups.  Most things considering
  * CPU time want to group these counts together and treat all three
diff --git a/kernel/posix-cpu-timers.c b/kernel/posix-cpu-timers.c
index dba1c334c3e8..9a7ea049fcdc 100644
--- a/kernel/posix-cpu-timers.c
+++ b/kernel/posix-cpu-timers.c
@@ -94,7 +94,7 @@ void update_rlimit_cpu(unsigned long rlim_new)
 
 	cputime = secs_to_cputime(rlim_new);
 	if (cputime_eq(current->signal->it_prof_expires, cputime_zero) ||
-            cputime_lt(current->signal->it_prof_expires, cputime)) {
+	    cputime_lt(current->signal->it_prof_expires, cputime)) {
 		spin_lock_irq(&current->sighand->siglock);
 		set_process_cpu_timer(current, CPUCLOCK_PROF, &cputime, NULL);
 		spin_unlock_irq(&current->sighand->siglock);
@@ -1372,9 +1372,9 @@ void run_posix_cpu_timers(struct task_struct *tsk)
 	 * tsk->signal is non-NULL; this probably can't happen but cover the
 	 * possibility anyway.
 	 */
-	if (unlikely(!sig) || !fastpath_timer_check(tsk, sig)) {
+	if (unlikely(!sig) || !fastpath_timer_check(tsk, sig))
 		return;
-	}
+
 	sighand = lock_task_sighand(tsk, &flags);
 	if (likely(sighand)) {
 		/*
-- 
cgit v1.2.3-55-g7522


From 6b3141962dc82cfe1c30afdf91d564b309859cbe Mon Sep 17 00:00:00 2001
From: Timur Tabi
Date: Fri, 19 Sep 2008 04:16:19 -0700
Subject: dmatest: properly handle duplicate DMA channels

Update the the dmatest driver so that it handles duplicate DMA channels
properly.

When a DMA client is notified of an available DMA channel, it must check if it
has already allocated resources for that channel.  If so, it should return
DMA_DUP.  This can happen, for example, if a DMA driver calls
dma_async_device_register() more than once.

Acked-by: Haavard Skinnemoen <haavard.skinnemoen@atmel.com>
Signed-off-by: Timur Tabi <timur@freescale.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 drivers/dma/dmatest.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/drivers/dma/dmatest.c b/drivers/dma/dmatest.c
index a08d19704743..422500c6c163 100644
--- a/drivers/dma/dmatest.c
+++ b/drivers/dma/dmatest.c
@@ -325,6 +325,11 @@ static enum dma_state_client dmatest_add_channel(struct dma_chan *chan)
 	struct dmatest_thread	*thread;
 	unsigned int		i;
 
+	/* Have we already been told about this channel? */
+	list_for_each_entry(dtc, &dmatest_channels, node)
+		if (dtc->chan == chan)
+			return DMA_DUP;
+
 	dtc = kmalloc(sizeof(struct dmatest_chan), GFP_ATOMIC);
 	if (!dtc) {
 		pr_warning("dmatest: No memory for %s\n", chan->dev.bus_id);
-- 
cgit v1.2.3-55-g7522


From 6fdb8bd47111d3f94be221082b725ec2dec1d5c7 Mon Sep 17 00:00:00 2001
From: Andrew Morton
Date: Fri, 19 Sep 2008 04:16:23 -0700
Subject: drivers/dma/dmatest.c: switch a GFP_ATOMIC to GFP_KERNEL

It was needlessly using the unreliable GFP_ATOMIC.

Cc: Timur Tabi <timur@freescale.com>
Acked-by: Haavard Skinnemoen <haavard.skinnemoen@atmel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 drivers/dma/dmatest.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/dma/dmatest.c b/drivers/dma/dmatest.c
index 422500c6c163..d1e381e35a9e 100644
--- a/drivers/dma/dmatest.c
+++ b/drivers/dma/dmatest.c
@@ -330,7 +330,7 @@ static enum dma_state_client dmatest_add_channel(struct dma_chan *chan)
 		if (dtc->chan == chan)
 			return DMA_DUP;
 
-	dtc = kmalloc(sizeof(struct dmatest_chan), GFP_ATOMIC);
+	dtc = kmalloc(sizeof(struct dmatest_chan), GFP_KERNEL);
 	if (!dtc) {
 		pr_warning("dmatest: No memory for %s\n", chan->dev.bus_id);
 		return DMA_NAK;
-- 
cgit v1.2.3-55-g7522


From d7cfb60c5cf904ecf1e0ae23ec178175b86f0d4a Mon Sep 17 00:00:00 2001
From: Mark McLoughlin
Date: Fri, 19 Sep 2008 13:13:44 +0100
Subject: hrtimer: remove hrtimer_clock_base::get_softirq_time()

Peter Zijlstra noticed this 8 months ago and I just noticed
it again.

hrtimer_clock_base::get_softirq_time() is currently unused
in the entire tree. In fact, looking at the logs, it appears
as if it was never used. Remove it.

Signed-off-by: Mark McLoughlin <markmc@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/hrtimer.h | 2 --
 kernel/hrtimer.c        | 4 +---
 2 files changed, 1 insertion(+), 5 deletions(-)

diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h
index 6d93dce61cbb..1b079bd29c35 100644
--- a/include/linux/hrtimer.h
+++ b/include/linux/hrtimer.h
@@ -145,7 +145,6 @@ struct hrtimer_sleeper {
  * @first:		pointer to the timer node which expires first
  * @resolution:		the resolution of the clock, in nanoseconds
  * @get_time:		function to retrieve the current time of the clock
- * @get_softirq_time:	function to retrieve the current time from the softirq
  * @softirq_time:	the time when running the hrtimer queue in the softirq
  * @offset:		offset of this clock to the monotonic base
  * @reprogram:		function to reprogram the timer event
@@ -157,7 +156,6 @@ struct hrtimer_clock_base {
 	struct rb_node		*first;
 	ktime_t			resolution;
 	ktime_t			(*get_time)(void);
-	ktime_t			(*get_softirq_time)(void);
 	ktime_t			softirq_time;
 #ifdef CONFIG_HIGH_RES_TIMERS
 	ktime_t			offset;
diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c
index 03ea1378c43b..4d761d50c529 100644
--- a/kernel/hrtimer.c
+++ b/kernel/hrtimer.c
@@ -1401,9 +1401,7 @@ void hrtimer_run_queues(void)
 		if (!base->first)
 			continue;
 
-		if (base->get_softirq_time)
-			base->softirq_time = base->get_softirq_time();
-		else if (gettime) {
+		if (gettime) {
 			hrtimer_get_softirq_time(cpu_base);
 			gettime = 0;
 		}
-- 
cgit v1.2.3-55-g7522


From b91c4996df56fcd201f85c392a1de7bc3f6641f5 Mon Sep 17 00:00:00 2001
From: Mark McLoughlin
Date: Fri, 19 Sep 2008 13:13:48 +0100
Subject: hrtimer: remove hrtimer_clock_base::reprogram()

hrtimer_clock_base::reprogram() also appears to never
have been used, so remove it.

Signed-off-by: Mark McLoughlin <markmc@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/hrtimer.h | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h
index 1b079bd29c35..68b0196d8696 100644
--- a/include/linux/hrtimer.h
+++ b/include/linux/hrtimer.h
@@ -147,7 +147,6 @@ struct hrtimer_sleeper {
  * @get_time:		function to retrieve the current time of the clock
  * @softirq_time:	the time when running the hrtimer queue in the softirq
  * @offset:		offset of this clock to the monotonic base
- * @reprogram:		function to reprogram the timer event
  */
 struct hrtimer_clock_base {
 	struct hrtimer_cpu_base	*cpu_base;
@@ -159,9 +158,6 @@ struct hrtimer_clock_base {
 	ktime_t			softirq_time;
 #ifdef CONFIG_HIGH_RES_TIMERS
 	ktime_t			offset;
-	int			(*reprogram)(struct hrtimer *t,
-					     struct hrtimer_clock_base *b,
-					     ktime_t n);
 #endif
 };
 
-- 
cgit v1.2.3-55-g7522


From bb34d92f643086d546b49cef680f6f305ed84414 Mon Sep 17 00:00:00 2001
From: Frank Mayhar
Date: Fri, 12 Sep 2008 09:54:39 -0700
Subject: timers: fix itimer/many thread hang, v2

This is the second resubmission of the posix timer rework patch, posted
a few days ago.

This includes the changes from the previous resubmittion, which addressed
Oleg Nesterov's comments, removing the RCU stuff from the patch and
un-inlining the thread_group_cputime() function for SMP.

In addition, per Ingo Molnar it simplifies the UP code, consolidating much
of it with the SMP version and depending on lower-level SMP/UP handling to
take care of the differences.

It also cleans up some UP compile errors, moves the scheduler stats-related
macros into kernel/sched_stats.h, cleans up a merge error in
kernel/fork.c and has a few other minor fixes and cleanups as suggested
by Oleg and Ingo. Thanks for the review, guys.

Signed-off-by: Frank Mayhar <fmayhar@google.com>
Cc: Roland McGrath <roland@redhat.com>
Cc: Alexey Dobriyan <adobriyan@gmail.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/kernel_stat.h |   1 +
 include/linux/sched.h       | 183 ++------------------------------------------
 kernel/fork.c               |   5 +-
 kernel/posix-cpu-timers.c   | 153 ++++++++++++++++--------------------
 kernel/sched.c              |  47 ++----------
 kernel/sched_stats.h        | 136 ++++++++++++++++++++++++++++++++
 6 files changed, 214 insertions(+), 311 deletions(-)

diff --git a/include/linux/kernel_stat.h b/include/linux/kernel_stat.h
index cf9f40a91c9c..cac3750cd65e 100644
--- a/include/linux/kernel_stat.h
+++ b/include/linux/kernel_stat.h
@@ -52,6 +52,7 @@ static inline int kstat_irqs(int irq)
 	return sum;
 }
 
+extern unsigned long long task_delta_exec(struct task_struct *);
 extern void account_user_time(struct task_struct *, cputime_t);
 extern void account_user_time_scaled(struct task_struct *, cputime_t);
 extern void account_system_time(struct task_struct *, int, cputime_t);
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 7ce8d4e53565..b982fb48c8f0 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -454,15 +454,9 @@ struct task_cputime {
  * This structure contains the version of task_cputime, above, that is
  * used for thread group CPU clock calculations.
  */
-#ifdef CONFIG_SMP
 struct thread_group_cputime {
 	struct task_cputime *totals;
 };
-#else
-struct thread_group_cputime {
-	struct task_cputime totals;
-};
-#endif
 
 /*
  * NOTE! "signal_struct" does not have it's own
@@ -2124,193 +2118,26 @@ static inline int spin_needbreak(spinlock_t *lock)
 /*
  * Thread group CPU time accounting.
  */
-#ifdef CONFIG_SMP
 
-extern int thread_group_cputime_alloc_smp(struct task_struct *);
-extern void thread_group_cputime_smp(struct task_struct *, struct task_cputime *);
+extern int thread_group_cputime_alloc(struct task_struct *);
+extern void thread_group_cputime(struct task_struct *, struct task_cputime *);
 
 static inline void thread_group_cputime_init(struct signal_struct *sig)
 {
 	sig->cputime.totals = NULL;
 }
 
-static inline int thread_group_cputime_clone_thread(struct task_struct *curr,
-						    struct task_struct *new)
+static inline int thread_group_cputime_clone_thread(struct task_struct *curr)
 {
 	if (curr->signal->cputime.totals)
 		return 0;
-	return thread_group_cputime_alloc_smp(curr);
+	return thread_group_cputime_alloc(curr);
 }
 
-static inline void thread_group_cputime_free(struct signal_struct *sig)
-{
-	free_percpu(sig->cputime.totals);
-}
-
-/**
- * thread_group_cputime - Sum the thread group time fields across all CPUs.
- *
- * This is a wrapper for the real routine, thread_group_cputime_smp().  See
- * that routine for details.
- */
-static inline void thread_group_cputime(
-	struct task_struct *tsk,
-	struct task_cputime *times)
-{
-	thread_group_cputime_smp(tsk, times);
-}
-
-/**
- * thread_group_cputime_account_user - Maintain utime for a thread group.
- *
- * @tgtimes:	Pointer to thread_group_cputime structure.
- * @cputime:	Time value by which to increment the utime field of that
- *		structure.
- *
- * If thread group time is being maintained, get the structure for the
- * running CPU and update the utime field there.
- */
-static inline void thread_group_cputime_account_user(
-	struct thread_group_cputime *tgtimes,
-	cputime_t cputime)
-{
-	if (tgtimes->totals) {
-		struct task_cputime *times;
-
-		times = per_cpu_ptr(tgtimes->totals, get_cpu());
-		times->utime = cputime_add(times->utime, cputime);
-		put_cpu_no_resched();
-	}
-}
-
-/**
- * thread_group_cputime_account_system - Maintain stime for a thread group.
- *
- * @tgtimes:	Pointer to thread_group_cputime structure.
- * @cputime:	Time value by which to increment the stime field of that
- *		structure.
- *
- * If thread group time is being maintained, get the structure for the
- * running CPU and update the stime field there.
- */
-static inline void thread_group_cputime_account_system(
-	struct thread_group_cputime *tgtimes,
-	cputime_t cputime)
-{
-	if (tgtimes->totals) {
-		struct task_cputime *times;
-
-		times = per_cpu_ptr(tgtimes->totals, get_cpu());
-		times->stime = cputime_add(times->stime, cputime);
-		put_cpu_no_resched();
-	}
-}
-
-/**
- * thread_group_cputime_account_exec_runtime - Maintain exec runtime for a
- *						thread group.
- *
- * @tgtimes:	Pointer to thread_group_cputime structure.
- * @ns:		Time value by which to increment the sum_exec_runtime field
- *		of that structure.
- *
- * If thread group time is being maintained, get the structure for the
- * running CPU and update the sum_exec_runtime field there.
- */
-static inline void thread_group_cputime_account_exec_runtime(
-	struct thread_group_cputime *tgtimes,
-	unsigned long long ns)
-{
-	if (tgtimes->totals) {
-		struct task_cputime *times;
-
-		times = per_cpu_ptr(tgtimes->totals, get_cpu());
-		times->sum_exec_runtime += ns;
-		put_cpu_no_resched();
-	}
-}
-
-#else /* CONFIG_SMP */
-
-static inline void thread_group_cputime_init(struct signal_struct *sig)
-{
-	sig->cputime.totals.utime = cputime_zero;
-	sig->cputime.totals.stime = cputime_zero;
-	sig->cputime.totals.sum_exec_runtime = 0;
-}
-
-static inline int thread_group_cputime_alloc(struct task_struct *tsk)
-{
-	return 0;
-}
 
 static inline void thread_group_cputime_free(struct signal_struct *sig)
 {
-}
-
-static inline int thread_group_cputime_clone_thread(struct task_struct *curr,
-						     struct task_struct *tsk)
-{
-	return 0;
-}
-
-static inline void thread_group_cputime(struct task_struct *tsk,
-					 struct task_cputime *cputime)
-{
-	*cputime = tsk->signal->cputime.totals;
-}
-
-static inline void thread_group_cputime_account_user(
-	struct thread_group_cputime *tgtimes,
-	cputime_t cputime)
-{
-	tgtimes->totals.utime = cputime_add(tgtimes->totals.utime, cputime);
-}
-
-static inline void thread_group_cputime_account_system(
-	struct thread_group_cputime *tgtimes,
-	cputime_t cputime)
-{
-	tgtimes->totals.stime = cputime_add(tgtimes->totals.stime, cputime);
-}
-
-static inline void thread_group_cputime_account_exec_runtime(
-	struct thread_group_cputime *tgtimes,
-	unsigned long long ns)
-{
-	tgtimes->totals.sum_exec_runtime += ns;
-}
-
-#endif /* CONFIG_SMP */
-
-static inline void account_group_user_time(struct task_struct *tsk,
-					    cputime_t cputime)
-{
-	struct signal_struct *sig;
-
-	sig = tsk->signal;
-	if (likely(sig))
-		thread_group_cputime_account_user(&sig->cputime, cputime);
-}
-
-static inline void account_group_system_time(struct task_struct *tsk,
-					      cputime_t cputime)
-{
-	struct signal_struct *sig;
-
-	sig = tsk->signal;
-	if (likely(sig))
-		thread_group_cputime_account_system(&sig->cputime, cputime);
-}
-
-static inline void account_group_exec_runtime(struct task_struct *tsk,
-					       unsigned long long ns)
-{
-	struct signal_struct *sig;
-
-	sig = tsk->signal;
-	if (likely(sig))
-		thread_group_cputime_account_exec_runtime(&sig->cputime, ns);
+	free_percpu(sig->cputime.totals);
 }
 
 /*
diff --git a/kernel/fork.c b/kernel/fork.c
index 1181b9aac48e..021ae012cc75 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -791,7 +791,7 @@ static int copy_signal(unsigned long clone_flags, struct task_struct *tsk)
 	int ret;
 
 	if (clone_flags & CLONE_THREAD) {
-		ret = thread_group_cputime_clone_thread(current, tsk);
+		ret = thread_group_cputime_clone_thread(current);
 		if (likely(!ret)) {
 			atomic_inc(&current->signal->count);
 			atomic_inc(&current->signal->live);
@@ -834,9 +834,6 @@ static int copy_signal(unsigned long clone_flags, struct task_struct *tsk)
 	sig->min_flt = sig->maj_flt = sig->cmin_flt = sig->cmaj_flt = 0;
 	sig->inblock = sig->oublock = sig->cinblock = sig->coublock = 0;
 	task_io_accounting_init(&sig->ioac);
-	INIT_LIST_HEAD(&sig->cpu_timers[0]);
-	INIT_LIST_HEAD(&sig->cpu_timers[1]);
-	INIT_LIST_HEAD(&sig->cpu_timers[2]);
 	taskstats_tgid_init(sig);
 
 	task_lock(current->group_leader);
diff --git a/kernel/posix-cpu-timers.c b/kernel/posix-cpu-timers.c
index 9a7ea049fcdc..153dcb2639c3 100644
--- a/kernel/posix-cpu-timers.c
+++ b/kernel/posix-cpu-timers.c
@@ -7,50 +7,46 @@
 #include <linux/errno.h>
 #include <linux/math64.h>
 #include <asm/uaccess.h>
+#include <linux/kernel_stat.h>
 
-#ifdef CONFIG_SMP
 /*
- * Allocate the thread_group_cputime structure appropriately for SMP kernels
- * and fill in the current values of the fields.  Called from copy_signal()
- * via thread_group_cputime_clone_thread() when adding a second or subsequent
+ * Allocate the thread_group_cputime structure appropriately and fill in the
+ * current values of the fields.  Called from copy_signal() via
+ * thread_group_cputime_clone_thread() when adding a second or subsequent
  * thread to a thread group.  Assumes interrupts are enabled when called.
  */
-int thread_group_cputime_alloc_smp(struct task_struct *tsk)
+int thread_group_cputime_alloc(struct task_struct *tsk)
 {
 	struct signal_struct *sig = tsk->signal;
 	struct task_cputime *cputime;
 
 	/*
 	 * If we have multiple threads and we don't already have a
-	 * per-CPU task_cputime struct, allocate one and fill it in with
-	 * the times accumulated so far.
+	 * per-CPU task_cputime struct (checked in the caller), allocate
+	 * one and fill it in with the times accumulated so far.  We may
+	 * race with another thread so recheck after we pick up the sighand
+	 * lock.
 	 */
-	if (sig->cputime.totals)
-		return 0;
 	cputime = alloc_percpu(struct task_cputime);
 	if (cputime == NULL)
 		return -ENOMEM;
-	read_lock(&tasklist_lock);
 	spin_lock_irq(&tsk->sighand->siglock);
 	if (sig->cputime.totals) {
 		spin_unlock_irq(&tsk->sighand->siglock);
-		read_unlock(&tasklist_lock);
 		free_percpu(cputime);
 		return 0;
 	}
 	sig->cputime.totals = cputime;
-	cputime = per_cpu_ptr(sig->cputime.totals, get_cpu());
+	cputime = per_cpu_ptr(sig->cputime.totals, smp_processor_id());
 	cputime->utime = tsk->utime;
 	cputime->stime = tsk->stime;
 	cputime->sum_exec_runtime = tsk->se.sum_exec_runtime;
-	put_cpu_no_resched();
 	spin_unlock_irq(&tsk->sighand->siglock);
-	read_unlock(&tasklist_lock);
 	return 0;
 }
 
 /**
- * thread_group_cputime_smp - Sum the thread group time fields across all CPUs.
+ * thread_group_cputime - Sum the thread group time fields across all CPUs.
  *
  * @tsk:	The task we use to identify the thread group.
  * @times:	task_cputime structure in which we return the summed fields.
@@ -58,7 +54,7 @@ int thread_group_cputime_alloc_smp(struct task_struct *tsk)
  * Walk the list of CPUs to sum the per-CPU time fields in the thread group
  * time structure.
  */
-void thread_group_cputime_smp(
+void thread_group_cputime(
 	struct task_struct *tsk,
 	struct task_cputime *times)
 {
@@ -83,8 +79,6 @@ void thread_group_cputime_smp(
 	}
 }
 
-#endif /* CONFIG_SMP */
-
 /*
  * Called after updating RLIMIT_CPU to set timer expiration if necessary.
  */
@@ -300,7 +294,7 @@ static int cpu_clock_sample(const clockid_t which_clock, struct task_struct *p,
 		cpu->cpu = virt_ticks(p);
 		break;
 	case CPUCLOCK_SCHED:
-		cpu->sched = task_sched_runtime(p);
+		cpu->sched = p->se.sum_exec_runtime + task_delta_exec(p);
 		break;
 	}
 	return 0;
@@ -309,16 +303,15 @@ static int cpu_clock_sample(const clockid_t which_clock, struct task_struct *p,
 /*
  * Sample a process (thread group) clock for the given group_leader task.
  * Must be called with tasklist_lock held for reading.
- * Must be called with tasklist_lock held for reading, and p->sighand->siglock.
  */
-static int cpu_clock_sample_group_locked(unsigned int clock_idx,
-					 struct task_struct *p,
-					 union cpu_time_count *cpu)
+static int cpu_clock_sample_group(const clockid_t which_clock,
+				  struct task_struct *p,
+				  union cpu_time_count *cpu)
 {
 	struct task_cputime cputime;
 
 	thread_group_cputime(p, &cputime);
-	switch (clock_idx) {
+	switch (which_clock) {
 	default:
 		return -EINVAL;
 	case CPUCLOCK_PROF:
@@ -328,29 +321,12 @@ static int cpu_clock_sample_group_locked(unsigned int clock_idx,
 		cpu->cpu = cputime.utime;
 		break;
 	case CPUCLOCK_SCHED:
-		cpu->sched = thread_group_sched_runtime(p);
+		cpu->sched = cputime.sum_exec_runtime + task_delta_exec(p);
 		break;
 	}
 	return 0;
 }
 
-/*
- * Sample a process (thread group) clock for the given group_leader task.
- * Must be called with tasklist_lock held for reading.
- */
-static int cpu_clock_sample_group(const clockid_t which_clock,
-				  struct task_struct *p,
-				  union cpu_time_count *cpu)
-{
-	int ret;
-	unsigned long flags;
-	spin_lock_irqsave(&p->sighand->siglock, flags);
-	ret = cpu_clock_sample_group_locked(CPUCLOCK_WHICH(which_clock), p,
-					    cpu);
-	spin_unlock_irqrestore(&p->sighand->siglock, flags);
-	return ret;
-}
-
 
 int posix_cpu_clock_get(const clockid_t which_clock, struct timespec *tp)
 {
@@ -1324,29 +1300,37 @@ static inline int task_cputime_expired(const struct task_cputime *sample,
  * fastpath_timer_check - POSIX CPU timers fast path.
  *
  * @tsk:	The task (thread) being checked.
- * @sig:	The signal pointer for that task.
  *
- * If there are no timers set return false.  Otherwise snapshot the task and
- * thread group timers, then compare them with the corresponding expiration
- # times.  Returns true if a timer has expired, else returns false.
+ * Check the task and thread group timers.  If both are zero (there are no
+ * timers set) return false.  Otherwise snapshot the task and thread group
+ * timers and compare them with the corresponding expiration times.  Return
+ * true if a timer has expired, else return false.
  */
-static inline int fastpath_timer_check(struct task_struct *tsk,
-					struct signal_struct *sig)
+static inline int fastpath_timer_check(struct task_struct *tsk)
 {
-	struct task_cputime task_sample = {
-		.utime = tsk->utime,
-		.stime = tsk->stime,
-		.sum_exec_runtime = tsk->se.sum_exec_runtime
-	};
-	struct task_cputime group_sample;
+	struct signal_struct *sig = tsk->signal;
 
-	if (task_cputime_zero(&tsk->cputime_expires) &&
-	    task_cputime_zero(&sig->cputime_expires))
+	if (unlikely(!sig))
 		return 0;
-	if (task_cputime_expired(&task_sample, &tsk->cputime_expires))
-		return 1;
-	thread_group_cputime(tsk, &group_sample);
-	return task_cputime_expired(&group_sample, &sig->cputime_expires);
+
+	if (!task_cputime_zero(&tsk->cputime_expires)) {
+		struct task_cputime task_sample = {
+			.utime = tsk->utime,
+			.stime = tsk->stime,
+			.sum_exec_runtime = tsk->se.sum_exec_runtime
+		};
+
+		if (task_cputime_expired(&task_sample, &tsk->cputime_expires))
+			return 1;
+	}
+	if (!task_cputime_zero(&sig->cputime_expires)) {
+		struct task_cputime group_sample;
+
+		thread_group_cputime(tsk, &group_sample);
+		if (task_cputime_expired(&group_sample, &sig->cputime_expires))
+			return 1;
+	}
+	return 0;
 }
 
 /*
@@ -1358,43 +1342,34 @@ void run_posix_cpu_timers(struct task_struct *tsk)
 {
 	LIST_HEAD(firing);
 	struct k_itimer *timer, *next;
-	struct signal_struct *sig;
-	struct sighand_struct *sighand;
-	unsigned long flags;
 
 	BUG_ON(!irqs_disabled());
 
-	/* Pick up tsk->signal and make sure it's valid. */
-	sig = tsk->signal;
 	/*
 	 * The fast path checks that there are no expired thread or thread
-	 * group timers.  If that's so, just return.  Also check that
-	 * tsk->signal is non-NULL; this probably can't happen but cover the
-	 * possibility anyway.
+	 * group timers.  If that's so, just return.
 	 */
-	if (unlikely(!sig) || !fastpath_timer_check(tsk, sig))
+	if (!fastpath_timer_check(tsk))
 		return;
 
-	sighand = lock_task_sighand(tsk, &flags);
-	if (likely(sighand)) {
-		/*
-		 * Here we take off tsk->signal->cpu_timers[N] and
-		 * tsk->cpu_timers[N] all the timers that are firing, and
-		 * put them on the firing list.
-		 */
-		check_thread_timers(tsk, &firing);
-		check_process_timers(tsk, &firing);
+	spin_lock(&tsk->sighand->siglock);
+	/*
+	 * Here we take off tsk->signal->cpu_timers[N] and
+	 * tsk->cpu_timers[N] all the timers that are firing, and
+	 * put them on the firing list.
+	 */
+	check_thread_timers(tsk, &firing);
+	check_process_timers(tsk, &firing);
 
-		/*
-		 * We must release these locks before taking any timer's lock.
-		 * There is a potential race with timer deletion here, as the
-		 * siglock now protects our private firing list.  We have set
-		 * the firing flag in each timer, so that a deletion attempt
-		 * that gets the timer lock before we do will give it up and
-		 * spin until we've taken care of that timer below.
-		 */
-	}
-	unlock_task_sighand(tsk, &flags);
+	/*
+	 * We must release these locks before taking any timer's lock.
+	 * There is a potential race with timer deletion here, as the
+	 * siglock now protects our private firing list.  We have set
+	 * the firing flag in each timer, so that a deletion attempt
+	 * that gets the timer lock before we do will give it up and
+	 * spin until we've taken care of that timer below.
+	 */
+	spin_unlock(&tsk->sighand->siglock);
 
 	/*
 	 * Now that all the timers on our list have the firing flag,
@@ -1433,7 +1408,7 @@ void set_process_cpu_timer(struct task_struct *tsk, unsigned int clock_idx,
 	struct list_head *head;
 
 	BUG_ON(clock_idx == CPUCLOCK_SCHED);
-	cpu_clock_sample_group_locked(clock_idx, tsk, &now);
+	cpu_clock_sample_group(clock_idx, tsk, &now);
 
 	if (oldval) {
 		if (!cputime_eq(*oldval, cputime_zero)) {
diff --git a/kernel/sched.c b/kernel/sched.c
index c51b5d276665..260c22cc530a 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -4039,55 +4039,22 @@ EXPORT_PER_CPU_SYMBOL(kstat);
 /*
  * Return any ns on the sched_clock that have not yet been banked in
  * @p in case that task is currently running.
- *
- * Called with task_rq_lock() held on @rq.
  */
-static unsigned long long task_delta_exec(struct task_struct *p, struct rq *rq)
+unsigned long long task_delta_exec(struct task_struct *p)
 {
+	struct rq *rq;
+	unsigned long flags;
+	u64 ns = 0;
+
+	rq = task_rq_lock(p, &flags);
 	if (task_current(rq, p)) {
 		u64 delta_exec;
 
 		update_rq_clock(rq);
 		delta_exec = rq->clock - p->se.exec_start;
 		if ((s64)delta_exec > 0)
-			return delta_exec;
+			ns = delta_exec;
 	}
-	return 0;
-}
-
-/*
- * Return p->sum_exec_runtime plus any more ns on the sched_clock
- * that have not yet been banked in case the task is currently running.
- */
-unsigned long long task_sched_runtime(struct task_struct *p)
-{
-	unsigned long flags;
-	u64 ns;
-	struct rq *rq;
-
-	rq = task_rq_lock(p, &flags);
-	ns = p->se.sum_exec_runtime + task_delta_exec(p, rq);
-	task_rq_unlock(rq, &flags);
-
-	return ns;
-}
-
-/*
- * Return sum_exec_runtime for the thread group plus any more ns on the
- * sched_clock that have not yet been banked in case the task is currently
- * running.
- */
-unsigned long long thread_group_sched_runtime(struct task_struct *p)
-{
-	unsigned long flags;
-	u64 ns;
-	struct rq *rq;
-	struct task_cputime totals;
-
-	rq = task_rq_lock(p, &flags);
-	thread_group_cputime(p, &totals);
-	ns = totals.sum_exec_runtime + task_delta_exec(p, rq);
-	task_rq_unlock(rq, &flags);
 
 	return ns;
 }
diff --git a/kernel/sched_stats.h b/kernel/sched_stats.h
index 8385d43987e2..d6903bd0c7a8 100644
--- a/kernel/sched_stats.h
+++ b/kernel/sched_stats.h
@@ -270,3 +270,139 @@ sched_info_switch(struct task_struct *prev, struct task_struct *next)
 #define sched_info_switch(t, next)		do { } while (0)
 #endif /* CONFIG_SCHEDSTATS || CONFIG_TASK_DELAY_ACCT */
 
+/*
+ * The following are functions that support scheduler-internal time accounting.
+ * These functions are generally called at the timer tick.  None of this depends
+ * on CONFIG_SCHEDSTATS.
+ */
+
+#ifdef CONFIG_SMP
+
+/**
+ * thread_group_cputime_account_user - Maintain utime for a thread group.
+ *
+ * @tgtimes:	Pointer to thread_group_cputime structure.
+ * @cputime:	Time value by which to increment the utime field of that
+ *		structure.
+ *
+ * If thread group time is being maintained, get the structure for the
+ * running CPU and update the utime field there.
+ */
+static inline void thread_group_cputime_account_user(
+	struct thread_group_cputime *tgtimes,
+	cputime_t cputime)
+{
+	if (tgtimes->totals) {
+		struct task_cputime *times;
+
+		times = per_cpu_ptr(tgtimes->totals, get_cpu());
+		times->utime = cputime_add(times->utime, cputime);
+		put_cpu_no_resched();
+	}
+}
+
+/**
+ * thread_group_cputime_account_system - Maintain stime for a thread group.
+ *
+ * @tgtimes:	Pointer to thread_group_cputime structure.
+ * @cputime:	Time value by which to increment the stime field of that
+ *		structure.
+ *
+ * If thread group time is being maintained, get the structure for the
+ * running CPU and update the stime field there.
+ */
+static inline void thread_group_cputime_account_system(
+	struct thread_group_cputime *tgtimes,
+	cputime_t cputime)
+{
+	if (tgtimes->totals) {
+		struct task_cputime *times;
+
+		times = per_cpu_ptr(tgtimes->totals, get_cpu());
+		times->stime = cputime_add(times->stime, cputime);
+		put_cpu_no_resched();
+	}
+}
+
+/**
+ * thread_group_cputime_account_exec_runtime - Maintain exec runtime for a
+ *						thread group.
+ *
+ * @tgtimes:	Pointer to thread_group_cputime structure.
+ * @ns:		Time value by which to increment the sum_exec_runtime field
+ *		of that structure.
+ *
+ * If thread group time is being maintained, get the structure for the
+ * running CPU and update the sum_exec_runtime field there.
+ */
+static inline void thread_group_cputime_account_exec_runtime(
+	struct thread_group_cputime *tgtimes,
+	unsigned long long ns)
+{
+	if (tgtimes->totals) {
+		struct task_cputime *times;
+
+		times = per_cpu_ptr(tgtimes->totals, get_cpu());
+		times->sum_exec_runtime += ns;
+		put_cpu_no_resched();
+	}
+}
+
+#else /* CONFIG_SMP */
+
+static inline void thread_group_cputime_account_user(
+	struct thread_group_cputime *tgtimes,
+	cputime_t cputime)
+{
+	tgtimes->totals->utime = cputime_add(tgtimes->totals->utime, cputime);
+}
+
+static inline void thread_group_cputime_account_system(
+	struct thread_group_cputime *tgtimes,
+	cputime_t cputime)
+{
+	tgtimes->totals->stime = cputime_add(tgtimes->totals->stime, cputime);
+}
+
+static inline void thread_group_cputime_account_exec_runtime(
+	struct thread_group_cputime *tgtimes,
+	unsigned long long ns)
+{
+	tgtimes->totals->sum_exec_runtime += ns;
+}
+
+#endif /* CONFIG_SMP */
+
+/*
+ * These are the generic time-accounting routines that use the above
+ * functions.  They are the functions actually called by the scheduler.
+ */
+static inline void account_group_user_time(struct task_struct *tsk,
+					    cputime_t cputime)
+{
+	struct signal_struct *sig;
+
+	sig = tsk->signal;
+	if (likely(sig))
+		thread_group_cputime_account_user(&sig->cputime, cputime);
+}
+
+static inline void account_group_system_time(struct task_struct *tsk,
+					      cputime_t cputime)
+{
+	struct signal_struct *sig;
+
+	sig = tsk->signal;
+	if (likely(sig))
+		thread_group_cputime_account_system(&sig->cputime, cputime);
+}
+
+static inline void account_group_exec_runtime(struct task_struct *tsk,
+					       unsigned long long ns)
+{
+	struct signal_struct *sig;
+
+	sig = tsk->signal;
+	if (likely(sig))
+		thread_group_cputime_account_exec_runtime(&sig->cputime, ns);
+}
-- 
cgit v1.2.3-55-g7522


From 59f647c25a4f27c1e5c84710e0608b36303089f9 Mon Sep 17 00:00:00 2001
From: Timur Tabi
Date: Tue, 23 Sep 2008 15:55:56 -0700
Subject: fsldma: remove internal self-test from Freescale Elo DMA driver

The Freescale Elo DMA driver runs an internal self-test before registering
the channels with the DMA engine.  This self-test has a fundemental flaw in
that it calls the DMA engine's callback functions directly before the
registration.  However, the registration initializes some variables that the
callback functions uses, namely the device struct.

The code works today because there are two device structs: the one created
by the DMA engine, and one created by the Open Firmware (OF) subsystem.  The
self-test currently uses the device struct created by OF.  However, in the
future, some of the device structs created by OF will be eliminated.
This means that the self-test will only have access to the device struct
created by the DMA engine.  But this device struct isn't initialized when
the self-test runs, and this causes a kernel panic.

Since there is already a DMA test module (dmatest), the internal self-test
code is not useful anyway.  It is extremely unlikely that the test will fail
in normal usage.  It may have been helpful during development, but not any more.

Cc: Kumar Gala <galak@kernel.crashing.org>
Cc: Li Yang <leoli@freescale.com>
Cc: Scott Wood <scottwood@freescale.com>
Signed-off-by: Timur Tabi <timur@freescale.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 drivers/dma/fsldma.c | 132 ---------------------------------------------------
 1 file changed, 132 deletions(-)

diff --git a/drivers/dma/fsldma.c b/drivers/dma/fsldma.c
index c0059ca58340..e9b263897c03 100644
--- a/drivers/dma/fsldma.c
+++ b/drivers/dma/fsldma.c
@@ -786,132 +786,6 @@ static void dma_do_tasklet(unsigned long data)
 	fsl_chan_ld_cleanup(fsl_chan);
 }
 
-static void fsl_dma_callback_test(void *param)
-{
-	struct fsl_dma_chan *fsl_chan = param;
-	if (fsl_chan)
-		dev_dbg(fsl_chan->dev, "selftest: callback is ok!\n");
-}
-
-static int fsl_dma_self_test(struct fsl_dma_chan *fsl_chan)
-{
-	struct dma_chan *chan;
-	int err = 0;
-	dma_addr_t dma_dest, dma_src;
-	dma_cookie_t cookie;
-	u8 *src, *dest;
-	int i;
-	size_t test_size;
-	struct dma_async_tx_descriptor *tx1, *tx2, *tx3;
-
-	test_size = 4096;
-
-	src = kmalloc(test_size * 2, GFP_KERNEL);
-	if (!src) {
-		dev_err(fsl_chan->dev,
-				"selftest: Cannot alloc memory for test!\n");
-		return -ENOMEM;
-	}
-
-	dest = src + test_size;
-
-	for (i = 0; i < test_size; i++)
-		src[i] = (u8) i;
-
-	chan = &fsl_chan->common;
-
-	if (fsl_dma_alloc_chan_resources(chan, NULL) < 1) {
-		dev_err(fsl_chan->dev,
-				"selftest: Cannot alloc resources for DMA\n");
-		err = -ENODEV;
-		goto out;
-	}
-
-	/* TX 1 */
-	dma_src = dma_map_single(fsl_chan->dev, src, test_size / 2,
-				 DMA_TO_DEVICE);
-	dma_dest = dma_map_single(fsl_chan->dev, dest, test_size / 2,
-				  DMA_FROM_DEVICE);
-	tx1 = fsl_dma_prep_memcpy(chan, dma_dest, dma_src, test_size / 2, 0);
-	async_tx_ack(tx1);
-
-	cookie = fsl_dma_tx_submit(tx1);
-	fsl_dma_memcpy_issue_pending(chan);
-	msleep(2);
-
-	if (fsl_dma_is_complete(chan, cookie, NULL, NULL) != DMA_SUCCESS) {
-		dev_err(fsl_chan->dev, "selftest: Time out!\n");
-		err = -ENODEV;
-		goto free_resources;
-	}
-
-	/* Test free and re-alloc channel resources */
-	fsl_dma_free_chan_resources(chan);
-
-	if (fsl_dma_alloc_chan_resources(chan, NULL) < 1) {
-		dev_err(fsl_chan->dev,
-				"selftest: Cannot alloc resources for DMA\n");
-		err = -ENODEV;
-		goto free_resources;
-	}
-
-	/* Continue to test
-	 * TX 2
-	 */
-	dma_src = dma_map_single(fsl_chan->dev, src + test_size / 2,
-					test_size / 4, DMA_TO_DEVICE);
-	dma_dest = dma_map_single(fsl_chan->dev, dest + test_size / 2,
-					test_size / 4, DMA_FROM_DEVICE);
-	tx2 = fsl_dma_prep_memcpy(chan, dma_dest, dma_src, test_size / 4, 0);
-	async_tx_ack(tx2);
-
-	/* TX 3 */
-	dma_src = dma_map_single(fsl_chan->dev, src + test_size * 3 / 4,
-					test_size / 4, DMA_TO_DEVICE);
-	dma_dest = dma_map_single(fsl_chan->dev, dest + test_size * 3 / 4,
-					test_size / 4, DMA_FROM_DEVICE);
-	tx3 = fsl_dma_prep_memcpy(chan, dma_dest, dma_src, test_size / 4, 0);
-	async_tx_ack(tx3);
-
-	/* Interrupt tx test */
-	tx1 = fsl_dma_prep_interrupt(chan, 0);
-	async_tx_ack(tx1);
-	cookie = fsl_dma_tx_submit(tx1);
-
-	/* Test exchanging the prepared tx sort */
-	cookie = fsl_dma_tx_submit(tx3);
-	cookie = fsl_dma_tx_submit(tx2);
-
-	if (dma_has_cap(DMA_INTERRUPT, ((struct fsl_dma_device *)
-	    dev_get_drvdata(fsl_chan->dev->parent))->common.cap_mask)) {
-		tx3->callback = fsl_dma_callback_test;
-		tx3->callback_param = fsl_chan;
-	}
-	fsl_dma_memcpy_issue_pending(chan);
-	msleep(2);
-
-	if (fsl_dma_is_complete(chan, cookie, NULL, NULL) != DMA_SUCCESS) {
-		dev_err(fsl_chan->dev, "selftest: Time out!\n");
-		err = -ENODEV;
-		goto free_resources;
-	}
-
-	err = memcmp(src, dest, test_size);
-	if (err) {
-		for (i = 0; (*(src + i) == *(dest + i)) && (i < test_size);
-				i++);
-		dev_err(fsl_chan->dev, "selftest: Test failed, data %d/%ld is "
-				"error! src 0x%x, dest 0x%x\n",
-				i, (long)test_size, *(src + i), *(dest + i));
-	}
-
-free_resources:
-	fsl_dma_free_chan_resources(chan);
-out:
-	kfree(src);
-	return err;
-}
-
 static int __devinit of_fsl_dma_chan_probe(struct of_device *dev,
 			const struct of_device_id *match)
 {
@@ -1000,17 +874,11 @@ static int __devinit of_fsl_dma_chan_probe(struct of_device *dev,
 		}
 	}
 
-	err = fsl_dma_self_test(new_fsl_chan);
-	if (err)
-		goto err_self_test;
-
 	dev_info(&dev->dev, "#%d (%s), irq %d\n", new_fsl_chan->id,
 				match->compatible, new_fsl_chan->irq);
 
 	return 0;
 
-err_self_test:
-	free_irq(new_fsl_chan->irq, new_fsl_chan);
 err_no_irq:
 	list_del(&new_fsl_chan->common.device_node);
 err_no_chan:
-- 
cgit v1.2.3-55-g7522


From 4aa7361179bed905fd0f35b236a5c65db683b9e0 Mon Sep 17 00:00:00 2001
From: Oleg Nesterov
Date: Mon, 22 Sep 2008 14:42:46 -0700
Subject: posix-timers: don't switch to ->group_leader if ->it_process dies

posix_timer_event() drops SIGEV_THREAD_ID and switches to ->group_leader
if send_sigqueue() fails.

This is not very useful and doesn't work reliably.  send_sigqueue() can
only fail if ->it_process is dead.  But it can die before it dequeues the
SI_TIMER signal, in that case the timer stops anyway.

Remove this code.  I guess it was needed a long ago to ensure that the
timer is not destroyed when when its creator thread dies.

Q: perhaps it makes sense to change sys_timer_settime() to return an error
if ->it_process is dead?

Signed-off-by: Oleg Nesterov <oleg@tv-sign.ru>
Cc: mingo@elte.hu
Cc: Roland McGrath <roland@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 kernel/posix-timers.c | 19 +++++--------------
 1 file changed, 5 insertions(+), 14 deletions(-)

diff --git a/kernel/posix-timers.c b/kernel/posix-timers.c
index e36d5798cbff..3dfd15aecc60 100644
--- a/kernel/posix-timers.c
+++ b/kernel/posix-timers.c
@@ -298,6 +298,7 @@ void do_schedule_next_timer(struct siginfo *info)
 
 int posix_timer_event(struct k_itimer *timr, int si_private)
 {
+	int shared, ret;
 	/*
 	 * FIXME: if ->sigq is queued we can race with
 	 * dequeue_signal()->do_schedule_next_timer().
@@ -316,20 +317,10 @@ int posix_timer_event(struct k_itimer *timr, int si_private)
 	timr->sigq->info.si_tid = timr->it_id;
 	timr->sigq->info.si_value = timr->it_sigev_value;
 
-	if (timr->it_sigev_notify & SIGEV_THREAD_ID) {
-		struct task_struct *leader;
-		int ret = send_sigqueue(timr->sigq, timr->it_process, 0);
-
-		if (likely(ret >= 0))
-			return ret;
-
-		timr->it_sigev_notify = SIGEV_SIGNAL;
-		leader = timr->it_process->group_leader;
-		put_task_struct(timr->it_process);
-		timr->it_process = leader;
-	}
-
-	return send_sigqueue(timr->sigq, timr->it_process, 1);
+	shared = !(timr->it_sigev_notify & SIGEV_THREAD_ID);
+	ret = send_sigqueue(timr->sigq, timr->it_process, shared);
+	/* If we failed to send the signal the timer stops. */
+	return ret > 0;
 }
 EXPORT_SYMBOL_GPL(posix_timer_event);
 
-- 
cgit v1.2.3-55-g7522


From 918fc0372831dca73039e1577bfea0c2ce49bdb6 Mon Sep 17 00:00:00 2001
From: Oleg Nesterov
Date: Mon, 22 Sep 2008 14:42:46 -0700
Subject: posix-timers: always do get_task_struct(timer->it_process)

Change the code to get/put timer->it_process regardless of
SIGEV_THREAD_ID.  This streamlines the create/destroy paths and allows us
to simplify the usage of exit_itimers() in de_thread().

Signed-off-by: Oleg Nesterov <oleg@tv-sign.ru>
Cc: mingo@elte.hu
Cc: Roland McGrath <roland@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 kernel/posix-timers.c | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/kernel/posix-timers.c b/kernel/posix-timers.c
index 3dfd15aecc60..bd9c931b3659 100644
--- a/kernel/posix-timers.c
+++ b/kernel/posix-timers.c
@@ -540,11 +540,10 @@ sys_timer_create(const clockid_t which_clock,
 			 */
 			spin_lock_irqsave(&process->sighand->siglock, flags);
 			if (!(process->flags & PF_EXITING)) {
+				get_task_struct(process);
 				new_timer->it_process = process;
 				list_add(&new_timer->list,
 					 &process->signal->posix_timers);
-				if (new_timer->it_sigev_notify == (SIGEV_SIGNAL|SIGEV_THREAD_ID))
-					get_task_struct(process);
 				spin_unlock_irqrestore(&process->sighand->siglock, flags);
 			} else {
 				spin_unlock_irqrestore(&process->sighand->siglock, flags);
@@ -561,6 +560,7 @@ sys_timer_create(const clockid_t which_clock,
 		new_timer->it_sigev_signo = SIGALRM;
 		new_timer->it_sigev_value.sival_int = new_timer->it_id;
 		process = current->group_leader;
+		get_task_struct(process);
 		spin_lock_irqsave(&process->sighand->siglock, flags);
 		new_timer->it_process = process;
 		list_add(&new_timer->list, &process->signal->posix_timers);
@@ -853,8 +853,7 @@ retry_delete:
 	 * This keeps any tasks waiting on the spin lock from thinking
 	 * they got something (see the lock code above).
 	 */
-	if (timer->it_sigev_notify == (SIGEV_SIGNAL|SIGEV_THREAD_ID))
-		put_task_struct(timer->it_process);
+	put_task_struct(timer->it_process);
 	timer->it_process = NULL;
 
 	unlock_timer(timer, flags);
@@ -881,8 +880,7 @@ retry_delete:
 	 * This keeps any tasks waiting on the spin lock from thinking
 	 * they got something (see the lock code above).
 	 */
-	if (timer->it_sigev_notify == (SIGEV_SIGNAL|SIGEV_THREAD_ID))
-		put_task_struct(timer->it_process);
+	put_task_struct(timer->it_process);
 	timer->it_process = NULL;
 
 	unlock_timer(timer, flags);
-- 
cgit v1.2.3-55-g7522


From 2cd499e38ec241691e4bce50bddc8f57e4cc9bd0 Mon Sep 17 00:00:00 2001
From: Oleg Nesterov
Date: Mon, 22 Sep 2008 14:42:47 -0700
Subject: posix-timers: sys_timer_create: remove the buggy PF_EXITING check

sys_timer_create() return -EINVAL if the target thread has PF_EXITING.

This doesn't really make sense, the sub-thread can die right after unlock.
And in fact, this is just wrong.  Without SIGEV_THREAD_ID good_sigevent()
returns ->group_leader, and it is very possible that the leader is already
dead.  This is OK, we shouldn't return the error in this case.

Remove this check and the comment.  Note that the "process" was found
under tasklist_lock, it must have ->sighand != NULL.

Also, remove a couple of unneeded initializations.

Signed-off-by: Oleg Nesterov <oleg@tv-sign.ru>
Cc: mingo@elte.hu
Cc: Roland McGrath <roland@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 kernel/posix-timers.c | 34 +++++++---------------------------
 1 file changed, 7 insertions(+), 27 deletions(-)

diff --git a/kernel/posix-timers.c b/kernel/posix-timers.c
index bd9c931b3659..60b262051d1d 100644
--- a/kernel/posix-timers.c
+++ b/kernel/posix-timers.c
@@ -460,9 +460,9 @@ sys_timer_create(const clockid_t which_clock,
 		 timer_t __user * created_timer_id)
 {
 	int error = 0;
-	struct k_itimer *new_timer = NULL;
+	struct k_itimer *new_timer;
 	int new_timer_id;
-	struct task_struct *process = NULL;
+	struct task_struct *process;
 	unsigned long flags;
 	sigevent_t event;
 	int it_id_set = IT_ID_NOT_SET;
@@ -523,32 +523,12 @@ sys_timer_create(const clockid_t which_clock,
 
 		read_lock(&tasklist_lock);
 		if ((process = good_sigevent(&event))) {
-			/*
-			 * We may be setting up this process for another
-			 * thread.  It may be exiting.  To catch this
-			 * case the we check the PF_EXITING flag.  If
-			 * the flag is not set, the siglock will catch
-			 * him before it is too late (in exit_itimers).
-			 *
-			 * The exec case is a bit more invloved but easy
-			 * to code.  If the process is in our thread
-			 * group (and it must be or we would not allow
-			 * it here) and is doing an exec, it will cause
-			 * us to be killed.  In this case it will wait
-			 * for us to die which means we can finish this
-			 * linkage with our last gasp. I.e. no code :)
-			 */
+			get_task_struct(process);
 			spin_lock_irqsave(&process->sighand->siglock, flags);
-			if (!(process->flags & PF_EXITING)) {
-				get_task_struct(process);
-				new_timer->it_process = process;
-				list_add(&new_timer->list,
-					 &process->signal->posix_timers);
-				spin_unlock_irqrestore(&process->sighand->siglock, flags);
-			} else {
-				spin_unlock_irqrestore(&process->sighand->siglock, flags);
-				process = NULL;
-			}
+			new_timer->it_process = process;
+			list_add(&new_timer->list,
+				&process->signal->posix_timers);
+			spin_unlock_irqrestore(&process->sighand->siglock, flags);
 		}
 		read_unlock(&tasklist_lock);
 		if (!process) {
-- 
cgit v1.2.3-55-g7522


From 36b2f046000b358b62b9d116cb10a2b1c5be5cbf Mon Sep 17 00:00:00 2001
From: Oleg Nesterov
Date: Mon, 22 Sep 2008 14:42:48 -0700
Subject: posix-timers: sys_timer_create: simplify and s/tasklist/rcu/

- Change the code to do rcu_read_lock() instead of taking tasklist_lock,
  it is safe to get_task_struct(p) if p was found under RCU.

  However, now we must not use process's sighand/signal, they may be NULL.
  We can use current->sighand/signal instead, this "process" must belong
  to the current's thread-group.

- Factor out the common code for 2 "if (timer_event_spec)" branches, the
  !timer_event_spec case can use current too.

- use spin_lock_irq() instead of _irqsave(), kill "flags".

Signed-off-by: Oleg Nesterov <oleg@tv-sign.ru>
Cc: mingo@elte.hu
Cc: Roland McGrath <roland@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 kernel/posix-timers.c | 23 ++++++++---------------
 1 file changed, 8 insertions(+), 15 deletions(-)

diff --git a/kernel/posix-timers.c b/kernel/posix-timers.c
index 60b262051d1d..5b761903b49a 100644
--- a/kernel/posix-timers.c
+++ b/kernel/posix-timers.c
@@ -463,7 +463,6 @@ sys_timer_create(const clockid_t which_clock,
 	struct k_itimer *new_timer;
 	int new_timer_id;
 	struct task_struct *process;
-	unsigned long flags;
 	sigevent_t event;
 	int it_id_set = IT_ID_NOT_SET;
 
@@ -521,16 +520,11 @@ sys_timer_create(const clockid_t which_clock,
 		new_timer->it_sigev_signo = event.sigev_signo;
 		new_timer->it_sigev_value = event.sigev_value;
 
-		read_lock(&tasklist_lock);
-		if ((process = good_sigevent(&event))) {
+		rcu_read_lock();
+		process = good_sigevent(&event);
+		if (process)
 			get_task_struct(process);
-			spin_lock_irqsave(&process->sighand->siglock, flags);
-			new_timer->it_process = process;
-			list_add(&new_timer->list,
-				&process->signal->posix_timers);
-			spin_unlock_irqrestore(&process->sighand->siglock, flags);
-		}
-		read_unlock(&tasklist_lock);
+		rcu_read_unlock();
 		if (!process) {
 			error = -EINVAL;
 			goto out;
@@ -541,19 +535,18 @@ sys_timer_create(const clockid_t which_clock,
 		new_timer->it_sigev_value.sival_int = new_timer->it_id;
 		process = current->group_leader;
 		get_task_struct(process);
-		spin_lock_irqsave(&process->sighand->siglock, flags);
-		new_timer->it_process = process;
-		list_add(&new_timer->list, &process->signal->posix_timers);
-		spin_unlock_irqrestore(&process->sighand->siglock, flags);
 	}
 
+	spin_lock_irq(&current->sighand->siglock);
+	new_timer->it_process = process;
+	list_add(&new_timer->list, &current->signal->posix_timers);
+	spin_unlock_irq(&current->sighand->siglock);
  	/*
 	 * In the case of the timer belonging to another task, after
 	 * the task is unlocked, the timer is owned by the other task
 	 * and may cease to exist at any time.  Don't use or modify
 	 * new_timer after the unlock call.
 	 */
-
 out:
 	if (error)
 		release_posix_timer(new_timer, it_id_set);
-- 
cgit v1.2.3-55-g7522


From 717835d94d3e3d343a302df0a3cb9405887c3e2a Mon Sep 17 00:00:00 2001
From: Oleg Nesterov
Date: Mon, 22 Sep 2008 14:42:49 -0700
Subject: posix-timers: move the initialization of timer->sigq from send to
 create path

posix_timer_event() always populates timer->sigq with the same numbers,
move this code into sys_timer_create().

Note that with this patch we can kill it_sigev_signo and it_sigev_value.

Signed-off-by: Oleg Nesterov <oleg@tv-sign.ru>
Cc: mingo@elte.hu
Cc: Roland McGrath <roland@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 kernel/posix-timers.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/kernel/posix-timers.c b/kernel/posix-timers.c
index 5b761903b49a..c459b29efdd4 100644
--- a/kernel/posix-timers.c
+++ b/kernel/posix-timers.c
@@ -312,11 +312,6 @@ int posix_timer_event(struct k_itimer *timr, int si_private)
 	 */
 	timr->sigq->info.si_sys_private = si_private;
 
-	timr->sigq->info.si_signo = timr->it_sigev_signo;
-	timr->sigq->info.si_code = SI_TIMER;
-	timr->sigq->info.si_tid = timr->it_id;
-	timr->sigq->info.si_value = timr->it_sigev_value;
-
 	shared = !(timr->it_sigev_notify & SIGEV_THREAD_ID);
 	ret = send_sigqueue(timr->sigq, timr->it_process, shared);
 	/* If we failed to send the signal the timer stops. */
@@ -537,6 +532,11 @@ sys_timer_create(const clockid_t which_clock,
 		get_task_struct(process);
 	}
 
+	new_timer->sigq->info.si_code  = SI_TIMER;
+	new_timer->sigq->info.si_tid   = new_timer->it_id;
+	new_timer->sigq->info.si_signo = new_timer->it_sigev_signo;
+	new_timer->sigq->info.si_value = new_timer->it_sigev_value;
+
 	spin_lock_irq(&current->sighand->siglock);
 	new_timer->it_process = process;
 	list_add(&new_timer->list, &current->signal->posix_timers);
-- 
cgit v1.2.3-55-g7522


From ef864c958801768fb28bd3603cd0b098b394671c Mon Sep 17 00:00:00 2001
From: Oleg Nesterov
Date: Mon, 22 Sep 2008 14:42:49 -0700
Subject: posix-timers: sys_timer_create: cleanup the error handling

Cleanup.

- sys_timer_create() is big and complicated. The code above the "out:"
  label relies on the fact that "error" must be == 0. This is not very
  robust, make the code more explicit. Remove the unneeded initialization
  of error.

- If idr_get_new() succeeds (as it normally should), we check the returned
  value twice. Move the "-EAGAIN" check under "if (error)".

Signed-off-by: Oleg Nesterov <oleg@tv-sign.ru>
Cc: mingo@elte.hu
Cc: Roland McGrath <roland@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 kernel/posix-timers.c | 15 +++++++--------
 1 file changed, 7 insertions(+), 8 deletions(-)

diff --git a/kernel/posix-timers.c b/kernel/posix-timers.c
index c459b29efdd4..7be385fe4eca 100644
--- a/kernel/posix-timers.c
+++ b/kernel/posix-timers.c
@@ -454,9 +454,8 @@ sys_timer_create(const clockid_t which_clock,
 		 struct sigevent __user *timer_event_spec,
 		 timer_t __user * created_timer_id)
 {
-	int error = 0;
 	struct k_itimer *new_timer;
-	int new_timer_id;
+	int error, new_timer_id;
 	struct task_struct *process;
 	sigevent_t event;
 	int it_id_set = IT_ID_NOT_SET;
@@ -478,9 +477,9 @@ sys_timer_create(const clockid_t which_clock,
 	error = idr_get_new(&posix_timers_id, (void *) new_timer,
 			    &new_timer_id);
 	spin_unlock_irq(&idr_lock);
-	if (error == -EAGAIN)
-		goto retry;
-	else if (error) {
+	if (error) {
+		if (error == -EAGAIN)
+			goto retry;
 		/*
 		 * Weird looking, but we return EAGAIN if the IDR is
 		 * full (proper POSIX return value for this)
@@ -541,6 +540,8 @@ sys_timer_create(const clockid_t which_clock,
 	new_timer->it_process = process;
 	list_add(&new_timer->list, &current->signal->posix_timers);
 	spin_unlock_irq(&current->sighand->siglock);
+
+	return 0;
  	/*
 	 * In the case of the timer belonging to another task, after
 	 * the task is unlocked, the timer is owned by the other task
@@ -548,9 +549,7 @@ sys_timer_create(const clockid_t which_clock,
 	 * new_timer after the unlock call.
 	 */
 out:
-	if (error)
-		release_posix_timer(new_timer, it_id_set);
-
+	release_posix_timer(new_timer, it_id_set);
 	return error;
 }
 
-- 
cgit v1.2.3-55-g7522


From 5a9fa73072854981a5c05eb7ba18a96d49c2804f Mon Sep 17 00:00:00 2001
From: Oleg Nesterov
Date: Mon, 22 Sep 2008 14:42:50 -0700
Subject: posix-timers: kill ->it_sigev_signo and ->it_sigev_value

With the recent changes ->it_sigev_signo and ->it_sigev_value are only
used in sys_timer_create(), kill them.

Signed-off-by: Oleg Nesterov <oleg@tv-sign.ru>
Cc: mingo@elte.hu
Cc: Roland McGrath <roland@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/posix-timers.h |  2 --
 kernel/posix-timers.c        | 17 +++++++----------
 2 files changed, 7 insertions(+), 12 deletions(-)

diff --git a/include/linux/posix-timers.h b/include/linux/posix-timers.h
index f9d8e9e94e9b..a7c721355549 100644
--- a/include/linux/posix-timers.h
+++ b/include/linux/posix-timers.h
@@ -45,8 +45,6 @@ struct k_itimer {
 	int it_requeue_pending;		/* waiting to requeue this timer */
 #define REQUEUE_PENDING 1
 	int it_sigev_notify;		/* notify word of sigevent struct */
-	int it_sigev_signo;		/* signo word of sigevent struct */
-	sigval_t it_sigev_value;	/* value word of sigevent struct */
 	struct task_struct *it_process;	/* process to send signal to */
 	struct sigqueue *sigq;		/* signal queue entry. */
 	union {
diff --git a/kernel/posix-timers.c b/kernel/posix-timers.c
index 7be385fe4eca..3eff47b0d8d5 100644
--- a/kernel/posix-timers.c
+++ b/kernel/posix-timers.c
@@ -510,10 +510,6 @@ sys_timer_create(const clockid_t which_clock,
 			error = -EFAULT;
 			goto out;
 		}
-		new_timer->it_sigev_notify = event.sigev_notify;
-		new_timer->it_sigev_signo = event.sigev_signo;
-		new_timer->it_sigev_value = event.sigev_value;
-
 		rcu_read_lock();
 		process = good_sigevent(&event);
 		if (process)
@@ -524,17 +520,18 @@ sys_timer_create(const clockid_t which_clock,
 			goto out;
 		}
 	} else {
-		new_timer->it_sigev_notify = SIGEV_SIGNAL;
-		new_timer->it_sigev_signo = SIGALRM;
-		new_timer->it_sigev_value.sival_int = new_timer->it_id;
+		event.sigev_notify = SIGEV_SIGNAL;
+		event.sigev_signo = SIGALRM;
+		event.sigev_value.sival_int = new_timer->it_id;
 		process = current->group_leader;
 		get_task_struct(process);
 	}
 
-	new_timer->sigq->info.si_code  = SI_TIMER;
+	new_timer->it_sigev_notify     = event.sigev_notify;
+	new_timer->sigq->info.si_signo = event.sigev_signo;
+	new_timer->sigq->info.si_value = event.sigev_value;
 	new_timer->sigq->info.si_tid   = new_timer->it_id;
-	new_timer->sigq->info.si_signo = new_timer->it_sigev_signo;
-	new_timer->sigq->info.si_value = new_timer->it_sigev_value;
+	new_timer->sigq->info.si_code  = SI_TIMER;
 
 	spin_lock_irq(&current->sighand->siglock);
 	new_timer->it_process = process;
-- 
cgit v1.2.3-55-g7522


From 5a51b713ccf8835d5adf7217e2f86eb12b1ca851 Mon Sep 17 00:00:00 2001
From: Oleg Nesterov
Date: Mon, 22 Sep 2008 14:42:51 -0700
Subject: posix-timers: lock_timer: kill the bogus ->it_id check

lock_timer() checks that the timer found by idr_find(timer_id) has ->it_id
== timer_id.  This buys nothing.  This check can fail only if
sys_timer_create() unlocked idr_lock after idr_get_new(), but didn't set
->it_id = new_timer_id yet.  But in that case ->it_process == NULL so
lock_timer() can't succeed anyway.

Also remove a couple of unneeded typecasts.

Note that with or without this patch we have a small problem. 
sys_timer_create() doesn't ensure that the result of setting (say)
->it_sigev_notify must be visible if lock_timer() succeeds.

Signed-off-by: Oleg Nesterov <oleg@tv-sign.ru>
Cc: mingo@elte.hu
Cc: Roland McGrath <roland@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 kernel/posix-timers.c | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/kernel/posix-timers.c b/kernel/posix-timers.c
index 3eff47b0d8d5..7185f05d53a9 100644
--- a/kernel/posix-timers.c
+++ b/kernel/posix-timers.c
@@ -474,8 +474,7 @@ sys_timer_create(const clockid_t which_clock,
 		goto out;
 	}
 	spin_lock_irq(&idr_lock);
-	error = idr_get_new(&posix_timers_id, (void *) new_timer,
-			    &new_timer_id);
+	error = idr_get_new(&posix_timers_id, new_timer, &new_timer_id);
 	spin_unlock_irq(&idr_lock);
 	if (error) {
 		if (error == -EAGAIN)
@@ -567,12 +566,12 @@ static struct k_itimer * lock_timer(timer_t timer_id, unsigned long *flags)
 	 */
 
 	spin_lock_irqsave(&idr_lock, *flags);
-	timr = (struct k_itimer *) idr_find(&posix_timers_id, (int) timer_id);
+	timr = idr_find(&posix_timers_id, (int) timer_id);
 	if (timr) {
 		spin_lock(&timr->it_lock);
 
-		if ((timr->it_id != timer_id) || !(timr->it_process) ||
-				!same_thread_group(timr->it_process, current)) {
+		if (!timr->it_process ||
+		    !same_thread_group(timr->it_process, current)) {
 			spin_unlock(&timr->it_lock);
 			spin_unlock_irqrestore(&idr_lock, *flags);
 			timr = NULL;
-- 
cgit v1.2.3-55-g7522


From 31d9284569e38fb97117497af3e8047a6a3c86f0 Mon Sep 17 00:00:00 2001
From: Oleg Nesterov
Date: Mon, 22 Sep 2008 14:42:51 -0700
Subject: posix-timers: lock_timer: make it readable

Cleanup.  Imho makes the code much more understandable.  At least this
patch lessens both the source and compiled code.

Signed-off-by: Oleg Nesterov <oleg@tv-sign.ru>
Cc: mingo@elte.hu
Cc: Roland McGrath <roland@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 kernel/posix-timers.c | 23 ++++++++++-------------
 1 file changed, 10 insertions(+), 13 deletions(-)

diff --git a/kernel/posix-timers.c b/kernel/posix-timers.c
index 7185f05d53a9..95451bf7d2eb 100644
--- a/kernel/posix-timers.c
+++ b/kernel/posix-timers.c
@@ -556,7 +556,7 @@ out:
  * the find to the timer lock.  To avoid a dead lock, the timer id MUST
  * be release with out holding the timer lock.
  */
-static struct k_itimer * lock_timer(timer_t timer_id, unsigned long *flags)
+static struct k_itimer *lock_timer(timer_t timer_id, unsigned long *flags)
 {
 	struct k_itimer *timr;
 	/*
@@ -564,23 +564,20 @@ static struct k_itimer * lock_timer(timer_t timer_id, unsigned long *flags)
 	 * flags part over to the timer lock.  Must not let interrupts in
 	 * while we are moving the lock.
 	 */
-
 	spin_lock_irqsave(&idr_lock, *flags);
-	timr = idr_find(&posix_timers_id, (int) timer_id);
+	timr = idr_find(&posix_timers_id, (int)timer_id);
 	if (timr) {
 		spin_lock(&timr->it_lock);
-
-		if (!timr->it_process ||
-		    !same_thread_group(timr->it_process, current)) {
-			spin_unlock(&timr->it_lock);
-			spin_unlock_irqrestore(&idr_lock, *flags);
-			timr = NULL;
-		} else
+		if (timr->it_process &&
+		    same_thread_group(timr->it_process, current)) {
 			spin_unlock(&idr_lock);
-	} else
-		spin_unlock_irqrestore(&idr_lock, *flags);
+			return timr;
+		}
+		spin_unlock(&timr->it_lock);
+	}
+	spin_unlock_irqrestore(&idr_lock, *flags);
 
-	return timr;
+	return NULL;
 }
 
 /*
-- 
cgit v1.2.3-55-g7522


From 1b02469088ac7a13d7e622b618b7410d0f1ce5ec Mon Sep 17 00:00:00 2001
From: Richard Kennedy
Date: Mon, 22 Sep 2008 14:42:43 -0700
Subject: hrtimer: reorder struct hrtimer to save 8 bytes on 64bit builds

reorder struct hrtimer to save 8 bytes on 64 bit builds when
CONFIG_TIMER_STATS selected.  (also removes 8 bytes from signal_struct)

Signed-off-by: Richard Kennedy <richard@rsk.demon.co.uk>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/hrtimer.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h
index 68b0196d8696..8730b60c9432 100644
--- a/include/linux/hrtimer.h
+++ b/include/linux/hrtimer.h
@@ -115,12 +115,12 @@ struct hrtimer {
 	enum hrtimer_restart		(*function)(struct hrtimer *);
 	struct hrtimer_clock_base	*base;
 	unsigned long			state;
-	enum hrtimer_cb_mode		cb_mode;
 	struct list_head		cb_entry;
+	enum hrtimer_cb_mode		cb_mode;
 #ifdef CONFIG_TIMER_STATS
+	int				start_pid;
 	void				*start_site;
 	char				start_comm[16];
-	int				start_pid;
 #endif
 };
 
-- 
cgit v1.2.3-55-g7522


From eb3f938fd6292dc79f43a5fe14784b044776e9f0 Mon Sep 17 00:00:00 2001
From: Maciej W. Rozycki
Date: Mon, 22 Sep 2008 14:42:40 -0700
Subject: ntp: let update_persistent_clock() sleep

This is a change that makes the 11-minute RTC update be run in the process
context.  This is so that update_persistent_clock() can sleep, which may
be required for certain types of RTC hardware -- most notably I2C devices.

Signed-off-by: Maciej W. Rozycki <macro@linux-mips.org>
Cc: Roman Zippel <zippel@linux-m68k.org>
Cc: Rik van Riel <riel@redhat.com>
Cc: David Brownell <david-b@pacbell.net>
Acked-by: Alessandro Zummo <a.zummo@towertech.it>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 kernel/time/ntp.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/kernel/time/ntp.c b/kernel/time/ntp.c
index c6921aa1a42a..450a45cb01c1 100644
--- a/kernel/time/ntp.c
+++ b/kernel/time/ntp.c
@@ -10,13 +10,13 @@
 
 #include <linux/mm.h>
 #include <linux/time.h>
-#include <linux/timer.h>
 #include <linux/timex.h>
 #include <linux/jiffies.h>
 #include <linux/hrtimer.h>
 #include <linux/capability.h>
 #include <linux/math64.h>
 #include <linux/clocksource.h>
+#include <linux/workqueue.h>
 #include <asm/timex.h>
 
 /*
@@ -218,11 +218,11 @@ void second_overflow(void)
 /* Disable the cmos update - used by virtualization and embedded */
 int no_sync_cmos_clock  __read_mostly;
 
-static void sync_cmos_clock(unsigned long dummy);
+static void sync_cmos_clock(struct work_struct *work);
 
-static DEFINE_TIMER(sync_cmos_timer, sync_cmos_clock, 0, 0);
+static DECLARE_DELAYED_WORK(sync_cmos_work, sync_cmos_clock);
 
-static void sync_cmos_clock(unsigned long dummy)
+static void sync_cmos_clock(struct work_struct *work)
 {
 	struct timespec now, next;
 	int fail = 1;
@@ -258,13 +258,13 @@ static void sync_cmos_clock(unsigned long dummy)
 		next.tv_sec++;
 		next.tv_nsec -= NSEC_PER_SEC;
 	}
-	mod_timer(&sync_cmos_timer, jiffies + timespec_to_jiffies(&next));
+	schedule_delayed_work(&sync_cmos_work, timespec_to_jiffies(&next));
 }
 
 static void notify_cmos_timer(void)
 {
 	if (!no_sync_cmos_clock)
-		mod_timer(&sync_cmos_timer, jiffies + 1);
+		schedule_delayed_work(&sync_cmos_work, 0);
 }
 
 #else
-- 
cgit v1.2.3-55-g7522


From 5cd1c9c5cf30d4b33df3d3f74d8142f278d536b7 Mon Sep 17 00:00:00 2001
From: Roman Zippel
Date: Mon, 22 Sep 2008 14:42:43 -0700
Subject: timekeeping: fix rounding problem during clock update

Due to a rounding problem during a clock update it's possible for readers
to observe the clock jumping back by 1nsec.  The following simplified
example demonstrates the problem:

cycle	xtime
0	0
1000	999999.6
2000	1999999.2
3000	2999998.8
...

1500 =	1499999.4
=	0.0 + 1499999.4
=	999999.6 + 499999.8

When reading the clock only the full nanosecond part is used, while
timekeeping internally keeps nanosecond fractions.  If the clock is now
updated at cycle 1500 here, a nanosecond is missing due to the truncation.

The simple fix is to round up the xtime value during the update, this also
changes the distance to the reference time, but the adjustment will
automatically take care that it stays under control.

Signed-off-by: Roman Zippel <zippel@linux-m68k.org>
Signed-off-by: John Stultz <johnstul@us.ibm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 kernel/time/timekeeping.c | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index e91c29f961c9..5ecbfc39a268 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -454,7 +454,7 @@ void update_wall_time(void)
 #else
 	offset = clock->cycle_interval;
 #endif
-	clock->xtime_nsec += (s64)xtime.tv_nsec << clock->shift;
+	clock->xtime_nsec = (s64)xtime.tv_nsec << clock->shift;
 
 	/* normally this loop will run just once, however in the
 	 * case of lost or late ticks, it will accumulate correctly.
@@ -479,9 +479,12 @@ void update_wall_time(void)
 	/* correct the clock when NTP error is too big */
 	clocksource_adjust(offset);
 
-	/* store full nanoseconds into xtime */
-	xtime.tv_nsec = (s64)clock->xtime_nsec >> clock->shift;
+	/* store full nanoseconds into xtime after rounding it up and
+	 * add the remainder to the error difference.
+	 */
+	xtime.tv_nsec = ((s64)clock->xtime_nsec >> clock->shift) + 1;
 	clock->xtime_nsec -= (s64)xtime.tv_nsec << clock->shift;
+	clock->error += clock->xtime_nsec << (NTP_SCALE_SHIFT - clock->shift);
 
 	update_xtime_cache(cyc2ns(clock, offset));
 
-- 
cgit v1.2.3-55-g7522


From d40e944c25fb4642adb2a4c580a48218a9f3f824 Mon Sep 17 00:00:00 2001
From: Roman Zippel
Date: Mon, 22 Sep 2008 14:42:44 -0700
Subject: ntp: improve adjtimex frequency rounding

Change PPM_SCALE_INV_SHIFT so that it doesn't throw away any input bits
(19 is the amount of the factor 2 in PPM_SCALE), the output frequency
can then be calculated back to its input value, as the inverse divide
produce a slightly larger value, which is then correctly rounded by the
final shift.

Reported-by: Martin Ziegler <ziegler@uni-freiburg.de>
Signed-off-by: Roman Zippel <zippel@linux-m68k.org>
Cc: John Stultz <johnstul@us.ibm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/timex.h | 2 +-
 kernel/time/ntp.c     | 5 ++---
 2 files changed, 3 insertions(+), 4 deletions(-)

diff --git a/include/linux/timex.h b/include/linux/timex.h
index c00bcdd3ae42..9007313b5b71 100644
--- a/include/linux/timex.h
+++ b/include/linux/timex.h
@@ -82,7 +82,7 @@
  */
 #define SHIFT_USEC 16		/* frequency offset scale (shift) */
 #define PPM_SCALE (NSEC_PER_USEC << (NTP_SCALE_SHIFT - SHIFT_USEC))
-#define PPM_SCALE_INV_SHIFT 20
+#define PPM_SCALE_INV_SHIFT 19
 #define PPM_SCALE_INV ((1ll << (PPM_SCALE_INV_SHIFT + NTP_SCALE_SHIFT)) / \
 		       PPM_SCALE + 1)
 
diff --git a/kernel/time/ntp.c b/kernel/time/ntp.c
index 450a45cb01c1..ddb0465a6baa 100644
--- a/kernel/time/ntp.c
+++ b/kernel/time/ntp.c
@@ -406,9 +406,8 @@ adj_done:
 	if (time_status & (STA_UNSYNC|STA_CLOCKERR))
 		result = TIME_ERROR;
 
-	txc->freq	   = shift_right((s32)(time_freq >> PPM_SCALE_INV_SHIFT) *
-					 (s64)PPM_SCALE_INV,
-					 NTP_SCALE_SHIFT);
+	txc->freq	   = shift_right((time_freq >> PPM_SCALE_INV_SHIFT) *
+					 (s64)PPM_SCALE_INV, NTP_SCALE_SHIFT);
 	txc->maxerror	   = time_maxerror;
 	txc->esterror	   = time_esterror;
 	txc->status	   = time_status;
-- 
cgit v1.2.3-55-g7522


From 77cd62e8082b9743b59ee1946a4c3ee2e3cd2bce Mon Sep 17 00:00:00 2001
From: Timur Tabi
Date: Fri, 26 Sep 2008 17:00:11 -0700
Subject: fsldma: allow Freescale Elo DMA driver to be compiled as a module

Modify the Freescale Elo / Elo Plus DMA driver so that it can be compiled as
a module.

The primary change is to stop treating the DMA controller as a bus, and the
DMA channels as devices on the bus.  This is because the Open Firmware (OF)
kernel code does not allow busses to be removed, so although we can call
of_platform_bus_probe() to probe the DMA channels, there is no
of_platform_bus_remove().  Instead, the DMA channels are manually probed,
similar to what fsl_elbc_nand.c does.

Cc: Scott Wood <scottwood@freescale.com>
Acked-by: Li Yang <leoli@freescale.com>
Signed-off-by: Timur Tabi <timur@freescale.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 drivers/dma/Kconfig  |  10 ++--
 drivers/dma/fsldma.c | 138 ++++++++++++++++++++++++++++++++-------------------
 drivers/dma/fsldma.h |   1 +
 3 files changed, 94 insertions(+), 55 deletions(-)

diff --git a/drivers/dma/Kconfig b/drivers/dma/Kconfig
index cd303901eb5b..904e57558bb5 100644
--- a/drivers/dma/Kconfig
+++ b/drivers/dma/Kconfig
@@ -48,13 +48,13 @@ config DW_DMAC
 	  can be integrated in chips such as the Atmel AT32ap7000.
 
 config FSL_DMA
-	bool "Freescale MPC85xx/MPC83xx DMA support"
-	depends on PPC
+	tristate "Freescale Elo and Elo Plus DMA support"
+	depends on FSL_SOC
 	select DMA_ENGINE
 	---help---
-	  Enable support for the Freescale DMA engine. Now, it support
-	  MPC8560/40, MPC8555, MPC8548 and MPC8641 processors.
-	  The MPC8349, MPC8360 is also supported.
+	  Enable support for the Freescale Elo and Elo Plus DMA controllers.
+	  The Elo is the DMA controller on some 82xx and 83xx parts, and the
+	  Elo Plus is the DMA controller on 85xx and 86xx parts.
 
 config MV_XOR
 	bool "Marvell XOR engine support"
diff --git a/drivers/dma/fsldma.c b/drivers/dma/fsldma.c
index e9b263897c03..0b95dcce447e 100644
--- a/drivers/dma/fsldma.c
+++ b/drivers/dma/fsldma.c
@@ -370,7 +370,10 @@ static int fsl_dma_alloc_chan_resources(struct dma_chan *chan,
 					struct dma_client *client)
 {
 	struct fsl_dma_chan *fsl_chan = to_fsl_chan(chan);
-	LIST_HEAD(tmp_list);
+
+	/* Has this channel already been allocated? */
+	if (fsl_chan->desc_pool)
+		return 1;
 
 	/* We need the descriptor to be aligned to 32bytes
 	 * for meeting FSL DMA specification requirement.
@@ -410,6 +413,8 @@ static void fsl_dma_free_chan_resources(struct dma_chan *chan)
 	}
 	spin_unlock_irqrestore(&fsl_chan->desc_lock, flags);
 	dma_pool_destroy(fsl_chan->desc_pool);
+
+	fsl_chan->desc_pool = NULL;
 }
 
 static struct dma_async_tx_descriptor *
@@ -786,33 +791,29 @@ static void dma_do_tasklet(unsigned long data)
 	fsl_chan_ld_cleanup(fsl_chan);
 }
 
-static int __devinit of_fsl_dma_chan_probe(struct of_device *dev,
-			const struct of_device_id *match)
+static int __devinit fsl_dma_chan_probe(struct fsl_dma_device *fdev,
+	struct device_node *node, u32 feature, const char *compatible)
 {
-	struct fsl_dma_device *fdev;
 	struct fsl_dma_chan *new_fsl_chan;
 	int err;
 
-	fdev = dev_get_drvdata(dev->dev.parent);
-	BUG_ON(!fdev);
-
 	/* alloc channel */
 	new_fsl_chan = kzalloc(sizeof(struct fsl_dma_chan), GFP_KERNEL);
 	if (!new_fsl_chan) {
-		dev_err(&dev->dev, "No free memory for allocating "
+		dev_err(fdev->dev, "No free memory for allocating "
 				"dma channels!\n");
 		return -ENOMEM;
 	}
 
 	/* get dma channel register base */
-	err = of_address_to_resource(dev->node, 0, &new_fsl_chan->reg);
+	err = of_address_to_resource(node, 0, &new_fsl_chan->reg);
 	if (err) {
-		dev_err(&dev->dev, "Can't get %s property 'reg'\n",
-				dev->node->full_name);
+		dev_err(fdev->dev, "Can't get %s property 'reg'\n",
+				node->full_name);
 		goto err_no_reg;
 	}
 
-	new_fsl_chan->feature = *(u32 *)match->data;
+	new_fsl_chan->feature = feature;
 
 	if (!fdev->feature)
 		fdev->feature = new_fsl_chan->feature;
@@ -822,13 +823,13 @@ static int __devinit of_fsl_dma_chan_probe(struct of_device *dev,
 	 */
 	WARN_ON(fdev->feature != new_fsl_chan->feature);
 
-	new_fsl_chan->dev = &dev->dev;
+	new_fsl_chan->dev = &new_fsl_chan->common.dev;
 	new_fsl_chan->reg_base = ioremap(new_fsl_chan->reg.start,
 			new_fsl_chan->reg.end - new_fsl_chan->reg.start + 1);
 
 	new_fsl_chan->id = ((new_fsl_chan->reg.start - 0x100) & 0xfff) >> 7;
 	if (new_fsl_chan->id > FSL_DMA_MAX_CHANS_PER_DEVICE) {
-		dev_err(&dev->dev, "There is no %d channel!\n",
+		dev_err(fdev->dev, "There is no %d channel!\n",
 				new_fsl_chan->id);
 		err = -EINVAL;
 		goto err_no_chan;
@@ -862,20 +863,20 @@ static int __devinit of_fsl_dma_chan_probe(struct of_device *dev,
 			&fdev->common.channels);
 	fdev->common.chancnt++;
 
-	new_fsl_chan->irq = irq_of_parse_and_map(dev->node, 0);
+	new_fsl_chan->irq = irq_of_parse_and_map(node, 0);
 	if (new_fsl_chan->irq != NO_IRQ) {
 		err = request_irq(new_fsl_chan->irq,
 					&fsl_dma_chan_do_interrupt, IRQF_SHARED,
 					"fsldma-channel", new_fsl_chan);
 		if (err) {
-			dev_err(&dev->dev, "DMA channel %s request_irq error "
-				"with return %d\n", dev->node->full_name, err);
+			dev_err(fdev->dev, "DMA channel %s request_irq error "
+				"with return %d\n", node->full_name, err);
 			goto err_no_irq;
 		}
 	}
 
-	dev_info(&dev->dev, "#%d (%s), irq %d\n", new_fsl_chan->id,
-				match->compatible, new_fsl_chan->irq);
+	dev_info(fdev->dev, "#%d (%s), irq %d\n", new_fsl_chan->id,
+				compatible, new_fsl_chan->irq);
 
 	return 0;
 
@@ -888,38 +889,20 @@ err_no_reg:
 	return err;
 }
 
-const u32 mpc8540_dma_ip_feature = FSL_DMA_IP_85XX | FSL_DMA_BIG_ENDIAN;
-const u32 mpc8349_dma_ip_feature = FSL_DMA_IP_83XX | FSL_DMA_LITTLE_ENDIAN;
-
-static struct of_device_id of_fsl_dma_chan_ids[] = {
-	{
-		.compatible = "fsl,eloplus-dma-channel",
-		.data = (void *)&mpc8540_dma_ip_feature,
-	},
-	{
-		.compatible = "fsl,elo-dma-channel",
-		.data = (void *)&mpc8349_dma_ip_feature,
-	},
-	{}
-};
-
-static struct of_platform_driver of_fsl_dma_chan_driver = {
-	.name = "of-fsl-dma-channel",
-	.match_table = of_fsl_dma_chan_ids,
-	.probe = of_fsl_dma_chan_probe,
-};
-
-static __init int of_fsl_dma_chan_init(void)
+static void fsl_dma_chan_remove(struct fsl_dma_chan *fchan)
 {
-	return of_register_platform_driver(&of_fsl_dma_chan_driver);
+	free_irq(fchan->irq, fchan);
+	list_del(&fchan->common.device_node);
+	iounmap(fchan->reg_base);
+	kfree(fchan);
 }
 
 static int __devinit of_fsl_dma_probe(struct of_device *dev,
 			const struct of_device_id *match)
 {
 	int err;
-	unsigned int irq;
 	struct fsl_dma_device *fdev;
+	struct device_node *child;
 
 	fdev = kzalloc(sizeof(struct fsl_dma_device), GFP_KERNEL);
 	if (!fdev) {
@@ -953,9 +936,9 @@ static int __devinit of_fsl_dma_probe(struct of_device *dev,
 	fdev->common.device_issue_pending = fsl_dma_memcpy_issue_pending;
 	fdev->common.dev = &dev->dev;
 
-	irq = irq_of_parse_and_map(dev->node, 0);
-	if (irq != NO_IRQ) {
-		err = request_irq(irq, &fsl_dma_do_interrupt, IRQF_SHARED,
+	fdev->irq = irq_of_parse_and_map(dev->node, 0);
+	if (fdev->irq != NO_IRQ) {
+		err = request_irq(fdev->irq, &fsl_dma_do_interrupt, IRQF_SHARED,
 					"fsldma-device", fdev);
 		if (err) {
 			dev_err(&dev->dev, "DMA device request_irq error "
@@ -965,7 +948,21 @@ static int __devinit of_fsl_dma_probe(struct of_device *dev,
 	}
 
 	dev_set_drvdata(&(dev->dev), fdev);
-	of_platform_bus_probe(dev->node, of_fsl_dma_chan_ids, &dev->dev);
+
+	/* We cannot use of_platform_bus_probe() because there is no
+	 * of_platform_bus_remove.  Instead, we manually instantiate every DMA
+	 * channel object.
+	 */
+	for_each_child_of_node(dev->node, child) {
+		if (of_device_is_compatible(child, "fsl,eloplus-dma-channel"))
+			fsl_dma_chan_probe(fdev, child,
+				FSL_DMA_IP_85XX | FSL_DMA_BIG_ENDIAN,
+				"fsl,eloplus-dma-channel");
+		if (of_device_is_compatible(child, "fsl,elo-dma-channel"))
+			fsl_dma_chan_probe(fdev, child,
+				FSL_DMA_IP_83XX | FSL_DMA_LITTLE_ENDIAN,
+				"fsl,elo-dma-channel");
+	}
 
 	dma_async_device_register(&fdev->common);
 	return 0;
@@ -977,6 +974,30 @@ err_no_reg:
 	return err;
 }
 
+static int of_fsl_dma_remove(struct of_device *of_dev)
+{
+	struct fsl_dma_device *fdev;
+	unsigned int i;
+
+	fdev = dev_get_drvdata(&of_dev->dev);
+
+	dma_async_device_unregister(&fdev->common);
+
+	for (i = 0; i < FSL_DMA_MAX_CHANS_PER_DEVICE; i++)
+		if (fdev->chan[i])
+			fsl_dma_chan_remove(fdev->chan[i]);
+
+	if (fdev->irq != NO_IRQ)
+		free_irq(fdev->irq, fdev);
+
+	iounmap(fdev->reg_base);
+
+	kfree(fdev);
+	dev_set_drvdata(&of_dev->dev, NULL);
+
+	return 0;
+}
+
 static struct of_device_id of_fsl_dma_ids[] = {
 	{ .compatible = "fsl,eloplus-dma", },
 	{ .compatible = "fsl,elo-dma", },
@@ -984,15 +1005,32 @@ static struct of_device_id of_fsl_dma_ids[] = {
 };
 
 static struct of_platform_driver of_fsl_dma_driver = {
-	.name = "of-fsl-dma",
+	.name = "fsl-elo-dma",
 	.match_table = of_fsl_dma_ids,
 	.probe = of_fsl_dma_probe,
+	.remove = of_fsl_dma_remove,
 };
 
 static __init int of_fsl_dma_init(void)
 {
-	return of_register_platform_driver(&of_fsl_dma_driver);
+	int ret;
+
+	pr_info("Freescale Elo / Elo Plus DMA driver\n");
+
+	ret = of_register_platform_driver(&of_fsl_dma_driver);
+	if (ret)
+		pr_err("fsldma: failed to register platform driver\n");
+
+	return ret;
+}
+
+static void __exit of_fsl_dma_exit(void)
+{
+	of_unregister_platform_driver(&of_fsl_dma_driver);
 }
 
-subsys_initcall(of_fsl_dma_chan_init);
 subsys_initcall(of_fsl_dma_init);
+module_exit(of_fsl_dma_exit);
+
+MODULE_DESCRIPTION("Freescale Elo / Elo Plus DMA driver");
+MODULE_LICENSE("GPL");
diff --git a/drivers/dma/fsldma.h b/drivers/dma/fsldma.h
index 6faf07ba0d0e..4f21a512d848 100644
--- a/drivers/dma/fsldma.h
+++ b/drivers/dma/fsldma.h
@@ -114,6 +114,7 @@ struct fsl_dma_device {
 	struct dma_device common;
 	struct fsl_dma_chan *chan[FSL_DMA_MAX_CHANS_PER_DEVICE];
 	u32 feature;		/* The same as DMA channels */
+	int irq;		/* Channel IRQ */
 };
 
 /* Define macros for fsl_dma_chan->feature property */
-- 
cgit v1.2.3-55-g7522


From 7086efe1c1536f6bc160e7d60a9bfd645b91f279 Mon Sep 17 00:00:00 2001
From: Frank Mayhar
Date: Fri, 12 Sep 2008 09:54:39 -0700
Subject: timers: fix itimer/many thread hang, v3

- fix UP lockup
- another set of UP/SMP cleanups and simplifications

Signed-off-by: Frank Mayhar <fmayhar@google.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/sched.h |   1 -
 kernel/sched.c        |   1 -
 kernel/sched_stats.h  | 126 +++++++++++++++-----------------------------------
 3 files changed, 38 insertions(+), 90 deletions(-)

diff --git a/include/linux/sched.h b/include/linux/sched.h
index b982fb48c8f0..23d9d5464544 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -2134,7 +2134,6 @@ static inline int thread_group_cputime_clone_thread(struct task_struct *curr)
 	return thread_group_cputime_alloc(curr);
 }
 
-
 static inline void thread_group_cputime_free(struct signal_struct *sig)
 {
 	free_percpu(sig->cputime.totals);
diff --git a/kernel/sched.c b/kernel/sched.c
index 260c22cc530a..29a3152c45db 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -4046,7 +4046,6 @@ unsigned long long task_delta_exec(struct task_struct *p)
 	unsigned long flags;
 	u64 ns = 0;
 
-	rq = task_rq_lock(p, &flags);
 	if (task_current(rq, p)) {
 		u64 delta_exec;
 
diff --git a/kernel/sched_stats.h b/kernel/sched_stats.h
index d6903bd0c7a8..b8c156979cf2 100644
--- a/kernel/sched_stats.h
+++ b/kernel/sched_stats.h
@@ -276,133 +276,83 @@ sched_info_switch(struct task_struct *prev, struct task_struct *next)
  * on CONFIG_SCHEDSTATS.
  */
 
-#ifdef CONFIG_SMP
-
 /**
- * thread_group_cputime_account_user - Maintain utime for a thread group.
+ * account_group_user_time - Maintain utime for a thread group.
  *
- * @tgtimes:	Pointer to thread_group_cputime structure.
- * @cputime:	Time value by which to increment the utime field of that
- *		structure.
+ * @tsk:	Pointer to task structure.
+ * @cputime:	Time value by which to increment the utime field of the
+ *		thread_group_cputime structure.
  *
  * If thread group time is being maintained, get the structure for the
  * running CPU and update the utime field there.
  */
-static inline void thread_group_cputime_account_user(
-	struct thread_group_cputime *tgtimes,
-	cputime_t cputime)
+static inline void account_group_user_time(struct task_struct *tsk,
+					   cputime_t cputime)
 {
-	if (tgtimes->totals) {
+	struct signal_struct *sig;
+
+	sig = tsk->signal;
+	if (unlikely(!sig))
+		return;
+	if (sig->cputime.totals) {
 		struct task_cputime *times;
 
-		times = per_cpu_ptr(tgtimes->totals, get_cpu());
+		times = per_cpu_ptr(sig->cputime.totals, get_cpu());
 		times->utime = cputime_add(times->utime, cputime);
 		put_cpu_no_resched();
 	}
 }
 
 /**
- * thread_group_cputime_account_system - Maintain stime for a thread group.
+ * account_group_system_time - Maintain stime for a thread group.
  *
- * @tgtimes:	Pointer to thread_group_cputime structure.
- * @cputime:	Time value by which to increment the stime field of that
- *		structure.
+ * @tsk:	Pointer to task structure.
+ * @cputime:	Time value by which to increment the stime field of the
+ *		thread_group_cputime structure.
  *
  * If thread group time is being maintained, get the structure for the
  * running CPU and update the stime field there.
  */
-static inline void thread_group_cputime_account_system(
-	struct thread_group_cputime *tgtimes,
-	cputime_t cputime)
+static inline void account_group_system_time(struct task_struct *tsk,
+					     cputime_t cputime)
 {
-	if (tgtimes->totals) {
+	struct signal_struct *sig;
+
+	sig = tsk->signal;
+	if (unlikely(!sig))
+		return;
+	if (sig->cputime.totals) {
 		struct task_cputime *times;
 
-		times = per_cpu_ptr(tgtimes->totals, get_cpu());
+		times = per_cpu_ptr(sig->cputime.totals, get_cpu());
 		times->stime = cputime_add(times->stime, cputime);
 		put_cpu_no_resched();
 	}
 }
 
 /**
- * thread_group_cputime_account_exec_runtime - Maintain exec runtime for a
- *						thread group.
+ * account_group_exec_runtime - Maintain exec runtime for a thread group.
  *
- * @tgtimes:	Pointer to thread_group_cputime structure.
+ * @tsk:	Pointer to task structure.
  * @ns:		Time value by which to increment the sum_exec_runtime field
- *		of that structure.
+ *		of the thread_group_cputime structure.
  *
  * If thread group time is being maintained, get the structure for the
  * running CPU and update the sum_exec_runtime field there.
  */
-static inline void thread_group_cputime_account_exec_runtime(
-	struct thread_group_cputime *tgtimes,
-	unsigned long long ns)
+static inline void account_group_exec_runtime(struct task_struct *tsk,
+					      unsigned long long ns)
 {
-	if (tgtimes->totals) {
+	struct signal_struct *sig;
+
+	sig = tsk->signal;
+	if (unlikely(!sig))
+		return;
+	if (sig->cputime.totals) {
 		struct task_cputime *times;
 
-		times = per_cpu_ptr(tgtimes->totals, get_cpu());
+		times = per_cpu_ptr(sig->cputime.totals, get_cpu());
 		times->sum_exec_runtime += ns;
 		put_cpu_no_resched();
 	}
 }
-
-#else /* CONFIG_SMP */
-
-static inline void thread_group_cputime_account_user(
-	struct thread_group_cputime *tgtimes,
-	cputime_t cputime)
-{
-	tgtimes->totals->utime = cputime_add(tgtimes->totals->utime, cputime);
-}
-
-static inline void thread_group_cputime_account_system(
-	struct thread_group_cputime *tgtimes,
-	cputime_t cputime)
-{
-	tgtimes->totals->stime = cputime_add(tgtimes->totals->stime, cputime);
-}
-
-static inline void thread_group_cputime_account_exec_runtime(
-	struct thread_group_cputime *tgtimes,
-	unsigned long long ns)
-{
-	tgtimes->totals->sum_exec_runtime += ns;
-}
-
-#endif /* CONFIG_SMP */
-
-/*
- * These are the generic time-accounting routines that use the above
- * functions.  They are the functions actually called by the scheduler.
- */
-static inline void account_group_user_time(struct task_struct *tsk,
-					    cputime_t cputime)
-{
-	struct signal_struct *sig;
-
-	sig = tsk->signal;
-	if (likely(sig))
-		thread_group_cputime_account_user(&sig->cputime, cputime);
-}
-
-static inline void account_group_system_time(struct task_struct *tsk,
-					      cputime_t cputime)
-{
-	struct signal_struct *sig;
-
-	sig = tsk->signal;
-	if (likely(sig))
-		thread_group_cputime_account_system(&sig->cputime, cputime);
-}
-
-static inline void account_group_exec_runtime(struct task_struct *tsk,
-					       unsigned long long ns)
-{
-	struct signal_struct *sig;
-
-	sig = tsk->signal;
-	if (likely(sig))
-		thread_group_cputime_account_exec_runtime(&sig->cputime, ns);
-}
-- 
cgit v1.2.3-55-g7522


From 1508487e7f16d992ad23cabd3712563ff912f413 Mon Sep 17 00:00:00 2001
From: Ingo Molnar
Date: Tue, 30 Sep 2008 08:28:17 +0200
Subject: timers: fix itimer/many thread hang, fix

fix bogus rq dereference: v3 removed the locking but also removed the rq
initialization.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 kernel/sched.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/kernel/sched.c b/kernel/sched.c
index 29a3152c45db..ebb03def564b 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -4042,10 +4042,12 @@ EXPORT_PER_CPU_SYMBOL(kstat);
  */
 unsigned long long task_delta_exec(struct task_struct *p)
 {
-	struct rq *rq;
 	unsigned long flags;
+	struct rq *rq;
 	u64 ns = 0;
 
+	rq = task_rq_lock(p, &flags);
+
 	if (task_current(rq, p)) {
 		u64 delta_exec;
 
@@ -4055,6 +4057,8 @@ unsigned long long task_delta_exec(struct task_struct *p)
 			ns = delta_exec;
 	}
 
+	task_rq_unlock(rq, &flags);
+
 	return ns;
 }
 
-- 
cgit v1.2.3-55-g7522


From 0c5d1eb77a8be917b638344a22afe1398236482b Mon Sep 17 00:00:00 2001
From: David Brownell
Date: Wed, 1 Oct 2008 14:46:18 -0700
Subject: genirq: record trigger type

Genirq hasn't previously recorded the trigger type used by any given IRQ,
although some irq_chip support has done so.  That data can be useful when
troubleshooting.  This patch records it in the relevant irq_desc.status
bits, and improves consistency between the two driver-visible calls
affected:

 - Make set_irq_type() usage match request_irq() usage:
    * IRQ_TYPE_NONE should be a NOP; succeed, so irq_chip methods
      won't have to handle that case any more (many do it wrong).
    * IRQ_TYPE_PROBE is ignored; any buggy out-of-tree callers
      might need to switch over to the real IRQ probing code.
    * emit the same diagnostics (from shared utility code)

 - Their kerneldoc now reflects usage:
    * request_irq() flags include IRQF_TRIGGER_* to specify
      active edge(s)/level ... docs previously omitted that
    * set_irq_type() is declared in <linux/irq.h> so callers
      should use the (bit-equivalent) IRQ_TYPE_* symbols there

Also: adds a warning about shared IRQs that don't end up using the
requested trigger mode; and fix an unrelated "sparse" warning.

Signed-off-by: David Brownell <dbrownell@users.sourceforge.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Acked-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 kernel/irq/chip.c      | 15 ++++++++-------
 kernel/irq/internals.h |  3 +++
 kernel/irq/manage.c    | 21 ++++++++++++++++++---
 3 files changed, 29 insertions(+), 10 deletions(-)

diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c
index d663338cb4a8..5203a599d211 100644
--- a/kernel/irq/chip.c
+++ b/kernel/irq/chip.c
@@ -111,9 +111,9 @@ int set_irq_chip(unsigned int irq, struct irq_chip *chip)
 EXPORT_SYMBOL(set_irq_chip);
 
 /**
- *	set_irq_type - set the irq type for an irq
+ *	set_irq_type - set the irq trigger type for an irq
  *	@irq:	irq number
- *	@type:	interrupt type - see include/linux/interrupt.h
+ *	@type:	IRQ_TYPE_{LEVEL,EDGE}_* value - see include/linux/irq.h
  */
 int set_irq_type(unsigned int irq, unsigned int type)
 {
@@ -127,11 +127,12 @@ int set_irq_type(unsigned int irq, unsigned int type)
 	}
 
 	desc = irq_desc + irq;
-	if (desc->chip->set_type) {
-		spin_lock_irqsave(&desc->lock, flags);
-		ret = desc->chip->set_type(irq, type);
-		spin_unlock_irqrestore(&desc->lock, flags);
-	}
+	if (type == IRQ_TYPE_NONE)
+		return 0;
+
+	spin_lock_irqsave(&desc->lock, flags);
+	ret = __irq_set_trigger(desc, irq, flags);
+	spin_unlock_irqrestore(&desc->lock, flags);
 	return ret;
 }
 EXPORT_SYMBOL(set_irq_type);
diff --git a/kernel/irq/internals.h b/kernel/irq/internals.h
index 08a849a22447..422dd00c8bd3 100644
--- a/kernel/irq/internals.h
+++ b/kernel/irq/internals.h
@@ -10,6 +10,9 @@ extern void irq_chip_set_defaults(struct irq_chip *chip);
 /* Set default handler: */
 extern void compat_irq_chip_set_default_handler(struct irq_desc *desc);
 
+extern int __irq_set_trigger(struct irq_desc *desc, unsigned int irq,
+		unsigned long flags);
+
 #ifdef CONFIG_PROC_FS
 extern void register_irq_proc(unsigned int irq);
 extern void register_handler_proc(unsigned int irq, struct irqaction *action);
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
index d62f69ba7453..e59157b591f8 100644
--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c
@@ -216,7 +216,7 @@ void enable_irq(unsigned int irq)
 }
 EXPORT_SYMBOL(enable_irq);
 
-int set_irq_wake_real(unsigned int irq, unsigned int on)
+static int set_irq_wake_real(unsigned int irq, unsigned int on)
 {
 	struct irq_desc *desc = irq_desc + irq;
 	int ret = -ENXIO;
@@ -305,10 +305,11 @@ void compat_irq_chip_set_default_handler(struct irq_desc *desc)
 		desc->handle_irq = NULL;
 }
 
-static int __irq_set_trigger(struct irq_chip *chip, unsigned int irq,
+int __irq_set_trigger(struct irq_desc *desc, unsigned int irq,
 		unsigned long flags)
 {
 	int ret;
+	struct irq_chip *chip = desc->chip;
 
 	if (!chip || !chip->set_type) {
 		/*
@@ -326,6 +327,11 @@ static int __irq_set_trigger(struct irq_chip *chip, unsigned int irq,
 		pr_err("setting trigger mode %d for irq %u failed (%pF)\n",
 				(int)(flags & IRQF_TRIGGER_MASK),
 				irq, chip->set_type);
+	else {
+		/* note that IRQF_TRIGGER_MASK == IRQ_TYPE_SENSE_MASK */
+		desc->status &= ~IRQ_TYPE_SENSE_MASK;
+		desc->status |= flags & IRQ_TYPE_SENSE_MASK;
+	}
 
 	return ret;
 }
@@ -404,7 +410,7 @@ int setup_irq(unsigned int irq, struct irqaction *new)
 
 		/* Setup the type (level, edge polarity) if configured: */
 		if (new->flags & IRQF_TRIGGER_MASK) {
-			ret = __irq_set_trigger(desc->chip, irq, new->flags);
+			ret = __irq_set_trigger(desc, irq, new->flags);
 
 			if (ret) {
 				spin_unlock_irqrestore(&desc->lock, flags);
@@ -430,6 +436,14 @@ int setup_irq(unsigned int irq, struct irqaction *new)
 
 		/* Set default affinity mask once everything is setup */
 		irq_select_affinity(irq);
+
+	} else if ((new->flags & IRQF_TRIGGER_MASK)
+			&& (new->flags & IRQF_TRIGGER_MASK)
+				!= (desc->status & IRQ_TYPE_SENSE_MASK)) {
+		/* hope the handler works with the actual trigger mode... */
+		pr_warning("IRQ %d uses trigger mode %d; requested %d\n",
+				irq, (int)(desc->status & IRQ_TYPE_SENSE_MASK),
+				(int)(new->flags & IRQF_TRIGGER_MASK));
 	}
 
 	*p = new;
@@ -586,6 +600,7 @@ EXPORT_SYMBOL(free_irq);
  *	IRQF_SHARED		Interrupt is shared
  *	IRQF_DISABLED	Disable local interrupts while processing
  *	IRQF_SAMPLE_RANDOM	The interrupt can be used for entropy
+ *	IRQF_TRIGGER_*		Specify active edge(s) or level
  *
  */
 int request_irq(unsigned int irq, irq_handler_t handler,
-- 
cgit v1.2.3-55-g7522


From 98f172b257725de516a792b810b08639d6293d4d Mon Sep 17 00:00:00 2001
From: Kyle McMartin
Date: Mon, 28 Jul 2008 16:21:19 -0400
Subject: parisc: parisc-agp - fix <asm-parisc/*> -> <asm/*>

---
 drivers/char/agp/parisc-agp.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/char/agp/parisc-agp.c b/drivers/char/agp/parisc-agp.c
index 8c42dcc5958c..f4bbcb273208 100644
--- a/drivers/char/agp/parisc-agp.c
+++ b/drivers/char/agp/parisc-agp.c
@@ -20,8 +20,8 @@
 #include <linux/agp_backend.h>
 #include <linux/log2.h>
 
-#include <asm-parisc/parisc-device.h>
-#include <asm-parisc/ropes.h>
+#include <asm/parisc-device.h>
+#include <asm/ropes.h>
 
 #include "agp.h"
 
-- 
cgit v1.2.3-55-g7522


From 40b10fc58a62a9698f4fa1916e61b0953139928a Mon Sep 17 00:00:00 2001
From: Kyle McMartin
Date: Mon, 28 Jul 2008 16:22:34 -0400
Subject: parisc: ropes.h - fix <asm-parisc/*> -> <asm/*>

---
 include/asm-parisc/ropes.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/asm-parisc/ropes.h b/include/asm-parisc/ropes.h
index 007a880615eb..09f51d5ab57c 100644
--- a/include/asm-parisc/ropes.h
+++ b/include/asm-parisc/ropes.h
@@ -1,7 +1,7 @@
 #ifndef _ASM_PARISC_ROPES_H_
 #define _ASM_PARISC_ROPES_H_
 
-#include <asm-parisc/parisc-device.h>
+#include <asm/parisc-device.h>
 
 #ifdef CONFIG_64BIT
 /* "low end" PA8800 machines use ZX1 chipset: PAT PDC and only run 64-bit */
-- 
cgit v1.2.3-55-g7522


From dae2cdf3e25bf1c63f8012ae19c133e3b3b187ca Mon Sep 17 00:00:00 2001
From: Kyle McMartin
Date: Mon, 28 Jul 2008 21:14:50 -0400
Subject: parisc: add arch/parisc/kernel/.gitignore

---
 arch/parisc/kernel/.gitignore | 1 +
 1 file changed, 1 insertion(+)
 create mode 100644 arch/parisc/kernel/.gitignore

diff --git a/arch/parisc/kernel/.gitignore b/arch/parisc/kernel/.gitignore
new file mode 100644
index 000000000000..c5f676c3c224
--- /dev/null
+++ b/arch/parisc/kernel/.gitignore
@@ -0,0 +1 @@
+vmlinux.lds
-- 
cgit v1.2.3-55-g7522


From 1e22166c40a99fb25fa6ff4f711a3217d848dd85 Mon Sep 17 00:00:00 2001
From: Kyle McMartin
Date: Mon, 28 Jul 2008 21:17:23 -0400
Subject: parisc: unify CCIO_COLLECT_STATS implementation

Make it behave in the same manner as SBA_COLLECT_STATS, further
clean ups pending.
---
 drivers/parisc/ccio-dma.c | 43 +++++++++++++++++--------------------------
 1 file changed, 17 insertions(+), 26 deletions(-)

diff --git a/drivers/parisc/ccio-dma.c b/drivers/parisc/ccio-dma.c
index b30e38f3a50d..dcc1e9958d2f 100644
--- a/drivers/parisc/ccio-dma.c
+++ b/drivers/parisc/ccio-dma.c
@@ -66,15 +66,8 @@
 #undef DEBUG_CCIO_RUN_SG
 
 #ifdef CONFIG_PROC_FS
-/*
- * CCIO_SEARCH_TIME can help measure how fast the bitmap search is.
- * impacts performance though - ditch it if you don't use it.
- */
-#define CCIO_SEARCH_TIME
-#undef CCIO_MAP_STATS
-#else
-#undef CCIO_SEARCH_TIME
-#undef CCIO_MAP_STATS
+/* depends on proc fs support. But costs CPU performance. */
+#undef CCIO_COLLECT_STATS
 #endif
 
 #include <linux/proc_fs.h>
@@ -239,12 +232,10 @@ struct ioc {
 	u32 res_size;		    	/* size of resource map in bytes */
 	spinlock_t res_lock;
 
-#ifdef CCIO_SEARCH_TIME
+#ifdef CCIO_COLLECT_STATS
 #define CCIO_SEARCH_SAMPLE 0x100
 	unsigned long avg_search[CCIO_SEARCH_SAMPLE];
 	unsigned long avg_idx;		  /* current index into avg_search */
-#endif
-#ifdef CCIO_MAP_STATS
 	unsigned long used_pages;
 	unsigned long msingle_calls;
 	unsigned long msingle_pages;
@@ -351,7 +342,7 @@ ccio_alloc_range(struct ioc *ioc, struct device *dev, size_t size)
 	unsigned int pages_needed = size >> IOVP_SHIFT;
 	unsigned int res_idx;
 	unsigned long boundary_size;
-#ifdef CCIO_SEARCH_TIME
+#ifdef CCIO_COLLECT_STATS
 	unsigned long cr_start = mfctl(16);
 #endif
 	
@@ -406,7 +397,7 @@ resource_found:
 	DBG_RES("%s() res_idx %d res_hint: %d\n",
 		__func__, res_idx, ioc->res_hint);
 
-#ifdef CCIO_SEARCH_TIME
+#ifdef CCIO_COLLECT_STATS
 	{
 		unsigned long cr_end = mfctl(16);
 		unsigned long tmp = cr_end - cr_start;
@@ -416,7 +407,7 @@ resource_found:
 	ioc->avg_search[ioc->avg_idx++] = cr_start;
 	ioc->avg_idx &= CCIO_SEARCH_SAMPLE - 1;
 #endif
-#ifdef CCIO_MAP_STATS
+#ifdef CCIO_COLLECT_STATS
 	ioc->used_pages += pages_needed;
 #endif
 	/* 
@@ -452,7 +443,7 @@ ccio_free_range(struct ioc *ioc, dma_addr_t iova, unsigned long pages_mapped)
 	DBG_RES("%s():  res_idx: %d pages_mapped %d\n", 
 		__func__, res_idx, pages_mapped);
 
-#ifdef CCIO_MAP_STATS
+#ifdef CCIO_COLLECT_STATS
 	ioc->used_pages -= pages_mapped;
 #endif
 
@@ -764,7 +755,7 @@ ccio_map_single(struct device *dev, void *addr, size_t size,
 	size = ALIGN(size + offset, IOVP_SIZE);
 	spin_lock_irqsave(&ioc->res_lock, flags);
 
-#ifdef CCIO_MAP_STATS
+#ifdef CCIO_COLLECT_STATS
 	ioc->msingle_calls++;
 	ioc->msingle_pages += size >> IOVP_SHIFT;
 #endif
@@ -828,7 +819,7 @@ ccio_unmap_single(struct device *dev, dma_addr_t iova, size_t size,
 
 	spin_lock_irqsave(&ioc->res_lock, flags);
 
-#ifdef CCIO_MAP_STATS
+#ifdef CCIO_COLLECT_STATS
 	ioc->usingle_calls++;
 	ioc->usingle_pages += size >> IOVP_SHIFT;
 #endif
@@ -894,7 +885,7 @@ ccio_free_consistent(struct device *dev, size_t size, void *cpu_addr,
 */
 #define PIDE_FLAG 0x80000000UL
 
-#ifdef CCIO_MAP_STATS
+#ifdef CCIO_COLLECT_STATS
 #define IOMMU_MAP_STATS
 #endif
 #include "iommu-helpers.h"
@@ -938,7 +929,7 @@ ccio_map_sg(struct device *dev, struct scatterlist *sglist, int nents,
 	
 	spin_lock_irqsave(&ioc->res_lock, flags);
 
-#ifdef CCIO_MAP_STATS
+#ifdef CCIO_COLLECT_STATS
 	ioc->msg_calls++;
 #endif
 
@@ -997,13 +988,13 @@ ccio_unmap_sg(struct device *dev, struct scatterlist *sglist, int nents,
 	DBG_RUN_SG("%s() START %d entries,  %08lx,%x\n",
 		__func__, nents, sg_virt_addr(sglist), sglist->length);
 
-#ifdef CCIO_MAP_STATS
+#ifdef CCIO_COLLECT_STATS
 	ioc->usg_calls++;
 #endif
 
 	while(sg_dma_len(sglist) && nents--) {
 
-#ifdef CCIO_MAP_STATS
+#ifdef CCIO_COLLECT_STATS
 		ioc->usg_pages += sg_dma_len(sglist) >> PAGE_SHIFT;
 #endif
 		ccio_unmap_single(dev, sg_dma_address(sglist),
@@ -1048,7 +1039,7 @@ static int ccio_proc_info(struct seq_file *m, void *p)
 		len += seq_printf(m, "IO PDIR size    : %d bytes (%d entries)\n",
 			       total_pages * 8, total_pages);
 
-#ifdef CCIO_MAP_STATS
+#ifdef CCIO_COLLECT_STATS
 		len += seq_printf(m, "IO PDIR entries : %ld free  %ld used (%d%%)\n",
 				  total_pages - ioc->used_pages, ioc->used_pages,
 				  (int)(ioc->used_pages * 100 / total_pages));
@@ -1057,7 +1048,7 @@ static int ccio_proc_info(struct seq_file *m, void *p)
 		len += seq_printf(m, "Resource bitmap : %d bytes (%d pages)\n", 
 				  ioc->res_size, total_pages);
 
-#ifdef CCIO_SEARCH_TIME
+#ifdef CCIO_COLLECT_STATS
 		min = max = ioc->avg_search[0];
 		for(j = 0; j < CCIO_SEARCH_SAMPLE; ++j) {
 			avg += ioc->avg_search[j];
@@ -1070,7 +1061,7 @@ static int ccio_proc_info(struct seq_file *m, void *p)
 		len += seq_printf(m, "  Bitmap search : %ld/%ld/%ld (min/avg/max CPU Cycles)\n",
 				  min, avg, max);
 #endif
-#ifdef CCIO_MAP_STATS
+#ifdef CCIO_COLLECT_STATS
 		len += seq_printf(m, "pci_map_single(): %8ld calls  %8ld pages (avg %d/1000)\n",
 				  ioc->msingle_calls, ioc->msingle_pages,
 				  (int)((ioc->msingle_pages * 1000)/ioc->msingle_calls));
@@ -1088,7 +1079,7 @@ static int ccio_proc_info(struct seq_file *m, void *p)
 		len += seq_printf(m, "pci_unmap_sg()  : %8ld calls  %8ld pages (avg %d/1000)\n\n\n",
 				  ioc->usg_calls, ioc->usg_pages,
 				  (int)((ioc->usg_pages * 1000)/ioc->usg_calls));
-#endif	/* CCIO_MAP_STATS */
+#endif	/* CCIO_COLLECT_STATS */
 
 		ioc = ioc->next;
 	}
-- 
cgit v1.2.3-55-g7522


From 6c86cb8237bf08443806089130dc108051569a93 Mon Sep 17 00:00:00 2001
From: Kyle McMartin
Date: Mon, 28 Jul 2008 22:52:18 -0400
Subject: parisc: move pdc_result to real2.S

---
 arch/parisc/kernel/asm-offsets.c |  3 +++
 arch/parisc/kernel/firmware.c    |  4 ++--
 arch/parisc/kernel/real2.S       | 12 ++++++++++++
 include/asm-parisc/pdc.h         |  3 +++
 4 files changed, 20 insertions(+), 2 deletions(-)

diff --git a/arch/parisc/kernel/asm-offsets.c b/arch/parisc/kernel/asm-offsets.c
index 3efc0b73e4ff..699cf8ef2118 100644
--- a/arch/parisc/kernel/asm-offsets.c
+++ b/arch/parisc/kernel/asm-offsets.c
@@ -290,5 +290,8 @@ int main(void)
 	DEFINE(EXCDATA_IP, offsetof(struct exception_data, fault_ip));
 	DEFINE(EXCDATA_SPACE, offsetof(struct exception_data, fault_space));
 	DEFINE(EXCDATA_ADDR, offsetof(struct exception_data, fault_addr));
+	BLANK();
+	DEFINE(ASM_PDC_RESULT_SIZE, NUM_PDC_RESULT * sizeof(unsigned long));
+	BLANK();
 	return 0;
 }
diff --git a/arch/parisc/kernel/firmware.c b/arch/parisc/kernel/firmware.c
index 7177a6cd1b7f..99a9e505edf9 100644
--- a/arch/parisc/kernel/firmware.c
+++ b/arch/parisc/kernel/firmware.c
@@ -71,8 +71,8 @@
 #include <asm/processor.h>	/* for boot_cpu_data */
 
 static DEFINE_SPINLOCK(pdc_lock);
-static unsigned long pdc_result[32] __attribute__ ((aligned (8)));
-static unsigned long pdc_result2[32] __attribute__ ((aligned (8)));
+extern unsigned long pdc_result[NUM_PDC_RESULT];
+extern unsigned long pdc_result2[NUM_PDC_RESULT];
 
 #ifdef CONFIG_64BIT
 #define WIDE_FIRMWARE 0x1
diff --git a/arch/parisc/kernel/real2.S b/arch/parisc/kernel/real2.S
index 7a92695d95a6..5f3d3a1f9037 100644
--- a/arch/parisc/kernel/real2.S
+++ b/arch/parisc/kernel/real2.S
@@ -8,12 +8,24 @@
  *
  */
 
+#include <asm/pdc.h>
 #include <asm/psw.h>
 #include <asm/assembly.h>
+#include <asm/asm-offsets.h>
 
 #include <linux/linkage.h>
 
+
 	.section	.bss
+
+	.export pdc_result
+	.export pdc_result2
+	.align 8
+pdc_result:
+	.block	ASM_PDC_RESULT_SIZE
+pdc_result2:
+	.block	ASM_PDC_RESULT_SIZE
+
 	.export real_stack
 	.export real32_stack
 	.export real64_stack
diff --git a/include/asm-parisc/pdc.h b/include/asm-parisc/pdc.h
index 9eaa794c3e4a..46b75f9cce51 100644
--- a/include/asm-parisc/pdc.h
+++ b/include/asm-parisc/pdc.h
@@ -332,6 +332,9 @@
 #define BOOT_CONSOLE_SPA_OFFSET  0x3c4
 #define BOOT_CONSOLE_PATH_OFFSET 0x3a8
 
+/* size of the pdc_result buffer for firmware.c */
+#define NUM_PDC_RESULT	32
+
 #if !defined(__ASSEMBLY__)
 #ifdef __KERNEL__
 
-- 
cgit v1.2.3-55-g7522


From deae26bf6a10e47983606f5df080b91e97650ead Mon Sep 17 00:00:00 2001
From: Kyle McMartin
Date: Mon, 28 Jul 2008 23:02:13 -0400
Subject: parisc: move include/asm-parisc to arch/parisc/include/asm

---
 arch/parisc/include/asm/Kbuild               |   3 +
 arch/parisc/include/asm/a.out.h              |  20 +
 arch/parisc/include/asm/agp.h                |  24 +
 arch/parisc/include/asm/asmregs.h            | 183 +++++
 arch/parisc/include/asm/assembly.h           | 519 ++++++++++++++
 arch/parisc/include/asm/atomic.h             | 348 ++++++++++
 arch/parisc/include/asm/auxvec.h             |   4 +
 arch/parisc/include/asm/bitops.h             | 239 +++++++
 arch/parisc/include/asm/bug.h                |  92 +++
 arch/parisc/include/asm/bugs.h               |  19 +
 arch/parisc/include/asm/byteorder.h          |  82 +++
 arch/parisc/include/asm/cache.h              |  60 ++
 arch/parisc/include/asm/cacheflush.h         | 121 ++++
 arch/parisc/include/asm/checksum.h           | 210 ++++++
 arch/parisc/include/asm/compat.h             | 165 +++++
 arch/parisc/include/asm/compat_rt_sigframe.h |  50 ++
 arch/parisc/include/asm/compat_signal.h      |   2 +
 arch/parisc/include/asm/compat_ucontext.h    |  17 +
 arch/parisc/include/asm/cputime.h            |   6 +
 arch/parisc/include/asm/current.h            |  15 +
 arch/parisc/include/asm/delay.h              |  43 ++
 arch/parisc/include/asm/device.h             |   7 +
 arch/parisc/include/asm/div64.h              |   1 +
 arch/parisc/include/asm/dma-mapping.h        | 253 +++++++
 arch/parisc/include/asm/dma.h                | 186 +++++
 arch/parisc/include/asm/eisa_bus.h           |  23 +
 arch/parisc/include/asm/eisa_eeprom.h        | 153 +++++
 arch/parisc/include/asm/elf.h                | 342 +++++++++
 arch/parisc/include/asm/emergency-restart.h  |   6 +
 arch/parisc/include/asm/errno.h              | 124 ++++
 arch/parisc/include/asm/fb.h                 |  19 +
 arch/parisc/include/asm/fcntl.h              |  39 ++
 arch/parisc/include/asm/fixmap.h             |  30 +
 arch/parisc/include/asm/floppy.h             | 271 ++++++++
 arch/parisc/include/asm/futex.h              |  77 +++
 arch/parisc/include/asm/grfioctl.h           | 113 +++
 arch/parisc/include/asm/hardirq.h            |  29 +
 arch/parisc/include/asm/hardware.h           | 127 ++++
 arch/parisc/include/asm/hw_irq.h             |   8 +
 arch/parisc/include/asm/ide.h                |  61 ++
 arch/parisc/include/asm/io.h                 | 293 ++++++++
 arch/parisc/include/asm/ioctl.h              |  44 ++
 arch/parisc/include/asm/ioctls.h             |  90 +++
 arch/parisc/include/asm/ipcbuf.h             |  27 +
 arch/parisc/include/asm/irq.h                |  57 ++
 arch/parisc/include/asm/irq_regs.h           |   1 +
 arch/parisc/include/asm/kdebug.h             |   1 +
 arch/parisc/include/asm/kmap_types.h         |  30 +
 arch/parisc/include/asm/led.h                |  42 ++
 arch/parisc/include/asm/linkage.h            |  31 +
 arch/parisc/include/asm/local.h              |   1 +
 arch/parisc/include/asm/machdep.h            |  16 +
 arch/parisc/include/asm/mc146818rtc.h        |   9 +
 arch/parisc/include/asm/mckinley.h           |   9 +
 arch/parisc/include/asm/mman.h               |  61 ++
 arch/parisc/include/asm/mmu.h                |   7 +
 arch/parisc/include/asm/mmu_context.h        |  75 ++
 arch/parisc/include/asm/mmzone.h             |  73 ++
 arch/parisc/include/asm/module.h             |  32 +
 arch/parisc/include/asm/msgbuf.h             |  37 +
 arch/parisc/include/asm/mutex.h              |   9 +
 arch/parisc/include/asm/page.h               | 173 +++++
 arch/parisc/include/asm/param.h              |  22 +
 arch/parisc/include/asm/parisc-device.h      |  64 ++
 arch/parisc/include/asm/parport.h            |  18 +
 arch/parisc/include/asm/pci.h                | 294 ++++++++
 arch/parisc/include/asm/pdc.h                | 760 ++++++++++++++++++++
 arch/parisc/include/asm/pdc_chassis.h        | 381 ++++++++++
 arch/parisc/include/asm/pdcpat.h             | 308 +++++++++
 arch/parisc/include/asm/percpu.h             |   7 +
 arch/parisc/include/asm/perf.h               |  74 ++
 arch/parisc/include/asm/pgalloc.h            | 149 ++++
 arch/parisc/include/asm/pgtable.h            | 508 ++++++++++++++
 arch/parisc/include/asm/poll.h               |   1 +
 arch/parisc/include/asm/posix_types.h        | 129 ++++
 arch/parisc/include/asm/prefetch.h           |  39 ++
 arch/parisc/include/asm/processor.h          | 357 ++++++++++
 arch/parisc/include/asm/psw.h                |  62 ++
 arch/parisc/include/asm/ptrace.h             |  58 ++
 arch/parisc/include/asm/real.h               |   5 +
 arch/parisc/include/asm/resource.h           |   7 +
 arch/parisc/include/asm/ropes.h              | 322 +++++++++
 arch/parisc/include/asm/rt_sigframe.h        |  23 +
 arch/parisc/include/asm/rtc.h                | 131 ++++
 arch/parisc/include/asm/runway.h             |  12 +
 arch/parisc/include/asm/scatterlist.h        |  27 +
 arch/parisc/include/asm/sections.h           |  12 +
 arch/parisc/include/asm/segment.h            |   6 +
 arch/parisc/include/asm/sembuf.h             |  29 +
 arch/parisc/include/asm/serial.h             |  10 +
 arch/parisc/include/asm/setup.h              |   6 +
 arch/parisc/include/asm/shmbuf.h             |  58 ++
 arch/parisc/include/asm/shmparam.h           |   8 +
 arch/parisc/include/asm/sigcontext.h         |  20 +
 arch/parisc/include/asm/siginfo.h            |  14 +
 arch/parisc/include/asm/signal.h             | 153 +++++
 arch/parisc/include/asm/smp.h                |  68 ++
 arch/parisc/include/asm/socket.h             |  62 ++
 arch/parisc/include/asm/sockios.h            |  13 +
 arch/parisc/include/asm/spinlock.h           | 194 ++++++
 arch/parisc/include/asm/spinlock_types.h     |  21 +
 arch/parisc/include/asm/stat.h               | 100 +++
 arch/parisc/include/asm/statfs.h             |  58 ++
 arch/parisc/include/asm/string.h             |  10 +
 arch/parisc/include/asm/superio.h            |  85 +++
 arch/parisc/include/asm/system.h             | 182 +++++
 arch/parisc/include/asm/termbits.h           | 200 ++++++
 arch/parisc/include/asm/termios.h            |  90 +++
 arch/parisc/include/asm/thread_info.h        |  74 ++
 arch/parisc/include/asm/timex.h              |  20 +
 arch/parisc/include/asm/tlb.h                |  27 +
 arch/parisc/include/asm/tlbflush.h           |  80 +++
 arch/parisc/include/asm/topology.h           |   6 +
 arch/parisc/include/asm/traps.h              |  16 +
 arch/parisc/include/asm/types.h              |  36 +
 arch/parisc/include/asm/uaccess.h            | 244 +++++++
 arch/parisc/include/asm/ucontext.h           |  12 +
 arch/parisc/include/asm/unaligned.h          |  16 +
 arch/parisc/include/asm/unistd.h             | 991 +++++++++++++++++++++++++++
 arch/parisc/include/asm/unwind.h             |  77 +++
 arch/parisc/include/asm/user.h               |   5 +
 arch/parisc/include/asm/vga.h                |   6 +
 arch/parisc/include/asm/xor.h                |   1 +
 include/asm-parisc/Kbuild                    |   3 -
 include/asm-parisc/a.out.h                   |  20 -
 include/asm-parisc/agp.h                     |  24 -
 include/asm-parisc/asmregs.h                 | 183 -----
 include/asm-parisc/assembly.h                | 519 --------------
 include/asm-parisc/atomic.h                  | 348 ----------
 include/asm-parisc/auxvec.h                  |   4 -
 include/asm-parisc/bitops.h                  | 239 -------
 include/asm-parisc/bug.h                     |  92 ---
 include/asm-parisc/bugs.h                    |  19 -
 include/asm-parisc/byteorder.h               |  82 ---
 include/asm-parisc/cache.h                   |  60 --
 include/asm-parisc/cacheflush.h              | 121 ----
 include/asm-parisc/checksum.h                | 210 ------
 include/asm-parisc/compat.h                  | 165 -----
 include/asm-parisc/compat_rt_sigframe.h      |  50 --
 include/asm-parisc/compat_signal.h           |   2 -
 include/asm-parisc/compat_ucontext.h         |  17 -
 include/asm-parisc/cputime.h                 |   6 -
 include/asm-parisc/current.h                 |  15 -
 include/asm-parisc/delay.h                   |  43 --
 include/asm-parisc/device.h                  |   7 -
 include/asm-parisc/div64.h                   |   1 -
 include/asm-parisc/dma-mapping.h             | 253 -------
 include/asm-parisc/dma.h                     | 186 -----
 include/asm-parisc/eisa_bus.h                |  23 -
 include/asm-parisc/eisa_eeprom.h             | 153 -----
 include/asm-parisc/elf.h                     | 342 ---------
 include/asm-parisc/emergency-restart.h       |   6 -
 include/asm-parisc/errno.h                   | 124 ----
 include/asm-parisc/fb.h                      |  19 -
 include/asm-parisc/fcntl.h                   |  39 --
 include/asm-parisc/fixmap.h                  |  30 -
 include/asm-parisc/floppy.h                  | 271 --------
 include/asm-parisc/futex.h                   |  77 ---
 include/asm-parisc/grfioctl.h                | 113 ---
 include/asm-parisc/hardirq.h                 |  29 -
 include/asm-parisc/hardware.h                | 127 ----
 include/asm-parisc/hw_irq.h                  |   8 -
 include/asm-parisc/ide.h                     |  61 --
 include/asm-parisc/io.h                      | 293 --------
 include/asm-parisc/ioctl.h                   |  44 --
 include/asm-parisc/ioctls.h                  |  90 ---
 include/asm-parisc/ipcbuf.h                  |  27 -
 include/asm-parisc/irq.h                     |  57 --
 include/asm-parisc/irq_regs.h                |   1 -
 include/asm-parisc/kdebug.h                  |   1 -
 include/asm-parisc/kmap_types.h              |  30 -
 include/asm-parisc/led.h                     |  42 --
 include/asm-parisc/linkage.h                 |  31 -
 include/asm-parisc/local.h                   |   1 -
 include/asm-parisc/machdep.h                 |  16 -
 include/asm-parisc/mc146818rtc.h             |   9 -
 include/asm-parisc/mckinley.h                |   9 -
 include/asm-parisc/mman.h                    |  61 --
 include/asm-parisc/mmu.h                     |   7 -
 include/asm-parisc/mmu_context.h             |  75 --
 include/asm-parisc/mmzone.h                  |  73 --
 include/asm-parisc/module.h                  |  32 -
 include/asm-parisc/msgbuf.h                  |  37 -
 include/asm-parisc/mutex.h                   |   9 -
 include/asm-parisc/page.h                    | 173 -----
 include/asm-parisc/param.h                   |  22 -
 include/asm-parisc/parisc-device.h           |  64 --
 include/asm-parisc/parport.h                 |  18 -
 include/asm-parisc/pci.h                     | 294 --------
 include/asm-parisc/pdc.h                     | 760 --------------------
 include/asm-parisc/pdc_chassis.h             | 381 ----------
 include/asm-parisc/pdcpat.h                  | 308 ---------
 include/asm-parisc/percpu.h                  |   7 -
 include/asm-parisc/perf.h                    |  74 --
 include/asm-parisc/pgalloc.h                 | 149 ----
 include/asm-parisc/pgtable.h                 | 508 --------------
 include/asm-parisc/poll.h                    |   1 -
 include/asm-parisc/posix_types.h             | 129 ----
 include/asm-parisc/prefetch.h                |  39 --
 include/asm-parisc/processor.h               | 357 ----------
 include/asm-parisc/psw.h                     |  62 --
 include/asm-parisc/ptrace.h                  |  58 --
 include/asm-parisc/real.h                    |   5 -
 include/asm-parisc/resource.h                |   7 -
 include/asm-parisc/ropes.h                   | 322 ---------
 include/asm-parisc/rt_sigframe.h             |  23 -
 include/asm-parisc/rtc.h                     | 131 ----
 include/asm-parisc/runway.h                  |  12 -
 include/asm-parisc/scatterlist.h             |  27 -
 include/asm-parisc/sections.h                |  12 -
 include/asm-parisc/segment.h                 |   6 -
 include/asm-parisc/sembuf.h                  |  29 -
 include/asm-parisc/serial.h                  |  10 -
 include/asm-parisc/setup.h                   |   6 -
 include/asm-parisc/shmbuf.h                  |  58 --
 include/asm-parisc/shmparam.h                |   8 -
 include/asm-parisc/sigcontext.h              |  20 -
 include/asm-parisc/siginfo.h                 |  14 -
 include/asm-parisc/signal.h                  | 153 -----
 include/asm-parisc/smp.h                     |  68 --
 include/asm-parisc/socket.h                  |  62 --
 include/asm-parisc/sockios.h                 |  13 -
 include/asm-parisc/spinlock.h                | 194 ------
 include/asm-parisc/spinlock_types.h          |  21 -
 include/asm-parisc/stat.h                    | 100 ---
 include/asm-parisc/statfs.h                  |  58 --
 include/asm-parisc/string.h                  |  10 -
 include/asm-parisc/superio.h                 |  85 ---
 include/asm-parisc/system.h                  | 182 -----
 include/asm-parisc/termbits.h                | 200 ------
 include/asm-parisc/termios.h                 |  90 ---
 include/asm-parisc/thread_info.h             |  74 --
 include/asm-parisc/timex.h                   |  20 -
 include/asm-parisc/tlb.h                     |  27 -
 include/asm-parisc/tlbflush.h                |  80 ---
 include/asm-parisc/topology.h                |   6 -
 include/asm-parisc/traps.h                   |  16 -
 include/asm-parisc/types.h                   |  36 -
 include/asm-parisc/uaccess.h                 | 244 -------
 include/asm-parisc/ucontext.h                |  12 -
 include/asm-parisc/unaligned.h               |  16 -
 include/asm-parisc/unistd.h                  | 991 ---------------------------
 include/asm-parisc/unwind.h                  |  77 ---
 include/asm-parisc/user.h                    |   5 -
 include/asm-parisc/vga.h                     |   6 -
 include/asm-parisc/xor.h                     |   1 -
 246 files changed, 11977 insertions(+), 11977 deletions(-)
 create mode 100644 arch/parisc/include/asm/Kbuild
 create mode 100644 arch/parisc/include/asm/a.out.h
 create mode 100644 arch/parisc/include/asm/agp.h
 create mode 100644 arch/parisc/include/asm/asmregs.h
 create mode 100644 arch/parisc/include/asm/assembly.h
 create mode 100644 arch/parisc/include/asm/atomic.h
 create mode 100644 arch/parisc/include/asm/auxvec.h
 create mode 100644 arch/parisc/include/asm/bitops.h
 create mode 100644 arch/parisc/include/asm/bug.h
 create mode 100644 arch/parisc/include/asm/bugs.h
 create mode 100644 arch/parisc/include/asm/byteorder.h
 create mode 100644 arch/parisc/include/asm/cache.h
 create mode 100644 arch/parisc/include/asm/cacheflush.h
 create mode 100644 arch/parisc/include/asm/checksum.h
 create mode 100644 arch/parisc/include/asm/compat.h
 create mode 100644 arch/parisc/include/asm/compat_rt_sigframe.h
 create mode 100644 arch/parisc/include/asm/compat_signal.h
 create mode 100644 arch/parisc/include/asm/compat_ucontext.h
 create mode 100644 arch/parisc/include/asm/cputime.h
 create mode 100644 arch/parisc/include/asm/current.h
 create mode 100644 arch/parisc/include/asm/delay.h
 create mode 100644 arch/parisc/include/asm/device.h
 create mode 100644 arch/parisc/include/asm/div64.h
 create mode 100644 arch/parisc/include/asm/dma-mapping.h
 create mode 100644 arch/parisc/include/asm/dma.h
 create mode 100644 arch/parisc/include/asm/eisa_bus.h
 create mode 100644 arch/parisc/include/asm/eisa_eeprom.h
 create mode 100644 arch/parisc/include/asm/elf.h
 create mode 100644 arch/parisc/include/asm/emergency-restart.h
 create mode 100644 arch/parisc/include/asm/errno.h
 create mode 100644 arch/parisc/include/asm/fb.h
 create mode 100644 arch/parisc/include/asm/fcntl.h
 create mode 100644 arch/parisc/include/asm/fixmap.h
 create mode 100644 arch/parisc/include/asm/floppy.h
 create mode 100644 arch/parisc/include/asm/futex.h
 create mode 100644 arch/parisc/include/asm/grfioctl.h
 create mode 100644 arch/parisc/include/asm/hardirq.h
 create mode 100644 arch/parisc/include/asm/hardware.h
 create mode 100644 arch/parisc/include/asm/hw_irq.h
 create mode 100644 arch/parisc/include/asm/ide.h
 create mode 100644 arch/parisc/include/asm/io.h
 create mode 100644 arch/parisc/include/asm/ioctl.h
 create mode 100644 arch/parisc/include/asm/ioctls.h
 create mode 100644 arch/parisc/include/asm/ipcbuf.h
 create mode 100644 arch/parisc/include/asm/irq.h
 create mode 100644 arch/parisc/include/asm/irq_regs.h
 create mode 100644 arch/parisc/include/asm/kdebug.h
 create mode 100644 arch/parisc/include/asm/kmap_types.h
 create mode 100644 arch/parisc/include/asm/led.h
 create mode 100644 arch/parisc/include/asm/linkage.h
 create mode 100644 arch/parisc/include/asm/local.h
 create mode 100644 arch/parisc/include/asm/machdep.h
 create mode 100644 arch/parisc/include/asm/mc146818rtc.h
 create mode 100644 arch/parisc/include/asm/mckinley.h
 create mode 100644 arch/parisc/include/asm/mman.h
 create mode 100644 arch/parisc/include/asm/mmu.h
 create mode 100644 arch/parisc/include/asm/mmu_context.h
 create mode 100644 arch/parisc/include/asm/mmzone.h
 create mode 100644 arch/parisc/include/asm/module.h
 create mode 100644 arch/parisc/include/asm/msgbuf.h
 create mode 100644 arch/parisc/include/asm/mutex.h
 create mode 100644 arch/parisc/include/asm/page.h
 create mode 100644 arch/parisc/include/asm/param.h
 create mode 100644 arch/parisc/include/asm/parisc-device.h
 create mode 100644 arch/parisc/include/asm/parport.h
 create mode 100644 arch/parisc/include/asm/pci.h
 create mode 100644 arch/parisc/include/asm/pdc.h
 create mode 100644 arch/parisc/include/asm/pdc_chassis.h
 create mode 100644 arch/parisc/include/asm/pdcpat.h
 create mode 100644 arch/parisc/include/asm/percpu.h
 create mode 100644 arch/parisc/include/asm/perf.h
 create mode 100644 arch/parisc/include/asm/pgalloc.h
 create mode 100644 arch/parisc/include/asm/pgtable.h
 create mode 100644 arch/parisc/include/asm/poll.h
 create mode 100644 arch/parisc/include/asm/posix_types.h
 create mode 100644 arch/parisc/include/asm/prefetch.h
 create mode 100644 arch/parisc/include/asm/processor.h
 create mode 100644 arch/parisc/include/asm/psw.h
 create mode 100644 arch/parisc/include/asm/ptrace.h
 create mode 100644 arch/parisc/include/asm/real.h
 create mode 100644 arch/parisc/include/asm/resource.h
 create mode 100644 arch/parisc/include/asm/ropes.h
 create mode 100644 arch/parisc/include/asm/rt_sigframe.h
 create mode 100644 arch/parisc/include/asm/rtc.h
 create mode 100644 arch/parisc/include/asm/runway.h
 create mode 100644 arch/parisc/include/asm/scatterlist.h
 create mode 100644 arch/parisc/include/asm/sections.h
 create mode 100644 arch/parisc/include/asm/segment.h
 create mode 100644 arch/parisc/include/asm/sembuf.h
 create mode 100644 arch/parisc/include/asm/serial.h
 create mode 100644 arch/parisc/include/asm/setup.h
 create mode 100644 arch/parisc/include/asm/shmbuf.h
 create mode 100644 arch/parisc/include/asm/shmparam.h
 create mode 100644 arch/parisc/include/asm/sigcontext.h
 create mode 100644 arch/parisc/include/asm/siginfo.h
 create mode 100644 arch/parisc/include/asm/signal.h
 create mode 100644 arch/parisc/include/asm/smp.h
 create mode 100644 arch/parisc/include/asm/socket.h
 create mode 100644 arch/parisc/include/asm/sockios.h
 create mode 100644 arch/parisc/include/asm/spinlock.h
 create mode 100644 arch/parisc/include/asm/spinlock_types.h
 create mode 100644 arch/parisc/include/asm/stat.h
 create mode 100644 arch/parisc/include/asm/statfs.h
 create mode 100644 arch/parisc/include/asm/string.h
 create mode 100644 arch/parisc/include/asm/superio.h
 create mode 100644 arch/parisc/include/asm/system.h
 create mode 100644 arch/parisc/include/asm/termbits.h
 create mode 100644 arch/parisc/include/asm/termios.h
 create mode 100644 arch/parisc/include/asm/thread_info.h
 create mode 100644 arch/parisc/include/asm/timex.h
 create mode 100644 arch/parisc/include/asm/tlb.h
 create mode 100644 arch/parisc/include/asm/tlbflush.h
 create mode 100644 arch/parisc/include/asm/topology.h
 create mode 100644 arch/parisc/include/asm/traps.h
 create mode 100644 arch/parisc/include/asm/types.h
 create mode 100644 arch/parisc/include/asm/uaccess.h
 create mode 100644 arch/parisc/include/asm/ucontext.h
 create mode 100644 arch/parisc/include/asm/unaligned.h
 create mode 100644 arch/parisc/include/asm/unistd.h
 create mode 100644 arch/parisc/include/asm/unwind.h
 create mode 100644 arch/parisc/include/asm/user.h
 create mode 100644 arch/parisc/include/asm/vga.h
 create mode 100644 arch/parisc/include/asm/xor.h
 delete mode 100644 include/asm-parisc/Kbuild
 delete mode 100644 include/asm-parisc/a.out.h
 delete mode 100644 include/asm-parisc/agp.h
 delete mode 100644 include/asm-parisc/asmregs.h
 delete mode 100644 include/asm-parisc/assembly.h
 delete mode 100644 include/asm-parisc/atomic.h
 delete mode 100644 include/asm-parisc/auxvec.h
 delete mode 100644 include/asm-parisc/bitops.h
 delete mode 100644 include/asm-parisc/bug.h
 delete mode 100644 include/asm-parisc/bugs.h
 delete mode 100644 include/asm-parisc/byteorder.h
 delete mode 100644 include/asm-parisc/cache.h
 delete mode 100644 include/asm-parisc/cacheflush.h
 delete mode 100644 include/asm-parisc/checksum.h
 delete mode 100644 include/asm-parisc/compat.h
 delete mode 100644 include/asm-parisc/compat_rt_sigframe.h
 delete mode 100644 include/asm-parisc/compat_signal.h
 delete mode 100644 include/asm-parisc/compat_ucontext.h
 delete mode 100644 include/asm-parisc/cputime.h
 delete mode 100644 include/asm-parisc/current.h
 delete mode 100644 include/asm-parisc/delay.h
 delete mode 100644 include/asm-parisc/device.h
 delete mode 100644 include/asm-parisc/div64.h
 delete mode 100644 include/asm-parisc/dma-mapping.h
 delete mode 100644 include/asm-parisc/dma.h
 delete mode 100644 include/asm-parisc/eisa_bus.h
 delete mode 100644 include/asm-parisc/eisa_eeprom.h
 delete mode 100644 include/asm-parisc/elf.h
 delete mode 100644 include/asm-parisc/emergency-restart.h
 delete mode 100644 include/asm-parisc/errno.h
 delete mode 100644 include/asm-parisc/fb.h
 delete mode 100644 include/asm-parisc/fcntl.h
 delete mode 100644 include/asm-parisc/fixmap.h
 delete mode 100644 include/asm-parisc/floppy.h
 delete mode 100644 include/asm-parisc/futex.h
 delete mode 100644 include/asm-parisc/grfioctl.h
 delete mode 100644 include/asm-parisc/hardirq.h
 delete mode 100644 include/asm-parisc/hardware.h
 delete mode 100644 include/asm-parisc/hw_irq.h
 delete mode 100644 include/asm-parisc/ide.h
 delete mode 100644 include/asm-parisc/io.h
 delete mode 100644 include/asm-parisc/ioctl.h
 delete mode 100644 include/asm-parisc/ioctls.h
 delete mode 100644 include/asm-parisc/ipcbuf.h
 delete mode 100644 include/asm-parisc/irq.h
 delete mode 100644 include/asm-parisc/irq_regs.h
 delete mode 100644 include/asm-parisc/kdebug.h
 delete mode 100644 include/asm-parisc/kmap_types.h
 delete mode 100644 include/asm-parisc/led.h
 delete mode 100644 include/asm-parisc/linkage.h
 delete mode 100644 include/asm-parisc/local.h
 delete mode 100644 include/asm-parisc/machdep.h
 delete mode 100644 include/asm-parisc/mc146818rtc.h
 delete mode 100644 include/asm-parisc/mckinley.h
 delete mode 100644 include/asm-parisc/mman.h
 delete mode 100644 include/asm-parisc/mmu.h
 delete mode 100644 include/asm-parisc/mmu_context.h
 delete mode 100644 include/asm-parisc/mmzone.h
 delete mode 100644 include/asm-parisc/module.h
 delete mode 100644 include/asm-parisc/msgbuf.h
 delete mode 100644 include/asm-parisc/mutex.h
 delete mode 100644 include/asm-parisc/page.h
 delete mode 100644 include/asm-parisc/param.h
 delete mode 100644 include/asm-parisc/parisc-device.h
 delete mode 100644 include/asm-parisc/parport.h
 delete mode 100644 include/asm-parisc/pci.h
 delete mode 100644 include/asm-parisc/pdc.h
 delete mode 100644 include/asm-parisc/pdc_chassis.h
 delete mode 100644 include/asm-parisc/pdcpat.h
 delete mode 100644 include/asm-parisc/percpu.h
 delete mode 100644 include/asm-parisc/perf.h
 delete mode 100644 include/asm-parisc/pgalloc.h
 delete mode 100644 include/asm-parisc/pgtable.h
 delete mode 100644 include/asm-parisc/poll.h
 delete mode 100644 include/asm-parisc/posix_types.h
 delete mode 100644 include/asm-parisc/prefetch.h
 delete mode 100644 include/asm-parisc/processor.h
 delete mode 100644 include/asm-parisc/psw.h
 delete mode 100644 include/asm-parisc/ptrace.h
 delete mode 100644 include/asm-parisc/real.h
 delete mode 100644 include/asm-parisc/resource.h
 delete mode 100644 include/asm-parisc/ropes.h
 delete mode 100644 include/asm-parisc/rt_sigframe.h
 delete mode 100644 include/asm-parisc/rtc.h
 delete mode 100644 include/asm-parisc/runway.h
 delete mode 100644 include/asm-parisc/scatterlist.h
 delete mode 100644 include/asm-parisc/sections.h
 delete mode 100644 include/asm-parisc/segment.h
 delete mode 100644 include/asm-parisc/sembuf.h
 delete mode 100644 include/asm-parisc/serial.h
 delete mode 100644 include/asm-parisc/setup.h
 delete mode 100644 include/asm-parisc/shmbuf.h
 delete mode 100644 include/asm-parisc/shmparam.h
 delete mode 100644 include/asm-parisc/sigcontext.h
 delete mode 100644 include/asm-parisc/siginfo.h
 delete mode 100644 include/asm-parisc/signal.h
 delete mode 100644 include/asm-parisc/smp.h
 delete mode 100644 include/asm-parisc/socket.h
 delete mode 100644 include/asm-parisc/sockios.h
 delete mode 100644 include/asm-parisc/spinlock.h
 delete mode 100644 include/asm-parisc/spinlock_types.h
 delete mode 100644 include/asm-parisc/stat.h
 delete mode 100644 include/asm-parisc/statfs.h
 delete mode 100644 include/asm-parisc/string.h
 delete mode 100644 include/asm-parisc/superio.h
 delete mode 100644 include/asm-parisc/system.h
 delete mode 100644 include/asm-parisc/termbits.h
 delete mode 100644 include/asm-parisc/termios.h
 delete mode 100644 include/asm-parisc/thread_info.h
 delete mode 100644 include/asm-parisc/timex.h
 delete mode 100644 include/asm-parisc/tlb.h
 delete mode 100644 include/asm-parisc/tlbflush.h
 delete mode 100644 include/asm-parisc/topology.h
 delete mode 100644 include/asm-parisc/traps.h
 delete mode 100644 include/asm-parisc/types.h
 delete mode 100644 include/asm-parisc/uaccess.h
 delete mode 100644 include/asm-parisc/ucontext.h
 delete mode 100644 include/asm-parisc/unaligned.h
 delete mode 100644 include/asm-parisc/unistd.h
 delete mode 100644 include/asm-parisc/unwind.h
 delete mode 100644 include/asm-parisc/user.h
 delete mode 100644 include/asm-parisc/vga.h
 delete mode 100644 include/asm-parisc/xor.h

diff --git a/arch/parisc/include/asm/Kbuild b/arch/parisc/include/asm/Kbuild
new file mode 100644
index 000000000000..f88b252e419c
--- /dev/null
+++ b/arch/parisc/include/asm/Kbuild
@@ -0,0 +1,3 @@
+include include/asm-generic/Kbuild.asm
+
+unifdef-y += pdc.h
diff --git a/arch/parisc/include/asm/a.out.h b/arch/parisc/include/asm/a.out.h
new file mode 100644
index 000000000000..eb04e34c5bb1
--- /dev/null
+++ b/arch/parisc/include/asm/a.out.h
@@ -0,0 +1,20 @@
+#ifndef __PARISC_A_OUT_H__
+#define __PARISC_A_OUT_H__
+
+struct exec
+{
+  unsigned int a_info;		/* Use macros N_MAGIC, etc for access */
+  unsigned a_text;		/* length of text, in bytes */
+  unsigned a_data;		/* length of data, in bytes */
+  unsigned a_bss;		/* length of uninitialized data area for file, in bytes */
+  unsigned a_syms;		/* length of symbol table data in file, in bytes */
+  unsigned a_entry;		/* start address */
+  unsigned a_trsize;		/* length of relocation info for text, in bytes */
+  unsigned a_drsize;		/* length of relocation info for data, in bytes */
+};
+
+#define N_TRSIZE(a)	((a).a_trsize)
+#define N_DRSIZE(a)	((a).a_drsize)
+#define N_SYMSIZE(a)	((a).a_syms)
+
+#endif /* __A_OUT_GNU_H__ */
diff --git a/arch/parisc/include/asm/agp.h b/arch/parisc/include/asm/agp.h
new file mode 100644
index 000000000000..9651660da639
--- /dev/null
+++ b/arch/parisc/include/asm/agp.h
@@ -0,0 +1,24 @@
+#ifndef _ASM_PARISC_AGP_H
+#define _ASM_PARISC_AGP_H
+
+/*
+ * PARISC specific AGP definitions.
+ * Copyright (c) 2006 Kyle McMartin <kyle@parisc-linux.org>
+ *
+ */
+
+#define map_page_into_agp(page)		/* nothing */
+#define unmap_page_from_agp(page)	/* nothing */
+#define flush_agp_cache()		mb()
+
+/* Convert a physical address to an address suitable for the GART. */
+#define phys_to_gart(x) (x)
+#define gart_to_phys(x) (x)
+
+/* GATT allocation. Returns/accepts GATT kernel virtual address. */
+#define alloc_gatt_pages(order)		\
+	((char *)__get_free_pages(GFP_KERNEL, (order)))
+#define free_gatt_pages(table, order)	\
+	free_pages((unsigned long)(table), (order))
+
+#endif /* _ASM_PARISC_AGP_H */
diff --git a/arch/parisc/include/asm/asmregs.h b/arch/parisc/include/asm/asmregs.h
new file mode 100644
index 000000000000..d93c646e1887
--- /dev/null
+++ b/arch/parisc/include/asm/asmregs.h
@@ -0,0 +1,183 @@
+/*
+ * Copyright (C) 1999 Hewlett-Packard (Frank Rowand)
+ *
+ *	This program is free software; you can redistribute it and/or modify
+ *	it under the terms of the GNU General Public License as published by
+ *	the Free Software Foundation; either version 2, or (at your option)
+ *	any later version.
+ *
+ *	This program is distributed in the hope that it will be useful,
+ *	but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *	GNU General Public License for more details.
+ *
+ *	You should have received a copy of the GNU General Public License
+ *	along with this program; if not, write to the Free Software
+ *	Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#ifndef _PARISC_ASMREGS_H
+#define _PARISC_ASMREGS_H
+
+;! General Registers
+
+rp:	.reg	%r2
+arg3:	.reg	%r23
+arg2:	.reg	%r24
+arg1:	.reg	%r25
+arg0:	.reg	%r26
+dp:	.reg	%r27
+ret0:	.reg	%r28
+ret1:	.reg	%r29
+sl:	.reg	%r29
+sp:	.reg	%r30
+
+#if 0
+/* PA20_REVISIT */
+arg7:	.reg	r19
+arg6:	.reg	r20
+arg5:	.reg	r21
+arg4:	.reg	r22
+gp:	.reg	r27
+ap:	.reg	r29
+#endif
+
+
+r0:	.reg	%r0
+r1:	.reg	%r1
+r2:	.reg	%r2
+r3:	.reg	%r3
+r4:	.reg	%r4
+r5:	.reg	%r5
+r6:	.reg	%r6
+r7:	.reg	%r7
+r8:	.reg	%r8
+r9:	.reg	%r9
+r10:	.reg	%r10
+r11:	.reg	%r11
+r12:	.reg	%r12
+r13:	.reg	%r13
+r14:	.reg	%r14
+r15:	.reg	%r15
+r16:	.reg	%r16
+r17:	.reg	%r17
+r18:	.reg	%r18
+r19:	.reg	%r19
+r20:	.reg	%r20
+r21:	.reg	%r21
+r22:	.reg	%r22
+r23:	.reg	%r23
+r24:	.reg	%r24
+r25:	.reg	%r25
+r26:	.reg	%r26
+r27:	.reg	%r27
+r28:	.reg	%r28
+r29:	.reg	%r29
+r30:	.reg	%r30
+r31:	.reg	%r31
+
+
+;! Space Registers
+
+sr0:	.reg	%sr0
+sr1:	.reg	%sr1
+sr2:	.reg	%sr2
+sr3:	.reg	%sr3
+sr4:	.reg	%sr4
+sr5:	.reg	%sr5
+sr6:	.reg	%sr6
+sr7:	.reg	%sr7
+
+
+;! Floating Point Registers
+
+fr0:	.reg	%fr0
+fr1:	.reg	%fr1
+fr2:	.reg	%fr2
+fr3:	.reg	%fr3
+fr4:	.reg	%fr4
+fr5:	.reg	%fr5
+fr6:	.reg	%fr6
+fr7:	.reg	%fr7
+fr8:	.reg	%fr8
+fr9:	.reg	%fr9
+fr10:	.reg	%fr10
+fr11:	.reg	%fr11
+fr12:	.reg	%fr12
+fr13:	.reg	%fr13
+fr14:	.reg	%fr14
+fr15:	.reg	%fr15
+fr16:	.reg	%fr16
+fr17:	.reg	%fr17
+fr18:	.reg	%fr18
+fr19:	.reg	%fr19
+fr20:	.reg	%fr20
+fr21:	.reg	%fr21
+fr22:	.reg	%fr22
+fr23:	.reg	%fr23
+fr24:	.reg	%fr24
+fr25:	.reg	%fr25
+fr26:	.reg	%fr26
+fr27:	.reg	%fr27
+fr28:	.reg	%fr28
+fr29:	.reg	%fr29
+fr30:	.reg	%fr30
+fr31:	.reg	%fr31
+
+
+;! Control Registers
+
+rctr:	.reg	%cr0
+pidr1:	.reg	%cr8
+pidr2:	.reg	%cr9
+ccr:	.reg	%cr10
+sar:	.reg	%cr11
+pidr3:	.reg	%cr12
+pidr4:	.reg	%cr13
+iva:	.reg	%cr14
+eiem:	.reg	%cr15
+itmr:	.reg	%cr16
+pcsq:	.reg	%cr17
+pcoq:	.reg	%cr18
+iir:	.reg	%cr19
+isr:	.reg	%cr20
+ior:	.reg	%cr21
+ipsw:	.reg	%cr22
+eirr:	.reg	%cr23
+tr0:	.reg	%cr24
+tr1:	.reg	%cr25
+tr2:	.reg	%cr26
+tr3:	.reg	%cr27
+tr4:	.reg	%cr28
+tr5:	.reg	%cr29
+tr6:	.reg	%cr30
+tr7:	.reg	%cr31
+
+
+cr0:	.reg	%cr0
+cr8:	.reg	%cr8
+cr9:	.reg	%cr9
+cr10:	.reg	%cr10
+cr11:	.reg	%cr11
+cr12:	.reg	%cr12
+cr13:	.reg	%cr13
+cr14:	.reg	%cr14
+cr15:	.reg	%cr15
+cr16:	.reg	%cr16
+cr17:	.reg	%cr17
+cr18:	.reg	%cr18
+cr19:	.reg	%cr19
+cr20:	.reg	%cr20
+cr21:	.reg	%cr21
+cr22:	.reg	%cr22
+cr23:	.reg	%cr23
+cr24:	.reg	%cr24
+cr25:	.reg	%cr25
+cr26:	.reg	%cr26
+cr27:	.reg	%cr27
+cr28:	.reg	%cr28
+cr29:	.reg	%cr29
+cr30:	.reg	%cr30
+cr31:	.reg	%cr31
+
+#endif
diff --git a/arch/parisc/include/asm/assembly.h b/arch/parisc/include/asm/assembly.h
new file mode 100644
index 000000000000..ffb208840ecc
--- /dev/null
+++ b/arch/parisc/include/asm/assembly.h
@@ -0,0 +1,519 @@
+/*
+ * Copyright (C) 1999 Hewlett-Packard (Frank Rowand)
+ * Copyright (C) 1999 Philipp Rumpf <prumpf@tux.org>
+ * Copyright (C) 1999 SuSE GmbH
+ *
+ *    This program is free software; you can redistribute it and/or modify
+ *    it under the terms of the GNU General Public License as published by
+ *    the Free Software Foundation; either version 2, or (at your option)
+ *    any later version.
+ *
+ *    This program is distributed in the hope that it will be useful,
+ *    but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *    GNU General Public License for more details.
+ *
+ *    You should have received a copy of the GNU General Public License
+ *    along with this program; if not, write to the Free Software
+ *    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#ifndef _PARISC_ASSEMBLY_H
+#define _PARISC_ASSEMBLY_H
+
+#define CALLEE_FLOAT_FRAME_SIZE	80
+
+#ifdef CONFIG_64BIT
+#define LDREG	ldd
+#define STREG	std
+#define LDREGX  ldd,s
+#define LDREGM	ldd,mb
+#define STREGM	std,ma
+#define SHRREG	shrd
+#define SHLREG	shld
+#define ANDCM   andcm,*
+#define	COND(x)	* ## x
+#define RP_OFFSET	16
+#define FRAME_SIZE	128
+#define CALLEE_REG_FRAME_SIZE	144
+#define ASM_ULONG_INSN	.dword
+#else	/* CONFIG_64BIT */
+#define LDREG	ldw
+#define STREG	stw
+#define LDREGX  ldwx,s
+#define LDREGM	ldwm
+#define STREGM	stwm
+#define SHRREG	shr
+#define SHLREG	shlw
+#define ANDCM   andcm
+#define COND(x)	x
+#define RP_OFFSET	20
+#define FRAME_SIZE	64
+#define CALLEE_REG_FRAME_SIZE	128
+#define ASM_ULONG_INSN	.word
+#endif
+
+#define CALLEE_SAVE_FRAME_SIZE (CALLEE_REG_FRAME_SIZE + CALLEE_FLOAT_FRAME_SIZE)
+
+#ifdef CONFIG_PA20
+#define LDCW		ldcw,co
+#define BL		b,l
+# ifdef CONFIG_64BIT
+#  define LEVEL		2.0w
+# else
+#  define LEVEL		2.0
+# endif
+#else
+#define LDCW		ldcw
+#define BL		bl
+#define LEVEL		1.1
+#endif
+
+#ifdef __ASSEMBLY__
+
+#ifdef CONFIG_64BIT
+/* the 64-bit pa gnu assembler unfortunately defaults to .level 1.1 or 2.0 so
+ * work around that for now... */
+	.level 2.0w
+#endif
+
+#include <asm/asm-offsets.h>
+#include <asm/page.h>
+
+#include <asm/asmregs.h>
+
+	sp	=	30
+	gp	=	27
+	ipsw	=	22
+
+	/*
+	 * We provide two versions of each macro to convert from physical
+	 * to virtual and vice versa. The "_r1" versions take one argument
+	 * register, but trashes r1 to do the conversion. The other
+	 * version takes two arguments: a src and destination register.
+	 * However, the source and destination registers can not be
+	 * the same register.
+	 */
+
+	.macro  tophys  grvirt, grphys
+	ldil    L%(__PAGE_OFFSET), \grphys
+	sub     \grvirt, \grphys, \grphys
+	.endm
+	
+	.macro  tovirt  grphys, grvirt
+	ldil    L%(__PAGE_OFFSET), \grvirt
+	add     \grphys, \grvirt, \grvirt
+	.endm
+
+	.macro  tophys_r1  gr
+	ldil    L%(__PAGE_OFFSET), %r1
+	sub     \gr, %r1, \gr
+	.endm
+	
+	.macro  tovirt_r1  gr
+	ldil    L%(__PAGE_OFFSET), %r1
+	add     \gr, %r1, \gr
+	.endm
+
+	.macro delay value
+	ldil	L%\value, 1
+	ldo	R%\value(1), 1
+	addib,UV,n -1,1,.
+	addib,NUV,n -1,1,.+8
+	nop
+	.endm
+
+	.macro	debug value
+	.endm
+
+
+	/* Shift Left - note the r and t can NOT be the same! */
+	.macro shl r, sa, t
+	dep,z	\r, 31-\sa, 32-\sa, \t
+	.endm
+
+	/* The PA 2.0 shift left */
+	.macro shlw r, sa, t
+	depw,z	\r, 31-\sa, 32-\sa, \t
+	.endm
+
+	/* And the PA 2.0W shift left */
+	.macro shld r, sa, t
+	depd,z	\r, 63-\sa, 64-\sa, \t
+	.endm
+
+	/* Shift Right - note the r and t can NOT be the same! */
+	.macro shr r, sa, t
+	extru \r, 31-\sa, 32-\sa, \t
+	.endm
+
+	/* pa20w version of shift right */
+	.macro shrd r, sa, t
+	extrd,u \r, 63-\sa, 64-\sa, \t
+	.endm
+
+	/* load 32-bit 'value' into 'reg' compensating for the ldil
+	 * sign-extension when running in wide mode.
+	 * WARNING!! neither 'value' nor 'reg' can be expressions
+	 * containing '.'!!!! */
+	.macro	load32 value, reg
+	ldil	L%\value, \reg
+	ldo	R%\value(\reg), \reg
+	.endm
+
+	.macro loadgp
+#ifdef CONFIG_64BIT
+	ldil		L%__gp, %r27
+	ldo		R%__gp(%r27), %r27
+#else
+	ldil		L%$global$, %r27
+	ldo		R%$global$(%r27), %r27
+#endif
+	.endm
+
+#define SAVE_SP(r, where) mfsp r, %r1 ! STREG %r1, where
+#define REST_SP(r, where) LDREG where, %r1 ! mtsp %r1, r
+#define SAVE_CR(r, where) mfctl r, %r1 ! STREG %r1, where
+#define REST_CR(r, where) LDREG where, %r1 ! mtctl %r1, r
+
+	.macro	save_general	regs
+	STREG %r1, PT_GR1 (\regs)
+	STREG %r2, PT_GR2 (\regs)
+	STREG %r3, PT_GR3 (\regs)
+	STREG %r4, PT_GR4 (\regs)
+	STREG %r5, PT_GR5 (\regs)
+	STREG %r6, PT_GR6 (\regs)
+	STREG %r7, PT_GR7 (\regs)
+	STREG %r8, PT_GR8 (\regs)
+	STREG %r9, PT_GR9 (\regs)
+	STREG %r10, PT_GR10(\regs)
+	STREG %r11, PT_GR11(\regs)
+	STREG %r12, PT_GR12(\regs)
+	STREG %r13, PT_GR13(\regs)
+	STREG %r14, PT_GR14(\regs)
+	STREG %r15, PT_GR15(\regs)
+	STREG %r16, PT_GR16(\regs)
+	STREG %r17, PT_GR17(\regs)
+	STREG %r18, PT_GR18(\regs)
+	STREG %r19, PT_GR19(\regs)
+	STREG %r20, PT_GR20(\regs)
+	STREG %r21, PT_GR21(\regs)
+	STREG %r22, PT_GR22(\regs)
+	STREG %r23, PT_GR23(\regs)
+	STREG %r24, PT_GR24(\regs)
+	STREG %r25, PT_GR25(\regs)
+	/* r26 is saved in get_stack and used to preserve a value across virt_map */
+	STREG %r27, PT_GR27(\regs)
+	STREG %r28, PT_GR28(\regs)
+	/* r29 is saved in get_stack and used to point to saved registers */
+	/* r30 stack pointer saved in get_stack */
+	STREG %r31, PT_GR31(\regs)
+	.endm
+
+	.macro	rest_general	regs
+	/* r1 used as a temp in rest_stack and is restored there */
+	LDREG PT_GR2 (\regs), %r2
+	LDREG PT_GR3 (\regs), %r3
+	LDREG PT_GR4 (\regs), %r4
+	LDREG PT_GR5 (\regs), %r5
+	LDREG PT_GR6 (\regs), %r6
+	LDREG PT_GR7 (\regs), %r7
+	LDREG PT_GR8 (\regs), %r8
+	LDREG PT_GR9 (\regs), %r9
+	LDREG PT_GR10(\regs), %r10
+	LDREG PT_GR11(\regs), %r11
+	LDREG PT_GR12(\regs), %r12
+	LDREG PT_GR13(\regs), %r13
+	LDREG PT_GR14(\regs), %r14
+	LDREG PT_GR15(\regs), %r15
+	LDREG PT_GR16(\regs), %r16
+	LDREG PT_GR17(\regs), %r17
+	LDREG PT_GR18(\regs), %r18
+	LDREG PT_GR19(\regs), %r19
+	LDREG PT_GR20(\regs), %r20
+	LDREG PT_GR21(\regs), %r21
+	LDREG PT_GR22(\regs), %r22
+	LDREG PT_GR23(\regs), %r23
+	LDREG PT_GR24(\regs), %r24
+	LDREG PT_GR25(\regs), %r25
+	LDREG PT_GR26(\regs), %r26
+	LDREG PT_GR27(\regs), %r27
+	LDREG PT_GR28(\regs), %r28
+	/* r29 points to register save area, and is restored in rest_stack */
+	/* r30 stack pointer restored in rest_stack */
+	LDREG PT_GR31(\regs), %r31
+	.endm
+
+	.macro	save_fp 	regs
+	fstd,ma  %fr0, 8(\regs)
+	fstd,ma	 %fr1, 8(\regs)
+	fstd,ma	 %fr2, 8(\regs)
+	fstd,ma	 %fr3, 8(\regs)
+	fstd,ma	 %fr4, 8(\regs)
+	fstd,ma	 %fr5, 8(\regs)
+	fstd,ma	 %fr6, 8(\regs)
+	fstd,ma	 %fr7, 8(\regs)
+	fstd,ma	 %fr8, 8(\regs)
+	fstd,ma	 %fr9, 8(\regs)
+	fstd,ma	%fr10, 8(\regs)
+	fstd,ma	%fr11, 8(\regs)
+	fstd,ma	%fr12, 8(\regs)
+	fstd,ma	%fr13, 8(\regs)
+	fstd,ma	%fr14, 8(\regs)
+	fstd,ma	%fr15, 8(\regs)
+	fstd,ma	%fr16, 8(\regs)
+	fstd,ma	%fr17, 8(\regs)
+	fstd,ma	%fr18, 8(\regs)
+	fstd,ma	%fr19, 8(\regs)
+	fstd,ma	%fr20, 8(\regs)
+	fstd,ma	%fr21, 8(\regs)
+	fstd,ma	%fr22, 8(\regs)
+	fstd,ma	%fr23, 8(\regs)
+	fstd,ma	%fr24, 8(\regs)
+	fstd,ma	%fr25, 8(\regs)
+	fstd,ma	%fr26, 8(\regs)
+	fstd,ma	%fr27, 8(\regs)
+	fstd,ma	%fr28, 8(\regs)
+	fstd,ma	%fr29, 8(\regs)
+	fstd,ma	%fr30, 8(\regs)
+	fstd	%fr31, 0(\regs)
+	.endm
+
+	.macro	rest_fp 	regs
+	fldd	0(\regs),	 %fr31
+	fldd,mb	-8(\regs),       %fr30
+	fldd,mb	-8(\regs),       %fr29
+	fldd,mb	-8(\regs),       %fr28
+	fldd,mb	-8(\regs),       %fr27
+	fldd,mb	-8(\regs),       %fr26
+	fldd,mb	-8(\regs),       %fr25
+	fldd,mb	-8(\regs),       %fr24
+	fldd,mb	-8(\regs),       %fr23
+	fldd,mb	-8(\regs),       %fr22
+	fldd,mb	-8(\regs),       %fr21
+	fldd,mb	-8(\regs),       %fr20
+	fldd,mb	-8(\regs),       %fr19
+	fldd,mb	-8(\regs),       %fr18
+	fldd,mb	-8(\regs),       %fr17
+	fldd,mb	-8(\regs),       %fr16
+	fldd,mb	-8(\regs),       %fr15
+	fldd,mb	-8(\regs),       %fr14
+	fldd,mb	-8(\regs),       %fr13
+	fldd,mb	-8(\regs),       %fr12
+	fldd,mb	-8(\regs),       %fr11
+	fldd,mb	-8(\regs),       %fr10
+	fldd,mb	-8(\regs),       %fr9
+	fldd,mb	-8(\regs),       %fr8
+	fldd,mb	-8(\regs),       %fr7
+	fldd,mb	-8(\regs),       %fr6
+	fldd,mb	-8(\regs),       %fr5
+	fldd,mb	-8(\regs),       %fr4
+	fldd,mb	-8(\regs),       %fr3
+	fldd,mb	-8(\regs),       %fr2
+	fldd,mb	-8(\regs),       %fr1
+	fldd,mb	-8(\regs),       %fr0
+	.endm
+
+	.macro	callee_save_float
+	fstd,ma	 %fr12,	8(%r30)
+	fstd,ma	 %fr13,	8(%r30)
+	fstd,ma	 %fr14,	8(%r30)
+	fstd,ma	 %fr15,	8(%r30)
+	fstd,ma	 %fr16,	8(%r30)
+	fstd,ma	 %fr17,	8(%r30)
+	fstd,ma	 %fr18,	8(%r30)
+	fstd,ma	 %fr19,	8(%r30)
+	fstd,ma	 %fr20,	8(%r30)
+	fstd,ma	 %fr21,	8(%r30)
+	.endm
+
+	.macro	callee_rest_float
+	fldd,mb	-8(%r30),   %fr21
+	fldd,mb	-8(%r30),   %fr20
+	fldd,mb	-8(%r30),   %fr19
+	fldd,mb	-8(%r30),   %fr18
+	fldd,mb	-8(%r30),   %fr17
+	fldd,mb	-8(%r30),   %fr16
+	fldd,mb	-8(%r30),   %fr15
+	fldd,mb	-8(%r30),   %fr14
+	fldd,mb	-8(%r30),   %fr13
+	fldd,mb	-8(%r30),   %fr12
+	.endm
+
+#ifdef CONFIG_64BIT
+	.macro	callee_save
+	std,ma	  %r3,	 CALLEE_REG_FRAME_SIZE(%r30)
+	mfctl	  %cr27, %r3
+	std	  %r4,	-136(%r30)
+	std	  %r5,	-128(%r30)
+	std	  %r6,	-120(%r30)
+	std	  %r7,	-112(%r30)
+	std	  %r8,	-104(%r30)
+	std	  %r9,	 -96(%r30)
+	std	 %r10,	 -88(%r30)
+	std	 %r11,	 -80(%r30)
+	std	 %r12,	 -72(%r30)
+	std	 %r13,	 -64(%r30)
+	std	 %r14,	 -56(%r30)
+	std	 %r15,	 -48(%r30)
+	std	 %r16,	 -40(%r30)
+	std	 %r17,	 -32(%r30)
+	std	 %r18,	 -24(%r30)
+	std	  %r3,	 -16(%r30)
+	.endm
+
+	.macro	callee_rest
+	ldd	 -16(%r30),    %r3
+	ldd	 -24(%r30),   %r18
+	ldd	 -32(%r30),   %r17
+	ldd	 -40(%r30),   %r16
+	ldd	 -48(%r30),   %r15
+	ldd	 -56(%r30),   %r14
+	ldd	 -64(%r30),   %r13
+	ldd	 -72(%r30),   %r12
+	ldd	 -80(%r30),   %r11
+	ldd	 -88(%r30),   %r10
+	ldd	 -96(%r30),    %r9
+	ldd	-104(%r30),    %r8
+	ldd	-112(%r30),    %r7
+	ldd	-120(%r30),    %r6
+	ldd	-128(%r30),    %r5
+	ldd	-136(%r30),    %r4
+	mtctl	%r3, %cr27
+	ldd,mb	-CALLEE_REG_FRAME_SIZE(%r30),    %r3
+	.endm
+
+#else /* ! CONFIG_64BIT */
+
+	.macro	callee_save
+	stw,ma	 %r3,	CALLEE_REG_FRAME_SIZE(%r30)
+	mfctl	 %cr27, %r3
+	stw	 %r4,	-124(%r30)
+	stw	 %r5,	-120(%r30)
+	stw	 %r6,	-116(%r30)
+	stw	 %r7,	-112(%r30)
+	stw	 %r8,	-108(%r30)
+	stw	 %r9,	-104(%r30)
+	stw	 %r10,	-100(%r30)
+	stw	 %r11,	 -96(%r30)
+	stw	 %r12,	 -92(%r30)
+	stw	 %r13,	 -88(%r30)
+	stw	 %r14,	 -84(%r30)
+	stw	 %r15,	 -80(%r30)
+	stw	 %r16,	 -76(%r30)
+	stw	 %r17,	 -72(%r30)
+	stw	 %r18,	 -68(%r30)
+	stw	  %r3,	 -64(%r30)
+	.endm
+
+	.macro	callee_rest
+	ldw	 -64(%r30),    %r3
+	ldw	 -68(%r30),   %r18
+	ldw	 -72(%r30),   %r17
+	ldw	 -76(%r30),   %r16
+	ldw	 -80(%r30),   %r15
+	ldw	 -84(%r30),   %r14
+	ldw	 -88(%r30),   %r13
+	ldw	 -92(%r30),   %r12
+	ldw	 -96(%r30),   %r11
+	ldw	-100(%r30),   %r10
+	ldw	-104(%r30),   %r9
+	ldw	-108(%r30),   %r8
+	ldw	-112(%r30),   %r7
+	ldw	-116(%r30),   %r6
+	ldw	-120(%r30),   %r5
+	ldw	-124(%r30),   %r4
+	mtctl	%r3, %cr27
+	ldw,mb	-CALLEE_REG_FRAME_SIZE(%r30),   %r3
+	.endm
+#endif /* ! CONFIG_64BIT */
+
+	.macro	save_specials	regs
+
+	SAVE_SP  (%sr0, PT_SR0 (\regs))
+	SAVE_SP  (%sr1, PT_SR1 (\regs))
+	SAVE_SP  (%sr2, PT_SR2 (\regs))
+	SAVE_SP  (%sr3, PT_SR3 (\regs))
+	SAVE_SP  (%sr4, PT_SR4 (\regs))
+	SAVE_SP  (%sr5, PT_SR5 (\regs))
+	SAVE_SP  (%sr6, PT_SR6 (\regs))
+	SAVE_SP  (%sr7, PT_SR7 (\regs))
+
+	SAVE_CR  (%cr17, PT_IASQ0(\regs))
+	mtctl	 %r0,	%cr17
+	SAVE_CR  (%cr17, PT_IASQ1(\regs))
+
+	SAVE_CR  (%cr18, PT_IAOQ0(\regs))
+	mtctl	 %r0,	%cr18
+	SAVE_CR  (%cr18, PT_IAOQ1(\regs))
+
+#ifdef CONFIG_64BIT
+	/* cr11 (sar) is a funny one.  5 bits on PA1.1 and 6 bit on PA2.0
+	 * For PA2.0 mtsar or mtctl always write 6 bits, but mfctl only
+	 * reads 5 bits.  Use mfctl,w to read all six bits.  Otherwise
+	 * we lose the 6th bit on a save/restore over interrupt.
+	 */
+	mfctl,w  %cr11, %r1
+	STREG    %r1, PT_SAR (\regs)
+#else
+	SAVE_CR  (%cr11, PT_SAR  (\regs))
+#endif
+	SAVE_CR  (%cr19, PT_IIR  (\regs))
+
+	/*
+	 * Code immediately following this macro (in intr_save) relies
+	 * on r8 containing ipsw.
+	 */
+	mfctl    %cr22, %r8
+	STREG    %r8,   PT_PSW(\regs)
+	.endm
+
+	.macro	rest_specials	regs
+
+	REST_SP  (%sr0, PT_SR0 (\regs))
+	REST_SP  (%sr1, PT_SR1 (\regs))
+	REST_SP  (%sr2, PT_SR2 (\regs))
+	REST_SP  (%sr3, PT_SR3 (\regs))
+	REST_SP  (%sr4, PT_SR4 (\regs))
+	REST_SP  (%sr5, PT_SR5 (\regs))
+	REST_SP  (%sr6, PT_SR6 (\regs))
+	REST_SP  (%sr7, PT_SR7 (\regs))
+
+	REST_CR	(%cr17, PT_IASQ0(\regs))
+	REST_CR	(%cr17, PT_IASQ1(\regs))
+
+	REST_CR	(%cr18, PT_IAOQ0(\regs))
+	REST_CR	(%cr18, PT_IAOQ1(\regs))
+
+	REST_CR (%cr11, PT_SAR	(\regs))
+
+	REST_CR	(%cr22, PT_PSW	(\regs))
+	.endm
+
+
+	/* First step to create a "relied upon translation"
+	 * See PA 2.0 Arch. page F-4 and F-5.
+	 *
+	 * The ssm was originally necessary due to a "PCxT bug".
+	 * But someone decided it needed to be added to the architecture
+	 * and this "feature" went into rev3 of PA-RISC 1.1 Arch Manual.
+	 * It's been carried forward into PA 2.0 Arch as well. :^(
+	 *
+	 * "ssm 0,%r0" is a NOP with side effects (prefetch barrier).
+	 * rsm/ssm prevents the ifetch unit from speculatively fetching
+	 * instructions past this line in the code stream.
+	 * PA 2.0 processor will single step all insn in the same QUAD (4 insn).
+	 */
+	.macro	pcxt_ssm_bug
+	rsm	PSW_SM_I,%r0
+	nop	/* 1 */
+	nop	/* 2 */
+	nop	/* 3 */
+	nop	/* 4 */
+	nop	/* 5 */
+	nop	/* 6 */
+	nop	/* 7 */
+	.endm
+
+#endif /* __ASSEMBLY__ */
+#endif
diff --git a/arch/parisc/include/asm/atomic.h b/arch/parisc/include/asm/atomic.h
new file mode 100644
index 000000000000..57fcc4a5ebb4
--- /dev/null
+++ b/arch/parisc/include/asm/atomic.h
@@ -0,0 +1,348 @@
+/* Copyright (C) 2000 Philipp Rumpf <prumpf@tux.org>
+ * Copyright (C) 2006 Kyle McMartin <kyle@parisc-linux.org>
+ */
+
+#ifndef _ASM_PARISC_ATOMIC_H_
+#define _ASM_PARISC_ATOMIC_H_
+
+#include <linux/types.h>
+#include <asm/system.h>
+
+/*
+ * Atomic operations that C can't guarantee us.  Useful for
+ * resource counting etc..
+ *
+ * And probably incredibly slow on parisc.  OTOH, we don't
+ * have to write any serious assembly.   prumpf
+ */
+
+#ifdef CONFIG_SMP
+#include <asm/spinlock.h>
+#include <asm/cache.h>		/* we use L1_CACHE_BYTES */
+
+/* Use an array of spinlocks for our atomic_ts.
+ * Hash function to index into a different SPINLOCK.
+ * Since "a" is usually an address, use one spinlock per cacheline.
+ */
+#  define ATOMIC_HASH_SIZE 4
+#  define ATOMIC_HASH(a) (&(__atomic_hash[ (((unsigned long) a)/L1_CACHE_BYTES) & (ATOMIC_HASH_SIZE-1) ]))
+
+extern raw_spinlock_t __atomic_hash[ATOMIC_HASH_SIZE] __lock_aligned;
+
+/* Can't use raw_spin_lock_irq because of #include problems, so
+ * this is the substitute */
+#define _atomic_spin_lock_irqsave(l,f) do {	\
+	raw_spinlock_t *s = ATOMIC_HASH(l);		\
+	local_irq_save(f);			\
+	__raw_spin_lock(s);			\
+} while(0)
+
+#define _atomic_spin_unlock_irqrestore(l,f) do {	\
+	raw_spinlock_t *s = ATOMIC_HASH(l);			\
+	__raw_spin_unlock(s);				\
+	local_irq_restore(f);				\
+} while(0)
+
+
+#else
+#  define _atomic_spin_lock_irqsave(l,f) do { local_irq_save(f); } while (0)
+#  define _atomic_spin_unlock_irqrestore(l,f) do { local_irq_restore(f); } while (0)
+#endif
+
+/* This should get optimized out since it's never called.
+** Or get a link error if xchg is used "wrong".
+*/
+extern void __xchg_called_with_bad_pointer(void);
+
+
+/* __xchg32/64 defined in arch/parisc/lib/bitops.c */
+extern unsigned long __xchg8(char, char *);
+extern unsigned long __xchg32(int, int *);
+#ifdef CONFIG_64BIT
+extern unsigned long __xchg64(unsigned long, unsigned long *);
+#endif
+
+/* optimizer better get rid of switch since size is a constant */
+static __inline__ unsigned long
+__xchg(unsigned long x, __volatile__ void * ptr, int size)
+{
+	switch(size) {
+#ifdef CONFIG_64BIT
+	case 8: return __xchg64(x,(unsigned long *) ptr);
+#endif
+	case 4: return __xchg32((int) x, (int *) ptr);
+	case 1: return __xchg8((char) x, (char *) ptr);
+	}
+	__xchg_called_with_bad_pointer();
+	return x;
+}
+
+
+/*
+** REVISIT - Abandoned use of LDCW in xchg() for now:
+** o need to test sizeof(*ptr) to avoid clearing adjacent bytes
+** o and while we are at it, could CONFIG_64BIT code use LDCD too?
+**
+**	if (__builtin_constant_p(x) && (x == NULL))
+**		if (((unsigned long)p & 0xf) == 0)
+**			return __ldcw(p);
+*/
+#define xchg(ptr,x) \
+	((__typeof__(*(ptr)))__xchg((unsigned long)(x),(ptr),sizeof(*(ptr))))
+
+
+#define __HAVE_ARCH_CMPXCHG	1
+
+/* bug catcher for when unsupported size is used - won't link */
+extern void __cmpxchg_called_with_bad_pointer(void);
+
+/* __cmpxchg_u32/u64 defined in arch/parisc/lib/bitops.c */
+extern unsigned long __cmpxchg_u32(volatile unsigned int *m, unsigned int old, unsigned int new_);
+extern unsigned long __cmpxchg_u64(volatile unsigned long *ptr, unsigned long old, unsigned long new_);
+
+/* don't worry...optimizer will get rid of most of this */
+static __inline__ unsigned long
+__cmpxchg(volatile void *ptr, unsigned long old, unsigned long new_, int size)
+{
+	switch(size) {
+#ifdef CONFIG_64BIT
+	case 8: return __cmpxchg_u64((unsigned long *)ptr, old, new_);
+#endif
+	case 4: return __cmpxchg_u32((unsigned int *)ptr, (unsigned int) old, (unsigned int) new_);
+	}
+	__cmpxchg_called_with_bad_pointer();
+	return old;
+}
+
+#define cmpxchg(ptr,o,n)						 \
+  ({									 \
+     __typeof__(*(ptr)) _o_ = (o);					 \
+     __typeof__(*(ptr)) _n_ = (n);					 \
+     (__typeof__(*(ptr))) __cmpxchg((ptr), (unsigned long)_o_,		 \
+				    (unsigned long)_n_, sizeof(*(ptr))); \
+  })
+
+#include <asm-generic/cmpxchg-local.h>
+
+static inline unsigned long __cmpxchg_local(volatile void *ptr,
+				      unsigned long old,
+				      unsigned long new_, int size)
+{
+	switch (size) {
+#ifdef CONFIG_64BIT
+	case 8:	return __cmpxchg_u64((unsigned long *)ptr, old, new_);
+#endif
+	case 4:	return __cmpxchg_u32(ptr, old, new_);
+	default:
+		return __cmpxchg_local_generic(ptr, old, new_, size);
+	}
+}
+
+/*
+ * cmpxchg_local and cmpxchg64_local are atomic wrt current CPU. Always make
+ * them available.
+ */
+#define cmpxchg_local(ptr, o, n)				  	\
+	((__typeof__(*(ptr)))__cmpxchg_local((ptr), (unsigned long)(o),	\
+			(unsigned long)(n), sizeof(*(ptr))))
+#ifdef CONFIG_64BIT
+#define cmpxchg64_local(ptr, o, n)					\
+  ({									\
+	BUILD_BUG_ON(sizeof(*(ptr)) != 8);				\
+	cmpxchg_local((ptr), (o), (n));					\
+  })
+#else
+#define cmpxchg64_local(ptr, o, n) __cmpxchg64_local_generic((ptr), (o), (n))
+#endif
+
+/* Note that we need not lock read accesses - aligned word writes/reads
+ * are atomic, so a reader never sees unconsistent values.
+ *
+ * Cache-line alignment would conflict with, for example, linux/module.h
+ */
+
+typedef struct { volatile int counter; } atomic_t;
+
+/* It's possible to reduce all atomic operations to either
+ * __atomic_add_return, atomic_set and atomic_read (the latter
+ * is there only for consistency).
+ */
+
+static __inline__ int __atomic_add_return(int i, atomic_t *v)
+{
+	int ret;
+	unsigned long flags;
+	_atomic_spin_lock_irqsave(v, flags);
+
+	ret = (v->counter += i);
+
+	_atomic_spin_unlock_irqrestore(v, flags);
+	return ret;
+}
+
+static __inline__ void atomic_set(atomic_t *v, int i) 
+{
+	unsigned long flags;
+	_atomic_spin_lock_irqsave(v, flags);
+
+	v->counter = i;
+
+	_atomic_spin_unlock_irqrestore(v, flags);
+}
+
+static __inline__ int atomic_read(const atomic_t *v)
+{
+	return v->counter;
+}
+
+/* exported interface */
+#define atomic_cmpxchg(v, o, n) (cmpxchg(&((v)->counter), (o), (n)))
+#define atomic_xchg(v, new) (xchg(&((v)->counter), new))
+
+/**
+ * atomic_add_unless - add unless the number is a given value
+ * @v: pointer of type atomic_t
+ * @a: the amount to add to v...
+ * @u: ...unless v is equal to u.
+ *
+ * Atomically adds @a to @v, so long as it was not @u.
+ * Returns non-zero if @v was not @u, and zero otherwise.
+ */
+static __inline__ int atomic_add_unless(atomic_t *v, int a, int u)
+{
+	int c, old;
+	c = atomic_read(v);
+	for (;;) {
+		if (unlikely(c == (u)))
+			break;
+		old = atomic_cmpxchg((v), c, c + (a));
+		if (likely(old == c))
+			break;
+		c = old;
+	}
+	return c != (u);
+}
+
+#define atomic_inc_not_zero(v) atomic_add_unless((v), 1, 0)
+
+#define atomic_add(i,v)	((void)(__atomic_add_return( ((int)i),(v))))
+#define atomic_sub(i,v)	((void)(__atomic_add_return(-((int)i),(v))))
+#define atomic_inc(v)	((void)(__atomic_add_return(   1,(v))))
+#define atomic_dec(v)	((void)(__atomic_add_return(  -1,(v))))
+
+#define atomic_add_return(i,v)	(__atomic_add_return( ((int)i),(v)))
+#define atomic_sub_return(i,v)	(__atomic_add_return(-((int)i),(v)))
+#define atomic_inc_return(v)	(__atomic_add_return(   1,(v)))
+#define atomic_dec_return(v)	(__atomic_add_return(  -1,(v)))
+
+#define atomic_add_negative(a, v)	(atomic_add_return((a), (v)) < 0)
+
+/*
+ * atomic_inc_and_test - increment and test
+ * @v: pointer of type atomic_t
+ *
+ * Atomically increments @v by 1
+ * and returns true if the result is zero, or false for all
+ * other cases.
+ */
+#define atomic_inc_and_test(v) (atomic_inc_return(v) == 0)
+
+#define atomic_dec_and_test(v)	(atomic_dec_return(v) == 0)
+
+#define atomic_sub_and_test(i,v)	(atomic_sub_return((i),(v)) == 0)
+
+#define ATOMIC_INIT(i)	((atomic_t) { (i) })
+
+#define smp_mb__before_atomic_dec()	smp_mb()
+#define smp_mb__after_atomic_dec()	smp_mb()
+#define smp_mb__before_atomic_inc()	smp_mb()
+#define smp_mb__after_atomic_inc()	smp_mb()
+
+#ifdef CONFIG_64BIT
+
+typedef struct { volatile s64 counter; } atomic64_t;
+
+#define ATOMIC64_INIT(i) ((atomic64_t) { (i) })
+
+static __inline__ int
+__atomic64_add_return(s64 i, atomic64_t *v)
+{
+	int ret;
+	unsigned long flags;
+	_atomic_spin_lock_irqsave(v, flags);
+
+	ret = (v->counter += i);
+
+	_atomic_spin_unlock_irqrestore(v, flags);
+	return ret;
+}
+
+static __inline__ void
+atomic64_set(atomic64_t *v, s64 i)
+{
+	unsigned long flags;
+	_atomic_spin_lock_irqsave(v, flags);
+
+	v->counter = i;
+
+	_atomic_spin_unlock_irqrestore(v, flags);
+}
+
+static __inline__ s64
+atomic64_read(const atomic64_t *v)
+{
+	return v->counter;
+}
+
+#define atomic64_add(i,v)	((void)(__atomic64_add_return( ((s64)i),(v))))
+#define atomic64_sub(i,v)	((void)(__atomic64_add_return(-((s64)i),(v))))
+#define atomic64_inc(v)		((void)(__atomic64_add_return(   1,(v))))
+#define atomic64_dec(v)		((void)(__atomic64_add_return(  -1,(v))))
+
+#define atomic64_add_return(i,v)	(__atomic64_add_return( ((s64)i),(v)))
+#define atomic64_sub_return(i,v)	(__atomic64_add_return(-((s64)i),(v)))
+#define atomic64_inc_return(v)		(__atomic64_add_return(   1,(v)))
+#define atomic64_dec_return(v)		(__atomic64_add_return(  -1,(v)))
+
+#define atomic64_add_negative(a, v)	(atomic64_add_return((a), (v)) < 0)
+
+#define atomic64_inc_and_test(v) 	(atomic64_inc_return(v) == 0)
+#define atomic64_dec_and_test(v)	(atomic64_dec_return(v) == 0)
+#define atomic64_sub_and_test(i,v)	(atomic64_sub_return((i),(v)) == 0)
+
+/* exported interface */
+#define atomic64_cmpxchg(v, o, n) \
+	((__typeof__((v)->counter))cmpxchg(&((v)->counter), (o), (n)))
+#define atomic64_xchg(v, new) (xchg(&((v)->counter), new))
+
+/**
+ * atomic64_add_unless - add unless the number is a given value
+ * @v: pointer of type atomic64_t
+ * @a: the amount to add to v...
+ * @u: ...unless v is equal to u.
+ *
+ * Atomically adds @a to @v, so long as it was not @u.
+ * Returns non-zero if @v was not @u, and zero otherwise.
+ */
+static __inline__ int atomic64_add_unless(atomic64_t *v, long a, long u)
+{
+	long c, old;
+	c = atomic64_read(v);
+	for (;;) {
+		if (unlikely(c == (u)))
+			break;
+		old = atomic64_cmpxchg((v), c, c + (a));
+		if (likely(old == c))
+			break;
+		c = old;
+	}
+	return c != (u);
+}
+
+#define atomic64_inc_not_zero(v) atomic64_add_unless((v), 1, 0)
+
+#endif /* CONFIG_64BIT */
+
+#include <asm-generic/atomic.h>
+
+#endif /* _ASM_PARISC_ATOMIC_H_ */
diff --git a/arch/parisc/include/asm/auxvec.h b/arch/parisc/include/asm/auxvec.h
new file mode 100644
index 000000000000..9c3ac4b89dc9
--- /dev/null
+++ b/arch/parisc/include/asm/auxvec.h
@@ -0,0 +1,4 @@
+#ifndef __ASMPARISC_AUXVEC_H
+#define __ASMPARISC_AUXVEC_H
+
+#endif
diff --git a/arch/parisc/include/asm/bitops.h b/arch/parisc/include/asm/bitops.h
new file mode 100644
index 000000000000..7a6ea10bd231
--- /dev/null
+++ b/arch/parisc/include/asm/bitops.h
@@ -0,0 +1,239 @@
+#ifndef _PARISC_BITOPS_H
+#define _PARISC_BITOPS_H
+
+#ifndef _LINUX_BITOPS_H
+#error only <linux/bitops.h> can be included directly
+#endif
+
+#include <linux/compiler.h>
+#include <asm/types.h>		/* for BITS_PER_LONG/SHIFT_PER_LONG */
+#include <asm/byteorder.h>
+#include <asm/atomic.h>
+
+/*
+ * HP-PARISC specific bit operations
+ * for a detailed description of the functions please refer
+ * to include/asm-i386/bitops.h or kerneldoc
+ */
+
+#define CHOP_SHIFTCOUNT(x) (((unsigned long) (x)) & (BITS_PER_LONG - 1))
+
+
+#define smp_mb__before_clear_bit()      smp_mb()
+#define smp_mb__after_clear_bit()       smp_mb()
+
+/* See http://marc.theaimsgroup.com/?t=108826637900003 for discussion
+ * on use of volatile and __*_bit() (set/clear/change):
+ *	*_bit() want use of volatile.
+ *	__*_bit() are "relaxed" and don't use spinlock or volatile.
+ */
+
+static __inline__ void set_bit(int nr, volatile unsigned long * addr)
+{
+	unsigned long mask = 1UL << CHOP_SHIFTCOUNT(nr);
+	unsigned long flags;
+
+	addr += (nr >> SHIFT_PER_LONG);
+	_atomic_spin_lock_irqsave(addr, flags);
+	*addr |= mask;
+	_atomic_spin_unlock_irqrestore(addr, flags);
+}
+
+static __inline__ void clear_bit(int nr, volatile unsigned long * addr)
+{
+	unsigned long mask = ~(1UL << CHOP_SHIFTCOUNT(nr));
+	unsigned long flags;
+
+	addr += (nr >> SHIFT_PER_LONG);
+	_atomic_spin_lock_irqsave(addr, flags);
+	*addr &= mask;
+	_atomic_spin_unlock_irqrestore(addr, flags);
+}
+
+static __inline__ void change_bit(int nr, volatile unsigned long * addr)
+{
+	unsigned long mask = 1UL << CHOP_SHIFTCOUNT(nr);
+	unsigned long flags;
+
+	addr += (nr >> SHIFT_PER_LONG);
+	_atomic_spin_lock_irqsave(addr, flags);
+	*addr ^= mask;
+	_atomic_spin_unlock_irqrestore(addr, flags);
+}
+
+static __inline__ int test_and_set_bit(int nr, volatile unsigned long * addr)
+{
+	unsigned long mask = 1UL << CHOP_SHIFTCOUNT(nr);
+	unsigned long old;
+	unsigned long flags;
+	int set;
+
+	addr += (nr >> SHIFT_PER_LONG);
+	_atomic_spin_lock_irqsave(addr, flags);
+	old = *addr;
+	set = (old & mask) ? 1 : 0;
+	if (!set)
+		*addr = old | mask;
+	_atomic_spin_unlock_irqrestore(addr, flags);
+
+	return set;
+}
+
+static __inline__ int test_and_clear_bit(int nr, volatile unsigned long * addr)
+{
+	unsigned long mask = 1UL << CHOP_SHIFTCOUNT(nr);
+	unsigned long old;
+	unsigned long flags;
+	int set;
+
+	addr += (nr >> SHIFT_PER_LONG);
+	_atomic_spin_lock_irqsave(addr, flags);
+	old = *addr;
+	set = (old & mask) ? 1 : 0;
+	if (set)
+		*addr = old & ~mask;
+	_atomic_spin_unlock_irqrestore(addr, flags);
+
+	return set;
+}
+
+static __inline__ int test_and_change_bit(int nr, volatile unsigned long * addr)
+{
+	unsigned long mask = 1UL << CHOP_SHIFTCOUNT(nr);
+	unsigned long oldbit;
+	unsigned long flags;
+
+	addr += (nr >> SHIFT_PER_LONG);
+	_atomic_spin_lock_irqsave(addr, flags);
+	oldbit = *addr;
+	*addr = oldbit ^ mask;
+	_atomic_spin_unlock_irqrestore(addr, flags);
+
+	return (oldbit & mask) ? 1 : 0;
+}
+
+#include <asm-generic/bitops/non-atomic.h>
+
+#ifdef __KERNEL__
+
+/**
+ * __ffs - find first bit in word. returns 0 to "BITS_PER_LONG-1".
+ * @word: The word to search
+ *
+ * __ffs() return is undefined if no bit is set.
+ *
+ * 32-bit fast __ffs by LaMont Jones "lamont At hp com".
+ * 64-bit enhancement by Grant Grundler "grundler At parisc-linux org".
+ * (with help from willy/jejb to get the semantics right)
+ *
+ * This algorithm avoids branches by making use of nullification.
+ * One side effect of "extr" instructions is it sets PSW[N] bit.
+ * How PSW[N] (nullify next insn) gets set is determined by the 
+ * "condition" field (eg "<>" or "TR" below) in the extr* insn.
+ * Only the 1st and one of either the 2cd or 3rd insn will get executed.
+ * Each set of 3 insn will get executed in 2 cycles on PA8x00 vs 16 or so
+ * cycles for each mispredicted branch.
+ */
+
+static __inline__ unsigned long __ffs(unsigned long x)
+{
+	unsigned long ret;
+
+	__asm__(
+#ifdef CONFIG_64BIT
+		" ldi       63,%1\n"
+		" extrd,u,*<>  %0,63,32,%%r0\n"
+		" extrd,u,*TR  %0,31,32,%0\n"	/* move top 32-bits down */
+		" addi    -32,%1,%1\n"
+#else
+		" ldi       31,%1\n"
+#endif
+		" extru,<>  %0,31,16,%%r0\n"
+		" extru,TR  %0,15,16,%0\n"	/* xxxx0000 -> 0000xxxx */
+		" addi    -16,%1,%1\n"
+		" extru,<>  %0,31,8,%%r0\n"
+		" extru,TR  %0,23,8,%0\n"	/* 0000xx00 -> 000000xx */
+		" addi    -8,%1,%1\n"
+		" extru,<>  %0,31,4,%%r0\n"
+		" extru,TR  %0,27,4,%0\n"	/* 000000x0 -> 0000000x */
+		" addi    -4,%1,%1\n"
+		" extru,<>  %0,31,2,%%r0\n"
+		" extru,TR  %0,29,2,%0\n"	/* 0000000y, 1100b -> 0011b */
+		" addi    -2,%1,%1\n"
+		" extru,=  %0,31,1,%%r0\n"	/* check last bit */
+		" addi    -1,%1,%1\n"
+			: "+r" (x), "=r" (ret) );
+	return ret;
+}
+
+#include <asm-generic/bitops/ffz.h>
+
+/*
+ * ffs: find first bit set. returns 1 to BITS_PER_LONG or 0 (if none set)
+ * This is defined the same way as the libc and compiler builtin
+ * ffs routines, therefore differs in spirit from the above ffz (man ffs).
+ */
+static __inline__ int ffs(int x)
+{
+	return x ? (__ffs((unsigned long)x) + 1) : 0;
+}
+
+/*
+ * fls: find last (most significant) bit set.
+ * fls(0) = 0, fls(1) = 1, fls(0x80000000) = 32.
+ */
+
+static __inline__ int fls(int x)
+{
+	int ret;
+	if (!x)
+		return 0;
+
+	__asm__(
+	"	ldi		1,%1\n"
+	"	extru,<>	%0,15,16,%%r0\n"
+	"	zdep,TR		%0,15,16,%0\n"		/* xxxx0000 */
+	"	addi		16,%1,%1\n"
+	"	extru,<>	%0,7,8,%%r0\n"
+	"	zdep,TR		%0,23,24,%0\n"		/* xx000000 */
+	"	addi		8,%1,%1\n"
+	"	extru,<>	%0,3,4,%%r0\n"
+	"	zdep,TR		%0,27,28,%0\n"		/* x0000000 */
+	"	addi		4,%1,%1\n"
+	"	extru,<>	%0,1,2,%%r0\n"
+	"	zdep,TR		%0,29,30,%0\n"		/* y0000000 (y&3 = 0) */
+	"	addi		2,%1,%1\n"
+	"	extru,=		%0,0,1,%%r0\n"
+	"	addi		1,%1,%1\n"		/* if y & 8, add 1 */
+		: "+r" (x), "=r" (ret) );
+
+	return ret;
+}
+
+#include <asm-generic/bitops/__fls.h>
+#include <asm-generic/bitops/fls64.h>
+#include <asm-generic/bitops/hweight.h>
+#include <asm-generic/bitops/lock.h>
+#include <asm-generic/bitops/sched.h>
+
+#endif /* __KERNEL__ */
+
+#include <asm-generic/bitops/find.h>
+
+#ifdef __KERNEL__
+
+#include <asm-generic/bitops/ext2-non-atomic.h>
+
+/* '3' is bits per byte */
+#define LE_BYTE_ADDR ((sizeof(unsigned long) - 1) << 3)
+
+#define ext2_set_bit_atomic(l,nr,addr) \
+		test_and_set_bit((nr)   ^ LE_BYTE_ADDR, (unsigned long *)addr)
+#define ext2_clear_bit_atomic(l,nr,addr) \
+		test_and_clear_bit( (nr) ^ LE_BYTE_ADDR, (unsigned long *)addr)
+
+#endif	/* __KERNEL__ */
+
+#include <asm-generic/bitops/minix-le.h>
+
+#endif /* _PARISC_BITOPS_H */
diff --git a/arch/parisc/include/asm/bug.h b/arch/parisc/include/asm/bug.h
new file mode 100644
index 000000000000..8cfc553fc837
--- /dev/null
+++ b/arch/parisc/include/asm/bug.h
@@ -0,0 +1,92 @@
+#ifndef _PARISC_BUG_H
+#define _PARISC_BUG_H
+
+/*
+ * Tell the user there is some problem.
+ * The offending file and line are encoded in the __bug_table section.
+ */
+
+#ifdef CONFIG_BUG
+#define HAVE_ARCH_BUG
+#define HAVE_ARCH_WARN_ON
+
+/* the break instruction is used as BUG() marker.  */
+#define	PARISC_BUG_BREAK_ASM	"break 0x1f, 0x1fff"
+#define	PARISC_BUG_BREAK_INSN	0x03ffe01f  /* PARISC_BUG_BREAK_ASM */
+
+#if defined(CONFIG_64BIT)
+#define ASM_WORD_INSN		".dword\t"
+#else
+#define ASM_WORD_INSN		".word\t"
+#endif
+
+#ifdef CONFIG_DEBUG_BUGVERBOSE
+#define BUG()								\
+	do {								\
+		asm volatile("\n"					\
+			     "1:\t" PARISC_BUG_BREAK_ASM "\n"		\
+			     "\t.pushsection __bug_table,\"a\"\n"	\
+			     "2:\t" ASM_WORD_INSN "1b, %c0\n"		\
+			     "\t.short %c1, %c2\n"			\
+			     "\t.org 2b+%c3\n"				\
+			     "\t.popsection"				\
+			     : : "i" (__FILE__), "i" (__LINE__),	\
+			     "i" (0), "i" (sizeof(struct bug_entry)) ); \
+		for(;;) ;						\
+	} while(0)
+
+#else
+#define BUG()								\
+	do {								\
+		asm volatile(PARISC_BUG_BREAK_ASM : : );		\
+		for(;;) ;						\
+	} while(0)
+#endif
+
+#ifdef CONFIG_DEBUG_BUGVERBOSE
+#define __WARN()							\
+	do {								\
+		asm volatile("\n"					\
+			     "1:\t" PARISC_BUG_BREAK_ASM "\n"		\
+			     "\t.pushsection __bug_table,\"a\"\n"	\
+			     "2:\t" ASM_WORD_INSN "1b, %c0\n"		\
+			     "\t.short %c1, %c2\n"			\
+			     "\t.org 2b+%c3\n"				\
+			     "\t.popsection"				\
+			     : : "i" (__FILE__), "i" (__LINE__),	\
+			     "i" (BUGFLAG_WARNING),			\
+			     "i" (sizeof(struct bug_entry)) );		\
+	} while(0)
+#else
+#define __WARN()							\
+	do {								\
+		asm volatile("\n"					\
+			     "1:\t" PARISC_BUG_BREAK_ASM "\n"		\
+			     "\t.pushsection __bug_table,\"a\"\n"	\
+			     "2:\t" ASM_WORD_INSN "1b\n"		\
+			     "\t.short %c0\n"				\
+			     "\t.org 2b+%c1\n"				\
+			     "\t.popsection"				\
+			     : : "i" (BUGFLAG_WARNING),			\
+			     "i" (sizeof(struct bug_entry)) );		\
+	} while(0)
+#endif
+
+
+#define WARN_ON(x) ({						\
+	int __ret_warn_on = !!(x);				\
+	if (__builtin_constant_p(__ret_warn_on)) {		\
+		if (__ret_warn_on)				\
+			__WARN();				\
+	} else {						\
+		if (unlikely(__ret_warn_on))			\
+			__WARN();				\
+	}							\
+	unlikely(__ret_warn_on);				\
+})
+
+#endif
+
+#include <asm-generic/bug.h>
+#endif
+
diff --git a/arch/parisc/include/asm/bugs.h b/arch/parisc/include/asm/bugs.h
new file mode 100644
index 000000000000..9e6284342a5f
--- /dev/null
+++ b/arch/parisc/include/asm/bugs.h
@@ -0,0 +1,19 @@
+/*
+ *  include/asm-parisc/bugs.h
+ *
+ *  Copyright (C) 1999	Mike Shaver
+ */
+
+/*
+ * This is included by init/main.c to check for architecture-dependent bugs.
+ *
+ * Needs:
+ *	void check_bugs(void);
+ */
+
+#include <asm/processor.h>
+
+static inline void check_bugs(void)
+{
+//	identify_cpu(&boot_cpu_data);
+}
diff --git a/arch/parisc/include/asm/byteorder.h b/arch/parisc/include/asm/byteorder.h
new file mode 100644
index 000000000000..db148313de5d
--- /dev/null
+++ b/arch/parisc/include/asm/byteorder.h
@@ -0,0 +1,82 @@
+#ifndef _PARISC_BYTEORDER_H
+#define _PARISC_BYTEORDER_H
+
+#include <asm/types.h>
+#include <linux/compiler.h>
+
+#ifdef __GNUC__
+
+static __inline__ __attribute_const__ __u16 ___arch__swab16(__u16 x)
+{
+	__asm__("dep %0, 15, 8, %0\n\t"		/* deposit 00ab -> 0bab */
+		"shd %%r0, %0, 8, %0"		/* shift 000000ab -> 00ba */
+		: "=r" (x)
+		: "0" (x));
+	return x;
+}
+
+static __inline__ __attribute_const__ __u32 ___arch__swab24(__u32 x)
+{
+	__asm__("shd %0, %0, 8, %0\n\t"		/* shift xabcxabc -> cxab */
+		"dep %0, 15, 8, %0\n\t"		/* deposit cxab -> cbab */
+		"shd %%r0, %0, 8, %0"		/* shift 0000cbab -> 0cba */
+		: "=r" (x)
+		: "0" (x));
+	return x;
+}
+
+static __inline__ __attribute_const__ __u32 ___arch__swab32(__u32 x)
+{
+	unsigned int temp;
+	__asm__("shd %0, %0, 16, %1\n\t"	/* shift abcdabcd -> cdab */
+		"dep %1, 15, 8, %1\n\t"		/* deposit cdab -> cbab */
+		"shd %0, %1, 8, %0"		/* shift abcdcbab -> dcba */
+		: "=r" (x), "=&r" (temp)
+		: "0" (x));
+	return x;
+}
+
+
+#if BITS_PER_LONG > 32
+/*
+** From "PA-RISC 2.0 Architecture", HP Professional Books.
+** See Appendix I page 8 , "Endian Byte Swapping".
+**
+** Pretty cool algorithm: (* == zero'd bits)
+**      PERMH   01234567 -> 67452301 into %0
+**      HSHL    67452301 -> 7*5*3*1* into %1
+**      HSHR    67452301 -> *6*4*2*0 into %0
+**      OR      %0 | %1  -> 76543210 into %0 (all done!)
+*/
+static __inline__ __attribute_const__ __u64 ___arch__swab64(__u64 x) {
+	__u64 temp;
+	__asm__("permh,3210 %0, %0\n\t"
+		"hshl %0, 8, %1\n\t"
+		"hshr,u %0, 8, %0\n\t"
+		"or %1, %0, %0"
+		: "=r" (x), "=&r" (temp)
+		: "0" (x));
+	return x;
+}
+#define __arch__swab64(x) ___arch__swab64(x)
+#define __BYTEORDER_HAS_U64__
+#elif !defined(__STRICT_ANSI__)
+static __inline__ __attribute_const__ __u64 ___arch__swab64(__u64 x)
+{
+	__u32 t1 = ___arch__swab32((__u32) x);
+	__u32 t2 = ___arch__swab32((__u32) (x >> 32));
+	return (((__u64) t1 << 32) | t2);
+}
+#define __arch__swab64(x) ___arch__swab64(x)
+#define __BYTEORDER_HAS_U64__
+#endif
+
+#define __arch__swab16(x) ___arch__swab16(x)
+#define __arch__swab24(x) ___arch__swab24(x)
+#define __arch__swab32(x) ___arch__swab32(x)
+
+#endif /* __GNUC__ */
+
+#include <linux/byteorder/big_endian.h>
+
+#endif /* _PARISC_BYTEORDER_H */
diff --git a/arch/parisc/include/asm/cache.h b/arch/parisc/include/asm/cache.h
new file mode 100644
index 000000000000..32c2cca74345
--- /dev/null
+++ b/arch/parisc/include/asm/cache.h
@@ -0,0 +1,60 @@
+/*
+ * include/asm-parisc/cache.h
+ */
+
+#ifndef __ARCH_PARISC_CACHE_H
+#define __ARCH_PARISC_CACHE_H
+
+
+/*
+ * PA 2.0 processors have 64-byte cachelines; PA 1.1 processors have
+ * 32-byte cachelines.  The default configuration is not for SMP anyway,
+ * so if you're building for SMP, you should select the appropriate
+ * processor type.  There is a potential livelock danger when running
+ * a machine with this value set too small, but it's more probable you'll
+ * just ruin performance.
+ */
+#ifdef CONFIG_PA20
+#define L1_CACHE_BYTES 64
+#define L1_CACHE_SHIFT 6
+#else
+#define L1_CACHE_BYTES 32
+#define L1_CACHE_SHIFT 5
+#endif
+
+#ifndef __ASSEMBLY__
+
+#define L1_CACHE_ALIGN(x)       (((x)+(L1_CACHE_BYTES-1))&~(L1_CACHE_BYTES-1))
+
+#define SMP_CACHE_BYTES L1_CACHE_BYTES
+
+#define __read_mostly __attribute__((__section__(".data.read_mostly")))
+
+void parisc_cache_init(void);	/* initializes cache-flushing */
+void disable_sr_hashing_asm(int); /* low level support for above */
+void disable_sr_hashing(void);   /* turns off space register hashing */
+void free_sid(unsigned long);
+unsigned long alloc_sid(void);
+
+struct seq_file;
+extern void show_cache_info(struct seq_file *m);
+
+extern int split_tlb;
+extern int dcache_stride;
+extern int icache_stride;
+extern struct pdc_cache_info cache_info;
+void parisc_setup_cache_timing(void);
+
+#define pdtlb(addr)         asm volatile("pdtlb 0(%%sr1,%0)" : : "r" (addr));
+#define pitlb(addr)         asm volatile("pitlb 0(%%sr1,%0)" : : "r" (addr));
+#define pdtlb_kernel(addr)  asm volatile("pdtlb 0(%0)" : : "r" (addr));
+
+#endif /* ! __ASSEMBLY__ */
+
+/* Classes of processor wrt: disabling space register hashing */
+
+#define SRHASH_PCXST    0   /* pcxs, pcxt, pcxt_ */
+#define SRHASH_PCXL     1   /* pcxl */
+#define SRHASH_PA20     2   /* pcxu, pcxu_, pcxw, pcxw_ */
+
+#endif
diff --git a/arch/parisc/include/asm/cacheflush.h b/arch/parisc/include/asm/cacheflush.h
new file mode 100644
index 000000000000..b7ca6dc7fddc
--- /dev/null
+++ b/arch/parisc/include/asm/cacheflush.h
@@ -0,0 +1,121 @@
+#ifndef _PARISC_CACHEFLUSH_H
+#define _PARISC_CACHEFLUSH_H
+
+#include <linux/mm.h>
+
+/* The usual comment is "Caches aren't brain-dead on the <architecture>".
+ * Unfortunately, that doesn't apply to PA-RISC. */
+
+/* Internal implementation */
+void flush_data_cache_local(void *);  /* flushes local data-cache only */
+void flush_instruction_cache_local(void *); /* flushes local code-cache only */
+#ifdef CONFIG_SMP
+void flush_data_cache(void); /* flushes data-cache only (all processors) */
+void flush_instruction_cache(void); /* flushes i-cache only (all processors) */
+#else
+#define flush_data_cache() flush_data_cache_local(NULL)
+#define flush_instruction_cache() flush_instruction_cache_local(NULL)
+#endif
+
+#define flush_cache_dup_mm(mm) flush_cache_mm(mm)
+
+void flush_user_icache_range_asm(unsigned long, unsigned long);
+void flush_kernel_icache_range_asm(unsigned long, unsigned long);
+void flush_user_dcache_range_asm(unsigned long, unsigned long);
+void flush_kernel_dcache_range_asm(unsigned long, unsigned long);
+void flush_kernel_dcache_page_asm(void *);
+void flush_kernel_icache_page(void *);
+void flush_user_dcache_page(unsigned long);
+void flush_user_icache_page(unsigned long);
+void flush_user_dcache_range(unsigned long, unsigned long);
+void flush_user_icache_range(unsigned long, unsigned long);
+
+/* Cache flush operations */
+
+void flush_cache_all_local(void);
+void flush_cache_all(void);
+void flush_cache_mm(struct mm_struct *mm);
+
+#define flush_kernel_dcache_range(start,size) \
+	flush_kernel_dcache_range_asm((start), (start)+(size));
+
+#define flush_cache_vmap(start, end)		flush_cache_all()
+#define flush_cache_vunmap(start, end)		flush_cache_all()
+
+extern void flush_dcache_page(struct page *page);
+
+#define flush_dcache_mmap_lock(mapping) \
+	spin_lock_irq(&(mapping)->tree_lock)
+#define flush_dcache_mmap_unlock(mapping) \
+	spin_unlock_irq(&(mapping)->tree_lock)
+
+#define flush_icache_page(vma,page)	do { 		\
+	flush_kernel_dcache_page(page);			\
+	flush_kernel_icache_page(page_address(page)); 	\
+} while (0)
+
+#define flush_icache_range(s,e)		do { 		\
+	flush_kernel_dcache_range_asm(s,e); 		\
+	flush_kernel_icache_range_asm(s,e); 		\
+} while (0)
+
+#define copy_to_user_page(vma, page, vaddr, dst, src, len) \
+do { \
+	flush_cache_page(vma, vaddr, page_to_pfn(page)); \
+	memcpy(dst, src, len); \
+	flush_kernel_dcache_range_asm((unsigned long)dst, (unsigned long)dst + len); \
+} while (0)
+
+#define copy_from_user_page(vma, page, vaddr, dst, src, len) \
+do { \
+	flush_cache_page(vma, vaddr, page_to_pfn(page)); \
+	memcpy(dst, src, len); \
+} while (0)
+
+void flush_cache_page(struct vm_area_struct *vma, unsigned long vmaddr, unsigned long pfn);
+void flush_cache_range(struct vm_area_struct *vma,
+		unsigned long start, unsigned long end);
+
+#define ARCH_HAS_FLUSH_ANON_PAGE
+static inline void
+flush_anon_page(struct vm_area_struct *vma, struct page *page, unsigned long vmaddr)
+{
+	if (PageAnon(page))
+		flush_user_dcache_page(vmaddr);
+}
+
+#define ARCH_HAS_FLUSH_KERNEL_DCACHE_PAGE
+void flush_kernel_dcache_page_addr(void *addr);
+static inline void flush_kernel_dcache_page(struct page *page)
+{
+	flush_kernel_dcache_page_addr(page_address(page));
+}
+
+#ifdef CONFIG_DEBUG_RODATA
+void mark_rodata_ro(void);
+#endif
+
+#ifdef CONFIG_PA8X00
+/* Only pa8800, pa8900 needs this */
+#define ARCH_HAS_KMAP
+
+void kunmap_parisc(void *addr);
+
+static inline void *kmap(struct page *page)
+{
+	might_sleep();
+	return page_address(page);
+}
+
+#define kunmap(page)			kunmap_parisc(page_address(page))
+
+#define kmap_atomic(page, idx)		page_address(page)
+
+#define kunmap_atomic(addr, idx)	kunmap_parisc(addr)
+
+#define kmap_atomic_pfn(pfn, idx)	page_address(pfn_to_page(pfn))
+#define kmap_atomic_to_page(ptr)	virt_to_page(ptr)
+#endif
+
+#endif /* _PARISC_CACHEFLUSH_H */
+
diff --git a/arch/parisc/include/asm/checksum.h b/arch/parisc/include/asm/checksum.h
new file mode 100644
index 000000000000..e9639ccc3fce
--- /dev/null
+++ b/arch/parisc/include/asm/checksum.h
@@ -0,0 +1,210 @@
+#ifndef _PARISC_CHECKSUM_H
+#define _PARISC_CHECKSUM_H
+
+#include <linux/in6.h>
+
+/*
+ * computes the checksum of a memory block at buff, length len,
+ * and adds in "sum" (32-bit)
+ *
+ * returns a 32-bit number suitable for feeding into itself
+ * or csum_tcpudp_magic
+ *
+ * this function must be called with even lengths, except
+ * for the last fragment, which may be odd
+ *
+ * it's best to have buff aligned on a 32-bit boundary
+ */
+extern __wsum csum_partial(const void *, int, __wsum);
+
+/*
+ * The same as csum_partial, but copies from src while it checksums.
+ *
+ * Here even more important to align src and dst on a 32-bit (or even
+ * better 64-bit) boundary
+ */
+extern __wsum csum_partial_copy_nocheck(const void *, void *, int, __wsum);
+
+/*
+ * this is a new version of the above that records errors it finds in *errp,
+ * but continues and zeros the rest of the buffer.
+ */
+extern __wsum csum_partial_copy_from_user(const void __user *src,
+		void *dst, int len, __wsum sum, int *errp);
+
+/*
+ *	Optimized for IP headers, which always checksum on 4 octet boundaries.
+ *
+ *	Written by Randolph Chung <tausq@debian.org>, and then mucked with by
+ *	LaMont Jones <lamont@debian.org>
+ */
+static inline __sum16 ip_fast_csum(const void *iph, unsigned int ihl)
+{
+	unsigned int sum;
+
+	__asm__ __volatile__ (
+"	ldws,ma		4(%1), %0\n"
+"	addib,<=	-4, %2, 2f\n"
+"\n"
+"	ldws		4(%1), %%r20\n"
+"	ldws		8(%1), %%r21\n"
+"	add		%0, %%r20, %0\n"
+"	ldws,ma		12(%1), %%r19\n"
+"	addc		%0, %%r21, %0\n"
+"	addc		%0, %%r19, %0\n"
+"1:	ldws,ma		4(%1), %%r19\n"
+"	addib,<		0, %2, 1b\n"
+"	addc		%0, %%r19, %0\n"
+"\n"
+"	extru		%0, 31, 16, %%r20\n"
+"	extru		%0, 15, 16, %%r21\n"
+"	addc		%%r20, %%r21, %0\n"
+"	extru		%0, 15, 16, %%r21\n"
+"	add		%0, %%r21, %0\n"
+"	subi		-1, %0, %0\n"
+"2:\n"
+	: "=r" (sum), "=r" (iph), "=r" (ihl)
+	: "1" (iph), "2" (ihl)
+	: "r19", "r20", "r21", "memory");
+
+	return (__force __sum16)sum;
+}
+
+/*
+ *	Fold a partial checksum
+ */
+static inline __sum16 csum_fold(__wsum csum)
+{
+	u32 sum = (__force u32)csum;
+	/* add the swapped two 16-bit halves of sum,
+	   a possible carry from adding the two 16-bit halves,
+	   will carry from the lower half into the upper half,
+	   giving us the correct sum in the upper half. */
+	sum += (sum << 16) + (sum >> 16);
+	return (__force __sum16)(~sum >> 16);
+}
+ 
+static inline __wsum csum_tcpudp_nofold(__be32 saddr, __be32 daddr,
+					       unsigned short len,
+					       unsigned short proto,
+					       __wsum sum)
+{
+	__asm__(
+	"	add  %1, %0, %0\n"
+	"	addc %2, %0, %0\n"
+	"	addc %3, %0, %0\n"
+	"	addc %%r0, %0, %0\n"
+		: "=r" (sum)
+		: "r" (daddr), "r"(saddr), "r"(proto+len), "0"(sum));
+	return sum;
+}
+
+/*
+ * computes the checksum of the TCP/UDP pseudo-header
+ * returns a 16-bit checksum, already complemented
+ */
+static inline __sum16 csum_tcpudp_magic(__be32 saddr, __be32 daddr,
+						   unsigned short len,
+						   unsigned short proto,
+						   __wsum sum)
+{
+	return csum_fold(csum_tcpudp_nofold(saddr,daddr,len,proto,sum));
+}
+
+/*
+ * this routine is used for miscellaneous IP-like checksums, mainly
+ * in icmp.c
+ */
+static inline __sum16 ip_compute_csum(const void *buf, int len)
+{
+	 return csum_fold (csum_partial(buf, len, 0));
+}
+
+
+#define _HAVE_ARCH_IPV6_CSUM
+static __inline__ __sum16 csum_ipv6_magic(const struct in6_addr *saddr,
+					  const struct in6_addr *daddr,
+					  __u32 len, unsigned short proto,
+					  __wsum sum)
+{
+	__asm__ __volatile__ (
+
+#if BITS_PER_LONG > 32
+
+	/*
+	** We can execute two loads and two adds per cycle on PA 8000.
+	** But add insn's get serialized waiting for the carry bit.
+	** Try to keep 4 registers with "live" values ahead of the ALU.
+	*/
+
+"	ldd,ma		8(%1), %%r19\n"	/* get 1st saddr word */
+"	ldd,ma		8(%2), %%r20\n"	/* get 1st daddr word */
+"	add		%8, %3, %3\n"/* add 16-bit proto + len */
+"	add		%%r19, %0, %0\n"
+"	ldd,ma		8(%1), %%r21\n"	/* 2cd saddr */
+"	ldd,ma		8(%2), %%r22\n"	/* 2cd daddr */
+"	add,dc		%%r20, %0, %0\n"
+"	add,dc		%%r21, %0, %0\n"
+"	add,dc		%%r22, %0, %0\n"
+"	add,dc		%3, %0, %0\n"  /* fold in proto+len | carry bit */
+"	extrd,u		%0, 31, 32, %%r19\n"	/* copy upper half down */
+"	depdi		0, 31, 32, %0\n"	/* clear upper half */
+"	add		%%r19, %0, %0\n"	/* fold into 32-bits */
+"	addc		0, %0, %0\n"		/* add carry */
+
+#else
+
+	/*
+	** For PA 1.x, the insn order doesn't matter as much.
+	** Insn stream is serialized on the carry bit here too.
+	** result from the previous operation (eg r0 + x)
+	*/
+
+"	ldw,ma		4(%1), %%r19\n"	/* get 1st saddr word */
+"	ldw,ma		4(%2), %%r20\n"	/* get 1st daddr word */
+"	add		%8, %3, %3\n"	/* add 16-bit proto + len */
+"	add		%%r19, %0, %0\n"
+"	ldw,ma		4(%1), %%r21\n"	/* 2cd saddr */
+"	addc		%%r20, %0, %0\n"
+"	ldw,ma		4(%2), %%r22\n"	/* 2cd daddr */
+"	addc		%%r21, %0, %0\n"
+"	ldw,ma		4(%1), %%r19\n"	/* 3rd saddr */
+"	addc		%%r22, %0, %0\n"
+"	ldw,ma		4(%2), %%r20\n"	/* 3rd daddr */
+"	addc		%%r19, %0, %0\n"
+"	ldw,ma		4(%1), %%r21\n"	/* 4th saddr */
+"	addc		%%r20, %0, %0\n"
+"	ldw,ma		4(%2), %%r22\n"	/* 4th daddr */
+"	addc		%%r21, %0, %0\n"
+"	addc		%%r22, %0, %0\n"
+"	addc		%3, %0, %0\n"	/* fold in proto+len, catch carry */
+
+#endif
+	: "=r" (sum), "=r" (saddr), "=r" (daddr), "=r" (len)
+	: "0" (sum), "1" (saddr), "2" (daddr), "3" (len), "r" (proto)
+	: "r19", "r20", "r21", "r22");
+	return csum_fold(sum);
+}
+
+/* 
+ *	Copy and checksum to user
+ */
+#define HAVE_CSUM_COPY_USER
+static __inline__ __wsum csum_and_copy_to_user(const void *src,
+						      void __user *dst,
+						      int len, __wsum sum,
+						      int *err_ptr)
+{
+	/* code stolen from include/asm-mips64 */
+	sum = csum_partial(src, len, sum);
+	 
+	if (copy_to_user(dst, src, len)) {
+		*err_ptr = -EFAULT;
+		return (__force __wsum)-1;
+	}
+
+	return sum;
+}
+
+#endif
+
diff --git a/arch/parisc/include/asm/compat.h b/arch/parisc/include/asm/compat.h
new file mode 100644
index 000000000000..7f32611a7a5e
--- /dev/null
+++ b/arch/parisc/include/asm/compat.h
@@ -0,0 +1,165 @@
+#ifndef _ASM_PARISC_COMPAT_H
+#define _ASM_PARISC_COMPAT_H
+/*
+ * Architecture specific compatibility types
+ */
+#include <linux/types.h>
+#include <linux/sched.h>
+#include <linux/thread_info.h>
+
+#define COMPAT_USER_HZ 100
+
+typedef u32	compat_size_t;
+typedef s32	compat_ssize_t;
+typedef s32	compat_time_t;
+typedef s32	compat_clock_t;
+typedef s32	compat_pid_t;
+typedef u32	__compat_uid_t;
+typedef u32	__compat_gid_t;
+typedef u32	__compat_uid32_t;
+typedef u32	__compat_gid32_t;
+typedef u16	compat_mode_t;
+typedef u32	compat_ino_t;
+typedef u32	compat_dev_t;
+typedef s32	compat_off_t;
+typedef s64	compat_loff_t;
+typedef u16	compat_nlink_t;
+typedef u16	compat_ipc_pid_t;
+typedef s32	compat_daddr_t;
+typedef u32	compat_caddr_t;
+typedef s32	compat_timer_t;
+
+typedef s32	compat_int_t;
+typedef s32	compat_long_t;
+typedef s64	compat_s64;
+typedef u32	compat_uint_t;
+typedef u32	compat_ulong_t;
+typedef u64	compat_u64;
+
+struct compat_timespec {
+	compat_time_t		tv_sec;
+	s32			tv_nsec;
+};
+
+struct compat_timeval {
+	compat_time_t		tv_sec;
+	s32			tv_usec;
+};
+
+struct compat_stat {
+	compat_dev_t		st_dev;	/* dev_t is 32 bits on parisc */
+	compat_ino_t		st_ino;	/* 32 bits */
+	compat_mode_t		st_mode;	/* 16 bits */
+	compat_nlink_t  	st_nlink;	/* 16 bits */
+	u16			st_reserved1;	/* old st_uid */
+	u16			st_reserved2;	/* old st_gid */
+	compat_dev_t		st_rdev;
+	compat_off_t		st_size;
+	compat_time_t		st_atime;
+	u32			st_atime_nsec;
+	compat_time_t		st_mtime;
+	u32			st_mtime_nsec;
+	compat_time_t		st_ctime;
+	u32			st_ctime_nsec;
+	s32			st_blksize;
+	s32			st_blocks;
+	u32			__unused1;	/* ACL stuff */
+	compat_dev_t		__unused2;	/* network */
+	compat_ino_t		__unused3;	/* network */
+	u32			__unused4;	/* cnodes */
+	u16			__unused5;	/* netsite */
+	short			st_fstype;
+	compat_dev_t		st_realdev;
+	u16			st_basemode;
+	u16			st_spareshort;
+	__compat_uid32_t	st_uid;
+	__compat_gid32_t	st_gid;
+	u32			st_spare4[3];
+};
+
+struct compat_flock {
+	short			l_type;
+	short			l_whence;
+	compat_off_t		l_start;
+	compat_off_t		l_len;
+	compat_pid_t		l_pid;
+};
+
+struct compat_flock64 {
+	short			l_type;
+	short			l_whence;
+	compat_loff_t		l_start;
+	compat_loff_t		l_len;
+	compat_pid_t		l_pid;
+};
+
+struct compat_statfs {
+	s32		f_type;
+	s32		f_bsize;
+	s32		f_blocks;
+	s32		f_bfree;
+	s32		f_bavail;
+	s32		f_files;
+	s32		f_ffree;
+	__kernel_fsid_t	f_fsid;
+	s32		f_namelen;
+	s32		f_frsize;
+	s32		f_spare[5];
+};
+
+struct compat_sigcontext {
+	compat_int_t sc_flags;
+	compat_int_t sc_gr[32]; /* PSW in sc_gr[0] */
+	u64 sc_fr[32];
+	compat_int_t sc_iasq[2];
+	compat_int_t sc_iaoq[2];
+	compat_int_t sc_sar; /* cr11 */
+};
+
+#define COMPAT_RLIM_INFINITY 0xffffffff
+
+typedef u32		compat_old_sigset_t;	/* at least 32 bits */
+
+#define _COMPAT_NSIG		64
+#define _COMPAT_NSIG_BPW	32
+
+typedef u32		compat_sigset_word;
+
+#define COMPAT_OFF_T_MAX	0x7fffffff
+#define COMPAT_LOFF_T_MAX	0x7fffffffffffffffL
+
+/*
+ * A pointer passed in from user mode. This should not
+ * be used for syscall parameters, just declare them
+ * as pointers because the syscall entry code will have
+ * appropriately converted them already.
+ */
+typedef	u32		compat_uptr_t;
+
+static inline void __user *compat_ptr(compat_uptr_t uptr)
+{
+	return (void __user *)(unsigned long)uptr;
+}
+
+static inline compat_uptr_t ptr_to_compat(void __user *uptr)
+{
+	return (u32)(unsigned long)uptr;
+}
+
+static __inline__ void __user *compat_alloc_user_space(long len)
+{
+	struct pt_regs *regs = &current->thread.regs;
+	return (void __user *)regs->gr[30];
+}
+
+static inline int __is_compat_task(struct task_struct *t)
+{
+	return test_ti_thread_flag(task_thread_info(t), TIF_32BIT);
+}
+
+static inline int is_compat_task(void)
+{
+	return __is_compat_task(current);
+}
+
+#endif /* _ASM_PARISC_COMPAT_H */
diff --git a/arch/parisc/include/asm/compat_rt_sigframe.h b/arch/parisc/include/asm/compat_rt_sigframe.h
new file mode 100644
index 000000000000..81bec28bdc48
--- /dev/null
+++ b/arch/parisc/include/asm/compat_rt_sigframe.h
@@ -0,0 +1,50 @@
+#include<linux/compat.h>
+#include<linux/compat_siginfo.h>
+#include<asm/compat_ucontext.h>
+
+#ifndef _ASM_PARISC_COMPAT_RT_SIGFRAME_H
+#define _ASM_PARISC_COMPAT_RT_SIGFRAME_H
+
+/* In a deft move of uber-hackery, we decide to carry the top half of all
+ * 64-bit registers in a non-portable, non-ABI, hidden structure.
+ * Userspace can read the hidden structure if it *wants* but is never
+ * guaranteed to be in the same place. Infact the uc_sigmask from the 
+ * ucontext_t structure may push the hidden register file downards
+ */
+struct compat_regfile {
+	/* Upper half of all the 64-bit registers that were truncated
+	   on a copy to a 32-bit userspace */
+	compat_int_t rf_gr[32];
+	compat_int_t rf_iasq[2];
+	compat_int_t rf_iaoq[2];
+	compat_int_t rf_sar;
+};
+
+#define COMPAT_SIGRETURN_TRAMP 4
+#define COMPAT_SIGRESTARTBLOCK_TRAMP 5 
+#define COMPAT_TRAMP_SIZE (COMPAT_SIGRETURN_TRAMP + COMPAT_SIGRESTARTBLOCK_TRAMP)
+
+struct compat_rt_sigframe {
+	/* XXX: Must match trampoline size in arch/parisc/kernel/signal.c 
+	        Secondary to that it must protect the ERESTART_RESTARTBLOCK
+		trampoline we left on the stack (we were bad and didn't 
+		change sp so we could run really fast.) */
+	compat_uint_t tramp[COMPAT_TRAMP_SIZE];
+	compat_siginfo_t info;
+	struct compat_ucontext uc;
+	/* Hidden location of truncated registers, *must* be last. */
+	struct compat_regfile regs; 
+};
+
+/*
+ * The 32-bit ABI wants at least 48 bytes for a function call frame:
+ * 16 bytes for arg0-arg3, and 32 bytes for magic (the only part of
+ * which Linux/parisc uses is sp-20 for the saved return pointer...)
+ * Then, the stack pointer must be rounded to a cache line (64 bytes).
+ */
+#define SIGFRAME32		64
+#define FUNCTIONCALLFRAME32	48
+#define PARISC_RT_SIGFRAME_SIZE32					\
+	(((sizeof(struct compat_rt_sigframe) + FUNCTIONCALLFRAME32) + SIGFRAME32) & -SIGFRAME32)
+
+#endif
diff --git a/arch/parisc/include/asm/compat_signal.h b/arch/parisc/include/asm/compat_signal.h
new file mode 100644
index 000000000000..6ad02c360b21
--- /dev/null
+++ b/arch/parisc/include/asm/compat_signal.h
@@ -0,0 +1,2 @@
+/* Use generic */
+#include <asm-generic/compat_signal.h>
diff --git a/arch/parisc/include/asm/compat_ucontext.h b/arch/parisc/include/asm/compat_ucontext.h
new file mode 100644
index 000000000000..2f7292afde3c
--- /dev/null
+++ b/arch/parisc/include/asm/compat_ucontext.h
@@ -0,0 +1,17 @@
+#ifndef _ASM_PARISC_COMPAT_UCONTEXT_H
+#define _ASM_PARISC_COMPAT_UCONTEXT_H
+
+#include <linux/compat.h>
+
+/* 32-bit ucontext as seen from an 64-bit kernel */
+struct compat_ucontext {
+	compat_uint_t uc_flags;
+	compat_uptr_t uc_link;
+	compat_stack_t uc_stack;	/* struct compat_sigaltstack (12 bytes)*/	
+	/* FIXME: Pad out to get uc_mcontext to start at an 8-byte aligned boundary */
+	compat_uint_t pad[1];
+	struct compat_sigcontext uc_mcontext;
+	compat_sigset_t uc_sigmask;	/* mask last for extensibility */
+};
+
+#endif /* !_ASM_PARISC_COMPAT_UCONTEXT_H */
diff --git a/arch/parisc/include/asm/cputime.h b/arch/parisc/include/asm/cputime.h
new file mode 100644
index 000000000000..dcdf2fbd7e72
--- /dev/null
+++ b/arch/parisc/include/asm/cputime.h
@@ -0,0 +1,6 @@
+#ifndef __PARISC_CPUTIME_H
+#define __PARISC_CPUTIME_H
+
+#include <asm-generic/cputime.h>
+
+#endif /* __PARISC_CPUTIME_H */
diff --git a/arch/parisc/include/asm/current.h b/arch/parisc/include/asm/current.h
new file mode 100644
index 000000000000..0fb9338e3bf2
--- /dev/null
+++ b/arch/parisc/include/asm/current.h
@@ -0,0 +1,15 @@
+#ifndef _PARISC_CURRENT_H
+#define _PARISC_CURRENT_H
+
+#include <linux/thread_info.h>
+
+struct task_struct;
+
+static inline struct task_struct * get_current(void)
+{
+	return current_thread_info()->task;
+}
+ 
+#define current get_current()
+
+#endif /* !(_PARISC_CURRENT_H) */
diff --git a/arch/parisc/include/asm/delay.h b/arch/parisc/include/asm/delay.h
new file mode 100644
index 000000000000..7a75e984674b
--- /dev/null
+++ b/arch/parisc/include/asm/delay.h
@@ -0,0 +1,43 @@
+#ifndef _PARISC_DELAY_H
+#define _PARISC_DELAY_H
+
+#include <asm/system.h>    /* for mfctl() */
+#include <asm/processor.h> /* for boot_cpu_data */
+
+
+/*
+ * Copyright (C) 1993 Linus Torvalds
+ *
+ * Delay routines
+ */
+
+static __inline__ void __delay(unsigned long loops) {
+	asm volatile(
+	"	.balignl	64,0x34000034\n"
+	"	addib,UV -1,%0,.\n"
+	"	nop\n"
+		: "=r" (loops) : "0" (loops));
+}
+
+static __inline__ void __cr16_delay(unsigned long clocks) {
+	unsigned long start;
+
+	/*
+	 * Note: Due to unsigned math, cr16 rollovers shouldn't be
+	 * a problem here. However, on 32 bit, we need to make sure
+	 * we don't pass in too big a value. The current default
+	 * value of MAX_UDELAY_MS should help prevent this.
+	 */
+
+	start = mfctl(16);
+	while ((mfctl(16) - start) < clocks)
+	    ;
+}
+
+static __inline__ void __udelay(unsigned long usecs) {
+	__cr16_delay(usecs * ((unsigned long)boot_cpu_data.cpu_hz / 1000000UL));
+}
+
+#define udelay(n) __udelay(n)
+
+#endif /* defined(_PARISC_DELAY_H) */
diff --git a/arch/parisc/include/asm/device.h b/arch/parisc/include/asm/device.h
new file mode 100644
index 000000000000..d8f9872b0e2d
--- /dev/null
+++ b/arch/parisc/include/asm/device.h
@@ -0,0 +1,7 @@
+/*
+ * Arch specific extensions to struct device
+ *
+ * This file is released under the GPLv2
+ */
+#include <asm-generic/device.h>
+
diff --git a/arch/parisc/include/asm/div64.h b/arch/parisc/include/asm/div64.h
new file mode 100644
index 000000000000..6cd978cefb28
--- /dev/null
+++ b/arch/parisc/include/asm/div64.h
@@ -0,0 +1 @@
+#include <asm-generic/div64.h>
diff --git a/arch/parisc/include/asm/dma-mapping.h b/arch/parisc/include/asm/dma-mapping.h
new file mode 100644
index 000000000000..53af696f23d2
--- /dev/null
+++ b/arch/parisc/include/asm/dma-mapping.h
@@ -0,0 +1,253 @@
+#ifndef _PARISC_DMA_MAPPING_H
+#define _PARISC_DMA_MAPPING_H
+
+#include <linux/mm.h>
+#include <asm/cacheflush.h>
+#include <asm/scatterlist.h>
+
+/* See Documentation/DMA-mapping.txt */
+struct hppa_dma_ops {
+	int  (*dma_supported)(struct device *dev, u64 mask);
+	void *(*alloc_consistent)(struct device *dev, size_t size, dma_addr_t *iova, gfp_t flag);
+	void *(*alloc_noncoherent)(struct device *dev, size_t size, dma_addr_t *iova, gfp_t flag);
+	void (*free_consistent)(struct device *dev, size_t size, void *vaddr, dma_addr_t iova);
+	dma_addr_t (*map_single)(struct device *dev, void *addr, size_t size, enum dma_data_direction direction);
+	void (*unmap_single)(struct device *dev, dma_addr_t iova, size_t size, enum dma_data_direction direction);
+	int  (*map_sg)(struct device *dev, struct scatterlist *sg, int nents, enum dma_data_direction direction);
+	void (*unmap_sg)(struct device *dev, struct scatterlist *sg, int nhwents, enum dma_data_direction direction);
+	void (*dma_sync_single_for_cpu)(struct device *dev, dma_addr_t iova, unsigned long offset, size_t size, enum dma_data_direction direction);
+	void (*dma_sync_single_for_device)(struct device *dev, dma_addr_t iova, unsigned long offset, size_t size, enum dma_data_direction direction);
+	void (*dma_sync_sg_for_cpu)(struct device *dev, struct scatterlist *sg, int nelems, enum dma_data_direction direction);
+	void (*dma_sync_sg_for_device)(struct device *dev, struct scatterlist *sg, int nelems, enum dma_data_direction direction);
+};
+
+/*
+** We could live without the hppa_dma_ops indirection if we didn't want
+** to support 4 different coherent dma models with one binary (they will
+** someday be loadable modules):
+**     I/O MMU        consistent method           dma_sync behavior
+**  =============   ======================       =======================
+**  a) PA-7x00LC    uncachable host memory          flush/purge
+**  b) U2/Uturn      cachable host memory              NOP
+**  c) Ike/Astro     cachable host memory              NOP
+**  d) EPIC/SAGA     memory on EPIC/SAGA         flush/reset DMA channel
+**
+** PA-7[13]00LC processors have a GSC bus interface and no I/O MMU.
+**
+** Systems (eg PCX-T workstations) that don't fall into the above
+** categories will need to modify the needed drivers to perform
+** flush/purge and allocate "regular" cacheable pages for everything.
+*/
+
+#ifdef CONFIG_PA11
+extern struct hppa_dma_ops pcxl_dma_ops;
+extern struct hppa_dma_ops pcx_dma_ops;
+#endif
+
+extern struct hppa_dma_ops *hppa_dma_ops;
+
+static inline void *
+dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle,
+		   gfp_t flag)
+{
+	return hppa_dma_ops->alloc_consistent(dev, size, dma_handle, flag);
+}
+
+static inline void *
+dma_alloc_noncoherent(struct device *dev, size_t size, dma_addr_t *dma_handle,
+		      gfp_t flag)
+{
+	return hppa_dma_ops->alloc_noncoherent(dev, size, dma_handle, flag);
+}
+
+static inline void
+dma_free_coherent(struct device *dev, size_t size, 
+		    void *vaddr, dma_addr_t dma_handle)
+{
+	hppa_dma_ops->free_consistent(dev, size, vaddr, dma_handle);
+}
+
+static inline void
+dma_free_noncoherent(struct device *dev, size_t size, 
+		    void *vaddr, dma_addr_t dma_handle)
+{
+	hppa_dma_ops->free_consistent(dev, size, vaddr, dma_handle);
+}
+
+static inline dma_addr_t
+dma_map_single(struct device *dev, void *ptr, size_t size,
+	       enum dma_data_direction direction)
+{
+	return hppa_dma_ops->map_single(dev, ptr, size, direction);
+}
+
+static inline void
+dma_unmap_single(struct device *dev, dma_addr_t dma_addr, size_t size,
+		 enum dma_data_direction direction)
+{
+	hppa_dma_ops->unmap_single(dev, dma_addr, size, direction);
+}
+
+static inline int
+dma_map_sg(struct device *dev, struct scatterlist *sg, int nents,
+	   enum dma_data_direction direction)
+{
+	return hppa_dma_ops->map_sg(dev, sg, nents, direction);
+}
+
+static inline void
+dma_unmap_sg(struct device *dev, struct scatterlist *sg, int nhwentries,
+	     enum dma_data_direction direction)
+{
+	hppa_dma_ops->unmap_sg(dev, sg, nhwentries, direction);
+}
+
+static inline dma_addr_t
+dma_map_page(struct device *dev, struct page *page, unsigned long offset,
+	     size_t size, enum dma_data_direction direction)
+{
+	return dma_map_single(dev, (page_address(page) + (offset)), size, direction);
+}
+
+static inline void
+dma_unmap_page(struct device *dev, dma_addr_t dma_address, size_t size,
+	       enum dma_data_direction direction)
+{
+	dma_unmap_single(dev, dma_address, size, direction);
+}
+
+
+static inline void
+dma_sync_single_for_cpu(struct device *dev, dma_addr_t dma_handle, size_t size,
+		enum dma_data_direction direction)
+{
+	if(hppa_dma_ops->dma_sync_single_for_cpu)
+		hppa_dma_ops->dma_sync_single_for_cpu(dev, dma_handle, 0, size, direction);
+}
+
+static inline void
+dma_sync_single_for_device(struct device *dev, dma_addr_t dma_handle, size_t size,
+		enum dma_data_direction direction)
+{
+	if(hppa_dma_ops->dma_sync_single_for_device)
+		hppa_dma_ops->dma_sync_single_for_device(dev, dma_handle, 0, size, direction);
+}
+
+static inline void
+dma_sync_single_range_for_cpu(struct device *dev, dma_addr_t dma_handle,
+		      unsigned long offset, size_t size,
+		      enum dma_data_direction direction)
+{
+	if(hppa_dma_ops->dma_sync_single_for_cpu)
+		hppa_dma_ops->dma_sync_single_for_cpu(dev, dma_handle, offset, size, direction);
+}
+
+static inline void
+dma_sync_single_range_for_device(struct device *dev, dma_addr_t dma_handle,
+		      unsigned long offset, size_t size,
+		      enum dma_data_direction direction)
+{
+	if(hppa_dma_ops->dma_sync_single_for_device)
+		hppa_dma_ops->dma_sync_single_for_device(dev, dma_handle, offset, size, direction);
+}
+
+static inline void
+dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg, int nelems,
+		 enum dma_data_direction direction)
+{
+	if(hppa_dma_ops->dma_sync_sg_for_cpu)
+		hppa_dma_ops->dma_sync_sg_for_cpu(dev, sg, nelems, direction);
+}
+
+static inline void
+dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg, int nelems,
+		 enum dma_data_direction direction)
+{
+	if(hppa_dma_ops->dma_sync_sg_for_device)
+		hppa_dma_ops->dma_sync_sg_for_device(dev, sg, nelems, direction);
+}
+
+static inline int
+dma_supported(struct device *dev, u64 mask)
+{
+	return hppa_dma_ops->dma_supported(dev, mask);
+}
+
+static inline int
+dma_set_mask(struct device *dev, u64 mask)
+{
+	if(!dev->dma_mask || !dma_supported(dev, mask))
+		return -EIO;
+
+	*dev->dma_mask = mask;
+
+	return 0;
+}
+
+static inline int
+dma_get_cache_alignment(void)
+{
+	return dcache_stride;
+}
+
+static inline int
+dma_is_consistent(struct device *dev, dma_addr_t dma_addr)
+{
+	return (hppa_dma_ops->dma_sync_single_for_cpu == NULL);
+}
+
+static inline void
+dma_cache_sync(struct device *dev, void *vaddr, size_t size,
+	       enum dma_data_direction direction)
+{
+	if(hppa_dma_ops->dma_sync_single_for_cpu)
+		flush_kernel_dcache_range((unsigned long)vaddr, size);
+}
+
+static inline void *
+parisc_walk_tree(struct device *dev)
+{
+	struct device *otherdev;
+	if(likely(dev->platform_data != NULL))
+		return dev->platform_data;
+	/* OK, just traverse the bus to find it */
+	for(otherdev = dev->parent; otherdev;
+	    otherdev = otherdev->parent) {
+		if(otherdev->platform_data) {
+			dev->platform_data = otherdev->platform_data;
+			break;
+		}
+	}
+	BUG_ON(!dev->platform_data);
+	return dev->platform_data;
+}
+		
+#define GET_IOC(dev) (HBA_DATA(parisc_walk_tree(dev))->iommu);	
+	
+
+#ifdef CONFIG_IOMMU_CCIO
+struct parisc_device;
+struct ioc;
+void * ccio_get_iommu(const struct parisc_device *dev);
+int ccio_request_resource(const struct parisc_device *dev,
+		struct resource *res);
+int ccio_allocate_resource(const struct parisc_device *dev,
+		struct resource *res, unsigned long size,
+		unsigned long min, unsigned long max, unsigned long align);
+#else /* !CONFIG_IOMMU_CCIO */
+#define ccio_get_iommu(dev) NULL
+#define ccio_request_resource(dev, res) insert_resource(&iomem_resource, res)
+#define ccio_allocate_resource(dev, res, size, min, max, align) \
+		allocate_resource(&iomem_resource, res, size, min, max, \
+				align, NULL, NULL)
+#endif /* !CONFIG_IOMMU_CCIO */
+
+#ifdef CONFIG_IOMMU_SBA
+struct parisc_device;
+void * sba_get_iommu(struct parisc_device *dev);
+#endif
+
+/* At the moment, we panic on error for IOMMU resource exaustion */
+#define dma_mapping_error(dev, x)	0
+
+#endif
diff --git a/arch/parisc/include/asm/dma.h b/arch/parisc/include/asm/dma.h
new file mode 100644
index 000000000000..31ad0f05af3d
--- /dev/null
+++ b/arch/parisc/include/asm/dma.h
@@ -0,0 +1,186 @@
+/* $Id: dma.h,v 1.2 1999/04/27 00:46:18 deller Exp $
+ * linux/include/asm/dma.h: Defines for using and allocating dma channels.
+ * Written by Hennus Bergman, 1992.
+ * High DMA channel support & info by Hannu Savolainen
+ * and John Boyd, Nov. 1992.
+ * (c) Copyright 2000, Grant Grundler
+ */
+
+#ifndef _ASM_DMA_H
+#define _ASM_DMA_H
+
+#include <asm/io.h>		/* need byte IO */
+#include <asm/system.h>	
+
+#define dma_outb	outb
+#define dma_inb		inb
+
+/*
+** DMA_CHUNK_SIZE is used by the SCSI mid-layer to break up
+** (or rather not merge) DMAs into manageable chunks.
+** On parisc, this is more of the software/tuning constraint
+** rather than the HW. I/O MMU allocation algorithms can be
+** faster with smaller sizes (to some degree).
+*/
+#define DMA_CHUNK_SIZE	(BITS_PER_LONG*PAGE_SIZE)
+
+/* The maximum address that we can perform a DMA transfer to on this platform
+** New dynamic DMA interfaces should obsolete this....
+*/
+#define MAX_DMA_ADDRESS (~0UL)
+
+/*
+** We don't have DMA channels... well V-class does but the
+** Dynamic DMA Mapping interface will support them... right? :^)
+** Note: this is not relevant right now for PA-RISC, but we cannot 
+** leave this as undefined because some things (e.g. sound)
+** won't compile :-(
+*/
+#define MAX_DMA_CHANNELS 8
+#define DMA_MODE_READ	0x44	/* I/O to memory, no autoinit, increment, single mode */
+#define DMA_MODE_WRITE	0x48	/* memory to I/O, no autoinit, increment, single mode */
+#define DMA_MODE_CASCADE 0xC0	/* pass thru DREQ->HRQ, DACK<-HLDA only */
+
+#define DMA_AUTOINIT	0x10
+
+/* 8237 DMA controllers */
+#define IO_DMA1_BASE	0x00	/* 8 bit slave DMA, channels 0..3 */
+#define IO_DMA2_BASE	0xC0	/* 16 bit master DMA, ch 4(=slave input)..7 */
+
+/* DMA controller registers */
+#define DMA1_CMD_REG		0x08	/* command register (w) */
+#define DMA1_STAT_REG		0x08	/* status register (r) */
+#define DMA1_REQ_REG            0x09    /* request register (w) */
+#define DMA1_MASK_REG		0x0A	/* single-channel mask (w) */
+#define DMA1_MODE_REG		0x0B	/* mode register (w) */
+#define DMA1_CLEAR_FF_REG	0x0C	/* clear pointer flip-flop (w) */
+#define DMA1_TEMP_REG           0x0D    /* Temporary Register (r) */
+#define DMA1_RESET_REG		0x0D	/* Master Clear (w) */
+#define DMA1_CLR_MASK_REG       0x0E    /* Clear Mask */
+#define DMA1_MASK_ALL_REG       0x0F    /* all-channels mask (w) */
+#define DMA1_EXT_MODE_REG	(0x400 | DMA1_MODE_REG)
+
+#define DMA2_CMD_REG		0xD0	/* command register (w) */
+#define DMA2_STAT_REG		0xD0	/* status register (r) */
+#define DMA2_REQ_REG            0xD2    /* request register (w) */
+#define DMA2_MASK_REG		0xD4	/* single-channel mask (w) */
+#define DMA2_MODE_REG		0xD6	/* mode register (w) */
+#define DMA2_CLEAR_FF_REG	0xD8	/* clear pointer flip-flop (w) */
+#define DMA2_TEMP_REG           0xDA    /* Temporary Register (r) */
+#define DMA2_RESET_REG		0xDA	/* Master Clear (w) */
+#define DMA2_CLR_MASK_REG       0xDC    /* Clear Mask */
+#define DMA2_MASK_ALL_REG       0xDE    /* all-channels mask (w) */
+#define DMA2_EXT_MODE_REG	(0x400 | DMA2_MODE_REG)
+
+static __inline__ unsigned long claim_dma_lock(void)
+{
+	return 0;
+}
+
+static __inline__ void release_dma_lock(unsigned long flags)
+{
+}
+
+
+/* Get DMA residue count. After a DMA transfer, this
+ * should return zero. Reading this while a DMA transfer is
+ * still in progress will return unpredictable results.
+ * If called before the channel has been used, it may return 1.
+ * Otherwise, it returns the number of _bytes_ left to transfer.
+ *
+ * Assumes DMA flip-flop is clear.
+ */
+static __inline__ int get_dma_residue(unsigned int dmanr)
+{
+	unsigned int io_port = (dmanr<=3)? ((dmanr&3)<<1) + 1 + IO_DMA1_BASE
+					 : ((dmanr&3)<<2) + 2 + IO_DMA2_BASE;
+
+	/* using short to get 16-bit wrap around */
+	unsigned short count;
+
+	count = 1 + dma_inb(io_port);
+	count += dma_inb(io_port) << 8;
+	
+	return (dmanr<=3)? count : (count<<1);
+}
+
+/* enable/disable a specific DMA channel */
+static __inline__ void enable_dma(unsigned int dmanr)
+{
+#ifdef CONFIG_SUPERIO
+	if (dmanr<=3)
+		dma_outb(dmanr,  DMA1_MASK_REG);
+	else
+		dma_outb(dmanr & 3,  DMA2_MASK_REG);
+#endif
+}
+
+static __inline__ void disable_dma(unsigned int dmanr)
+{
+#ifdef CONFIG_SUPERIO
+	if (dmanr<=3)
+		dma_outb(dmanr | 4,  DMA1_MASK_REG);
+	else
+		dma_outb((dmanr & 3) | 4,  DMA2_MASK_REG);
+#endif
+}
+
+/* reserve a DMA channel */
+#define request_dma(dmanr, device_id)	(0)
+
+/* Clear the 'DMA Pointer Flip Flop'.
+ * Write 0 for LSB/MSB, 1 for MSB/LSB access.
+ * Use this once to initialize the FF to a known state.
+ * After that, keep track of it. :-)
+ * --- In order to do that, the DMA routines below should ---
+ * --- only be used while holding the DMA lock ! ---
+ */
+static __inline__ void clear_dma_ff(unsigned int dmanr)
+{
+}
+
+/* set mode (above) for a specific DMA channel */
+static __inline__ void set_dma_mode(unsigned int dmanr, char mode)
+{
+}
+
+/* Set only the page register bits of the transfer address.
+ * This is used for successive transfers when we know the contents of
+ * the lower 16 bits of the DMA current address register, but a 64k boundary
+ * may have been crossed.
+ */
+static __inline__ void set_dma_page(unsigned int dmanr, char pagenr)
+{
+}
+
+
+/* Set transfer address & page bits for specific DMA channel.
+ * Assumes dma flipflop is clear.
+ */
+static __inline__ void set_dma_addr(unsigned int dmanr, unsigned int a)
+{
+}
+
+
+/* Set transfer size (max 64k for DMA1..3, 128k for DMA5..7) for
+ * a specific DMA channel.
+ * You must ensure the parameters are valid.
+ * NOTE: from a manual: "the number of transfers is one more
+ * than the initial word count"! This is taken into account.
+ * Assumes dma flip-flop is clear.
+ * NOTE 2: "count" represents _bytes_ and must be even for channels 5-7.
+ */
+static __inline__ void set_dma_count(unsigned int dmanr, unsigned int count)
+{
+}
+
+
+#define free_dma(dmanr)
+
+#ifdef CONFIG_PCI
+extern int isa_dma_bridge_buggy;
+#else
+#define isa_dma_bridge_buggy 	(0)
+#endif
+
+#endif /* _ASM_DMA_H */
diff --git a/arch/parisc/include/asm/eisa_bus.h b/arch/parisc/include/asm/eisa_bus.h
new file mode 100644
index 000000000000..201085f83dd5
--- /dev/null
+++ b/arch/parisc/include/asm/eisa_bus.h
@@ -0,0 +1,23 @@
+/*
+ * eisa_bus.h interface between the eisa BA driver and the bus enumerator
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ * Copyright (c) 2002 Daniel Engstrom <5116@telia.com>
+ *
+ */
+
+#ifndef ASM_EISA_H
+#define ASM_EISA_H
+
+extern void eisa_make_irq_level(int num);
+extern void eisa_make_irq_edge(int num);
+extern int eisa_enumerator(unsigned long eeprom_addr,
+			   struct resource *io_parent, 
+			   struct resource *mem_parent);
+extern int eisa_eeprom_init(unsigned long addr);
+
+#endif
diff --git a/arch/parisc/include/asm/eisa_eeprom.h b/arch/parisc/include/asm/eisa_eeprom.h
new file mode 100644
index 000000000000..9c9da980402a
--- /dev/null
+++ b/arch/parisc/include/asm/eisa_eeprom.h
@@ -0,0 +1,153 @@
+/*
+ * eisa_eeprom.h - provide support for EISA adapters in PA-RISC machines
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ * Copyright (c) 2001, 2002 Daniel Engstrom <5116@telia.com>
+ *
+ */
+
+#ifndef ASM_EISA_EEPROM_H
+#define ASM_EISA_EEPROM_H
+
+extern void __iomem *eisa_eeprom_addr;
+
+#define HPEE_MAX_LENGTH       0x2000	/* maximum eeprom length */
+
+#define HPEE_SLOT_INFO(slot) (20+(48*slot))
+
+struct eeprom_header 
+{
+   
+	u_int32_t num_writes;       /* number of writes */
+ 	u_int8_t  flags;            /* flags, usage? */
+	u_int8_t  ver_maj;
+	u_int8_t  ver_min;
+	u_int8_t  num_slots;        /* number of EISA slots in system */
+	u_int16_t csum;             /* checksum, I don't know how to calulate this */
+	u_int8_t  pad[10];
+} __attribute__ ((packed));
+
+
+struct eeprom_eisa_slot_info
+{
+	u_int32_t eisa_slot_id;
+	u_int32_t config_data_offset;
+	u_int32_t num_writes;
+	u_int16_t csum;
+	u_int16_t num_functions;
+	u_int16_t config_data_length;
+	
+	/* bits 0..3 are the duplicate slot id */ 
+#define HPEE_SLOT_INFO_EMBEDDED  0x10
+#define HPEE_SLOT_INFO_VIRTUAL   0x20
+#define HPEE_SLOT_INFO_NO_READID 0x40
+#define HPEE_SLOT_INFO_DUPLICATE 0x80
+	u_int8_t slot_info;
+	
+#define HPEE_SLOT_FEATURES_ENABLE         0x01
+#define HPEE_SLOT_FEATURES_IOCHK          0x02
+#define HPEE_SLOT_FEATURES_CFG_INCOMPLETE 0x80
+	u_int8_t slot_features;
+	
+	u_int8_t  ver_min;
+	u_int8_t  ver_maj;
+	
+#define HPEE_FUNCTION_INFO_HAVE_TYPE      0x01
+#define HPEE_FUNCTION_INFO_HAVE_MEMORY    0x02
+#define HPEE_FUNCTION_INFO_HAVE_IRQ       0x04
+#define HPEE_FUNCTION_INFO_HAVE_DMA       0x08
+#define HPEE_FUNCTION_INFO_HAVE_PORT      0x10
+#define HPEE_FUNCTION_INFO_HAVE_PORT_INIT 0x20
+/* I think there are two slighty different 
+ * versions of the function_info field 
+ * one int the fixed header and one optional 
+ * in the parsed slot data area */
+#define HPEE_FUNCTION_INFO_HAVE_FUNCTION  0x01
+#define HPEE_FUNCTION_INFO_F_DISABLED     0x80
+#define HPEE_FUNCTION_INFO_CFG_FREE_FORM  0x40
+	u_int8_t  function_info;
+
+#define HPEE_FLAG_BOARD_IS_ISA		  0x01 /* flag and minor version for isa board */
+	u_int8_t  flags;
+	u_int8_t  pad[24];
+} __attribute__ ((packed));
+
+
+#define HPEE_MEMORY_MAX_ENT   9
+/* memory descriptor: byte 0 */
+#define HPEE_MEMORY_WRITABLE  0x01
+#define HPEE_MEMORY_CACHABLE  0x02
+#define HPEE_MEMORY_TYPE_MASK 0x18
+#define HPEE_MEMORY_TYPE_SYS  0x00
+#define HPEE_MEMORY_TYPE_EXP  0x08
+#define HPEE_MEMORY_TYPE_VIR  0x10
+#define HPEE_MEMORY_TYPE_OTH  0x18
+#define HPEE_MEMORY_SHARED    0x20
+#define HPEE_MEMORY_MORE      0x80
+
+/* memory descriptor: byte 1 */
+#define HPEE_MEMORY_WIDTH_MASK 0x03
+#define HPEE_MEMORY_WIDTH_BYTE 0x00
+#define HPEE_MEMORY_WIDTH_WORD 0x01
+#define HPEE_MEMORY_WIDTH_DWORD 0x02
+#define HPEE_MEMORY_DECODE_MASK 0x0c
+#define HPEE_MEMORY_DECODE_20BITS 0x00
+#define HPEE_MEMORY_DECODE_24BITS 0x04
+#define HPEE_MEMORY_DECODE_32BITS 0x08
+/* byte 2 and 3 are a 16bit LE value
+ * containging the memory size in kilobytes */
+/* byte 4,5,6 are a 24bit LE value
+ * containing the memory base address */
+
+
+#define HPEE_IRQ_MAX_ENT      7
+/* Interrupt entry: byte 0 */
+#define HPEE_IRQ_CHANNEL_MASK 0xf
+#define HPEE_IRQ_TRIG_LEVEL   0x20
+#define HPEE_IRQ_MORE         0x80
+/* byte 1 seems to be unused */
+
+#define HPEE_DMA_MAX_ENT     4
+
+/* dma entry: byte 0 */
+#define HPEE_DMA_CHANNEL_MASK 7
+#define HPEE_DMA_SIZE_MASK	0xc
+#define HPEE_DMA_SIZE_BYTE	0x0
+#define HPEE_DMA_SIZE_WORD	0x4
+#define HPEE_DMA_SIZE_DWORD	0x8
+#define HPEE_DMA_SHARED      0x40
+#define HPEE_DMA_MORE        0x80
+
+/* dma entry: byte 1 */
+#define HPEE_DMA_TIMING_MASK 0x30
+#define HPEE_DMA_TIMING_ISA	0x0
+#define HPEE_DMA_TIMING_TYPEA 0x10
+#define HPEE_DMA_TIMING_TYPEB 0x20
+#define HPEE_DMA_TIMING_TYPEC 0x30
+
+#define HPEE_PORT_MAX_ENT 20
+/* port entry byte 0 */
+#define HPEE_PORT_SIZE_MASK 0x1f
+#define HPEE_PORT_SHARED    0x40
+#define HPEE_PORT_MORE      0x80
+/* byte 1 and 2 is a 16bit LE value
+ * conating the start port number */
+
+#define HPEE_PORT_INIT_MAX_LEN     60 /* in bytes here */
+/* port init entry byte 0 */
+#define HPEE_PORT_INIT_WIDTH_MASK  0x3
+#define HPEE_PORT_INIT_WIDTH_BYTE  0x0
+#define HPEE_PORT_INIT_WIDTH_WORD  0x1
+#define HPEE_PORT_INIT_WIDTH_DWORD 0x2
+#define HPEE_PORT_INIT_MASK        0x4
+#define HPEE_PORT_INIT_MORE        0x80
+
+#define HPEE_SELECTION_MAX_ENT 26
+
+#define HPEE_TYPE_MAX_LEN    80
+
+#endif
diff --git a/arch/parisc/include/asm/elf.h b/arch/parisc/include/asm/elf.h
new file mode 100644
index 000000000000..d0a4a8262818
--- /dev/null
+++ b/arch/parisc/include/asm/elf.h
@@ -0,0 +1,342 @@
+#ifndef __ASMPARISC_ELF_H
+#define __ASMPARISC_ELF_H
+
+/*
+ * ELF register definitions..
+ */
+
+#include <asm/ptrace.h>
+
+#define EM_PARISC 15
+
+/* HPPA specific definitions.  */
+
+/* Legal values for e_flags field of Elf32_Ehdr.  */
+
+#define EF_PARISC_TRAPNIL	0x00010000 /* Trap nil pointer dereference.  */
+#define EF_PARISC_EXT		0x00020000 /* Program uses arch. extensions. */
+#define EF_PARISC_LSB		0x00040000 /* Program expects little endian. */
+#define EF_PARISC_WIDE		0x00080000 /* Program expects wide mode.  */
+#define EF_PARISC_NO_KABP	0x00100000 /* No kernel assisted branch
+					      prediction.  */
+#define EF_PARISC_LAZYSWAP	0x00400000 /* Allow lazy swapping.  */
+#define EF_PARISC_ARCH		0x0000ffff /* Architecture version.  */
+
+/* Defined values for `e_flags & EF_PARISC_ARCH' are:  */
+
+#define EFA_PARISC_1_0		    0x020b /* PA-RISC 1.0 big-endian.  */
+#define EFA_PARISC_1_1		    0x0210 /* PA-RISC 1.1 big-endian.  */
+#define EFA_PARISC_2_0		    0x0214 /* PA-RISC 2.0 big-endian.  */
+
+/* Additional section indices.  */
+
+#define SHN_PARISC_ANSI_COMMON	0xff00	   /* Section for tenatively declared
+					      symbols in ANSI C.  */
+#define SHN_PARISC_HUGE_COMMON	0xff01	   /* Common blocks in huge model.  */
+
+/* Legal values for sh_type field of Elf32_Shdr.  */
+
+#define SHT_PARISC_EXT		0x70000000 /* Contains product specific ext. */
+#define SHT_PARISC_UNWIND	0x70000001 /* Unwind information.  */
+#define SHT_PARISC_DOC		0x70000002 /* Debug info for optimized code. */
+
+/* Legal values for sh_flags field of Elf32_Shdr.  */
+
+#define SHF_PARISC_SHORT	0x20000000 /* Section with short addressing. */
+#define SHF_PARISC_HUGE		0x40000000 /* Section far from gp.  */
+#define SHF_PARISC_SBP		0x80000000 /* Static branch prediction code. */
+
+/* Legal values for ST_TYPE subfield of st_info (symbol type).  */
+
+#define STT_PARISC_MILLICODE	13	/* Millicode function entry point.  */
+
+#define STT_HP_OPAQUE		(STT_LOOS + 0x1)
+#define STT_HP_STUB		(STT_LOOS + 0x2)
+
+/* HPPA relocs.  */
+
+#define R_PARISC_NONE		0	/* No reloc.  */
+#define R_PARISC_DIR32		1	/* Direct 32-bit reference.  */
+#define R_PARISC_DIR21L		2	/* Left 21 bits of eff. address.  */
+#define R_PARISC_DIR17R		3	/* Right 17 bits of eff. address.  */
+#define R_PARISC_DIR17F		4	/* 17 bits of eff. address.  */
+#define R_PARISC_DIR14R		6	/* Right 14 bits of eff. address.  */
+#define R_PARISC_PCREL32	9	/* 32-bit rel. address.  */
+#define R_PARISC_PCREL21L	10	/* Left 21 bits of rel. address.  */
+#define R_PARISC_PCREL17R	11	/* Right 17 bits of rel. address.  */
+#define R_PARISC_PCREL17F	12	/* 17 bits of rel. address.  */
+#define R_PARISC_PCREL14R	14	/* Right 14 bits of rel. address.  */
+#define R_PARISC_DPREL21L	18	/* Left 21 bits of rel. address.  */
+#define R_PARISC_DPREL14R	22	/* Right 14 bits of rel. address.  */
+#define R_PARISC_GPREL21L	26	/* GP-relative, left 21 bits.  */
+#define R_PARISC_GPREL14R	30	/* GP-relative, right 14 bits.  */
+#define R_PARISC_LTOFF21L	34	/* LT-relative, left 21 bits.  */
+#define R_PARISC_LTOFF14R	38	/* LT-relative, right 14 bits.  */
+#define R_PARISC_SECREL32	41	/* 32 bits section rel. address.  */
+#define R_PARISC_SEGBASE	48	/* No relocation, set segment base.  */
+#define R_PARISC_SEGREL32	49	/* 32 bits segment rel. address.  */
+#define R_PARISC_PLTOFF21L	50	/* PLT rel. address, left 21 bits.  */
+#define R_PARISC_PLTOFF14R	54	/* PLT rel. address, right 14 bits.  */
+#define R_PARISC_LTOFF_FPTR32	57	/* 32 bits LT-rel. function pointer. */
+#define R_PARISC_LTOFF_FPTR21L	58	/* LT-rel. fct ptr, left 21 bits. */
+#define R_PARISC_LTOFF_FPTR14R	62	/* LT-rel. fct ptr, right 14 bits. */
+#define R_PARISC_FPTR64		64	/* 64 bits function address.  */
+#define R_PARISC_PLABEL32	65	/* 32 bits function address.  */
+#define R_PARISC_PCREL64	72	/* 64 bits PC-rel. address.  */
+#define R_PARISC_PCREL22F	74	/* 22 bits PC-rel. address.  */
+#define R_PARISC_PCREL14WR	75	/* PC-rel. address, right 14 bits.  */
+#define R_PARISC_PCREL14DR	76	/* PC rel. address, right 14 bits.  */
+#define R_PARISC_PCREL16F	77	/* 16 bits PC-rel. address.  */
+#define R_PARISC_PCREL16WF	78	/* 16 bits PC-rel. address.  */
+#define R_PARISC_PCREL16DF	79	/* 16 bits PC-rel. address.  */
+#define R_PARISC_DIR64		80	/* 64 bits of eff. address.  */
+#define R_PARISC_DIR14WR	83	/* 14 bits of eff. address.  */
+#define R_PARISC_DIR14DR	84	/* 14 bits of eff. address.  */
+#define R_PARISC_DIR16F		85	/* 16 bits of eff. address.  */
+#define R_PARISC_DIR16WF	86	/* 16 bits of eff. address.  */
+#define R_PARISC_DIR16DF	87	/* 16 bits of eff. address.  */
+#define R_PARISC_GPREL64	88	/* 64 bits of GP-rel. address.  */
+#define R_PARISC_GPREL14WR	91	/* GP-rel. address, right 14 bits.  */
+#define R_PARISC_GPREL14DR	92	/* GP-rel. address, right 14 bits.  */
+#define R_PARISC_GPREL16F	93	/* 16 bits GP-rel. address.  */
+#define R_PARISC_GPREL16WF	94	/* 16 bits GP-rel. address.  */
+#define R_PARISC_GPREL16DF	95	/* 16 bits GP-rel. address.  */
+#define R_PARISC_LTOFF64	96	/* 64 bits LT-rel. address.  */
+#define R_PARISC_LTOFF14WR	99	/* LT-rel. address, right 14 bits.  */
+#define R_PARISC_LTOFF14DR	100	/* LT-rel. address, right 14 bits.  */
+#define R_PARISC_LTOFF16F	101	/* 16 bits LT-rel. address.  */
+#define R_PARISC_LTOFF16WF	102	/* 16 bits LT-rel. address.  */
+#define R_PARISC_LTOFF16DF	103	/* 16 bits LT-rel. address.  */
+#define R_PARISC_SECREL64	104	/* 64 bits section rel. address.  */
+#define R_PARISC_SEGREL64	112	/* 64 bits segment rel. address.  */
+#define R_PARISC_PLTOFF14WR	115	/* PLT-rel. address, right 14 bits.  */
+#define R_PARISC_PLTOFF14DR	116	/* PLT-rel. address, right 14 bits.  */
+#define R_PARISC_PLTOFF16F	117	/* 16 bits LT-rel. address.  */
+#define R_PARISC_PLTOFF16WF	118	/* 16 bits PLT-rel. address.  */
+#define R_PARISC_PLTOFF16DF	119	/* 16 bits PLT-rel. address.  */
+#define R_PARISC_LTOFF_FPTR64	120	/* 64 bits LT-rel. function ptr.  */
+#define R_PARISC_LTOFF_FPTR14WR	123	/* LT-rel. fct. ptr., right 14 bits. */
+#define R_PARISC_LTOFF_FPTR14DR	124	/* LT-rel. fct. ptr., right 14 bits. */
+#define R_PARISC_LTOFF_FPTR16F	125	/* 16 bits LT-rel. function ptr.  */
+#define R_PARISC_LTOFF_FPTR16WF	126	/* 16 bits LT-rel. function ptr.  */
+#define R_PARISC_LTOFF_FPTR16DF	127	/* 16 bits LT-rel. function ptr.  */
+#define R_PARISC_LORESERVE	128
+#define R_PARISC_COPY		128	/* Copy relocation.  */
+#define R_PARISC_IPLT		129	/* Dynamic reloc, imported PLT */
+#define R_PARISC_EPLT		130	/* Dynamic reloc, exported PLT */
+#define R_PARISC_TPREL32	153	/* 32 bits TP-rel. address.  */
+#define R_PARISC_TPREL21L	154	/* TP-rel. address, left 21 bits.  */
+#define R_PARISC_TPREL14R	158	/* TP-rel. address, right 14 bits.  */
+#define R_PARISC_LTOFF_TP21L	162	/* LT-TP-rel. address, left 21 bits. */
+#define R_PARISC_LTOFF_TP14R	166	/* LT-TP-rel. address, right 14 bits.*/
+#define R_PARISC_LTOFF_TP14F	167	/* 14 bits LT-TP-rel. address.  */
+#define R_PARISC_TPREL64	216	/* 64 bits TP-rel. address.  */
+#define R_PARISC_TPREL14WR	219	/* TP-rel. address, right 14 bits.  */
+#define R_PARISC_TPREL14DR	220	/* TP-rel. address, right 14 bits.  */
+#define R_PARISC_TPREL16F	221	/* 16 bits TP-rel. address.  */
+#define R_PARISC_TPREL16WF	222	/* 16 bits TP-rel. address.  */
+#define R_PARISC_TPREL16DF	223	/* 16 bits TP-rel. address.  */
+#define R_PARISC_LTOFF_TP64	224	/* 64 bits LT-TP-rel. address.  */
+#define R_PARISC_LTOFF_TP14WR	227	/* LT-TP-rel. address, right 14 bits.*/
+#define R_PARISC_LTOFF_TP14DR	228	/* LT-TP-rel. address, right 14 bits.*/
+#define R_PARISC_LTOFF_TP16F	229	/* 16 bits LT-TP-rel. address.  */
+#define R_PARISC_LTOFF_TP16WF	230	/* 16 bits LT-TP-rel. address.  */
+#define R_PARISC_LTOFF_TP16DF	231	/* 16 bits LT-TP-rel. address.  */
+#define R_PARISC_HIRESERVE	255
+
+#define PA_PLABEL_FDESC		0x02	/* bit set if PLABEL points to
+					 * a function descriptor, not
+					 * an address */
+
+/* The following are PA function descriptors 
+ *
+ * addr:	the absolute address of the function
+ * gp:		either the data pointer (r27) for non-PIC code or the
+ *		the PLT pointer (r19) for PIC code */
+
+/* Format for the Elf32 Function descriptor */
+typedef struct elf32_fdesc {
+	__u32	addr;
+	__u32	gp;
+} Elf32_Fdesc;
+
+/* Format for the Elf64 Function descriptor */
+typedef struct elf64_fdesc {
+	__u64	dummy[2]; /* FIXME: nothing uses these, why waste
+			   * the space */
+	__u64	addr;
+	__u64	gp;
+} Elf64_Fdesc;
+
+/* Legal values for p_type field of Elf32_Phdr/Elf64_Phdr.  */
+
+#define PT_HP_TLS		(PT_LOOS + 0x0)
+#define PT_HP_CORE_NONE		(PT_LOOS + 0x1)
+#define PT_HP_CORE_VERSION	(PT_LOOS + 0x2)
+#define PT_HP_CORE_KERNEL	(PT_LOOS + 0x3)
+#define PT_HP_CORE_COMM		(PT_LOOS + 0x4)
+#define PT_HP_CORE_PROC		(PT_LOOS + 0x5)
+#define PT_HP_CORE_LOADABLE	(PT_LOOS + 0x6)
+#define PT_HP_CORE_STACK	(PT_LOOS + 0x7)
+#define PT_HP_CORE_SHM		(PT_LOOS + 0x8)
+#define PT_HP_CORE_MMF		(PT_LOOS + 0x9)
+#define PT_HP_PARALLEL		(PT_LOOS + 0x10)
+#define PT_HP_FASTBIND		(PT_LOOS + 0x11)
+#define PT_HP_OPT_ANNOT		(PT_LOOS + 0x12)
+#define PT_HP_HSL_ANNOT		(PT_LOOS + 0x13)
+#define PT_HP_STACK		(PT_LOOS + 0x14)
+
+#define PT_PARISC_ARCHEXT	0x70000000
+#define PT_PARISC_UNWIND	0x70000001
+
+/* Legal values for p_flags field of Elf32_Phdr/Elf64_Phdr.  */
+
+#define PF_PARISC_SBP		0x08000000
+
+#define PF_HP_PAGE_SIZE		0x00100000
+#define PF_HP_FAR_SHARED	0x00200000
+#define PF_HP_NEAR_SHARED	0x00400000
+#define PF_HP_CODE		0x01000000
+#define PF_HP_MODIFY		0x02000000
+#define PF_HP_LAZYSWAP		0x04000000
+#define PF_HP_SBP		0x08000000
+
+/*
+ * The following definitions are those for 32-bit ELF binaries on a 32-bit
+ * kernel and for 64-bit binaries on a 64-bit kernel.  To run 32-bit binaries
+ * on a 64-bit kernel, arch/parisc/kernel/binfmt_elf32.c defines these
+ * macros appropriately and then #includes binfmt_elf.c, which then includes
+ * this file.
+ */
+#ifndef ELF_CLASS
+
+/*
+ * This is used to ensure we don't load something for the wrong architecture.
+ *
+ * Note that this header file is used by default in fs/binfmt_elf.c. So
+ * the following macros are for the default case. However, for the 64
+ * bit kernel we also support 32 bit parisc binaries. To do that
+ * arch/parisc/kernel/binfmt_elf32.c defines its own set of these
+ * macros, and then it includes fs/binfmt_elf.c to provide an alternate
+ * elf binary handler for 32 bit binaries (on the 64 bit kernel).
+ */
+#ifdef CONFIG_64BIT
+#define ELF_CLASS   ELFCLASS64
+#else
+#define ELF_CLASS	ELFCLASS32
+#endif
+
+typedef unsigned long elf_greg_t;
+
+/*
+ * This yields a string that ld.so will use to load implementation
+ * specific libraries for optimization.  This is more specific in
+ * intent than poking at uname or /proc/cpuinfo.
+ */
+
+#define ELF_PLATFORM  ("PARISC\0")
+
+#define SET_PERSONALITY(ex, ibcs2) \
+	current->personality = PER_LINUX; \
+	current->thread.map_base = DEFAULT_MAP_BASE; \
+	current->thread.task_size = DEFAULT_TASK_SIZE \
+
+/*
+ * Fill in general registers in a core dump.  This saves pretty
+ * much the same registers as hp-ux, although in a different order.
+ * Registers marked # below are not currently saved in pt_regs, so
+ * we use their current values here.
+ *
+ * 	gr0..gr31
+ * 	sr0..sr7
+ * 	iaoq0..iaoq1
+ * 	iasq0..iasq1
+ * 	cr11 (sar)
+ * 	cr19 (iir)
+ * 	cr20 (isr)
+ * 	cr21 (ior)
+ *  #	cr22 (ipsw)
+ *  #	cr0 (recovery counter)
+ *  #	cr24..cr31 (temporary registers)
+ *  #	cr8,9,12,13 (protection IDs)
+ *  #	cr10 (scr/ccr)
+ *  #	cr15 (ext int enable mask)
+ *
+ */
+
+#define ELF_CORE_COPY_REGS(dst, pt)	\
+	memset(dst, 0, sizeof(dst));	/* don't leak any "random" bits */ \
+	memcpy(dst + 0, pt->gr, 32 * sizeof(elf_greg_t)); \
+	memcpy(dst + 32, pt->sr, 8 * sizeof(elf_greg_t)); \
+	memcpy(dst + 40, pt->iaoq, 2 * sizeof(elf_greg_t)); \
+	memcpy(dst + 42, pt->iasq, 2 * sizeof(elf_greg_t)); \
+	dst[44] = pt->sar;   dst[45] = pt->iir; \
+	dst[46] = pt->isr;   dst[47] = pt->ior; \
+	dst[48] = mfctl(22); dst[49] = mfctl(0); \
+	dst[50] = mfctl(24); dst[51] = mfctl(25); \
+	dst[52] = mfctl(26); dst[53] = mfctl(27); \
+	dst[54] = mfctl(28); dst[55] = mfctl(29); \
+	dst[56] = mfctl(30); dst[57] = mfctl(31); \
+	dst[58] = mfctl( 8); dst[59] = mfctl( 9); \
+	dst[60] = mfctl(12); dst[61] = mfctl(13); \
+	dst[62] = mfctl(10); dst[63] = mfctl(15);
+
+#endif /* ! ELF_CLASS */
+
+#define ELF_NGREG 80	/* We only need 64 at present, but leave space
+			   for expansion. */
+typedef elf_greg_t elf_gregset_t[ELF_NGREG];
+
+#define ELF_NFPREG 32
+typedef double elf_fpreg_t;
+typedef elf_fpreg_t elf_fpregset_t[ELF_NFPREG];
+
+struct task_struct;
+
+extern int dump_task_fpu (struct task_struct *, elf_fpregset_t *);
+#define ELF_CORE_COPY_FPREGS(tsk, elf_fpregs) dump_task_fpu(tsk, elf_fpregs)
+
+struct pt_regs;	/* forward declaration... */
+
+
+#define elf_check_arch(x) ((x)->e_machine == EM_PARISC && (x)->e_ident[EI_CLASS] == ELF_CLASS)
+
+/*
+ * These are used to set parameters in the core dumps.
+ */
+#define ELF_DATA	ELFDATA2MSB
+#define ELF_ARCH	EM_PARISC
+#define ELF_OSABI 	ELFOSABI_LINUX
+
+/* %r23 is set by ld.so to a pointer to a function which might be 
+   registered using atexit.  This provides a means for the dynamic
+   linker to call DT_FINI functions for shared libraries that have
+   been loaded before the code runs.
+
+   So that we can use the same startup file with static executables,
+   we start programs with a value of 0 to indicate that there is no
+   such function.  */
+#define ELF_PLAT_INIT(_r, load_addr)       _r->gr[23] = 0
+
+#define USE_ELF_CORE_DUMP
+#define ELF_EXEC_PAGESIZE	4096
+
+/* This is the location that an ET_DYN program is loaded if exec'ed.  Typical
+   use of this is to invoke "./ld.so someprog" to test out a new version of
+   the loader.  We need to make sure that it is out of the way of the program
+   that it will "exec", and that there is sufficient room for the brk.
+
+   (2 * TASK_SIZE / 3) turns into something undefined when run through a
+   32 bit preprocessor and in some cases results in the kernel trying to map
+   ld.so to the kernel virtual base. Use a sane value instead. /Jes 
+  */
+
+#define ELF_ET_DYN_BASE         (TASK_UNMAPPED_BASE + 0x01000000)
+
+/* This yields a mask that user programs can use to figure out what
+   instruction set this CPU supports.  This could be done in user space,
+   but it's not easy, and we've already done it here.  */
+
+#define ELF_HWCAP	0
+
+#endif
diff --git a/arch/parisc/include/asm/emergency-restart.h b/arch/parisc/include/asm/emergency-restart.h
new file mode 100644
index 000000000000..108d8c48e42e
--- /dev/null
+++ b/arch/parisc/include/asm/emergency-restart.h
@@ -0,0 +1,6 @@
+#ifndef _ASM_EMERGENCY_RESTART_H
+#define _ASM_EMERGENCY_RESTART_H
+
+#include <asm-generic/emergency-restart.h>
+
+#endif /* _ASM_EMERGENCY_RESTART_H */
diff --git a/arch/parisc/include/asm/errno.h b/arch/parisc/include/asm/errno.h
new file mode 100644
index 000000000000..e2f3ddc796be
--- /dev/null
+++ b/arch/parisc/include/asm/errno.h
@@ -0,0 +1,124 @@
+#ifndef _PARISC_ERRNO_H
+#define _PARISC_ERRNO_H
+
+#include <asm-generic/errno-base.h>
+
+#define	ENOMSG		35	/* No message of desired type */
+#define	EIDRM		36	/* Identifier removed */
+#define	ECHRNG		37	/* Channel number out of range */
+#define	EL2NSYNC	38	/* Level 2 not synchronized */
+#define	EL3HLT		39	/* Level 3 halted */
+#define	EL3RST		40	/* Level 3 reset */
+#define	ELNRNG		41	/* Link number out of range */
+#define	EUNATCH		42	/* Protocol driver not attached */
+#define	ENOCSI		43	/* No CSI structure available */
+#define	EL2HLT		44	/* Level 2 halted */
+#define	EDEADLK		45	/* Resource deadlock would occur */
+#define	EDEADLOCK	EDEADLK
+#define	ENOLCK		46	/* No record locks available */
+#define	EILSEQ		47	/* Illegal byte sequence */
+
+#define	ENONET		50	/* Machine is not on the network */
+#define	ENODATA		51	/* No data available */
+#define	ETIME		52	/* Timer expired */
+#define	ENOSR		53	/* Out of streams resources */
+#define	ENOSTR		54	/* Device not a stream */
+#define	ENOPKG		55	/* Package not installed */
+
+#define	ENOLINK		57	/* Link has been severed */
+#define	EADV		58	/* Advertise error */
+#define	ESRMNT		59	/* Srmount error */
+#define	ECOMM		60	/* Communication error on send */
+#define	EPROTO		61	/* Protocol error */
+
+#define	EMULTIHOP	64	/* Multihop attempted */
+
+#define	EDOTDOT		66	/* RFS specific error */
+#define	EBADMSG		67	/* Not a data message */
+#define	EUSERS		68	/* Too many users */
+#define	EDQUOT		69	/* Quota exceeded */
+#define	ESTALE		70	/* Stale NFS file handle */
+#define	EREMOTE		71	/* Object is remote */
+#define	EOVERFLOW	72	/* Value too large for defined data type */
+
+/* these errnos are defined by Linux but not HPUX. */
+
+#define	EBADE		160	/* Invalid exchange */
+#define	EBADR		161	/* Invalid request descriptor */
+#define	EXFULL		162	/* Exchange full */
+#define	ENOANO		163	/* No anode */
+#define	EBADRQC		164	/* Invalid request code */
+#define	EBADSLT		165	/* Invalid slot */
+#define	EBFONT		166	/* Bad font file format */
+#define	ENOTUNIQ	167	/* Name not unique on network */
+#define	EBADFD		168	/* File descriptor in bad state */
+#define	EREMCHG		169	/* Remote address changed */
+#define	ELIBACC		170	/* Can not access a needed shared library */
+#define	ELIBBAD		171	/* Accessing a corrupted shared library */
+#define	ELIBSCN		172	/* .lib section in a.out corrupted */
+#define	ELIBMAX		173	/* Attempting to link in too many shared libraries */
+#define	ELIBEXEC	174	/* Cannot exec a shared library directly */
+#define	ERESTART	175	/* Interrupted system call should be restarted */
+#define	ESTRPIPE	176	/* Streams pipe error */
+#define	EUCLEAN		177	/* Structure needs cleaning */
+#define	ENOTNAM		178	/* Not a XENIX named type file */
+#define	ENAVAIL		179	/* No XENIX semaphores available */
+#define	EISNAM		180	/* Is a named type file */
+#define	EREMOTEIO	181	/* Remote I/O error */
+#define	ENOMEDIUM	182	/* No medium found */
+#define	EMEDIUMTYPE	183	/* Wrong medium type */
+#define	ENOKEY		184	/* Required key not available */
+#define	EKEYEXPIRED	185	/* Key has expired */
+#define	EKEYREVOKED	186	/* Key has been revoked */
+#define	EKEYREJECTED	187	/* Key was rejected by service */
+
+/* We now return you to your regularly scheduled HPUX. */
+
+#define ENOSYM		215	/* symbol does not exist in executable */
+#define	ENOTSOCK	216	/* Socket operation on non-socket */
+#define	EDESTADDRREQ	217	/* Destination address required */
+#define	EMSGSIZE	218	/* Message too long */
+#define	EPROTOTYPE	219	/* Protocol wrong type for socket */
+#define	ENOPROTOOPT	220	/* Protocol not available */
+#define	EPROTONOSUPPORT	221	/* Protocol not supported */
+#define	ESOCKTNOSUPPORT	222	/* Socket type not supported */
+#define	EOPNOTSUPP	223	/* Operation not supported on transport endpoint */
+#define	EPFNOSUPPORT	224	/* Protocol family not supported */
+#define	EAFNOSUPPORT	225	/* Address family not supported by protocol */
+#define	EADDRINUSE	226	/* Address already in use */
+#define	EADDRNOTAVAIL	227	/* Cannot assign requested address */
+#define	ENETDOWN	228	/* Network is down */
+#define	ENETUNREACH	229	/* Network is unreachable */
+#define	ENETRESET	230	/* Network dropped connection because of reset */
+#define	ECONNABORTED	231	/* Software caused connection abort */
+#define	ECONNRESET	232	/* Connection reset by peer */
+#define	ENOBUFS		233	/* No buffer space available */
+#define	EISCONN		234	/* Transport endpoint is already connected */
+#define	ENOTCONN	235	/* Transport endpoint is not connected */
+#define	ESHUTDOWN	236	/* Cannot send after transport endpoint shutdown */
+#define	ETOOMANYREFS	237	/* Too many references: cannot splice */
+#define EREFUSED	ECONNREFUSED	/* for HP's NFS apparently */
+#define	ETIMEDOUT	238	/* Connection timed out */
+#define	ECONNREFUSED	239	/* Connection refused */
+#define EREMOTERELEASE	240	/* Remote peer released connection */
+#define	EHOSTDOWN	241	/* Host is down */
+#define	EHOSTUNREACH	242	/* No route to host */
+
+#define	EALREADY	244	/* Operation already in progress */
+#define	EINPROGRESS	245	/* Operation now in progress */
+#define	EWOULDBLOCK	246	/* Operation would block (Linux returns EAGAIN) */
+#define	ENOTEMPTY	247	/* Directory not empty */
+#define	ENAMETOOLONG	248	/* File name too long */
+#define	ELOOP		249	/* Too many symbolic links encountered */
+#define	ENOSYS		251	/* Function not implemented */
+
+#define ENOTSUP		252	/* Function not implemented (POSIX.4 / HPUX) */
+#define ECANCELLED	253	/* aio request was canceled before complete (POSIX.4 / HPUX) */
+#define ECANCELED	ECANCELLED	/* SuSv3 and Solaris wants one 'L' */
+
+/* for robust mutexes */
+#define EOWNERDEAD	254	/* Owner died */
+#define ENOTRECOVERABLE	255	/* State not recoverable */
+
+
+#endif
diff --git a/arch/parisc/include/asm/fb.h b/arch/parisc/include/asm/fb.h
new file mode 100644
index 000000000000..4d503a023ab2
--- /dev/null
+++ b/arch/parisc/include/asm/fb.h
@@ -0,0 +1,19 @@
+#ifndef _ASM_FB_H_
+#define _ASM_FB_H_
+
+#include <linux/fb.h>
+#include <linux/fs.h>
+#include <asm/page.h>
+
+static inline void fb_pgprotect(struct file *file, struct vm_area_struct *vma,
+				unsigned long off)
+{
+	pgprot_val(vma->vm_page_prot) |= _PAGE_NO_CACHE;
+}
+
+static inline int fb_is_primary_device(struct fb_info *info)
+{
+	return 0;
+}
+
+#endif /* _ASM_FB_H_ */
diff --git a/arch/parisc/include/asm/fcntl.h b/arch/parisc/include/asm/fcntl.h
new file mode 100644
index 000000000000..1e1c824764ee
--- /dev/null
+++ b/arch/parisc/include/asm/fcntl.h
@@ -0,0 +1,39 @@
+#ifndef _PARISC_FCNTL_H
+#define _PARISC_FCNTL_H
+
+/* open/fcntl - O_SYNC is only implemented on blocks devices and on files
+   located on an ext2 file system */
+#define O_APPEND	000000010
+#define O_BLKSEEK	000000100 /* HPUX only */
+#define O_CREAT		000000400 /* not fcntl */
+#define O_EXCL		000002000 /* not fcntl */
+#define O_LARGEFILE	000004000
+#define O_SYNC		000100000
+#define O_NONBLOCK	000200004 /* HPUX has separate NDELAY & NONBLOCK */
+#define O_NOCTTY	000400000 /* not fcntl */
+#define O_DSYNC		001000000 /* HPUX only */
+#define O_RSYNC		002000000 /* HPUX only */
+#define O_NOATIME	004000000
+#define O_CLOEXEC	010000000 /* set close_on_exec */
+
+#define O_DIRECTORY	000010000 /* must be a directory */
+#define O_NOFOLLOW	000000200 /* don't follow links */
+#define O_INVISIBLE	004000000 /* invisible I/O, for DMAPI/XDSM */
+
+#define F_GETLK64	8
+#define F_SETLK64	9
+#define F_SETLKW64	10
+
+#define F_GETOWN	11	/*  for sockets. */
+#define F_SETOWN	12	/*  for sockets. */
+#define F_SETSIG	13	/*  for sockets. */
+#define F_GETSIG	14	/*  for sockets. */
+
+/* for posix fcntl() and lockf() */
+#define F_RDLCK		01
+#define F_WRLCK		02
+#define F_UNLCK		03
+
+#include <asm-generic/fcntl.h>
+
+#endif
diff --git a/arch/parisc/include/asm/fixmap.h b/arch/parisc/include/asm/fixmap.h
new file mode 100644
index 000000000000..de3fe3a18229
--- /dev/null
+++ b/arch/parisc/include/asm/fixmap.h
@@ -0,0 +1,30 @@
+#ifndef _ASM_FIXMAP_H
+#define _ASM_FIXMAP_H
+
+/*
+ * This file defines the locations of the fixed mappings on parisc.
+ *
+ * All of the values in this file are machine virtual addresses.
+ *
+ * All of the values in this file must be <4GB (because of assembly
+ * loading restrictions).  If you place this region anywhere above
+ * __PAGE_OFFSET, you must adjust the memory map accordingly */
+
+/* The alias region is used in kernel space to do copy/clear to or
+ * from areas congruently mapped with user space.  It is 8MB large
+ * and must be 16MB aligned */
+#define TMPALIAS_MAP_START	((__PAGE_OFFSET) - 16*1024*1024)
+/* This is the kernel area for all maps (vmalloc, dma etc.)  most
+ * usually, it extends up to TMPALIAS_MAP_START.  Virtual addresses
+ * 0..GATEWAY_PAGE_SIZE are reserved for the gateway page */
+#define KERNEL_MAP_START	(GATEWAY_PAGE_SIZE)
+#define KERNEL_MAP_END		(TMPALIAS_MAP_START)
+
+#ifndef __ASSEMBLY__
+extern void *vmalloc_start;
+#define PCXL_DMA_MAP_SIZE	(8*1024*1024)
+#define VMALLOC_START		((unsigned long)vmalloc_start)
+#define VMALLOC_END		(KERNEL_MAP_END)
+#endif /*__ASSEMBLY__*/
+
+#endif /*_ASM_FIXMAP_H*/
diff --git a/arch/parisc/include/asm/floppy.h b/arch/parisc/include/asm/floppy.h
new file mode 100644
index 000000000000..4ca69f558fae
--- /dev/null
+++ b/arch/parisc/include/asm/floppy.h
@@ -0,0 +1,271 @@
+/*    Architecture specific parts of the Floppy driver
+ *
+ *    Linux/PA-RISC Project (http://www.parisc-linux.org/)
+ *    Copyright (C) 2000 Matthew Wilcox (willy a debian . org)
+ *    Copyright (C) 2000 Dave Kennedy
+ *
+ *    This program is free software; you can redistribute it and/or modify
+ *    it under the terms of the GNU General Public License as published by
+ *    the Free Software Foundation; either version 2 of the License, or
+ *    (at your option) any later version.
+ *
+ *    This program is distributed in the hope that it will be useful,
+ *    but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *    GNU General Public License for more details.
+ *
+ *    You should have received a copy of the GNU General Public License
+ *    along with this program; if not, write to the Free Software
+ *    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+#ifndef __ASM_PARISC_FLOPPY_H
+#define __ASM_PARISC_FLOPPY_H
+
+#include <linux/vmalloc.h>
+
+
+/*
+ * The DMA channel used by the floppy controller cannot access data at
+ * addresses >= 16MB
+ *
+ * Went back to the 1MB limit, as some people had problems with the floppy
+ * driver otherwise. It doesn't matter much for performance anyway, as most
+ * floppy accesses go through the track buffer.
+ */
+#define _CROSS_64KB(a,s,vdma) \
+(!vdma && ((unsigned long)(a)/K_64 != ((unsigned long)(a) + (s) - 1) / K_64))
+
+#define CROSS_64KB(a,s) _CROSS_64KB(a,s,use_virtual_dma & 1)
+
+
+#define SW fd_routine[use_virtual_dma&1]
+#define CSW fd_routine[can_use_virtual_dma & 1]
+
+
+#define fd_inb(port)			readb(port)
+#define fd_outb(value, port)		writeb(value, port)
+
+#define fd_request_dma()        CSW._request_dma(FLOPPY_DMA,"floppy")
+#define fd_free_dma()           CSW._free_dma(FLOPPY_DMA)
+#define fd_enable_irq()         enable_irq(FLOPPY_IRQ)
+#define fd_disable_irq()        disable_irq(FLOPPY_IRQ)
+#define fd_free_irq()		free_irq(FLOPPY_IRQ, NULL)
+#define fd_get_dma_residue()    SW._get_dma_residue(FLOPPY_DMA)
+#define fd_dma_mem_alloc(size)	SW._dma_mem_alloc(size)
+#define fd_dma_setup(addr, size, mode, io) SW._dma_setup(addr, size, mode, io)
+
+#define FLOPPY_CAN_FALLBACK_ON_NODMA
+
+static int virtual_dma_count=0;
+static int virtual_dma_residue=0;
+static char *virtual_dma_addr=0;
+static int virtual_dma_mode=0;
+static int doing_pdma=0;
+
+static void floppy_hardint(int irq, void *dev_id, struct pt_regs * regs)
+{
+	register unsigned char st;
+
+#undef TRACE_FLPY_INT
+
+#ifdef TRACE_FLPY_INT
+	static int calls=0;
+	static int bytes=0;
+	static int dma_wait=0;
+#endif
+	if (!doing_pdma) {
+		floppy_interrupt(irq, dev_id, regs);
+		return;
+	}
+
+#ifdef TRACE_FLPY_INT
+	if(!calls)
+		bytes = virtual_dma_count;
+#endif
+
+	{
+		register int lcount;
+		register char *lptr = virtual_dma_addr;
+
+		for (lcount = virtual_dma_count; lcount; lcount--) {
+			st = fd_inb(virtual_dma_port+4) & 0xa0 ;
+			if (st != 0xa0) 
+				break;
+			if (virtual_dma_mode) {
+				fd_outb(*lptr, virtual_dma_port+5);
+			} else {
+				*lptr = fd_inb(virtual_dma_port+5);
+			}
+			lptr++;
+		}
+		virtual_dma_count = lcount;
+		virtual_dma_addr = lptr;
+		st = fd_inb(virtual_dma_port+4);
+	}
+
+#ifdef TRACE_FLPY_INT
+	calls++;
+#endif
+	if (st == 0x20)
+		return;
+	if (!(st & 0x20)) {
+		virtual_dma_residue += virtual_dma_count;
+		virtual_dma_count = 0;
+#ifdef TRACE_FLPY_INT
+		printk("count=%x, residue=%x calls=%d bytes=%d dma_wait=%d\n", 
+		       virtual_dma_count, virtual_dma_residue, calls, bytes,
+		       dma_wait);
+		calls = 0;
+		dma_wait=0;
+#endif
+		doing_pdma = 0;
+		floppy_interrupt(irq, dev_id, regs);
+		return;
+	}
+#ifdef TRACE_FLPY_INT
+	if (!virtual_dma_count)
+		dma_wait++;
+#endif
+}
+
+static void fd_disable_dma(void)
+{
+	if(! (can_use_virtual_dma & 1))
+		disable_dma(FLOPPY_DMA);
+	doing_pdma = 0;
+	virtual_dma_residue += virtual_dma_count;
+	virtual_dma_count=0;
+}
+
+static int vdma_request_dma(unsigned int dmanr, const char * device_id)
+{
+	return 0;
+}
+
+static void vdma_nop(unsigned int dummy)
+{
+}
+
+
+static int vdma_get_dma_residue(unsigned int dummy)
+{
+	return virtual_dma_count + virtual_dma_residue;
+}
+
+
+static int fd_request_irq(void)
+{
+	if(can_use_virtual_dma)
+		return request_irq(FLOPPY_IRQ, floppy_hardint,
+				   IRQF_DISABLED, "floppy", NULL);
+	else
+		return request_irq(FLOPPY_IRQ, floppy_interrupt,
+				   IRQF_DISABLED, "floppy", NULL);
+}
+
+static unsigned long dma_mem_alloc(unsigned long size)
+{
+	return __get_dma_pages(GFP_KERNEL, get_order(size));
+}
+
+
+static unsigned long vdma_mem_alloc(unsigned long size)
+{
+	return (unsigned long) vmalloc(size);
+
+}
+
+#define nodma_mem_alloc(size) vdma_mem_alloc(size)
+
+static void _fd_dma_mem_free(unsigned long addr, unsigned long size)
+{
+	if((unsigned int) addr >= (unsigned int) high_memory)
+		return vfree((void *)addr);
+	else
+		free_pages(addr, get_order(size));		
+}
+
+#define fd_dma_mem_free(addr, size)  _fd_dma_mem_free(addr, size) 
+
+static void _fd_chose_dma_mode(char *addr, unsigned long size)
+{
+	if(can_use_virtual_dma == 2) {
+		if((unsigned int) addr >= (unsigned int) high_memory ||
+		   virt_to_bus(addr) >= 0x1000000 ||
+		   _CROSS_64KB(addr, size, 0))
+			use_virtual_dma = 1;
+		else
+			use_virtual_dma = 0;
+	} else {
+		use_virtual_dma = can_use_virtual_dma & 1;
+	}
+}
+
+#define fd_chose_dma_mode(addr, size) _fd_chose_dma_mode(addr, size)
+
+
+static int vdma_dma_setup(char *addr, unsigned long size, int mode, int io)
+{
+	doing_pdma = 1;
+	virtual_dma_port = io;
+	virtual_dma_mode = (mode  == DMA_MODE_WRITE);
+	virtual_dma_addr = addr;
+	virtual_dma_count = size;
+	virtual_dma_residue = 0;
+	return 0;
+}
+
+static int hard_dma_setup(char *addr, unsigned long size, int mode, int io)
+{
+#ifdef FLOPPY_SANITY_CHECK
+	if (CROSS_64KB(addr, size)) {
+		printk("DMA crossing 64-K boundary %p-%p\n", addr, addr+size);
+		return -1;
+	}
+#endif
+	/* actual, physical DMA */
+	doing_pdma = 0;
+	clear_dma_ff(FLOPPY_DMA);
+	set_dma_mode(FLOPPY_DMA,mode);
+	set_dma_addr(FLOPPY_DMA,virt_to_bus(addr));
+	set_dma_count(FLOPPY_DMA,size);
+	enable_dma(FLOPPY_DMA);
+	return 0;
+}
+
+static struct fd_routine_l {
+	int (*_request_dma)(unsigned int dmanr, const char * device_id);
+	void (*_free_dma)(unsigned int dmanr);
+	int (*_get_dma_residue)(unsigned int dummy);
+	unsigned long (*_dma_mem_alloc) (unsigned long size);
+	int (*_dma_setup)(char *addr, unsigned long size, int mode, int io);
+} fd_routine[] = {
+	{
+		request_dma,
+		free_dma,
+		get_dma_residue,
+		dma_mem_alloc,
+		hard_dma_setup
+	},
+	{
+		vdma_request_dma,
+		vdma_nop,
+		vdma_get_dma_residue,
+		vdma_mem_alloc,
+		vdma_dma_setup
+	}
+};
+
+
+static int FDC1 = 0x3f0; /* Lies.  Floppy controller is memory mapped, not io mapped */
+static int FDC2 = -1;
+
+#define FLOPPY0_TYPE	0
+#define FLOPPY1_TYPE	0
+
+#define N_FDC 1
+#define N_DRIVE 8
+
+#define EXTRA_FLOPPY_PARAMS
+
+#endif /* __ASM_PARISC_FLOPPY_H */
diff --git a/arch/parisc/include/asm/futex.h b/arch/parisc/include/asm/futex.h
new file mode 100644
index 000000000000..0c705c3a55ef
--- /dev/null
+++ b/arch/parisc/include/asm/futex.h
@@ -0,0 +1,77 @@
+#ifndef _ASM_PARISC_FUTEX_H
+#define _ASM_PARISC_FUTEX_H
+
+#ifdef __KERNEL__
+
+#include <linux/futex.h>
+#include <linux/uaccess.h>
+#include <asm/errno.h>
+
+static inline int
+futex_atomic_op_inuser (int encoded_op, int __user *uaddr)
+{
+	int op = (encoded_op >> 28) & 7;
+	int cmp = (encoded_op >> 24) & 15;
+	int oparg = (encoded_op << 8) >> 20;
+	int cmparg = (encoded_op << 20) >> 20;
+	int oldval = 0, ret;
+	if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28))
+		oparg = 1 << oparg;
+
+	if (! access_ok (VERIFY_WRITE, uaddr, sizeof(int)))
+		return -EFAULT;
+
+	pagefault_disable();
+
+	switch (op) {
+	case FUTEX_OP_SET:
+	case FUTEX_OP_ADD:
+	case FUTEX_OP_OR:
+	case FUTEX_OP_ANDN:
+	case FUTEX_OP_XOR:
+	default:
+		ret = -ENOSYS;
+	}
+
+	pagefault_enable();
+
+	if (!ret) {
+		switch (cmp) {
+		case FUTEX_OP_CMP_EQ: ret = (oldval == cmparg); break;
+		case FUTEX_OP_CMP_NE: ret = (oldval != cmparg); break;
+		case FUTEX_OP_CMP_LT: ret = (oldval < cmparg); break;
+		case FUTEX_OP_CMP_GE: ret = (oldval >= cmparg); break;
+		case FUTEX_OP_CMP_LE: ret = (oldval <= cmparg); break;
+		case FUTEX_OP_CMP_GT: ret = (oldval > cmparg); break;
+		default: ret = -ENOSYS;
+		}
+	}
+	return ret;
+}
+
+/* Non-atomic version */
+static inline int
+futex_atomic_cmpxchg_inatomic(int __user *uaddr, int oldval, int newval)
+{
+	int err = 0;
+	int uval;
+
+	/* futex.c wants to do a cmpxchg_inatomic on kernel NULL, which is
+	 * our gateway page, and causes no end of trouble...
+	 */
+	if (segment_eq(KERNEL_DS, get_fs()) && !uaddr)
+		return -EFAULT;
+
+	if (!access_ok(VERIFY_WRITE, uaddr, sizeof(int)))
+		return -EFAULT;
+
+	err = get_user(uval, uaddr);
+	if (err) return -EFAULT;
+	if (uval == oldval)
+		err = put_user(newval, uaddr);
+	if (err) return -EFAULT;
+	return uval;
+}
+
+#endif /*__KERNEL__*/
+#endif /*_ASM_PARISC_FUTEX_H*/
diff --git a/arch/parisc/include/asm/grfioctl.h b/arch/parisc/include/asm/grfioctl.h
new file mode 100644
index 000000000000..671e06042b40
--- /dev/null
+++ b/arch/parisc/include/asm/grfioctl.h
@@ -0,0 +1,113 @@
+/*  Architecture specific parts of HP's STI (framebuffer) driver.
+ *  Structures are HP-UX compatible for XFree86 usage.
+ * 
+ *    Linux/PA-RISC Project (http://www.parisc-linux.org/)
+ *    Copyright (C) 2001 Helge Deller (deller a parisc-linux org)
+ *
+ *    This program is free software; you can redistribute it and/or modify
+ *    it under the terms of the GNU General Public License as published by
+ *    the Free Software Foundation; either version 2 of the License, or
+ *    (at your option) any later version.
+ *
+ *    This program is distributed in the hope that it will be useful,
+ *    but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *    GNU General Public License for more details.
+ *
+ *    You should have received a copy of the GNU General Public License
+ *    along with this program; if not, write to the Free Software
+ *    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+#ifndef __ASM_PARISC_GRFIOCTL_H
+#define __ASM_PARISC_GRFIOCTL_H
+
+/* upper 32 bits of graphics id (HP/UX identifier) */
+
+#define GRFGATOR		8
+#define S9000_ID_S300		9
+#define GRFBOBCAT		9
+#define	GRFCATSEYE		9
+#define S9000_ID_98720		10
+#define GRFRBOX			10
+#define S9000_ID_98550		11
+#define GRFFIREEYE		11
+#define S9000_ID_A1096A		12
+#define GRFHYPERION		12
+#define S9000_ID_FRI		13
+#define S9000_ID_98730		14
+#define GRFDAVINCI		14
+#define S9000_ID_98705		0x26C08070	/* Tigershark */
+#define S9000_ID_98736		0x26D148AB
+#define S9000_ID_A1659A		0x26D1482A	/* CRX 8 plane color (=ELK) */
+#define S9000_ID_ELK		S9000_ID_A1659A
+#define S9000_ID_A1439A		0x26D148EE	/* CRX24 = CRX+ (24-plane color) */
+#define S9000_ID_A1924A		0x26D1488C	/* GRX gray-scale */
+#define S9000_ID_ELM		S9000_ID_A1924A
+#define S9000_ID_98765		0x27480DEF
+#define S9000_ID_ELK_768	0x27482101
+#define S9000_ID_STINGER	0x27A4A402
+#define S9000_ID_TIMBER		0x27F12392	/* Bushmaster (710) Graphics */
+#define S9000_ID_TOMCAT		0x27FCCB6D	/* dual-headed ELK (Dual CRX) */
+#define S9000_ID_ARTIST		0x2B4DED6D	/* Artist (Gecko/712 & 715) onboard Graphics */
+#define S9000_ID_HCRX		0x2BCB015A	/* Hyperdrive/Hyperbowl (A4071A) Graphics */
+#define CRX24_OVERLAY_PLANES	0x920825AA	/* Overlay planes on CRX24 */
+
+#define CRT_ID_ELK_1024		S9000_ID_ELK_768 /* Elk 1024x768  CRX */
+#define CRT_ID_ELK_1280		S9000_ID_A1659A	/* Elk 1280x1024 CRX */
+#define CRT_ID_ELK_1024DB	0x27849CA5      /* Elk 1024x768 double buffer */
+#define CRT_ID_ELK_GS		S9000_ID_A1924A	/* Elk 1280x1024 GreyScale    */
+#define CRT_ID_CRX24		S9000_ID_A1439A	/* Piranha */
+#define CRT_ID_VISUALIZE_EG	0x2D08C0A7      /* Graffiti, A4450A (built-in B132+/B160L) */
+#define CRT_ID_THUNDER		0x2F23E5FC      /* Thunder 1 VISUALIZE 48*/
+#define CRT_ID_THUNDER2		0x2F8D570E      /* Thunder 2 VISUALIZE 48 XP*/
+#define CRT_ID_HCRX		S9000_ID_HCRX	/* Hyperdrive HCRX */
+#define CRT_ID_CRX48Z		S9000_ID_STINGER /* Stinger */
+#define CRT_ID_DUAL_CRX		S9000_ID_TOMCAT	/* Tomcat */
+#define CRT_ID_PVRX		S9000_ID_98705	/* Tigershark */
+#define CRT_ID_TIMBER		S9000_ID_TIMBER	/* Timber (710 builtin) */
+#define CRT_ID_TVRX		S9000_ID_98765	/* TVRX (gto/falcon) */
+#define CRT_ID_ARTIST		S9000_ID_ARTIST	/* Artist */
+#define CRT_ID_SUMMIT		0x2FC1066B      /* Summit FX2, FX4, FX6 ... */
+#define CRT_ID_LEGO		0x35ACDA30	/* Lego FX5, FX10 ... */
+#define CRT_ID_PINNACLE		0x35ACDA16	/* Pinnacle FXe */ 
+
+/* structure for ioctl(GCDESCRIBE) */
+
+#define gaddr_t unsigned long	/* FIXME: PA2.0 (64bit) portable ? */
+
+struct	grf_fbinfo {
+	unsigned int	id;		/* upper 32 bits of graphics id */
+	unsigned int	mapsize;	/* mapped size of framebuffer */
+	unsigned int	dwidth, dlength;/* x and y sizes */
+	unsigned int	width, length;	/* total x and total y size */
+	unsigned int	xlen;		/* x pitch size */
+	unsigned int	bpp, bppu;	/* bits per pixel and used bpp */
+	unsigned int	npl, nplbytes;	/* # of planes and bytes per plane */
+	char		name[32];	/* name of the device (from ROM) */
+	unsigned int	attr;		/* attributes */
+	gaddr_t 	fbbase, regbase;/* framebuffer and register base addr */
+	gaddr_t		regions[6];	/* region bases */
+};
+
+#define	GCID		_IOR('G', 0, int)
+#define	GCON		_IO('G', 1)
+#define	GCOFF		_IO('G', 2)
+#define	GCAON		_IO('G', 3)
+#define	GCAOFF		_IO('G', 4)
+#define	GCMAP		_IOWR('G', 5, int)
+#define	GCUNMAP		_IOWR('G', 6, int)
+#define	GCMAP_HPUX	_IO('G', 5)
+#define	GCUNMAP_HPUX	_IO('G', 6)
+#define	GCLOCK		_IO('G', 7)
+#define	GCUNLOCK	_IO('G', 8)
+#define	GCLOCK_MINIMUM	_IO('G', 9)
+#define	GCUNLOCK_MINIMUM _IO('G', 10)
+#define	GCSTATIC_CMAP	_IO('G', 11)
+#define	GCVARIABLE_CMAP _IO('G', 12)
+#define GCTERM		_IOWR('G',20,int)	/* multi-headed Tomcat */ 
+#define GCDESCRIBE	_IOR('G', 21, struct grf_fbinfo)
+#define GCFASTLOCK	_IO('G', 26)
+
+#endif /* __ASM_PARISC_GRFIOCTL_H */
+
diff --git a/arch/parisc/include/asm/hardirq.h b/arch/parisc/include/asm/hardirq.h
new file mode 100644
index 000000000000..ce93133d5112
--- /dev/null
+++ b/arch/parisc/include/asm/hardirq.h
@@ -0,0 +1,29 @@
+/* hardirq.h: PA-RISC hard IRQ support.
+ *
+ * Copyright (C) 2001 Matthew Wilcox <matthew@wil.cx>
+ *
+ * The locking is really quite interesting.  There's a cpu-local
+ * count of how many interrupts are being handled, and a global
+ * lock.  An interrupt can only be serviced if the global lock
+ * is free.  You can't be sure no more interrupts are being
+ * serviced until you've acquired the lock and then checked
+ * all the per-cpu interrupt counts are all zero.  It's a specialised
+ * br_lock, and that's exactly how Sparc does it.  We don't because
+ * it's more locking for us.  This way is lock-free in the interrupt path.
+ */
+
+#ifndef _PARISC_HARDIRQ_H
+#define _PARISC_HARDIRQ_H
+
+#include <linux/threads.h>
+#include <linux/irq.h>
+
+typedef struct {
+	unsigned long __softirq_pending; /* set_bit is used on this */
+} ____cacheline_aligned irq_cpustat_t;
+
+#include <linux/irq_cpustat.h>	/* Standard mappings for irq_cpustat_t above */
+
+void ack_bad_irq(unsigned int irq);
+
+#endif /* _PARISC_HARDIRQ_H */
diff --git a/arch/parisc/include/asm/hardware.h b/arch/parisc/include/asm/hardware.h
new file mode 100644
index 000000000000..4e9626836bab
--- /dev/null
+++ b/arch/parisc/include/asm/hardware.h
@@ -0,0 +1,127 @@
+#ifndef _PARISC_HARDWARE_H
+#define _PARISC_HARDWARE_H
+
+#include <linux/mod_devicetable.h>
+#include <asm/pdc.h>
+
+#define HWTYPE_ANY_ID		PA_HWTYPE_ANY_ID
+#define HVERSION_ANY_ID		PA_HVERSION_ANY_ID
+#define HVERSION_REV_ANY_ID	PA_HVERSION_REV_ANY_ID
+#define SVERSION_ANY_ID		PA_SVERSION_ANY_ID
+
+struct hp_hardware {
+	unsigned short	hw_type:5;	/* HPHW_xxx */
+	unsigned short	hversion;
+	unsigned long	sversion:28;
+	unsigned short	opt;
+	const char	name[80];	/* The hardware description */
+};
+
+struct parisc_device;
+
+enum cpu_type {
+	pcx	= 0, /* pa7000		pa 1.0  */
+	pcxs	= 1, /* pa7000		pa 1.1a */
+	pcxt	= 2, /* pa7100		pa 1.1b */
+	pcxt_	= 3, /* pa7200	(t')	pa 1.1c */
+	pcxl	= 4, /* pa7100lc	pa 1.1d */
+	pcxl2	= 5, /* pa7300lc	pa 1.1e */
+	pcxu	= 6, /* pa8000		pa 2.0  */
+	pcxu_	= 7, /* pa8200	(u+)	pa 2.0  */
+	pcxw	= 8, /* pa8500		pa 2.0  */
+	pcxw_	= 9, /* pa8600	(w+)	pa 2.0  */
+	pcxw2	= 10, /* pa8700		pa 2.0  */
+	mako	= 11, /* pa8800		pa 2.0  */
+	mako2	= 12  /* pa8900		pa 2.0  */
+};
+
+extern const char * const cpu_name_version[][2]; /* mapping from enum cpu_type to strings */
+
+struct parisc_driver;
+
+struct io_module {
+        volatile uint32_t nothing;		/* reg 0 */
+        volatile uint32_t io_eim;
+        volatile uint32_t io_dc_adata;
+        volatile uint32_t io_ii_cdata;
+        volatile uint32_t io_dma_link;		/* reg 4 */
+        volatile uint32_t io_dma_command;
+        volatile uint32_t io_dma_address;
+        volatile uint32_t io_dma_count;
+        volatile uint32_t io_flex;		/* reg 8 */
+        volatile uint32_t io_spa_address;
+        volatile uint32_t reserved1[2];
+        volatile uint32_t io_command;		/* reg 12 */
+        volatile uint32_t io_status;
+        volatile uint32_t io_control;
+        volatile uint32_t io_data;
+        volatile uint32_t reserved2;		/* reg 16 */
+        volatile uint32_t chain_addr;
+        volatile uint32_t sub_mask_clr;
+        volatile uint32_t reserved3[13];
+        volatile uint32_t undefined[480];
+        volatile uint32_t unpriv[512];
+};
+
+struct bc_module {
+        volatile uint32_t unused1[12];
+        volatile uint32_t io_command;
+        volatile uint32_t io_status;
+        volatile uint32_t io_control;
+        volatile uint32_t unused2[1];
+        volatile uint32_t io_err_resp;
+        volatile uint32_t io_err_info;
+        volatile uint32_t io_err_req;
+        volatile uint32_t unused3[11];
+        volatile uint32_t io_io_low;
+        volatile uint32_t io_io_high;
+};
+
+#define HPHW_NPROC     0 
+#define HPHW_MEMORY    1       
+#define HPHW_B_DMA     2
+#define HPHW_OBSOLETE  3
+#define HPHW_A_DMA     4
+#define HPHW_A_DIRECT  5
+#define HPHW_OTHER     6
+#define HPHW_BCPORT    7
+#define HPHW_CIO       8
+#define HPHW_CONSOLE   9
+#define HPHW_FIO       10
+#define HPHW_BA        11
+#define HPHW_IOA       12
+#define HPHW_BRIDGE    13
+#define HPHW_FABRIC    14
+#define HPHW_MC	       15
+#define HPHW_FAULTY    31
+
+
+/* hardware.c: */
+extern const char *parisc_hardware_description(struct parisc_device_id *id);
+extern enum cpu_type parisc_get_cpu_type(unsigned long hversion);
+
+struct pci_dev;
+
+/* drivers.c: */
+extern struct parisc_device *alloc_pa_dev(unsigned long hpa,
+		struct hardware_path *path);
+extern int register_parisc_device(struct parisc_device *dev);
+extern int register_parisc_driver(struct parisc_driver *driver);
+extern int count_parisc_driver(struct parisc_driver *driver);
+extern int unregister_parisc_driver(struct parisc_driver *driver);
+extern void walk_central_bus(void);
+extern const struct parisc_device *find_pa_parent_type(const struct parisc_device *, int);
+extern void print_parisc_devices(void);
+extern char *print_pa_hwpath(struct parisc_device *dev, char *path);
+extern char *print_pci_hwpath(struct pci_dev *dev, char *path);
+extern void get_pci_node_path(struct pci_dev *dev, struct hardware_path *path);
+extern void init_parisc_bus(void);
+extern struct device *hwpath_to_device(struct hardware_path *modpath);
+extern void device_to_hwpath(struct device *dev, struct hardware_path *path);
+
+
+/* inventory.c: */
+extern void do_memory_inventory(void);
+extern void do_device_inventory(void);
+
+#endif /* _PARISC_HARDWARE_H */
diff --git a/arch/parisc/include/asm/hw_irq.h b/arch/parisc/include/asm/hw_irq.h
new file mode 100644
index 000000000000..6707f7df3921
--- /dev/null
+++ b/arch/parisc/include/asm/hw_irq.h
@@ -0,0 +1,8 @@
+#ifndef _ASM_HW_IRQ_H
+#define _ASM_HW_IRQ_H
+
+/*
+ *	linux/include/asm/hw_irq.h
+ */
+
+#endif
diff --git a/arch/parisc/include/asm/ide.h b/arch/parisc/include/asm/ide.h
new file mode 100644
index 000000000000..c246ef75017d
--- /dev/null
+++ b/arch/parisc/include/asm/ide.h
@@ -0,0 +1,61 @@
+/*
+ *  linux/include/asm-parisc/ide.h
+ *
+ *  Copyright (C) 1994-1996  Linus Torvalds & authors
+ */
+
+/*
+ *  This file contains the PARISC architecture specific IDE code.
+ */
+
+#ifndef __ASM_PARISC_IDE_H
+#define __ASM_PARISC_IDE_H
+
+#ifdef __KERNEL__
+
+#define ide_request_irq(irq,hand,flg,dev,id)	request_irq((irq),(hand),(flg),(dev),(id))
+#define ide_free_irq(irq,dev_id)		free_irq((irq), (dev_id))
+#define ide_request_region(from,extent,name)	request_region((from), (extent), (name))
+#define ide_release_region(from,extent)		release_region((from), (extent))
+/* Generic I/O and MEMIO string operations.  */
+
+#define __ide_insw	insw
+#define __ide_insl	insl
+#define __ide_outsw	outsw
+#define __ide_outsl	outsl
+
+static __inline__ void __ide_mm_insw(void __iomem *port, void *addr, u32 count)
+{
+	while (count--) {
+		*(u16 *)addr = __raw_readw(port);
+		addr += 2;
+	}
+}
+
+static __inline__ void __ide_mm_insl(void __iomem *port, void *addr, u32 count)
+{
+	while (count--) {
+		*(u32 *)addr = __raw_readl(port);
+		addr += 4;
+	}
+}
+
+static __inline__ void __ide_mm_outsw(void __iomem *port, void *addr, u32 count)
+{
+	while (count--) {
+		__raw_writew(*(u16 *)addr, port);
+		addr += 2;
+	}
+}
+
+static __inline__ void __ide_mm_outsl(void __iomem *port, void *addr, u32 count)
+{
+	while (count--) {
+		__raw_writel(*(u32 *)addr, port);
+		addr += 4;
+	}
+}
+
+#endif /* __KERNEL__ */
+
+#endif /* __ASM_PARISC_IDE_H */
diff --git a/arch/parisc/include/asm/io.h b/arch/parisc/include/asm/io.h
new file mode 100644
index 000000000000..55ddb1842107
--- /dev/null
+++ b/arch/parisc/include/asm/io.h
@@ -0,0 +1,293 @@
+#ifndef _ASM_IO_H
+#define _ASM_IO_H
+
+#include <linux/types.h>
+#include <asm/pgtable.h>
+
+extern unsigned long parisc_vmerge_boundary;
+extern unsigned long parisc_vmerge_max_size;
+
+#define BIO_VMERGE_BOUNDARY	parisc_vmerge_boundary
+#define BIO_VMERGE_MAX_SIZE	parisc_vmerge_max_size
+
+#define virt_to_phys(a) ((unsigned long)__pa(a))
+#define phys_to_virt(a) __va(a)
+#define virt_to_bus virt_to_phys
+#define bus_to_virt phys_to_virt
+
+static inline unsigned long isa_bus_to_virt(unsigned long addr) {
+	BUG();
+	return 0;
+}
+
+static inline unsigned long isa_virt_to_bus(void *addr) {
+	BUG();
+	return 0;
+}
+
+/*
+ * Memory mapped I/O
+ *
+ * readX()/writeX() do byteswapping and take an ioremapped address
+ * __raw_readX()/__raw_writeX() don't byteswap and take an ioremapped address.
+ * gsc_*() don't byteswap and operate on physical addresses;
+ *   eg dev->hpa or 0xfee00000.
+ */
+
+static inline unsigned char gsc_readb(unsigned long addr)
+{
+	long flags;
+	unsigned char ret;
+
+	__asm__ __volatile__(
+	"	rsm	2,%0\n"
+	"	ldbx	0(%2),%1\n"
+	"	mtsm	%0\n"
+	: "=&r" (flags), "=r" (ret) : "r" (addr) );
+
+	return ret;
+}
+
+static inline unsigned short gsc_readw(unsigned long addr)
+{
+	long flags;
+	unsigned short ret;
+
+	__asm__ __volatile__(
+	"	rsm	2,%0\n"
+	"	ldhx	0(%2),%1\n"
+	"	mtsm	%0\n"
+	: "=&r" (flags), "=r" (ret) : "r" (addr) );
+
+	return ret;
+}
+
+static inline unsigned int gsc_readl(unsigned long addr)
+{
+	u32 ret;
+
+	__asm__ __volatile__(
+	"	ldwax	0(%1),%0\n"
+	: "=r" (ret) : "r" (addr) );
+
+	return ret;
+}
+
+static inline unsigned long long gsc_readq(unsigned long addr)
+{
+	unsigned long long ret;
+
+#ifdef CONFIG_64BIT
+	__asm__ __volatile__(
+	"	ldda	0(%1),%0\n"
+	:  "=r" (ret) : "r" (addr) );
+#else
+	/* two reads may have side effects.. */
+	ret = ((u64) gsc_readl(addr)) << 32;
+	ret |= gsc_readl(addr+4);
+#endif
+	return ret;
+}
+
+static inline void gsc_writeb(unsigned char val, unsigned long addr)
+{
+	long flags;
+	__asm__ __volatile__(
+	"	rsm	2,%0\n"
+	"	stbs	%1,0(%2)\n"
+	"	mtsm	%0\n"
+	: "=&r" (flags) :  "r" (val), "r" (addr) );
+}
+
+static inline void gsc_writew(unsigned short val, unsigned long addr)
+{
+	long flags;
+	__asm__ __volatile__(
+	"	rsm	2,%0\n"
+	"	sths	%1,0(%2)\n"
+	"	mtsm	%0\n"
+	: "=&r" (flags) :  "r" (val), "r" (addr) );
+}
+
+static inline void gsc_writel(unsigned int val, unsigned long addr)
+{
+	__asm__ __volatile__(
+	"	stwas	%0,0(%1)\n"
+	: :  "r" (val), "r" (addr) );
+}
+
+static inline void gsc_writeq(unsigned long long val, unsigned long addr)
+{
+#ifdef CONFIG_64BIT
+	__asm__ __volatile__(
+	"	stda	%0,0(%1)\n"
+	: :  "r" (val), "r" (addr) );
+#else
+	/* two writes may have side effects.. */
+	gsc_writel(val >> 32, addr);
+	gsc_writel(val, addr+4);
+#endif
+}
+
+/*
+ * The standard PCI ioremap interfaces
+ */
+
+extern void __iomem * __ioremap(unsigned long offset, unsigned long size, unsigned long flags);
+
+/* Most machines react poorly to I/O-space being cacheable... Instead let's
+ * define ioremap() in terms of ioremap_nocache().
+ */
+static inline void __iomem * ioremap(unsigned long offset, unsigned long size)
+{
+	return __ioremap(offset, size, _PAGE_NO_CACHE);
+}
+#define ioremap_nocache(off, sz)	ioremap((off), (sz))
+
+extern void iounmap(const volatile void __iomem *addr);
+
+static inline unsigned char __raw_readb(const volatile void __iomem *addr)
+{
+	return (*(volatile unsigned char __force *) (addr));
+}
+static inline unsigned short __raw_readw(const volatile void __iomem *addr)
+{
+	return *(volatile unsigned short __force *) addr;
+}
+static inline unsigned int __raw_readl(const volatile void __iomem *addr)
+{
+	return *(volatile unsigned int __force *) addr;
+}
+static inline unsigned long long __raw_readq(const volatile void __iomem *addr)
+{
+	return *(volatile unsigned long long __force *) addr;
+}
+
+static inline void __raw_writeb(unsigned char b, volatile void __iomem *addr)
+{
+	*(volatile unsigned char __force *) addr = b;
+}
+static inline void __raw_writew(unsigned short b, volatile void __iomem *addr)
+{
+	*(volatile unsigned short __force *) addr = b;
+}
+static inline void __raw_writel(unsigned int b, volatile void __iomem *addr)
+{
+	*(volatile unsigned int __force *) addr = b;
+}
+static inline void __raw_writeq(unsigned long long b, volatile void __iomem *addr)
+{
+	*(volatile unsigned long long __force *) addr = b;
+}
+
+/* readb can never be const, so use __fswab instead of le*_to_cpu */
+#define readb(addr) __raw_readb(addr)
+#define readw(addr) __fswab16(__raw_readw(addr))
+#define readl(addr) __fswab32(__raw_readl(addr))
+#define readq(addr) __fswab64(__raw_readq(addr))
+#define writeb(b, addr) __raw_writeb(b, addr)
+#define writew(b, addr) __raw_writew(cpu_to_le16(b), addr)
+#define writel(b, addr) __raw_writel(cpu_to_le32(b), addr)
+#define writeq(b, addr) __raw_writeq(cpu_to_le64(b), addr)
+
+#define readb_relaxed(addr) readb(addr)
+#define readw_relaxed(addr) readw(addr)
+#define readl_relaxed(addr) readl(addr)
+#define readq_relaxed(addr) readq(addr)
+
+#define mmiowb() do { } while (0)
+
+void memset_io(volatile void __iomem *addr, unsigned char val, int count);
+void memcpy_fromio(void *dst, const volatile void __iomem *src, int count);
+void memcpy_toio(volatile void __iomem *dst, const void *src, int count);
+
+/* Port-space IO */
+
+#define inb_p inb
+#define inw_p inw
+#define inl_p inl
+#define outb_p outb
+#define outw_p outw
+#define outl_p outl
+
+extern unsigned char eisa_in8(unsigned short port);
+extern unsigned short eisa_in16(unsigned short port);
+extern unsigned int eisa_in32(unsigned short port);
+extern void eisa_out8(unsigned char data, unsigned short port);
+extern void eisa_out16(unsigned short data, unsigned short port);
+extern void eisa_out32(unsigned int data, unsigned short port);
+
+#if defined(CONFIG_PCI)
+extern unsigned char inb(int addr);
+extern unsigned short inw(int addr);
+extern unsigned int inl(int addr);
+
+extern void outb(unsigned char b, int addr);
+extern void outw(unsigned short b, int addr);
+extern void outl(unsigned int b, int addr);
+#elif defined(CONFIG_EISA)
+#define inb eisa_in8
+#define inw eisa_in16
+#define inl eisa_in32
+#define outb eisa_out8
+#define outw eisa_out16
+#define outl eisa_out32
+#else
+static inline char inb(unsigned long addr)
+{
+	BUG();
+	return -1;
+}
+
+static inline short inw(unsigned long addr)
+{
+	BUG();
+	return -1;
+}
+
+static inline int inl(unsigned long addr)
+{
+	BUG();
+	return -1;
+}
+
+#define outb(x, y)	BUG()
+#define outw(x, y)	BUG()
+#define outl(x, y)	BUG()
+#endif
+
+/*
+ * String versions of in/out ops:
+ */
+extern void insb (unsigned long port, void *dst, unsigned long count);
+extern void insw (unsigned long port, void *dst, unsigned long count);
+extern void insl (unsigned long port, void *dst, unsigned long count);
+extern void outsb (unsigned long port, const void *src, unsigned long count);
+extern void outsw (unsigned long port, const void *src, unsigned long count);
+extern void outsl (unsigned long port, const void *src, unsigned long count);
+
+
+/* IO Port space is :      BBiiii   where BB is HBA number. */
+#define IO_SPACE_LIMIT 0x00ffffff
+
+/* PA machines have an MM I/O space from 0xf0000000-0xffffffff in 32
+ * bit mode and from 0xfffffffff0000000-0xfffffffffffffff in 64 bit
+ * mode (essentially just sign extending.  This macro takes in a 32
+ * bit I/O address (still with the leading f) and outputs the correct
+ * value for either 32 or 64 bit mode */
+#define F_EXTEND(x) ((unsigned long)((x) | (0xffffffff00000000ULL)))
+
+#include <asm-generic/iomap.h>
+
+/*
+ * Convert a physical pointer to a virtual kernel pointer for /dev/mem
+ * access
+ */
+#define xlate_dev_mem_ptr(p)	__va(p)
+
+/*
+ * Convert a virtual cached pointer to an uncached pointer
+ */
+#define xlate_dev_kmem_ptr(p)	p
+
+#endif
diff --git a/arch/parisc/include/asm/ioctl.h b/arch/parisc/include/asm/ioctl.h
new file mode 100644
index 000000000000..ec8efa02beda
--- /dev/null
+++ b/arch/parisc/include/asm/ioctl.h
@@ -0,0 +1,44 @@
+/*
+ *    Linux/PA-RISC Project (http://www.parisc-linux.org/)
+ *    Copyright (C) 1999,2003 Matthew Wilcox < willy at debian . org >
+ *    portions from "linux/ioctl.h for Linux" by H.H. Bergman.
+ *
+ *    This program is free software; you can redistribute it and/or modify
+ *    it under the terms of the GNU General Public License as published by
+ *    the Free Software Foundation; either version 2 of the License, or
+ *    (at your option) any later version.
+ *
+ *    This program is distributed in the hope that it will be useful,
+ *    but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *    GNU General Public License for more details.
+ *
+ *    You should have received a copy of the GNU General Public License
+ *    along with this program; if not, write to the Free Software
+ *    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+
+#ifndef _ASM_PARISC_IOCTL_H
+#define _ASM_PARISC_IOCTL_H
+
+/* ioctl command encoding: 32 bits total, command in lower 16 bits,
+ * size of the parameter structure in the lower 14 bits of the
+ * upper 16 bits.
+ * Encoding the size of the parameter structure in the ioctl request
+ * is useful for catching programs compiled with old versions
+ * and to avoid overwriting user space outside the user buffer area.
+ * The highest 2 bits are reserved for indicating the ``access mode''.
+ * NOTE: This limits the max parameter size to 16kB -1 !
+ */
+
+/*
+ * Direction bits.
+ */
+#define _IOC_NONE	0U
+#define _IOC_WRITE	2U
+#define _IOC_READ	1U
+
+#include <asm-generic/ioctl.h>
+
+#endif /* _ASM_PARISC_IOCTL_H */
diff --git a/arch/parisc/include/asm/ioctls.h b/arch/parisc/include/asm/ioctls.h
new file mode 100644
index 000000000000..6747fad07a3e
--- /dev/null
+++ b/arch/parisc/include/asm/ioctls.h
@@ -0,0 +1,90 @@
+#ifndef __ARCH_PARISC_IOCTLS_H__
+#define __ARCH_PARISC_IOCTLS_H__
+
+#include <asm/ioctl.h>
+
+/* 0x54 is just a magic number to make these relatively unique ('T') */
+
+#define TCGETS		_IOR('T', 16, struct termios) /* TCGETATTR */
+#define TCSETS		_IOW('T', 17, struct termios) /* TCSETATTR */
+#define TCSETSW		_IOW('T', 18, struct termios) /* TCSETATTRD */
+#define TCSETSF		_IOW('T', 19, struct termios) /* TCSETATTRF */
+#define TCGETA		_IOR('T', 1, struct termio)
+#define TCSETA		_IOW('T', 2, struct termio)
+#define TCSETAW		_IOW('T', 3, struct termio)
+#define TCSETAF		_IOW('T', 4, struct termio)
+#define TCSBRK		_IO('T', 5)
+#define TCXONC		_IO('T', 6)
+#define TCFLSH		_IO('T', 7)
+#define TIOCEXCL	0x540C
+#define TIOCNXCL	0x540D
+#define TIOCSCTTY	0x540E
+#define TIOCGPGRP	_IOR('T', 30, int)
+#define TIOCSPGRP	_IOW('T', 29, int)
+#define TIOCOUTQ	0x5411
+#define TIOCSTI		0x5412
+#define TIOCGWINSZ	0x5413
+#define TIOCSWINSZ	0x5414
+#define TIOCMGET	0x5415
+#define TIOCMBIS	0x5416
+#define TIOCMBIC	0x5417
+#define TIOCMSET	0x5418
+#define TIOCGSOFTCAR	0x5419
+#define TIOCSSOFTCAR	0x541A
+#define FIONREAD	0x541B
+#define TIOCINQ		FIONREAD
+#define TIOCLINUX	0x541C
+#define TIOCCONS	0x541D
+#define TIOCGSERIAL	0x541E
+#define TIOCSSERIAL	0x541F
+#define TIOCPKT		0x5420
+#define FIONBIO		0x5421
+#define TIOCNOTTY	0x5422
+#define TIOCSETD	0x5423
+#define TIOCGETD	0x5424
+#define TCSBRKP		0x5425	/* Needed for POSIX tcsendbreak() */
+#define TIOCSBRK	0x5427  /* BSD compatibility */
+#define TIOCCBRK	0x5428  /* BSD compatibility */
+#define TIOCGSID	_IOR('T', 20, int) /* Return the session ID of FD */
+#define TCGETS2		_IOR('T',0x2A, struct termios2)
+#define TCSETS2		_IOW('T',0x2B, struct termios2)
+#define TCSETSW2	_IOW('T',0x2C, struct termios2)
+#define TCSETSF2	_IOW('T',0x2D, struct termios2)
+#define TIOCGPTN	_IOR('T',0x30, unsigned int) /* Get Pty Number (of pty-mux device) */
+#define TIOCSPTLCK	_IOW('T',0x31, int)  /* Lock/unlock Pty */
+
+#define FIONCLEX	0x5450  /* these numbers need to be adjusted. */
+#define FIOCLEX		0x5451
+#define FIOASYNC	0x5452
+#define TIOCSERCONFIG	0x5453
+#define TIOCSERGWILD	0x5454
+#define TIOCSERSWILD	0x5455
+#define TIOCGLCKTRMIOS	0x5456
+#define TIOCSLCKTRMIOS	0x5457
+#define TIOCSERGSTRUCT	0x5458 /* For debugging only */
+#define TIOCSERGETLSR   0x5459 /* Get line status register */
+#define TIOCSERGETMULTI 0x545A /* Get multiport config  */
+#define TIOCSERSETMULTI 0x545B /* Set multiport config */
+
+#define TIOCMIWAIT	0x545C	/* wait for a change on serial input line(s) */
+#define TIOCGICOUNT	0x545D	/* read serial port inline interrupt counts */
+#define TIOCGHAYESESP   0x545E  /* Get Hayes ESP configuration */
+#define TIOCSHAYESESP   0x545F  /* Set Hayes ESP configuration */
+#define FIOQSIZE	0x5460	/* Get exact space used by quota */
+
+#define TIOCSTART	0x5461
+#define TIOCSTOP	0x5462
+#define TIOCSLTC	0x5462
+
+/* Used for packet mode */
+#define TIOCPKT_DATA		 0
+#define TIOCPKT_FLUSHREAD	 1
+#define TIOCPKT_FLUSHWRITE	 2
+#define TIOCPKT_STOP		 4
+#define TIOCPKT_START		 8
+#define TIOCPKT_NOSTOP		16
+#define TIOCPKT_DOSTOP		32
+
+#define TIOCSER_TEMT    0x01	/* Transmitter physically empty */
+
+#endif /* _ASM_PARISC_IOCTLS_H */
diff --git a/arch/parisc/include/asm/ipcbuf.h b/arch/parisc/include/asm/ipcbuf.h
new file mode 100644
index 000000000000..bd956c425785
--- /dev/null
+++ b/arch/parisc/include/asm/ipcbuf.h
@@ -0,0 +1,27 @@
+#ifndef __PARISC_IPCBUF_H__
+#define __PARISC_IPCBUF_H__
+
+/*
+ * The ipc64_perm structure for PA-RISC is almost identical to
+ * kern_ipc_perm as we have always had 32-bit UIDs and GIDs in the kernel.
+ * 'seq' has been changed from long to int so that it's the same size
+ * on 64-bit kernels as on 32-bit ones.
+ */
+
+struct ipc64_perm
+{
+	key_t           key;
+	uid_t           uid;
+	gid_t           gid;
+	uid_t           cuid;
+	gid_t           cgid;
+	unsigned short int	__pad1;
+	mode_t          mode;
+	unsigned short int	__pad2;
+	unsigned short int	seq;
+	unsigned int	__pad3;
+	unsigned long long int __unused1;
+	unsigned long long int __unused2;
+};
+
+#endif /* __PARISC_IPCBUF_H__ */
diff --git a/arch/parisc/include/asm/irq.h b/arch/parisc/include/asm/irq.h
new file mode 100644
index 000000000000..399c81981ed5
--- /dev/null
+++ b/arch/parisc/include/asm/irq.h
@@ -0,0 +1,57 @@
+/*
+ * include/asm-parisc/irq.h
+ *
+ * Copyright 2005 Matthew Wilcox <matthew@wil.cx>
+ */
+
+#ifndef _ASM_PARISC_IRQ_H
+#define _ASM_PARISC_IRQ_H
+
+#include <linux/cpumask.h>
+#include <asm/types.h>
+
+#define NO_IRQ		(-1)
+
+#ifdef CONFIG_GSC
+#define GSC_IRQ_BASE	16
+#define GSC_IRQ_MAX	63
+#define CPU_IRQ_BASE	64
+#else
+#define CPU_IRQ_BASE	16
+#endif
+
+#define TIMER_IRQ	(CPU_IRQ_BASE + 0)
+#define	IPI_IRQ		(CPU_IRQ_BASE + 1)
+#define CPU_IRQ_MAX	(CPU_IRQ_BASE + (BITS_PER_LONG - 1))
+
+#define NR_IRQS		(CPU_IRQ_MAX + 1)
+
+static __inline__ int irq_canonicalize(int irq)
+{
+	return (irq == 2) ? 9 : irq;
+}
+
+struct irq_chip;
+
+/*
+ * Some useful "we don't have to do anything here" handlers.  Should
+ * probably be provided by the generic code.
+ */
+void no_ack_irq(unsigned int irq);
+void no_end_irq(unsigned int irq);
+void cpu_ack_irq(unsigned int irq);
+void cpu_end_irq(unsigned int irq);
+
+extern int txn_alloc_irq(unsigned int nbits);
+extern int txn_claim_irq(int);
+extern unsigned int txn_alloc_data(unsigned int);
+extern unsigned long txn_alloc_addr(unsigned int);
+extern unsigned long txn_affinity_addr(unsigned int irq, int cpu);
+
+extern int cpu_claim_irq(unsigned int irq, struct irq_chip *, void *);
+extern int cpu_check_affinity(unsigned int irq, cpumask_t *dest);
+
+/* soft power switch support (power.c) */
+extern struct tasklet_struct power_tasklet;
+
+#endif	/* _ASM_PARISC_IRQ_H */
diff --git a/arch/parisc/include/asm/irq_regs.h b/arch/parisc/include/asm/irq_regs.h
new file mode 100644
index 000000000000..3dd9c0b70270
--- /dev/null
+++ b/arch/parisc/include/asm/irq_regs.h
@@ -0,0 +1 @@
+#include <asm-generic/irq_regs.h>
diff --git a/arch/parisc/include/asm/kdebug.h b/arch/parisc/include/asm/kdebug.h
new file mode 100644
index 000000000000..6ece1b037665
--- /dev/null
+++ b/arch/parisc/include/asm/kdebug.h
@@ -0,0 +1 @@
+#include <asm-generic/kdebug.h>
diff --git a/arch/parisc/include/asm/kmap_types.h b/arch/parisc/include/asm/kmap_types.h
new file mode 100644
index 000000000000..806aae3c5338
--- /dev/null
+++ b/arch/parisc/include/asm/kmap_types.h
@@ -0,0 +1,30 @@
+#ifndef _ASM_KMAP_TYPES_H
+#define _ASM_KMAP_TYPES_H
+
+
+#ifdef CONFIG_DEBUG_HIGHMEM
+# define D(n) __KM_FENCE_##n ,
+#else
+# define D(n)
+#endif
+
+enum km_type {
+D(0)	KM_BOUNCE_READ,
+D(1)	KM_SKB_SUNRPC_DATA,
+D(2)	KM_SKB_DATA_SOFTIRQ,
+D(3)	KM_USER0,
+D(4)	KM_USER1,
+D(5)	KM_BIO_SRC_IRQ,
+D(6)	KM_BIO_DST_IRQ,
+D(7)	KM_PTE0,
+D(8)	KM_PTE1,
+D(9)	KM_IRQ0,
+D(10)	KM_IRQ1,
+D(11)	KM_SOFTIRQ0,
+D(12)	KM_SOFTIRQ1,
+D(13)	KM_TYPE_NR
+};
+
+#undef D
+
+#endif
diff --git a/arch/parisc/include/asm/led.h b/arch/parisc/include/asm/led.h
new file mode 100644
index 000000000000..c3405ab9d60a
--- /dev/null
+++ b/arch/parisc/include/asm/led.h
@@ -0,0 +1,42 @@
+#ifndef LED_H
+#define LED_H
+
+#define	LED7		0x80		/* top (or furthest right) LED */
+#define	LED6		0x40
+#define	LED5		0x20
+#define	LED4		0x10
+#define	LED3		0x08
+#define	LED2		0x04
+#define	LED1		0x02
+#define	LED0		0x01		/* bottom (or furthest left) LED */
+
+#define	LED_LAN_TX	LED0		/* for LAN transmit activity */
+#define	LED_LAN_RCV	LED1		/* for LAN receive activity */
+#define	LED_DISK_IO	LED2		/* for disk activity */
+#define	LED_HEARTBEAT	LED3		/* heartbeat */
+
+/* values for pdc_chassis_lcd_info_ret_block.model: */
+#define DISPLAY_MODEL_LCD  0		/* KittyHawk LED or LCD */
+#define DISPLAY_MODEL_NONE 1		/* no LED or LCD */
+#define DISPLAY_MODEL_LASI 2		/* LASI style 8 bit LED */
+#define DISPLAY_MODEL_OLD_ASP 0x7F	/* faked: ASP style 8 x 1 bit LED (only very old ASP versions) */
+
+#define LED_CMD_REG_NONE 0		/* NULL == no addr for the cmd register */
+
+/* register_led_driver() */
+int __init register_led_driver(int model, unsigned long cmd_reg, unsigned long data_reg);
+
+/* registers the LED regions for procfs */
+void __init register_led_regions(void);
+
+#ifdef CONFIG_CHASSIS_LCD_LED
+/* writes a string to the LCD display (if possible on this h/w) */
+int lcd_print(const char *str);
+#else
+#define lcd_print(str)
+#endif
+
+/* main LED initialization function (uses PDC) */ 
+int __init led_init(void);
+
+#endif /* LED_H */
diff --git a/arch/parisc/include/asm/linkage.h b/arch/parisc/include/asm/linkage.h
new file mode 100644
index 000000000000..0b19a7242d0c
--- /dev/null
+++ b/arch/parisc/include/asm/linkage.h
@@ -0,0 +1,31 @@
+#ifndef __ASM_PARISC_LINKAGE_H
+#define __ASM_PARISC_LINKAGE_H
+
+#ifndef __ALIGN
+#define __ALIGN         .align 4
+#define __ALIGN_STR     ".align 4"
+#endif
+
+/*
+ * In parisc assembly a semicolon marks a comment while a
+ * exclamation mark is used to separate independent lines.
+ */
+#ifdef __ASSEMBLY__
+
+#define ENTRY(name) \
+	.export name !\
+	ALIGN !\
+name:
+
+#ifdef CONFIG_64BIT
+#define ENDPROC(name) \
+	END(name)
+#else
+#define ENDPROC(name) \
+	.type name, @function !\
+	END(name)
+#endif
+
+#endif /* __ASSEMBLY__ */
+
+#endif  /* __ASM_PARISC_LINKAGE_H */
diff --git a/arch/parisc/include/asm/local.h b/arch/parisc/include/asm/local.h
new file mode 100644
index 000000000000..c11c530f74d0
--- /dev/null
+++ b/arch/parisc/include/asm/local.h
@@ -0,0 +1 @@
+#include <asm-generic/local.h>
diff --git a/arch/parisc/include/asm/machdep.h b/arch/parisc/include/asm/machdep.h
new file mode 100644
index 000000000000..a231c97d703e
--- /dev/null
+++ b/arch/parisc/include/asm/machdep.h
@@ -0,0 +1,16 @@
+#ifndef _PARISC_MACHDEP_H
+#define _PARISC_MACHDEP_H
+
+#include <linux/notifier.h>
+
+#define	MACH_RESTART	1
+#define	MACH_HALT	2
+#define MACH_POWER_ON	3
+#define	MACH_POWER_OFF	4
+
+extern struct notifier_block *mach_notifier;
+extern void pa7300lc_init(void);
+
+extern void (*cpu_lpmc)(int, struct pt_regs *);
+
+#endif
diff --git a/arch/parisc/include/asm/mc146818rtc.h b/arch/parisc/include/asm/mc146818rtc.h
new file mode 100644
index 000000000000..adf41631449f
--- /dev/null
+++ b/arch/parisc/include/asm/mc146818rtc.h
@@ -0,0 +1,9 @@
+/*
+ * Machine dependent access functions for RTC registers.
+ */
+#ifndef _ASM_MC146818RTC_H
+#define _ASM_MC146818RTC_H
+
+/* empty include file to satisfy the include in genrtc.c */
+
+#endif /* _ASM_MC146818RTC_H */
diff --git a/arch/parisc/include/asm/mckinley.h b/arch/parisc/include/asm/mckinley.h
new file mode 100644
index 000000000000..d1ea6f12915e
--- /dev/null
+++ b/arch/parisc/include/asm/mckinley.h
@@ -0,0 +1,9 @@
+#ifndef ASM_PARISC_MCKINLEY_H
+#define ASM_PARISC_MCKINLEY_H
+#ifdef __KERNEL__
+
+/* declared in arch/parisc/kernel/setup.c */
+extern struct proc_dir_entry * proc_mckinley_root;
+
+#endif /*__KERNEL__*/
+#endif /*ASM_PARISC_MCKINLEY_H*/
diff --git a/arch/parisc/include/asm/mman.h b/arch/parisc/include/asm/mman.h
new file mode 100644
index 000000000000..defe752cc996
--- /dev/null
+++ b/arch/parisc/include/asm/mman.h
@@ -0,0 +1,61 @@
+#ifndef __PARISC_MMAN_H__
+#define __PARISC_MMAN_H__
+
+#define PROT_READ	0x1		/* page can be read */
+#define PROT_WRITE	0x2		/* page can be written */
+#define PROT_EXEC	0x4		/* page can be executed */
+#define PROT_SEM	0x8		/* page may be used for atomic ops */
+#define PROT_NONE	0x0		/* page can not be accessed */
+#define PROT_GROWSDOWN	0x01000000	/* mprotect flag: extend change to start of growsdown vma */
+#define PROT_GROWSUP	0x02000000	/* mprotect flag: extend change to end of growsup vma */
+
+#define MAP_SHARED	0x01		/* Share changes */
+#define MAP_PRIVATE	0x02		/* Changes are private */
+#define MAP_TYPE	0x03		/* Mask for type of mapping */
+#define MAP_FIXED	0x04		/* Interpret addr exactly */
+#define MAP_ANONYMOUS	0x10		/* don't use a file */
+
+#define MAP_DENYWRITE	0x0800		/* ETXTBSY */
+#define MAP_EXECUTABLE	0x1000		/* mark it as an executable */
+#define MAP_LOCKED	0x2000		/* pages are locked */
+#define MAP_NORESERVE	0x4000		/* don't check for reservations */
+#define MAP_GROWSDOWN	0x8000		/* stack-like segment */
+#define MAP_POPULATE	0x10000		/* populate (prefault) pagetables */
+#define MAP_NONBLOCK	0x20000		/* do not block on IO */
+
+#define MS_SYNC		1		/* synchronous memory sync */
+#define MS_ASYNC	2		/* sync memory asynchronously */
+#define MS_INVALIDATE	4		/* invalidate the caches */
+
+#define MCL_CURRENT	1		/* lock all current mappings */
+#define MCL_FUTURE	2		/* lock all future mappings */
+
+#define MADV_NORMAL     0               /* no further special treatment */
+#define MADV_RANDOM     1               /* expect random page references */
+#define MADV_SEQUENTIAL 2               /* expect sequential page references */
+#define MADV_WILLNEED   3               /* will need these pages */
+#define MADV_DONTNEED   4               /* don't need these pages */
+#define MADV_SPACEAVAIL 5               /* insure that resources are reserved */
+#define MADV_VPS_PURGE  6               /* Purge pages from VM page cache */
+#define MADV_VPS_INHERIT 7              /* Inherit parents page size */
+
+/* common/generic parameters */
+#define MADV_REMOVE	9		/* remove these pages & resources */
+#define MADV_DONTFORK	10		/* don't inherit across fork */
+#define MADV_DOFORK	11		/* do inherit across fork */
+
+/* The range 12-64 is reserved for page size specification. */
+#define MADV_4K_PAGES   12              /* Use 4K pages  */
+#define MADV_16K_PAGES  14              /* Use 16K pages */
+#define MADV_64K_PAGES  16              /* Use 64K pages */
+#define MADV_256K_PAGES 18              /* Use 256K pages */
+#define MADV_1M_PAGES   20              /* Use 1 Megabyte pages */
+#define MADV_4M_PAGES   22              /* Use 4 Megabyte pages */
+#define MADV_16M_PAGES  24              /* Use 16 Megabyte pages */
+#define MADV_64M_PAGES  26              /* Use 64 Megabyte pages */
+
+/* compatibility flags */
+#define MAP_FILE	0
+#define MAP_VARIABLE	0
+
+#endif /* __PARISC_MMAN_H__ */
diff --git a/arch/parisc/include/asm/mmu.h b/arch/parisc/include/asm/mmu.h
new file mode 100644
index 000000000000..6a310cf8b734
--- /dev/null
+++ b/arch/parisc/include/asm/mmu.h
@@ -0,0 +1,7 @@
+#ifndef _PARISC_MMU_H_
+#define _PARISC_MMU_H_
+
+/* On parisc, we store the space id here */
+typedef unsigned long mm_context_t;
+
+#endif /* _PARISC_MMU_H_ */
diff --git a/arch/parisc/include/asm/mmu_context.h b/arch/parisc/include/asm/mmu_context.h
new file mode 100644
index 000000000000..85856c74ad1d
--- /dev/null
+++ b/arch/parisc/include/asm/mmu_context.h
@@ -0,0 +1,75 @@
+#ifndef __PARISC_MMU_CONTEXT_H
+#define __PARISC_MMU_CONTEXT_H
+
+#include <linux/mm.h>
+#include <linux/sched.h>
+#include <asm/atomic.h>
+#include <asm/pgalloc.h>
+#include <asm/pgtable.h>
+#include <asm-generic/mm_hooks.h>
+
+static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk)
+{
+}
+
+/* on PA-RISC, we actually have enough contexts to justify an allocator
+ * for them.  prumpf */
+
+extern unsigned long alloc_sid(void);
+extern void free_sid(unsigned long);
+
+static inline int
+init_new_context(struct task_struct *tsk, struct mm_struct *mm)
+{
+	BUG_ON(atomic_read(&mm->mm_users) != 1);
+
+	mm->context = alloc_sid();
+	return 0;
+}
+
+static inline void
+destroy_context(struct mm_struct *mm)
+{
+	free_sid(mm->context);
+	mm->context = 0;
+}
+
+static inline void load_context(mm_context_t context)
+{
+	mtsp(context, 3);
+#if SPACEID_SHIFT == 0
+	mtctl(context << 1,8);
+#else
+	mtctl(context >> (SPACEID_SHIFT - 1),8);
+#endif
+}
+
+static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next, struct task_struct *tsk)
+{
+
+	if (prev != next) {
+		mtctl(__pa(next->pgd), 25);
+		load_context(next->context);
+	}
+}
+
+#define deactivate_mm(tsk,mm)	do { } while (0)
+
+static inline void activate_mm(struct mm_struct *prev, struct mm_struct *next)
+{
+	/*
+	 * Activate_mm is our one chance to allocate a space id
+	 * for a new mm created in the exec path. There's also
+	 * some lazy tlb stuff, which is currently dead code, but
+	 * we only allocate a space id if one hasn't been allocated
+	 * already, so we should be OK.
+	 */
+
+	BUG_ON(next == &init_mm); /* Should never happen */
+
+	if (next->context == 0)
+	    next->context = alloc_sid();
+
+	switch_mm(prev,next,current);
+}
+#endif
diff --git a/arch/parisc/include/asm/mmzone.h b/arch/parisc/include/asm/mmzone.h
new file mode 100644
index 000000000000..9608d2cf214a
--- /dev/null
+++ b/arch/parisc/include/asm/mmzone.h
@@ -0,0 +1,73 @@
+#ifndef _PARISC_MMZONE_H
+#define _PARISC_MMZONE_H
+
+#ifdef CONFIG_DISCONTIGMEM
+
+#define MAX_PHYSMEM_RANGES 8 /* Fix the size for now (current known max is 3) */
+extern int npmem_ranges;
+
+struct node_map_data {
+    pg_data_t pg_data;
+};
+
+extern struct node_map_data node_data[];
+
+#define NODE_DATA(nid)          (&node_data[nid].pg_data)
+
+#define node_start_pfn(nid)	(NODE_DATA(nid)->node_start_pfn)
+#define node_end_pfn(nid)						\
+({									\
+	pg_data_t *__pgdat = NODE_DATA(nid);				\
+	__pgdat->node_start_pfn + __pgdat->node_spanned_pages;		\
+})
+
+/* We have these possible memory map layouts:
+ * Astro: 0-3.75, 67.75-68, 4-64
+ * zx1: 0-1, 257-260, 4-256
+ * Stretch (N-class): 0-2, 4-32, 34-xxx
+ */
+
+/* Since each 1GB can only belong to one region (node), we can create
+ * an index table for pfn to nid lookup; each entry in pfnnid_map 
+ * represents 1GB, and contains the node that the memory belongs to. */
+
+#define PFNNID_SHIFT (30 - PAGE_SHIFT)
+#define PFNNID_MAP_MAX  512     /* support 512GB */
+extern unsigned char pfnnid_map[PFNNID_MAP_MAX];
+
+#ifndef CONFIG_64BIT
+#define pfn_is_io(pfn) ((pfn & (0xf0000000UL >> PAGE_SHIFT)) == (0xf0000000UL >> PAGE_SHIFT))
+#else
+/* io can be 0xf0f0f0f0f0xxxxxx or 0xfffffffff0000000 */
+#define pfn_is_io(pfn) ((pfn & (0xf000000000000000UL >> PAGE_SHIFT)) == (0xf000000000000000UL >> PAGE_SHIFT))
+#endif
+
+static inline int pfn_to_nid(unsigned long pfn)
+{
+	unsigned int i;
+	unsigned char r;
+
+	if (unlikely(pfn_is_io(pfn)))
+		return 0;
+
+	i = pfn >> PFNNID_SHIFT;
+	BUG_ON(i >= sizeof(pfnnid_map) / sizeof(pfnnid_map[0]));
+	r = pfnnid_map[i];
+	BUG_ON(r == 0xff);
+
+	return (int)r;
+}
+
+static inline int pfn_valid(int pfn)
+{
+	int nid = pfn_to_nid(pfn);
+
+	if (nid >= 0)
+		return (pfn < node_end_pfn(nid));
+	return 0;
+}
+
+#else /* !CONFIG_DISCONTIGMEM */
+#define MAX_PHYSMEM_RANGES 	1 
+#endif
+#endif /* _PARISC_MMZONE_H */
diff --git a/arch/parisc/include/asm/module.h b/arch/parisc/include/asm/module.h
new file mode 100644
index 000000000000..c2cb49e934c1
--- /dev/null
+++ b/arch/parisc/include/asm/module.h
@@ -0,0 +1,32 @@
+#ifndef _ASM_PARISC_MODULE_H
+#define _ASM_PARISC_MODULE_H
+/*
+ * This file contains the parisc architecture specific module code.
+ */
+#ifdef CONFIG_64BIT
+#define Elf_Shdr Elf64_Shdr
+#define Elf_Sym Elf64_Sym
+#define Elf_Ehdr Elf64_Ehdr
+#define Elf_Addr Elf64_Addr
+#define Elf_Rela Elf64_Rela
+#else
+#define Elf_Shdr Elf32_Shdr
+#define Elf_Sym Elf32_Sym
+#define Elf_Ehdr Elf32_Ehdr
+#define Elf_Addr Elf32_Addr
+#define Elf_Rela Elf32_Rela
+#endif
+
+struct unwind_table;
+
+struct mod_arch_specific
+{
+	unsigned long got_offset, got_count, got_max;
+	unsigned long fdesc_offset, fdesc_count, fdesc_max;
+	unsigned long stub_offset, stub_count, stub_max;
+	unsigned long init_stub_offset, init_stub_count, init_stub_max;
+	int unwind_section;
+	struct unwind_table *unwind;
+};
+
+#endif /* _ASM_PARISC_MODULE_H */
diff --git a/arch/parisc/include/asm/msgbuf.h b/arch/parisc/include/asm/msgbuf.h
new file mode 100644
index 000000000000..fe88f2649418
--- /dev/null
+++ b/arch/parisc/include/asm/msgbuf.h
@@ -0,0 +1,37 @@
+#ifndef _PARISC_MSGBUF_H
+#define _PARISC_MSGBUF_H
+
+/* 
+ * The msqid64_ds structure for parisc architecture, copied from sparc.
+ * Note extra padding because this structure is passed back and forth
+ * between kernel and user space.
+ *
+ * Pad space is left for:
+ * - 64-bit time_t to solve y2038 problem
+ * - 2 miscellaneous 32-bit values
+ */
+
+struct msqid64_ds {
+	struct ipc64_perm msg_perm;
+#ifndef CONFIG_64BIT
+	unsigned int   __pad1;
+#endif
+	__kernel_time_t msg_stime;	/* last msgsnd time */
+#ifndef CONFIG_64BIT
+	unsigned int   __pad2;
+#endif
+	__kernel_time_t msg_rtime;	/* last msgrcv time */
+#ifndef CONFIG_64BIT
+	unsigned int   __pad3;
+#endif
+	__kernel_time_t msg_ctime;	/* last change time */
+	unsigned int  msg_cbytes;	/* current number of bytes on queue */
+	unsigned int  msg_qnum;	/* number of messages in queue */
+	unsigned int  msg_qbytes;	/* max number of bytes on queue */
+	__kernel_pid_t msg_lspid;	/* pid of last msgsnd */
+	__kernel_pid_t msg_lrpid;	/* last receive pid */
+	unsigned int  __unused1;
+	unsigned int  __unused2;
+};
+
+#endif /* _PARISC_MSGBUF_H */
diff --git a/arch/parisc/include/asm/mutex.h b/arch/parisc/include/asm/mutex.h
new file mode 100644
index 000000000000..458c1f7fbc18
--- /dev/null
+++ b/arch/parisc/include/asm/mutex.h
@@ -0,0 +1,9 @@
+/*
+ * Pull in the generic implementation for the mutex fastpath.
+ *
+ * TODO: implement optimized primitives instead, or leave the generic
+ * implementation in place, or pick the atomic_xchg() based generic
+ * implementation. (see asm-generic/mutex-xchg.h for details)
+ */
+
+#include <asm-generic/mutex-dec.h>
diff --git a/arch/parisc/include/asm/page.h b/arch/parisc/include/asm/page.h
new file mode 100644
index 000000000000..c3941f09a878
--- /dev/null
+++ b/arch/parisc/include/asm/page.h
@@ -0,0 +1,173 @@
+#ifndef _PARISC_PAGE_H
+#define _PARISC_PAGE_H
+
+#include <linux/const.h>
+
+#if defined(CONFIG_PARISC_PAGE_SIZE_4KB)
+# define PAGE_SHIFT	12
+#elif defined(CONFIG_PARISC_PAGE_SIZE_16KB)
+# define PAGE_SHIFT	14
+#elif defined(CONFIG_PARISC_PAGE_SIZE_64KB)
+# define PAGE_SHIFT	16
+#else
+# error "unknown default kernel page size"
+#endif
+#define PAGE_SIZE	(_AC(1,UL) << PAGE_SHIFT)
+#define PAGE_MASK	(~(PAGE_SIZE-1))
+
+
+#ifndef __ASSEMBLY__
+
+#include <asm/types.h>
+#include <asm/cache.h>
+
+#define clear_page(page)	memset((void *)(page), 0, PAGE_SIZE)
+#define copy_page(to,from)      copy_user_page_asm((void *)(to), (void *)(from))
+
+struct page;
+
+void copy_user_page_asm(void *to, void *from);
+void copy_user_page(void *vto, void *vfrom, unsigned long vaddr,
+			   struct page *pg);
+void clear_user_page(void *page, unsigned long vaddr, struct page *pg);
+
+/*
+ * These are used to make use of C type-checking..
+ */
+#define STRICT_MM_TYPECHECKS
+#ifdef STRICT_MM_TYPECHECKS
+typedef struct { unsigned long pte;
+#if !defined(CONFIG_64BIT)
+                 unsigned long future_flags;
+ /* XXX: it's possible to remove future_flags and change BITS_PER_PTE_ENTRY
+	 to 2, but then strangely the identical 32bit kernel boots on a
+	 c3000(pa20), but not any longer on a 715(pa11).
+	 Still investigating... HelgeD.
+  */
+#endif
+} pte_t; /* either 32 or 64bit */
+
+/* NOTE: even on 64 bits, these entries are __u32 because we allocate
+ * the pmd and pgd in ZONE_DMA (i.e. under 4GB) */
+typedef struct { __u32 pmd; } pmd_t;
+typedef struct { __u32 pgd; } pgd_t;
+typedef struct { unsigned long pgprot; } pgprot_t;
+
+#define pte_val(x)	((x).pte)
+/* These do not work lvalues, so make sure we don't use them as such. */
+#define pmd_val(x)	((x).pmd + 0)
+#define pgd_val(x)	((x).pgd + 0)
+#define pgprot_val(x)	((x).pgprot)
+
+#define __pte(x)	((pte_t) { (x) } )
+#define __pmd(x)	((pmd_t) { (x) } )
+#define __pgd(x)	((pgd_t) { (x) } )
+#define __pgprot(x)	((pgprot_t) { (x) } )
+
+#define __pmd_val_set(x,n) (x).pmd = (n)
+#define __pgd_val_set(x,n) (x).pgd = (n)
+
+#else
+/*
+ * .. while these make it easier on the compiler
+ */
+typedef unsigned long pte_t;
+typedef         __u32 pmd_t;
+typedef         __u32 pgd_t;
+typedef unsigned long pgprot_t;
+
+#define pte_val(x)      (x)
+#define pmd_val(x)      (x)
+#define pgd_val(x)      (x)
+#define pgprot_val(x)   (x)
+
+#define __pte(x)        (x)
+#define __pmd(x)	(x)
+#define __pgd(x)        (x)
+#define __pgprot(x)     (x)
+
+#define __pmd_val_set(x,n) (x) = (n)
+#define __pgd_val_set(x,n) (x) = (n)
+
+#endif /* STRICT_MM_TYPECHECKS */
+
+typedef struct page *pgtable_t;
+
+typedef struct __physmem_range {
+	unsigned long start_pfn;
+	unsigned long pages;       /* PAGE_SIZE pages */
+} physmem_range_t;
+
+extern physmem_range_t pmem_ranges[];
+extern int npmem_ranges;
+
+#endif /* !__ASSEMBLY__ */
+
+/* WARNING: The definitions below must match exactly to sizeof(pte_t)
+ * etc
+ */
+#ifdef CONFIG_64BIT
+#define BITS_PER_PTE_ENTRY	3
+#define BITS_PER_PMD_ENTRY	2
+#define BITS_PER_PGD_ENTRY	2
+#else
+#define BITS_PER_PTE_ENTRY	3
+#define BITS_PER_PMD_ENTRY	2
+#define BITS_PER_PGD_ENTRY	BITS_PER_PMD_ENTRY
+#endif
+#define PGD_ENTRY_SIZE	(1UL << BITS_PER_PGD_ENTRY)
+#define PMD_ENTRY_SIZE	(1UL << BITS_PER_PMD_ENTRY)
+#define PTE_ENTRY_SIZE	(1UL << BITS_PER_PTE_ENTRY)
+
+#define LINUX_GATEWAY_SPACE     0
+
+/* This governs the relationship between virtual and physical addresses.
+ * If you alter it, make sure to take care of our various fixed mapping
+ * segments in fixmap.h */
+#ifdef CONFIG_64BIT
+#define __PAGE_OFFSET	(0x40000000)	/* 1GB */
+#else
+#define __PAGE_OFFSET	(0x10000000)	/* 256MB */
+#endif
+
+#define PAGE_OFFSET		((unsigned long)__PAGE_OFFSET)
+
+/* The size of the gateway page (we leave lots of room for expansion) */
+#define GATEWAY_PAGE_SIZE	0x4000
+
+/* The start of the actual kernel binary---used in vmlinux.lds.S
+ * Leave some space after __PAGE_OFFSET for detecting kernel null
+ * ptr derefs */
+#define KERNEL_BINARY_TEXT_START	(__PAGE_OFFSET + 0x100000)
+
+/* These macros don't work for 64-bit C code -- don't allow in C at all */
+#ifdef __ASSEMBLY__
+#   define PA(x)	((x)-__PAGE_OFFSET)
+#   define VA(x)	((x)+__PAGE_OFFSET)
+#endif
+#define __pa(x)			((unsigned long)(x)-PAGE_OFFSET)
+#define __va(x)			((void *)((unsigned long)(x)+PAGE_OFFSET))
+
+#ifndef CONFIG_DISCONTIGMEM
+#define pfn_valid(pfn)		((pfn) < max_mapnr)
+#endif /* CONFIG_DISCONTIGMEM */
+
+#ifdef CONFIG_HUGETLB_PAGE
+#define HPAGE_SHIFT		22	/* 4MB (is this fixed?) */
+#define HPAGE_SIZE      	((1UL) << HPAGE_SHIFT)
+#define HPAGE_MASK		(~(HPAGE_SIZE - 1))
+#define HUGETLB_PAGE_ORDER	(HPAGE_SHIFT - PAGE_SHIFT)
+#endif
+
+#define virt_addr_valid(kaddr)	pfn_valid(__pa(kaddr) >> PAGE_SHIFT)
+
+#define page_to_phys(page)	(page_to_pfn(page) << PAGE_SHIFT)
+#define virt_to_page(kaddr)     pfn_to_page(__pa(kaddr) >> PAGE_SHIFT)
+
+#define VM_DATA_DEFAULT_FLAGS	(VM_READ | VM_WRITE | VM_EXEC | \
+				 VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC)
+
+#include <asm-generic/memory_model.h>
+#include <asm-generic/page.h>
+
+#endif /* _PARISC_PAGE_H */
diff --git a/arch/parisc/include/asm/param.h b/arch/parisc/include/asm/param.h
new file mode 100644
index 000000000000..32e03d877858
--- /dev/null
+++ b/arch/parisc/include/asm/param.h
@@ -0,0 +1,22 @@
+#ifndef _ASMPARISC_PARAM_H
+#define _ASMPARISC_PARAM_H
+
+#ifdef __KERNEL__
+#define HZ		CONFIG_HZ
+#define USER_HZ		100		/* some user API use "ticks" */
+#define CLOCKS_PER_SEC	(USER_HZ)	/* like times() */
+#endif
+
+#ifndef HZ
+#define HZ 100
+#endif
+
+#define EXEC_PAGESIZE	4096
+
+#ifndef NOGROUP
+#define NOGROUP		(-1)
+#endif
+
+#define MAXHOSTNAMELEN	64	/* max length of hostname */
+
+#endif
diff --git a/arch/parisc/include/asm/parisc-device.h b/arch/parisc/include/asm/parisc-device.h
new file mode 100644
index 000000000000..7aa13f2add7a
--- /dev/null
+++ b/arch/parisc/include/asm/parisc-device.h
@@ -0,0 +1,64 @@
+#ifndef _ASM_PARISC_PARISC_DEVICE_H_
+#define _ASM_PARISC_PARISC_DEVICE_H_
+
+#include <linux/device.h>
+
+struct parisc_device {
+	struct resource hpa;		/* Hard Physical Address */
+	struct parisc_device_id id;
+	struct parisc_driver *driver;	/* Driver for this device */
+	char		name[80];	/* The hardware description */
+	int		irq;
+	int		aux_irq;	/* Some devices have a second IRQ */
+
+	char		hw_path;        /* The module number on this bus */
+	unsigned int	num_addrs;	/* some devices have additional address ranges. */
+	unsigned long	*addr;          /* which will be stored here */
+ 
+#ifdef CONFIG_64BIT
+	/* parms for pdc_pat_cell_module() call */
+	unsigned long	pcell_loc;	/* Physical Cell location */
+	unsigned long	mod_index;	/* PAT specific - Misc Module info */
+
+	/* generic info returned from pdc_pat_cell_module() */
+	unsigned long	mod_info;	/* PAT specific - Misc Module info */
+	unsigned long	pmod_loc;	/* physical Module location */
+#endif
+	u64		dma_mask;	/* DMA mask for I/O */
+	struct device 	dev;
+};
+
+struct parisc_driver {
+	struct parisc_driver *next;
+	char *name; 
+	const struct parisc_device_id *id_table;
+	int (*probe) (struct parisc_device *dev); /* New device discovered */
+	int (*remove) (struct parisc_device *dev);
+	struct device_driver drv;
+};
+
+
+#define to_parisc_device(d)	container_of(d, struct parisc_device, dev)
+#define to_parisc_driver(d)	container_of(d, struct parisc_driver, drv)
+#define parisc_parent(d)	to_parisc_device(d->dev.parent)
+
+static inline char *parisc_pathname(struct parisc_device *d)
+{
+	return d->dev.bus_id;
+}
+
+static inline void
+parisc_set_drvdata(struct parisc_device *d, void *p)
+{
+	dev_set_drvdata(&d->dev, p);
+}
+
+static inline void *
+parisc_get_drvdata(struct parisc_device *d)
+{
+	return dev_get_drvdata(&d->dev);
+}
+
+extern struct bus_type parisc_bus_type;
+
+#endif /*_ASM_PARISC_PARISC_DEVICE_H_*/
diff --git a/arch/parisc/include/asm/parport.h b/arch/parisc/include/asm/parport.h
new file mode 100644
index 000000000000..00d9cc3e7b97
--- /dev/null
+++ b/arch/parisc/include/asm/parport.h
@@ -0,0 +1,18 @@
+/* 
+ *
+ * parport.h: ia32-compatible parport initialisation
+ *
+ * This file should only be included by drivers/parport/parport_pc.c.
+ */
+#ifndef _ASM_PARPORT_H
+#define _ASM_PARPORT_H 1
+
+
+static int __devinit parport_pc_find_nonpci_ports (int autoirq, int autodma)
+{
+	/* nothing ! */
+	return 0;
+}
+
+
+#endif /* !(_ASM_PARPORT_H) */
diff --git a/arch/parisc/include/asm/pci.h b/arch/parisc/include/asm/pci.h
new file mode 100644
index 000000000000..4ba868f44a5e
--- /dev/null
+++ b/arch/parisc/include/asm/pci.h
@@ -0,0 +1,294 @@
+#ifndef __ASM_PARISC_PCI_H
+#define __ASM_PARISC_PCI_H
+
+#include <asm/scatterlist.h>
+
+
+
+/*
+** HP PCI platforms generally support multiple bus adapters.
+**    (workstations 1-~4, servers 2-~32)
+**
+** Newer platforms number the busses across PCI bus adapters *sparsely*.
+** E.g. 0, 8, 16, ...
+**
+** Under a PCI bus, most HP platforms support PPBs up to two or three
+** levels deep. See "Bit3" product line. 
+*/
+#define PCI_MAX_BUSSES	256
+
+
+/* To be used as: mdelay(pci_post_reset_delay);
+ *
+ * post_reset is the time the kernel should stall to prevent anyone from
+ * accessing the PCI bus once #RESET is de-asserted. 
+ * PCI spec somewhere says 1 second but with multi-PCI bus systems,
+ * this makes the boot time much longer than necessary.
+ * 20ms seems to work for all the HP PCI implementations to date.
+ */
+#define pci_post_reset_delay 50
+
+
+/*
+** pci_hba_data (aka H2P_OBJECT in HP/UX)
+**
+** This is the "common" or "base" data structure which HBA drivers
+** (eg Dino or LBA) are required to place at the top of their own
+** platform_data structure.  I've heard this called "C inheritance" too.
+**
+** Data needed by pcibios layer belongs here.
+*/
+struct pci_hba_data {
+	void __iomem   *base_addr;	/* aka Host Physical Address */
+	const struct parisc_device *dev; /* device from PA bus walk */
+	struct pci_bus *hba_bus;	/* primary PCI bus below HBA */
+	int		hba_num;	/* I/O port space access "key" */
+	struct resource bus_num;	/* PCI bus numbers */
+	struct resource io_space;	/* PIOP */
+	struct resource lmmio_space;	/* bus addresses < 4Gb */
+	struct resource elmmio_space;	/* additional bus addresses < 4Gb */
+	struct resource gmmio_space;	/* bus addresses > 4Gb */
+
+	/* NOTE: Dino code assumes it can use *all* of the lmmio_space,
+	 * elmmio_space and gmmio_space as a contiguous array of
+	 * resources.  This #define represents the array size */
+	#define DINO_MAX_LMMIO_RESOURCES	3
+
+	unsigned long   lmmio_space_offset;  /* CPU view - PCI view */
+	void *          iommu;          /* IOMMU this device is under */
+	/* REVISIT - spinlock to protect resources? */
+
+	#define HBA_NAME_SIZE 16
+	char io_name[HBA_NAME_SIZE];
+	char lmmio_name[HBA_NAME_SIZE];
+	char elmmio_name[HBA_NAME_SIZE];
+	char gmmio_name[HBA_NAME_SIZE];
+};
+
+#define HBA_DATA(d)		((struct pci_hba_data *) (d))
+
+/* 
+** We support 2^16 I/O ports per HBA.  These are set up in the form
+** 0xbbxxxx, where bb is the bus number and xxxx is the I/O port
+** space address.
+*/
+#define HBA_PORT_SPACE_BITS	16
+
+#define HBA_PORT_BASE(h)	((h) << HBA_PORT_SPACE_BITS)
+#define HBA_PORT_SPACE_SIZE	(1UL << HBA_PORT_SPACE_BITS)
+
+#define PCI_PORT_HBA(a)		((a) >> HBA_PORT_SPACE_BITS)
+#define PCI_PORT_ADDR(a)	((a) & (HBA_PORT_SPACE_SIZE - 1))
+
+#ifdef CONFIG_64BIT
+#define PCI_F_EXTEND		0xffffffff00000000UL
+#define PCI_IS_LMMIO(hba,a)	pci_is_lmmio(hba,a)
+
+/* We need to know if an address is LMMMIO or GMMIO.
+ * LMMIO requires mangling and GMMIO we must use as-is.
+ */
+static __inline__  int pci_is_lmmio(struct pci_hba_data *hba, unsigned long a)
+{
+	return(((a) & PCI_F_EXTEND) == PCI_F_EXTEND);
+}
+
+/*
+** Convert between PCI (IO_VIEW) addresses and processor (PA_VIEW) addresses.
+** See pci.c for more conversions used by Generic PCI code.
+**
+** Platform characteristics/firmware guarantee that
+**	(1) PA_VIEW - IO_VIEW = lmmio_offset for both LMMIO and ELMMIO
+**	(2) PA_VIEW == IO_VIEW for GMMIO
+*/
+#define PCI_BUS_ADDR(hba,a)	(PCI_IS_LMMIO(hba,a)	\
+		?  ((a) - hba->lmmio_space_offset)	/* mangle LMMIO */ \
+		: (a))					/* GMMIO */
+#define PCI_HOST_ADDR(hba,a)	(((a) & PCI_F_EXTEND) == 0 \
+		? (a) + hba->lmmio_space_offset \
+		: (a))
+
+#else	/* !CONFIG_64BIT */
+
+#define PCI_BUS_ADDR(hba,a)	(a)
+#define PCI_HOST_ADDR(hba,a)	(a)
+#define PCI_F_EXTEND		0UL
+#define PCI_IS_LMMIO(hba,a)	(1)	/* 32-bit doesn't support GMMIO */
+
+#endif /* !CONFIG_64BIT */
+
+/*
+** KLUGE: linux/pci.h include asm/pci.h BEFORE declaring struct pci_bus
+** (This eliminates some of the warnings).
+*/
+struct pci_bus;
+struct pci_dev;
+
+/*
+ * If the PCI device's view of memory is the same as the CPU's view of memory,
+ * PCI_DMA_BUS_IS_PHYS is true.  The networking and block device layers use
+ * this boolean for bounce buffer decisions.
+ */
+#ifdef CONFIG_PA20
+/* All PA-2.0 machines have an IOMMU. */
+#define PCI_DMA_BUS_IS_PHYS	0
+#define parisc_has_iommu()	do { } while (0)
+#else
+
+#if defined(CONFIG_IOMMU_CCIO) || defined(CONFIG_IOMMU_SBA)
+extern int parisc_bus_is_phys; 	/* in arch/parisc/kernel/setup.c */
+#define PCI_DMA_BUS_IS_PHYS	parisc_bus_is_phys
+#define parisc_has_iommu()	do { parisc_bus_is_phys = 0; } while (0)
+#else
+#define PCI_DMA_BUS_IS_PHYS	1
+#define parisc_has_iommu()	do { } while (0)
+#endif
+
+#endif	/* !CONFIG_PA20 */
+
+
+/*
+** Most PCI devices (eg Tulip, NCR720) also export the same registers
+** to both MMIO and I/O port space.  Due to poor performance of I/O Port
+** access under HP PCI bus adapters, strongly recommend the use of MMIO
+** address space.
+**
+** While I'm at it more PA programming notes:
+**
+** 1) MMIO stores (writes) are posted operations. This means the processor
+**    gets an "ACK" before the write actually gets to the device. A read
+**    to the same device (or typically the bus adapter above it) will
+**    force in-flight write transaction(s) out to the targeted device
+**    before the read can complete.
+**
+** 2) The Programmed I/O (PIO) data may not always be strongly ordered with
+**    respect to DMA on all platforms. Ie PIO data can reach the processor
+**    before in-flight DMA reaches memory. Since most SMP PA platforms
+**    are I/O coherent, it generally doesn't matter...but sometimes
+**    it does.
+**
+** I've helped device driver writers debug both types of problems.
+*/
+struct pci_port_ops {
+	  u8 (*inb)  (struct pci_hba_data *hba, u16 port);
+	 u16 (*inw)  (struct pci_hba_data *hba, u16 port);
+	 u32 (*inl)  (struct pci_hba_data *hba, u16 port);
+	void (*outb) (struct pci_hba_data *hba, u16 port,  u8 data);
+	void (*outw) (struct pci_hba_data *hba, u16 port, u16 data);
+	void (*outl) (struct pci_hba_data *hba, u16 port, u32 data);
+};
+
+
+struct pci_bios_ops {
+	void (*init)(void);
+	void (*fixup_bus)(struct pci_bus *bus);
+};
+
+/* pci_unmap_{single,page} is not a nop, thus... */
+#define DECLARE_PCI_UNMAP_ADDR(ADDR_NAME)	\
+	dma_addr_t ADDR_NAME;
+#define DECLARE_PCI_UNMAP_LEN(LEN_NAME)		\
+	__u32 LEN_NAME;
+#define pci_unmap_addr(PTR, ADDR_NAME)			\
+	((PTR)->ADDR_NAME)
+#define pci_unmap_addr_set(PTR, ADDR_NAME, VAL)		\
+	(((PTR)->ADDR_NAME) = (VAL))
+#define pci_unmap_len(PTR, LEN_NAME)			\
+	((PTR)->LEN_NAME)
+#define pci_unmap_len_set(PTR, LEN_NAME, VAL)		\
+	(((PTR)->LEN_NAME) = (VAL))
+
+/*
+** Stuff declared in arch/parisc/kernel/pci.c
+*/
+extern struct pci_port_ops *pci_port;
+extern struct pci_bios_ops *pci_bios;
+
+#ifdef CONFIG_PCI
+extern void pcibios_register_hba(struct pci_hba_data *);
+extern void pcibios_set_master(struct pci_dev *);
+#else
+static inline void pcibios_register_hba(struct pci_hba_data *x)
+{
+}
+#endif
+
+/*
+ * pcibios_assign_all_busses() is used in drivers/pci/pci.c:pci_do_scan_bus()
+ *   0 == check if bridge is numbered before re-numbering.
+ *   1 == pci_do_scan_bus() should automatically number all PCI-PCI bridges.
+ *
+ *   We *should* set this to zero for "legacy" platforms and one
+ *   for PAT platforms.
+ *
+ *   But legacy platforms also need to renumber the busses below a Host
+ *   Bus controller.  Adding a 4-port Tulip card on the first PCI root
+ *   bus of a C200 resulted in the secondary bus being numbered as 1.
+ *   The second PCI host bus controller's root bus had already been
+ *   assigned bus number 1 by firmware and sysfs complained.
+ *
+ *   Firmware isn't doing anything wrong here since each controller
+ *   is its own PCI domain.  It's simpler and easier for us to renumber
+ *   the busses rather than treat each Dino as a separate PCI domain.
+ *   Eventually, we may want to introduce PCI domains for Superdome or
+ *   rp7420/8420 boxes and then revisit this issue.
+ */
+#define pcibios_assign_all_busses()     (1)
+#define pcibios_scan_all_fns(a, b)	(0)
+
+#define PCIBIOS_MIN_IO          0x10
+#define PCIBIOS_MIN_MEM         0x1000 /* NBPG - but pci/setup-res.c dies */
+
+/* export the pci_ DMA API in terms of the dma_ one */
+#include <asm-generic/pci-dma-compat.h>
+
+#ifdef CONFIG_PCI
+static inline void pci_dma_burst_advice(struct pci_dev *pdev,
+					enum pci_dma_burst_strategy *strat,
+					unsigned long *strategy_parameter)
+{
+	unsigned long cacheline_size;
+	u8 byte;
+
+	pci_read_config_byte(pdev, PCI_CACHE_LINE_SIZE, &byte);
+	if (byte == 0)
+		cacheline_size = 1024;
+	else
+		cacheline_size = (int) byte * 4;
+
+	*strat = PCI_DMA_BURST_MULTIPLE;
+	*strategy_parameter = cacheline_size;
+}
+#endif
+
+extern void
+pcibios_resource_to_bus(struct pci_dev *dev, struct pci_bus_region *region,
+			 struct resource *res);
+
+extern void
+pcibios_bus_to_resource(struct pci_dev *dev, struct resource *res,
+			struct pci_bus_region *region);
+
+static inline struct resource *
+pcibios_select_root(struct pci_dev *pdev, struct resource *res)
+{
+	struct resource *root = NULL;
+
+	if (res->flags & IORESOURCE_IO)
+		root = &ioport_resource;
+	if (res->flags & IORESOURCE_MEM)
+		root = &iomem_resource;
+
+	return root;
+}
+
+static inline void pcibios_penalize_isa_irq(int irq, int active)
+{
+	/* We don't need to penalize isa irq's */
+}
+
+static inline int pci_get_legacy_ide_irq(struct pci_dev *dev, int channel)
+{
+	return channel ? 15 : 14;
+}
+
+#endif /* __ASM_PARISC_PCI_H */
diff --git a/arch/parisc/include/asm/pdc.h b/arch/parisc/include/asm/pdc.h
new file mode 100644
index 000000000000..46b75f9cce51
--- /dev/null
+++ b/arch/parisc/include/asm/pdc.h
@@ -0,0 +1,760 @@
+#ifndef _PARISC_PDC_H
+#define _PARISC_PDC_H
+
+/*
+ *	PDC return values ...
+ *	All PDC calls return a subset of these errors. 
+ */
+
+#define PDC_WARN		  3	/* Call completed with a warning */
+#define PDC_REQ_ERR_1		  2	/* See above			 */
+#define PDC_REQ_ERR_0		  1	/* Call would generate a requestor error */
+#define PDC_OK			  0	/* Call completed successfully	*/
+#define PDC_BAD_PROC		 -1	/* Called non-existent procedure*/
+#define PDC_BAD_OPTION		 -2	/* Called with non-existent option */
+#define PDC_ERROR		 -3	/* Call could not complete without an error */
+#define PDC_NE_MOD		 -5	/* Module not found		*/
+#define PDC_NE_CELL_MOD		 -7	/* Cell module not found	*/
+#define PDC_INVALID_ARG		-10	/* Called with an invalid argument */
+#define PDC_BUS_POW_WARN	-12	/* Call could not complete in allowed power budget */
+#define PDC_NOT_NARROW		-17	/* Narrow mode not supported	*/
+
+/*
+ *	PDC entry points...
+ */
+
+#define PDC_POW_FAIL	1		/* perform a power-fail		*/
+#define PDC_POW_FAIL_PREPARE	0	/* prepare for powerfail	*/
+
+#define PDC_CHASSIS	2		/* PDC-chassis functions	*/
+#define PDC_CHASSIS_DISP	0	/* update chassis display	*/
+#define PDC_CHASSIS_WARN	1	/* return chassis warnings	*/
+#define PDC_CHASSIS_DISPWARN	2	/* update&return chassis status */
+#define PDC_RETURN_CHASSIS_INFO 128	/* HVERSION dependent: return chassis LED/LCD info  */
+
+#define PDC_PIM         3               /* Get PIM data                 */
+#define PDC_PIM_HPMC            0       /* Transfer HPMC data           */
+#define PDC_PIM_RETURN_SIZE     1       /* Get Max buffer needed for PIM*/
+#define PDC_PIM_LPMC            2       /* Transfer HPMC data           */
+#define PDC_PIM_SOFT_BOOT       3       /* Transfer Soft Boot data      */
+#define PDC_PIM_TOC             4       /* Transfer TOC data            */
+
+#define PDC_MODEL	4		/* PDC model information call	*/
+#define PDC_MODEL_INFO		0	/* returns information 		*/
+#define PDC_MODEL_BOOTID	1	/* set the BOOT_ID		*/
+#define PDC_MODEL_VERSIONS	2	/* returns cpu-internal versions*/
+#define PDC_MODEL_SYSMODEL	3	/* return system model info	*/
+#define PDC_MODEL_ENSPEC	4	/* enable specific option	*/
+#define PDC_MODEL_DISPEC	5	/* disable specific option	*/
+#define PDC_MODEL_CPU_ID	6	/* returns cpu-id (only newer machines!) */
+#define PDC_MODEL_CAPABILITIES	7	/* returns OS32/OS64-flags	*/
+/* Values for PDC_MODEL_CAPABILITIES non-equivalent virtual aliasing support */
+#define  PDC_MODEL_IOPDIR_FDC		(1 << 2)
+#define  PDC_MODEL_NVA_MASK		(3 << 4)
+#define  PDC_MODEL_NVA_SUPPORTED	(0 << 4)
+#define  PDC_MODEL_NVA_SLOW		(1 << 4)
+#define  PDC_MODEL_NVA_UNSUPPORTED	(3 << 4)
+#define PDC_MODEL_GET_BOOT__OP	8	/* returns boot test options	*/
+#define PDC_MODEL_SET_BOOT__OP	9	/* set boot test options	*/
+
+#define PA89_INSTRUCTION_SET	0x4	/* capatibilies returned	*/
+#define PA90_INSTRUCTION_SET	0x8
+
+#define PDC_CACHE	5		/* return/set cache (& TLB) info*/
+#define PDC_CACHE_INFO		0	/* returns information 		*/
+#define PDC_CACHE_SET_COH	1	/* set coherence state		*/
+#define PDC_CACHE_RET_SPID	2	/* returns space-ID bits	*/
+
+#define PDC_HPA		6		/* return HPA of processor	*/
+#define PDC_HPA_PROCESSOR	0
+#define PDC_HPA_MODULES		1
+
+#define PDC_COPROC	7		/* Co-Processor (usually FP unit(s)) */
+#define PDC_COPROC_CFG		0	/* Co-Processor Cfg (FP unit(s) enabled?) */
+
+#define PDC_IODC	8		/* talk to IODC			*/
+#define PDC_IODC_READ		0	/* read IODC entry point	*/
+/*      PDC_IODC_RI_			 * INDEX parameter of PDC_IODC_READ */
+#define PDC_IODC_RI_DATA_BYTES	0	/* IODC Data Bytes		*/
+/*				1, 2	   obsolete - HVERSION dependent*/
+#define PDC_IODC_RI_INIT	3	/* Initialize module		*/
+#define PDC_IODC_RI_IO		4	/* Module input/output		*/
+#define PDC_IODC_RI_SPA		5	/* Module input/output		*/
+#define PDC_IODC_RI_CONFIG	6	/* Module input/output		*/
+/*				7	  obsolete - HVERSION dependent */
+#define PDC_IODC_RI_TEST	8	/* Module input/output		*/
+#define PDC_IODC_RI_TLB		9	/* Module input/output		*/
+#define PDC_IODC_NINIT		2	/* non-destructive init		*/
+#define PDC_IODC_DINIT		3	/* destructive init		*/
+#define PDC_IODC_MEMERR		4	/* check for memory errors	*/
+#define PDC_IODC_INDEX_DATA	0	/* get first 16 bytes from mod IODC */
+#define PDC_IODC_BUS_ERROR	-4	/* bus error return value	*/
+#define PDC_IODC_INVALID_INDEX	-5	/* invalid index return value	*/
+#define PDC_IODC_COUNT		-6	/* count is too small		*/
+
+#define PDC_TOD		9		/* time-of-day clock (TOD)	*/
+#define PDC_TOD_READ		0	/* read TOD			*/
+#define PDC_TOD_WRITE		1	/* write TOD			*/
+
+
+#define PDC_STABLE	10		/* stable storage (sprockets)	*/
+#define PDC_STABLE_READ		0
+#define PDC_STABLE_WRITE	1
+#define PDC_STABLE_RETURN_SIZE	2
+#define PDC_STABLE_VERIFY_CONTENTS 3
+#define PDC_STABLE_INITIALIZE	4
+
+#define PDC_NVOLATILE	11		/* often not implemented	*/
+
+#define PDC_ADD_VALID	12		/* Memory validation PDC call	*/
+#define PDC_ADD_VALID_VERIFY	0	/* Make PDC_ADD_VALID verify region */
+
+#define PDC_INSTR	15		/* get instr to invoke PDCE_CHECK() */
+
+#define PDC_PROC	16		/* (sprockets)			*/
+
+#define PDC_CONFIG	16		/* (sprockets)			*/
+#define PDC_CONFIG_DECONFIG	0
+#define PDC_CONFIG_DRECONFIG	1
+#define PDC_CONFIG_DRETURN_CONFIG 2
+
+#define PDC_BLOCK_TLB	18		/* manage hardware block-TLB	*/
+#define PDC_BTLB_INFO		0	/* returns parameter 		*/
+#define PDC_BTLB_INSERT		1	/* insert BTLB entry		*/
+#define PDC_BTLB_PURGE		2	/* purge BTLB entries 		*/
+#define PDC_BTLB_PURGE_ALL	3	/* purge all BTLB entries 	*/
+
+#define PDC_TLB		19		/* manage hardware TLB miss handling */
+#define PDC_TLB_INFO		0	/* returns parameter 		*/
+#define PDC_TLB_SETUP		1	/* set up miss handling 	*/
+
+#define PDC_MEM		20		/* Manage memory		*/
+#define PDC_MEM_MEMINFO		0
+#define PDC_MEM_ADD_PAGE	1
+#define PDC_MEM_CLEAR_PDT	2
+#define PDC_MEM_READ_PDT	3
+#define PDC_MEM_RESET_CLEAR	4
+#define PDC_MEM_GOODMEM		5
+#define PDC_MEM_TABLE		128	/* Non contig mem map (sprockets) */
+#define PDC_MEM_RETURN_ADDRESS_TABLE	PDC_MEM_TABLE
+#define PDC_MEM_GET_MEMORY_SYSTEM_TABLES_SIZE	131
+#define PDC_MEM_GET_MEMORY_SYSTEM_TABLES	132
+#define PDC_MEM_GET_PHYSICAL_LOCATION_FROM_MEMORY_ADDRESS 133
+
+#define PDC_MEM_RET_SBE_REPLACED	5	/* PDC_MEM return values */
+#define PDC_MEM_RET_DUPLICATE_ENTRY	4
+#define PDC_MEM_RET_BUF_SIZE_SMALL	1
+#define PDC_MEM_RET_PDT_FULL		-11
+#define PDC_MEM_RET_INVALID_PHYSICAL_LOCATION ~0ULL
+
+#define PDC_PSW		21		/* Get/Set default System Mask  */
+#define PDC_PSW_MASK		0	/* Return mask                  */
+#define PDC_PSW_GET_DEFAULTS	1	/* Return defaults              */
+#define PDC_PSW_SET_DEFAULTS	2	/* Set default                  */
+#define PDC_PSW_ENDIAN_BIT	1	/* set for big endian           */
+#define PDC_PSW_WIDE_BIT	2	/* set for wide mode            */ 
+
+#define PDC_SYSTEM_MAP	22		/* find system modules		*/
+#define PDC_FIND_MODULE 	0
+#define PDC_FIND_ADDRESS	1
+#define PDC_TRANSLATE_PATH	2
+
+#define PDC_SOFT_POWER	23		/* soft power switch		*/
+#define PDC_SOFT_POWER_INFO	0	/* return info about the soft power switch */
+#define PDC_SOFT_POWER_ENABLE	1	/* enable/disable soft power switch */
+
+
+/* HVERSION dependent */
+
+/* The PDC_MEM_MAP calls */
+#define PDC_MEM_MAP	128		/* on s700: return page info	*/
+#define PDC_MEM_MAP_HPA		0	/* returns hpa of a module	*/
+
+#define PDC_EEPROM	129		/* EEPROM access		*/
+#define PDC_EEPROM_READ_WORD	0
+#define PDC_EEPROM_WRITE_WORD	1
+#define PDC_EEPROM_READ_BYTE	2
+#define PDC_EEPROM_WRITE_BYTE	3
+#define PDC_EEPROM_EEPROM_PASSWORD -1000
+
+#define PDC_NVM		130		/* NVM (non-volatile memory) access */
+#define PDC_NVM_READ_WORD	0
+#define PDC_NVM_WRITE_WORD	1
+#define PDC_NVM_READ_BYTE	2
+#define PDC_NVM_WRITE_BYTE	3
+
+#define PDC_SEED_ERROR	132		/* (sprockets)			*/
+
+#define PDC_IO		135		/* log error info, reset IO system */
+#define PDC_IO_READ_AND_CLEAR_ERRORS	0
+#define PDC_IO_RESET			1
+#define PDC_IO_RESET_DEVICES		2
+/* sets bits 6&7 (little endian) of the HcControl Register */
+#define PDC_IO_USB_SUSPEND	0xC000000000000000
+#define PDC_IO_EEPROM_IO_ERR_TABLE_FULL	-5	/* return value */
+#define PDC_IO_NO_SUSPEND		-6	/* return value */
+
+#define PDC_BROADCAST_RESET 136		/* reset all processors		*/
+#define PDC_DO_RESET		0	/* option: perform a broadcast reset */
+#define PDC_DO_FIRM_TEST_RESET	1	/* Do broadcast reset with bitmap */
+#define PDC_BR_RECONFIGURATION	2	/* reset w/reconfiguration	*/
+#define PDC_FIRM_TEST_MAGIC	0xab9ec36fUL    /* for this reboot only	*/
+
+#define PDC_LAN_STATION_ID 138		/* Hversion dependent mechanism for */
+#define PDC_LAN_STATION_ID_READ	0	/* getting the lan station address  */
+
+#define	PDC_LAN_STATION_ID_SIZE	6
+
+#define PDC_CHECK_RANGES 139		/* (sprockets)			*/
+
+#define PDC_NV_SECTIONS	141		/* (sprockets)			*/
+
+#define PDC_PERFORMANCE	142		/* performance monitoring	*/
+
+#define PDC_SYSTEM_INFO	143		/* system information		*/
+#define PDC_SYSINFO_RETURN_INFO_SIZE	0
+#define PDC_SYSINFO_RRETURN_SYS_INFO	1
+#define PDC_SYSINFO_RRETURN_ERRORS	2
+#define PDC_SYSINFO_RRETURN_WARNINGS	3
+#define PDC_SYSINFO_RETURN_REVISIONS	4
+#define PDC_SYSINFO_RRETURN_DIAGNOSE	5
+#define PDC_SYSINFO_RRETURN_HV_DIAGNOSE	1005
+
+#define PDC_RDR		144		/* (sprockets)			*/
+#define PDC_RDR_READ_BUFFER	0
+#define PDC_RDR_READ_SINGLE	1
+#define PDC_RDR_WRITE_SINGLE	2
+
+#define PDC_INTRIGUE	145 		/* (sprockets)			*/
+#define PDC_INTRIGUE_WRITE_BUFFER 	 0
+#define PDC_INTRIGUE_GET_SCRATCH_BUFSIZE 1
+#define PDC_INTRIGUE_START_CPU_COUNTERS	 2
+#define PDC_INTRIGUE_STOP_CPU_COUNTERS	 3
+
+#define PDC_STI		146 		/* STI access			*/
+/* same as PDC_PCI_XXX values (see below) */
+
+/* Legacy PDC definitions for same stuff */
+#define PDC_PCI_INDEX	147
+#define PDC_PCI_INTERFACE_INFO		0
+#define PDC_PCI_SLOT_INFO		1
+#define PDC_PCI_INFLIGHT_BYTES		2
+#define PDC_PCI_READ_CONFIG		3
+#define PDC_PCI_WRITE_CONFIG		4
+#define PDC_PCI_READ_PCI_IO		5
+#define PDC_PCI_WRITE_PCI_IO		6
+#define PDC_PCI_READ_CONFIG_DELAY	7
+#define PDC_PCI_UPDATE_CONFIG_DELAY	8
+#define PDC_PCI_PCI_PATH_TO_PCI_HPA	9
+#define PDC_PCI_PCI_HPA_TO_PCI_PATH	10
+#define PDC_PCI_PCI_PATH_TO_PCI_BUS	11
+#define PDC_PCI_PCI_RESERVED		12
+#define PDC_PCI_PCI_INT_ROUTE_SIZE	13
+#define PDC_PCI_GET_INT_TBL_SIZE	PDC_PCI_PCI_INT_ROUTE_SIZE
+#define PDC_PCI_PCI_INT_ROUTE		14
+#define PDC_PCI_GET_INT_TBL		PDC_PCI_PCI_INT_ROUTE 
+#define PDC_PCI_READ_MON_TYPE		15
+#define PDC_PCI_WRITE_MON_TYPE		16
+
+
+/* Get SCSI Interface Card info:  SDTR, SCSI ID, mode (SE vs LVD) */
+#define PDC_INITIATOR	163
+#define PDC_GET_INITIATOR	0
+#define PDC_SET_INITIATOR	1
+#define PDC_DELETE_INITIATOR	2
+#define PDC_RETURN_TABLE_SIZE	3
+#define PDC_RETURN_TABLE	4
+
+#define PDC_LINK	165 		/* (sprockets)			*/
+#define PDC_LINK_PCI_ENTRY_POINTS	0  /* list (Arg1) = 0 */
+#define PDC_LINK_USB_ENTRY_POINTS	1  /* list (Arg1) = 1 */
+
+/* cl_class
+ * page 3-33 of IO-Firmware ARS
+ * IODC ENTRY_INIT(Search first) RET[1]
+ */
+#define	CL_NULL		0	/* invalid */
+#define	CL_RANDOM	1	/* random access (as disk) */
+#define	CL_SEQU		2	/* sequential access (as tape) */
+#define	CL_DUPLEX	7	/* full-duplex point-to-point (RS-232, Net) */
+#define	CL_KEYBD	8	/* half-duplex console (HIL Keyboard) */
+#define	CL_DISPL	9	/* half-duplex console (display) */
+#define	CL_FC		10	/* FiberChannel access media */
+
+/* IODC ENTRY_INIT() */
+#define ENTRY_INIT_SRCH_FRST	2
+#define ENTRY_INIT_SRCH_NEXT	3
+#define ENTRY_INIT_MOD_DEV	4
+#define ENTRY_INIT_DEV		5
+#define ENTRY_INIT_MOD		6
+#define ENTRY_INIT_MSG		9
+
+/* IODC ENTRY_IO() */
+#define ENTRY_IO_BOOTIN		0
+#define ENTRY_IO_BOOTOUT	1
+#define ENTRY_IO_CIN		2
+#define ENTRY_IO_COUT		3
+#define ENTRY_IO_CLOSE		4
+#define ENTRY_IO_GETMSG		9
+#define ENTRY_IO_BBLOCK_IN	16
+#define ENTRY_IO_BBLOCK_OUT	17
+
+/* IODC ENTRY_SPA() */
+
+/* IODC ENTRY_CONFIG() */
+
+/* IODC ENTRY_TEST() */
+
+/* IODC ENTRY_TLB() */
+
+/* constants for OS (NVM...) */
+#define OS_ID_NONE		0	/* Undefined OS ID	*/
+#define OS_ID_HPUX		1	/* HP-UX OS		*/
+#define OS_ID_MPEXL		2	/* MPE XL OS		*/
+#define OS_ID_OSF		3	/* OSF OS		*/
+#define OS_ID_HPRT		4	/* HP-RT OS		*/
+#define OS_ID_NOVEL		5	/* NOVELL OS		*/
+#define OS_ID_LINUX		6	/* Linux		*/
+
+
+/* constants for PDC_CHASSIS */
+#define OSTAT_OFF		0
+#define OSTAT_FLT		1 
+#define OSTAT_TEST		2
+#define OSTAT_INIT		3
+#define OSTAT_SHUT		4
+#define OSTAT_WARN		5
+#define OSTAT_RUN		6
+#define OSTAT_ON		7
+
+/* Page Zero constant offsets used by the HPMC handler */
+#define BOOT_CONSOLE_HPA_OFFSET  0x3c0
+#define BOOT_CONSOLE_SPA_OFFSET  0x3c4
+#define BOOT_CONSOLE_PATH_OFFSET 0x3a8
+
+/* size of the pdc_result buffer for firmware.c */
+#define NUM_PDC_RESULT	32
+
+#if !defined(__ASSEMBLY__)
+#ifdef __KERNEL__
+
+#include <linux/types.h>
+
+extern int pdc_type;
+
+/* Values for pdc_type */
+#define PDC_TYPE_ILLEGAL	-1
+#define PDC_TYPE_PAT		 0 /* 64-bit PAT-PDC */
+#define PDC_TYPE_SYSTEM_MAP	 1 /* 32-bit, but supports PDC_SYSTEM_MAP */
+#define PDC_TYPE_SNAKE		 2 /* Doesn't support SYSTEM_MAP */
+
+struct pdc_chassis_info {       /* for PDC_CHASSIS_INFO */
+	unsigned long actcnt;   /* actual number of bytes returned */
+	unsigned long maxcnt;   /* maximum number of bytes that could be returned */
+};
+
+struct pdc_coproc_cfg {         /* for PDC_COPROC_CFG */
+        unsigned long ccr_functional;
+        unsigned long ccr_present;
+        unsigned long revision;
+        unsigned long model;
+};
+
+struct pdc_model {		/* for PDC_MODEL */
+	unsigned long hversion;
+	unsigned long sversion;
+	unsigned long hw_id;
+	unsigned long boot_id;
+	unsigned long sw_id;
+	unsigned long sw_cap;
+	unsigned long arch_rev;
+	unsigned long pot_key;
+	unsigned long curr_key;
+};
+
+struct pdc_cache_cf {		/* for PDC_CACHE  (I/D-caches) */
+    unsigned long
+#ifdef CONFIG_64BIT
+		cc_padW:32,
+#endif
+		cc_alias: 4,	/* alias boundaries for virtual addresses   */
+		cc_block: 4,	/* to determine most efficient stride */
+		cc_line	: 3,	/* maximum amount written back as a result of store (multiple of 16 bytes) */
+		cc_shift: 2,	/* how much to shift cc_block left */
+		cc_wt	: 1,	/* 0 = WT-Dcache, 1 = WB-Dcache */
+		cc_sh	: 2,	/* 0 = separate I/D-cache, else shared I/D-cache */
+		cc_cst  : 3,	/* 0 = incoherent D-cache, 1=coherent D-cache */
+		cc_pad1 : 10,	/* reserved */
+		cc_hv   : 3;	/* hversion dependent */
+};
+
+struct pdc_tlb_cf {		/* for PDC_CACHE (I/D-TLB's) */
+    unsigned long tc_pad0:12,	/* reserved */
+#ifdef CONFIG_64BIT
+		tc_padW:32,
+#endif
+		tc_sh	: 2,	/* 0 = separate I/D-TLB, else shared I/D-TLB */
+		tc_hv   : 1,	/* HV */
+		tc_page : 1,	/* 0 = 2K page-size-machine, 1 = 4k page size */
+		tc_cst  : 3,	/* 0 = incoherent operations, else coherent operations */
+		tc_aid  : 5,	/* ITLB: width of access ids of processor (encoded!) */
+		tc_pad1 : 8;	/* ITLB: width of space-registers (encoded) */
+};
+
+struct pdc_cache_info {		/* main-PDC_CACHE-structure (caches & TLB's) */
+	/* I-cache */
+	unsigned long	ic_size;	/* size in bytes */
+	struct pdc_cache_cf ic_conf;	/* configuration */
+	unsigned long	ic_base;	/* base-addr */
+	unsigned long	ic_stride;
+	unsigned long	ic_count;
+	unsigned long	ic_loop;
+	/* D-cache */
+	unsigned long	dc_size;	/* size in bytes */
+	struct pdc_cache_cf dc_conf;	/* configuration */
+	unsigned long	dc_base;	/* base-addr */
+	unsigned long	dc_stride;
+	unsigned long	dc_count;
+	unsigned long	dc_loop;
+	/* Instruction-TLB */
+	unsigned long	it_size;	/* number of entries in I-TLB */
+	struct pdc_tlb_cf it_conf;	/* I-TLB-configuration */
+	unsigned long	it_sp_base;
+	unsigned long	it_sp_stride;
+	unsigned long	it_sp_count;
+	unsigned long	it_off_base;
+	unsigned long	it_off_stride;
+	unsigned long	it_off_count;
+	unsigned long	it_loop;
+	/* data-TLB */
+	unsigned long	dt_size;	/* number of entries in D-TLB */
+	struct pdc_tlb_cf dt_conf;	/* D-TLB-configuration */
+	unsigned long	dt_sp_base;
+	unsigned long	dt_sp_stride;
+	unsigned long	dt_sp_count;
+	unsigned long	dt_off_base;
+	unsigned long	dt_off_stride;
+	unsigned long	dt_off_count;
+	unsigned long	dt_loop;
+};
+
+#if 0
+/* If you start using the next struct, you'll have to adjust it to
+ * work with 64-bit firmware I think -PB
+ */
+struct pdc_iodc {     /* PDC_IODC */
+	unsigned char   hversion_model;
+	unsigned char 	hversion;
+	unsigned char 	spa;
+	unsigned char 	type;
+	unsigned int	sversion_rev:4;
+	unsigned int	sversion_model:19;
+	unsigned int	sversion_opt:8;
+	unsigned char	rev;
+	unsigned char	dep;
+	unsigned char	features;
+	unsigned char	pad1;
+	unsigned int	checksum:16;
+	unsigned int	length:16;
+	unsigned int    pad[15];
+} __attribute__((aligned(8))) ;
+#endif
+
+#ifndef CONFIG_PA20
+/* no BLTBs in pa2.0 processors */
+struct pdc_btlb_info_range {
+	__u8 res00;
+	__u8 num_i;
+	__u8 num_d;
+	__u8 num_comb;
+};
+
+struct pdc_btlb_info {	/* PDC_BLOCK_TLB, return of PDC_BTLB_INFO */
+	unsigned int min_size;	/* minimum size of BTLB in pages */
+	unsigned int max_size;	/* maximum size of BTLB in pages */
+	struct pdc_btlb_info_range fixed_range_info;
+	struct pdc_btlb_info_range variable_range_info;
+};
+
+#endif /* !CONFIG_PA20 */
+
+#ifdef CONFIG_64BIT
+struct pdc_memory_table_raddr { /* PDC_MEM/PDC_MEM_TABLE (return info) */
+	unsigned long entries_returned;
+	unsigned long entries_total;
+};
+
+struct pdc_memory_table {       /* PDC_MEM/PDC_MEM_TABLE (arguments) */
+	unsigned long paddr;
+	unsigned int  pages;
+	unsigned int  reserved;
+};
+#endif /* CONFIG_64BIT */
+
+struct pdc_system_map_mod_info { /* PDC_SYSTEM_MAP/FIND_MODULE */
+	unsigned long mod_addr;
+	unsigned long mod_pgs;
+	unsigned long add_addrs;
+};
+
+struct pdc_system_map_addr_info { /* PDC_SYSTEM_MAP/FIND_ADDRESS */
+	unsigned long mod_addr;
+	unsigned long mod_pgs;
+};
+
+struct pdc_initiator { /* PDC_INITIATOR */
+	int host_id;
+	int factor;
+	int width;
+	int mode;
+};
+
+struct hardware_path {
+	char  flags;	/* see bit definitions below */
+	char  bc[6];	/* Bus Converter routing info to a specific */
+			/* I/O adaptor (< 0 means none, > 63 resvd) */
+	char  mod;	/* fixed field of specified module */
+};
+
+/*
+ * Device path specifications used by PDC.
+ */
+struct pdc_module_path {
+	struct hardware_path path;
+	unsigned int layers[6]; /* device-specific info (ctlr #, unit # ...) */
+};
+
+#ifndef CONFIG_PA20
+/* Only used on some pre-PA2.0 boxes */
+struct pdc_memory_map {		/* PDC_MEMORY_MAP */
+	unsigned long hpa;	/* mod's register set address */
+	unsigned long more_pgs;	/* number of additional I/O pgs */
+};
+#endif
+
+struct pdc_tod {
+	unsigned long tod_sec; 
+	unsigned long tod_usec;
+};
+
+/* architected results from PDC_PIM/transfer hpmc on a PA1.1 machine */
+
+struct pdc_hpmc_pim_11 { /* PDC_PIM */
+	__u32 gr[32];
+	__u32 cr[32];
+	__u32 sr[8];
+	__u32 iasq_back;
+	__u32 iaoq_back;
+	__u32 check_type;
+	__u32 cpu_state;
+	__u32 rsvd1;
+	__u32 cache_check;
+	__u32 tlb_check;
+	__u32 bus_check;
+	__u32 assists_check;
+	__u32 rsvd2;
+	__u32 assist_state;
+	__u32 responder_addr;
+	__u32 requestor_addr;
+	__u32 path_info;
+	__u64 fr[32];
+};
+
+/*
+ * architected results from PDC_PIM/transfer hpmc on a PA2.0 machine
+ *
+ * Note that PDC_PIM doesn't care whether or not wide mode was enabled
+ * so the results are different on  PA1.1 vs. PA2.0 when in narrow mode.
+ *
+ * Note also that there are unarchitected results available, which
+ * are hversion dependent. Do a "ser pim 0 hpmc" after rebooting, since
+ * the firmware is probably the best way of printing hversion dependent
+ * data.
+ */
+
+struct pdc_hpmc_pim_20 { /* PDC_PIM */
+	__u64 gr[32];
+	__u64 cr[32];
+	__u64 sr[8];
+	__u64 iasq_back;
+	__u64 iaoq_back;
+	__u32 check_type;
+	__u32 cpu_state;
+	__u32 cache_check;
+	__u32 tlb_check;
+	__u32 bus_check;
+	__u32 assists_check;
+	__u32 assist_state;
+	__u32 path_info;
+	__u64 responder_addr;
+	__u64 requestor_addr;
+	__u64 fr[32];
+};
+
+void pdc_console_init(void);	/* in pdc_console.c */
+void pdc_console_restart(void);
+
+void setup_pdc(void);		/* in inventory.c */
+
+/* wrapper-functions from pdc.c */
+
+int pdc_add_valid(unsigned long address);
+int pdc_chassis_info(struct pdc_chassis_info *chassis_info, void *led_info, unsigned long len);
+int pdc_chassis_disp(unsigned long disp);
+int pdc_chassis_warn(unsigned long *warn);
+int pdc_coproc_cfg(struct pdc_coproc_cfg *pdc_coproc_info);
+int pdc_iodc_read(unsigned long *actcnt, unsigned long hpa, unsigned int index,
+		  void *iodc_data, unsigned int iodc_data_size);
+int pdc_system_map_find_mods(struct pdc_system_map_mod_info *pdc_mod_info,
+			     struct pdc_module_path *mod_path, long mod_index);
+int pdc_system_map_find_addrs(struct pdc_system_map_addr_info *pdc_addr_info,
+			      long mod_index, long addr_index);
+int pdc_model_info(struct pdc_model *model);
+int pdc_model_sysmodel(char *name);
+int pdc_model_cpuid(unsigned long *cpu_id);
+int pdc_model_versions(unsigned long *versions, int id);
+int pdc_model_capabilities(unsigned long *capabilities);
+int pdc_cache_info(struct pdc_cache_info *cache);
+int pdc_spaceid_bits(unsigned long *space_bits);
+#ifndef CONFIG_PA20
+int pdc_btlb_info(struct pdc_btlb_info *btlb);
+int pdc_mem_map_hpa(struct pdc_memory_map *r_addr, struct pdc_module_path *mod_path);
+#endif /* !CONFIG_PA20 */
+int pdc_lan_station_id(char *lan_addr, unsigned long net_hpa);
+
+int pdc_stable_read(unsigned long staddr, void *memaddr, unsigned long count);
+int pdc_stable_write(unsigned long staddr, void *memaddr, unsigned long count);
+int pdc_stable_get_size(unsigned long *size);
+int pdc_stable_verify_contents(void);
+int pdc_stable_initialize(void);
+
+int pdc_pci_irt_size(unsigned long *num_entries, unsigned long hpa);
+int pdc_pci_irt(unsigned long num_entries, unsigned long hpa, void *tbl);
+
+int pdc_get_initiator(struct hardware_path *, struct pdc_initiator *);
+int pdc_tod_read(struct pdc_tod *tod);
+int pdc_tod_set(unsigned long sec, unsigned long usec);
+
+#ifdef CONFIG_64BIT
+int pdc_mem_mem_table(struct pdc_memory_table_raddr *r_addr,
+		struct pdc_memory_table *tbl, unsigned long entries);
+#endif
+
+void set_firmware_width(void);
+int pdc_do_firm_test_reset(unsigned long ftc_bitmap);
+int pdc_do_reset(void);
+int pdc_soft_power_info(unsigned long *power_reg);
+int pdc_soft_power_button(int sw_control);
+void pdc_io_reset(void);
+void pdc_io_reset_devices(void);
+int pdc_iodc_getc(void);
+int pdc_iodc_print(const unsigned char *str, unsigned count);
+
+void pdc_emergency_unlock(void);
+int pdc_sti_call(unsigned long func, unsigned long flags,
+                 unsigned long inptr, unsigned long outputr,
+                 unsigned long glob_cfg);
+
+static inline char * os_id_to_string(u16 os_id) {
+	switch(os_id) {
+	case OS_ID_NONE:	return "No OS";
+	case OS_ID_HPUX:	return "HP-UX";
+	case OS_ID_MPEXL:	return "MPE-iX";
+	case OS_ID_OSF:		return "OSF";
+	case OS_ID_HPRT:	return "HP-RT";
+	case OS_ID_NOVEL:	return "Novell Netware";
+	case OS_ID_LINUX:	return "Linux";
+	default:	return "Unknown";
+	}
+}
+
+#endif /* __KERNEL__ */
+
+#define PAGE0   ((struct zeropage *)__PAGE_OFFSET)
+
+/* DEFINITION OF THE ZERO-PAGE (PAG0) */
+/* based on work by Jason Eckhardt (jason@equator.com) */
+
+/* flags of the device_path */
+#define	PF_AUTOBOOT	0x80
+#define	PF_AUTOSEARCH	0x40
+#define	PF_TIMER	0x0F
+
+struct device_path {		/* page 1-69 */
+	unsigned char flags;	/* flags see above! */
+	unsigned char bc[6];	/* bus converter routing info */
+	unsigned char mod;
+	unsigned int  layers[6];/* device-specific layer-info */
+} __attribute__((aligned(8))) ;
+
+struct pz_device {
+	struct	device_path dp;	/* see above */
+	/* struct	iomod *hpa; */
+	unsigned int hpa;	/* HPA base address */
+	/* char	*spa; */
+	unsigned int spa;	/* SPA base address */
+	/* int	(*iodc_io)(struct iomod*, ...); */
+	unsigned int iodc_io;	/* device entry point */
+	short	pad;		/* reserved */
+	unsigned short cl_class;/* see below */
+} __attribute__((aligned(8))) ;
+
+struct zeropage {
+	/* [0x000] initialize vectors (VEC) */
+	unsigned int	vec_special;		/* must be zero */
+	/* int	(*vec_pow_fail)(void);*/
+	unsigned int	vec_pow_fail; /* power failure handler */
+	/* int	(*vec_toc)(void); */
+	unsigned int	vec_toc;
+	unsigned int	vec_toclen;
+	/* int	(*vec_rendz)(void); */
+	unsigned int vec_rendz;
+	int	vec_pow_fail_flen;
+	int	vec_pad[10];		
+	
+	/* [0x040] reserved processor dependent */
+	int	pad0[112];
+
+	/* [0x200] reserved */
+	int	pad1[84];
+
+	/* [0x350] memory configuration (MC) */
+	int	memc_cont;		/* contiguous mem size (bytes) */
+	int	memc_phsize;		/* physical memory size */
+	int	memc_adsize;		/* additional mem size, bytes of SPA space used by PDC */
+	unsigned int mem_pdc_hi;	/* used for 64-bit */
+
+	/* [0x360] various parameters for the boot-CPU */
+	/* unsigned int *mem_booterr[8]; */
+	unsigned int mem_booterr[8];	/* ptr to boot errors */
+	unsigned int mem_free;		/* first location, where OS can be loaded */
+	/* struct iomod *mem_hpa; */
+	unsigned int mem_hpa;		/* HPA of the boot-CPU */
+	/* int (*mem_pdc)(int, ...); */
+	unsigned int mem_pdc;		/* PDC entry point */
+	unsigned int mem_10msec;	/* number of clock ticks in 10msec */
+
+	/* [0x390] initial memory module (IMM) */
+	/* struct iomod *imm_hpa; */
+	unsigned int imm_hpa;		/* HPA of the IMM */
+	int	imm_soft_boot;		/* 0 = was hard boot, 1 = was soft boot */
+	unsigned int	imm_spa_size;		/* SPA size of the IMM in bytes */
+	unsigned int	imm_max_mem;		/* bytes of mem in IMM */
+
+	/* [0x3A0] boot console, display device and keyboard */
+	struct pz_device mem_cons;	/* description of console device */
+	struct pz_device mem_boot;	/* description of boot device */
+	struct pz_device mem_kbd;	/* description of keyboard device */
+
+	/* [0x430] reserved */
+	int	pad430[116];
+
+	/* [0x600] processor dependent */
+	__u32	pad600[1];
+	__u32	proc_sti;		/* pointer to STI ROM */
+	__u32	pad608[126];
+};
+
+#endif /* !defined(__ASSEMBLY__) */
+
+#endif /* _PARISC_PDC_H */
diff --git a/arch/parisc/include/asm/pdc_chassis.h b/arch/parisc/include/asm/pdc_chassis.h
new file mode 100644
index 000000000000..a609273dc6bf
--- /dev/null
+++ b/arch/parisc/include/asm/pdc_chassis.h
@@ -0,0 +1,381 @@
+/*
+ *	include/asm-parisc/pdc_chassis.h
+ *
+ *	Copyright (C) 2002 Laurent Canet <canetl@esiee.fr>
+ *	Copyright (C) 2002 Thibaut Varene <varenet@parisc-linux.org>
+ *
+ *
+ *      This program is free software; you can redistribute it and/or modify
+ *      it under the terms of the GNU General Public License, version 2, as
+ *      published by the Free Software Foundation.
+ *      
+ *      This program is distributed in the hope that it will be useful,
+ *      but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *      MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *      GNU General Public License for more details.
+ *      
+ *      You should have received a copy of the GNU General Public License
+ *      along with this program; if not, write to the Free Software
+ *      Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ *      TODO:	- handle processor number on SMP systems (Reporting Entity ID)
+ *      	- handle message ID
+ *      	- handle timestamps
+ */
+ 
+
+#ifndef _PARISC_PDC_CHASSIS_H
+#define _PARISC_PDC_CHASSIS_H
+
+/*
+ * ----------
+ * Prototypes
+ * ----------
+ */
+
+int pdc_chassis_send_status(int message);
+void parisc_pdc_chassis_init(void);
+
+
+/*
+ * -----------------
+ * Direct call names
+ * -----------------
+ * They setup everything for you, the Log message and the corresponding LED state
+ */
+
+#define PDC_CHASSIS_DIRECT_BSTART	0
+#define PDC_CHASSIS_DIRECT_BCOMPLETE	1
+#define PDC_CHASSIS_DIRECT_SHUTDOWN	2
+#define PDC_CHASSIS_DIRECT_PANIC	3
+#define PDC_CHASSIS_DIRECT_HPMC		4
+#define PDC_CHASSIS_DIRECT_LPMC		5
+#define PDC_CHASSIS_DIRECT_DUMP		6	/* not yet implemented */
+#define PDC_CHASSIS_DIRECT_OOPS		7	/* not yet implemented */
+
+
+/*
+ * ------------
+ * LEDs control
+ * ------------
+ * Set the three LEDs -- Run, Attn, and Fault.
+ */
+
+/* Old PDC LED control */
+#define PDC_CHASSIS_DISP_DATA(v)	((unsigned long)(v) << 17)
+
+/* 
+ * Available PDC PAT LED states
+ */
+
+#define PDC_CHASSIS_LED_RUN_OFF		(0ULL << 4)
+#define PDC_CHASSIS_LED_RUN_FLASH	(1ULL << 4)
+#define PDC_CHASSIS_LED_RUN_ON		(2ULL << 4)
+#define PDC_CHASSIS_LED_RUN_NC		(3ULL << 4)
+#define PDC_CHASSIS_LED_ATTN_OFF	(0ULL << 6)
+#define PDC_CHASSIS_LED_ATTN_FLASH	(1ULL << 6)
+#define PDC_CHASSIS_LED_ATTN_NC		(3ULL << 6)	/* ATTN ON is invalid */
+#define PDC_CHASSIS_LED_FAULT_OFF	(0ULL << 8)
+#define PDC_CHASSIS_LED_FAULT_FLASH	(1ULL << 8)
+#define PDC_CHASSIS_LED_FAULT_ON	(2ULL << 8)
+#define PDC_CHASSIS_LED_FAULT_NC	(3ULL << 8)
+#define PDC_CHASSIS_LED_VALID		(1ULL << 10)
+
+/* 
+ * Valid PDC PAT LED states combinations
+ */
+
+/* System running normally */
+#define PDC_CHASSIS_LSTATE_RUN_NORMAL	(PDC_CHASSIS_LED_RUN_ON		| \
+					 PDC_CHASSIS_LED_ATTN_OFF	| \
+					 PDC_CHASSIS_LED_FAULT_OFF	| \
+					 PDC_CHASSIS_LED_VALID		)
+/* System crashed and rebooted itself successfully */
+#define PDC_CHASSIS_LSTATE_RUN_CRASHREC	(PDC_CHASSIS_LED_RUN_ON		| \
+					 PDC_CHASSIS_LED_ATTN_OFF	| \
+					 PDC_CHASSIS_LED_FAULT_FLASH	| \
+					 PDC_CHASSIS_LED_VALID		)
+/* There was a system interruption that did not take the system down */
+#define PDC_CHASSIS_LSTATE_RUN_SYSINT	(PDC_CHASSIS_LED_RUN_ON		| \
+					 PDC_CHASSIS_LED_ATTN_FLASH	| \
+					 PDC_CHASSIS_LED_FAULT_OFF	| \
+					 PDC_CHASSIS_LED_VALID		)
+/* System running and unexpected reboot or non-critical error detected */
+#define PDC_CHASSIS_LSTATE_RUN_NCRIT	(PDC_CHASSIS_LED_RUN_ON		| \
+					 PDC_CHASSIS_LED_ATTN_FLASH	| \
+					 PDC_CHASSIS_LED_FAULT_FLASH	| \
+					 PDC_CHASSIS_LED_VALID		)
+/* Executing non-OS code */
+#define PDC_CHASSIS_LSTATE_NONOS	(PDC_CHASSIS_LED_RUN_FLASH	| \
+					 PDC_CHASSIS_LED_ATTN_OFF	| \
+					 PDC_CHASSIS_LED_FAULT_OFF	| \
+					 PDC_CHASSIS_LED_VALID		)
+/* Boot failed - Executing non-OS code */
+#define PDC_CHASSIS_LSTATE_NONOS_BFAIL	(PDC_CHASSIS_LED_RUN_FLASH	| \
+					 PDC_CHASSIS_LED_ATTN_OFF	| \
+					 PDC_CHASSIS_LED_FAULT_ON	| \
+					 PDC_CHASSIS_LED_VALID		)
+/* Unexpected reboot occurred - Executing non-OS code */
+#define PDC_CHASSIS_LSTATE_NONOS_UNEXP	(PDC_CHASSIS_LED_RUN_FLASH	| \
+					 PDC_CHASSIS_LED_ATTN_OFF	| \
+					 PDC_CHASSIS_LED_FAULT_FLASH	| \
+					 PDC_CHASSIS_LED_VALID		)
+/* Executing non-OS code - Non-critical error detected */
+#define PDC_CHASSIS_LSTATE_NONOS_NCRIT	(PDC_CHASSIS_LED_RUN_FLASH	| \
+					 PDC_CHASSIS_LED_ATTN_FLASH	| \
+					 PDC_CHASSIS_LED_FAULT_OFF	| \
+					 PDC_CHASSIS_LED_VALID		)
+/* Boot failed - Executing non-OS code - Non-critical error detected */
+#define PDC_CHASSIS_LSTATE_BFAIL_NCRIT	(PDC_CHASSIS_LED_RUN_FLASH	| \
+					 PDC_CHASSIS_LED_ATTN_FLASH	| \
+					 PDC_CHASSIS_LED_FAULT_ON	| \
+					 PDC_CHASSIS_LED_VALID		)
+/* Unexpected reboot/recovering - Executing non-OS code - Non-critical error detected */
+#define PDC_CHASSIS_LSTATE_UNEXP_NCRIT	(PDC_CHASSIS_LED_RUN_FLASH	| \
+					 PDC_CHASSIS_LED_ATTN_FLASH	| \
+					 PDC_CHASSIS_LED_FAULT_FLASH	| \
+					 PDC_CHASSIS_LED_VALID		)
+/* Cannot execute PDC */
+#define PDC_CHASSIS_LSTATE_CANNOT_PDC	(PDC_CHASSIS_LED_RUN_OFF	| \
+					 PDC_CHASSIS_LED_ATTN_OFF	| \
+					 PDC_CHASSIS_LED_FAULT_OFF	| \
+					 PDC_CHASSIS_LED_VALID		)
+/* Boot failed - OS not up - PDC has detected a failure that prevents boot */
+#define PDC_CHASSIS_LSTATE_FATAL_BFAIL	(PDC_CHASSIS_LED_RUN_OFF	| \
+					 PDC_CHASSIS_LED_ATTN_OFF	| \
+					 PDC_CHASSIS_LED_FAULT_ON	| \
+					 PDC_CHASSIS_LED_VALID		)
+/* No code running - Non-critical error detected (double fault situation) */
+#define PDC_CHASSIS_LSTATE_NOCODE_NCRIT	(PDC_CHASSIS_LED_RUN_OFF	| \
+					 PDC_CHASSIS_LED_ATTN_FLASH	| \
+					 PDC_CHASSIS_LED_FAULT_OFF	| \
+					 PDC_CHASSIS_LED_VALID		)
+/* Boot failed - OS not up - Fatal failure detected - Non-critical error detected */
+#define PDC_CHASSIS_LSTATE_FATAL_NCRIT	(PDC_CHASSIS_LED_RUN_OFF	| \
+					 PDC_CHASSIS_LED_ATTN_FLASH	| \
+					 PDC_CHASSIS_LED_FAULT_ON	| \
+					 PDC_CHASSIS_LED_VALID		)
+/* All other states are invalid */
+
+
+/*
+ * --------------
+ * PDC Log events
+ * --------------
+ * Here follows bits needed to fill up the log event sent to PDC_CHASSIS
+ * The log message contains: Alert level, Source, Source detail,
+ * Source ID, Problem detail, Caller activity, Activity status, 
+ * Caller subactivity, Reporting entity type, Reporting entity ID,
+ * Data type, Unique message ID and EOM. 
+ */
+
+/* Alert level */
+#define PDC_CHASSIS_ALERT_FORWARD	(0ULL << 36)	/* no failure detected */
+#define PDC_CHASSIS_ALERT_SERPROC	(1ULL << 36)	/* service proc - no failure */
+#define PDC_CHASSIS_ALERT_NURGENT	(2ULL << 36)	/* non-urgent operator attn */
+#define PDC_CHASSIS_ALERT_BLOCKED	(3ULL << 36)	/* system blocked */
+#define PDC_CHASSIS_ALERT_CONF_CHG	(4ULL << 36)	/* unexpected configuration change */
+#define PDC_CHASSIS_ALERT_ENV_PB	(5ULL << 36)	/* boot possible, environmental pb */
+#define PDC_CHASSIS_ALERT_PENDING	(6ULL << 36)	/* boot possible, pending failure */
+#define PDC_CHASSIS_ALERT_PERF_IMP	(8ULL << 36)	/* boot possible, performance impaired */
+#define PDC_CHASSIS_ALERT_FUNC_IMP	(10ULL << 36)	/* boot possible, functionality impaired */
+#define PDC_CHASSIS_ALERT_SOFT_FAIL	(12ULL << 36)	/* software failure */
+#define PDC_CHASSIS_ALERT_HANG		(13ULL << 36)	/* system hang */
+#define PDC_CHASSIS_ALERT_ENV_FATAL	(14ULL << 36)	/* fatal power or environmental pb */
+#define PDC_CHASSIS_ALERT_HW_FATAL	(15ULL << 36)	/* fatal hardware problem */
+
+/* Source */
+#define PDC_CHASSIS_SRC_NONE		(0ULL << 28)	/* unknown, no source stated */
+#define PDC_CHASSIS_SRC_PROC		(1ULL << 28)	/* processor */
+/* For later use ? */
+#define PDC_CHASSIS_SRC_PROC_CACHE	(2ULL << 28)	/* processor cache*/
+#define PDC_CHASSIS_SRC_PDH		(3ULL << 28)	/* processor dependent hardware */
+#define PDC_CHASSIS_SRC_PWR		(4ULL << 28)	/* power */
+#define PDC_CHASSIS_SRC_FAB		(5ULL << 28)	/* fabric connector */
+#define PDC_CHASSIS_SRC_PLATi		(6ULL << 28)	/* platform */
+#define PDC_CHASSIS_SRC_MEM		(7ULL << 28)	/* memory */
+#define PDC_CHASSIS_SRC_IO		(8ULL << 28)	/* I/O */
+#define PDC_CHASSIS_SRC_CELL		(9ULL << 28)	/* cell */
+#define PDC_CHASSIS_SRC_PD		(10ULL << 28)	/* protected domain */
+
+/* Source detail field */
+#define PDC_CHASSIS_SRC_D_PROC		(1ULL << 24)	/* processor general */
+
+/* Source ID - platform dependent */
+#define PDC_CHASSIS_SRC_ID_UNSPEC	(0ULL << 16)
+
+/* Problem detail - problem source dependent */
+#define PDC_CHASSIS_PB_D_PROC_NONE	(0ULL << 32)	/* no problem detail */
+#define PDC_CHASSIS_PB_D_PROC_TIMEOUT	(4ULL << 32)	/* timeout */
+
+/* Caller activity */
+#define PDC_CHASSIS_CALL_ACT_HPUX_BL	(7ULL << 12)	/* Boot Loader */
+#define PDC_CHASSIS_CALL_ACT_HPUX_PD	(8ULL << 12)	/* SAL_PD activities */
+#define PDC_CHASSIS_CALL_ACT_HPUX_EVENT	(9ULL << 12)	/* SAL_EVENTS activities */
+#define PDC_CHASSIS_CALL_ACT_HPUX_IO	(10ULL << 12)	/* SAL_IO activities */
+#define PDC_CHASSIS_CALL_ACT_HPUX_PANIC	(11ULL << 12)	/* System panic */
+#define PDC_CHASSIS_CALL_ACT_HPUX_INIT	(12ULL << 12)	/* System initialization */
+#define PDC_CHASSIS_CALL_ACT_HPUX_SHUT	(13ULL << 12)	/* System shutdown */
+#define PDC_CHASSIS_CALL_ACT_HPUX_WARN	(14ULL << 12)	/* System warning */
+#define PDC_CHASSIS_CALL_ACT_HPUX_DU	(15ULL << 12)	/* Display_Activity() update */
+
+/* Activity status - implementation dependent */
+#define PDC_CHASSIS_ACT_STATUS_UNSPEC	(0ULL << 0)
+
+/* Caller subactivity - implementation dependent */
+/* FIXME: other subactivities ? */
+#define PDC_CHASSIS_CALL_SACT_UNSPEC	(0ULL << 4)	/* implementation dependent */
+
+/* Reporting entity type */
+#define PDC_CHASSIS_RET_GENERICOS	(12ULL << 52)	/* generic OSes */
+#define PDC_CHASSIS_RET_IA64_NT		(13ULL << 52)	/* IA-64 NT */
+#define PDC_CHASSIS_RET_HPUX		(14ULL << 52)	/* HP-UX */
+#define PDC_CHASSIS_RET_DIAG		(15ULL << 52)	/* offline diagnostics & utilities */
+
+/* Reporting entity ID */
+#define PDC_CHASSIS_REID_UNSPEC		(0ULL << 44)
+
+/* Data type */
+#define PDC_CHASSIS_DT_NONE		(0ULL << 59)	/* data field unused */
+/* For later use ? Do we need these ? */
+#define PDC_CHASSIS_DT_PHYS_ADDR	(1ULL << 59)	/* physical address */
+#define PDC_CHASSIS_DT_DATA_EXPECT	(2ULL << 59)	/* expected data */
+#define PDC_CHASSIS_DT_ACTUAL		(3ULL << 59)	/* actual data */
+#define PDC_CHASSIS_DT_PHYS_LOC		(4ULL << 59)	/* physical location */
+#define PDC_CHASSIS_DT_PHYS_LOC_EXT	(5ULL << 59)	/* physical location extension */
+#define PDC_CHASSIS_DT_TAG		(6ULL << 59)	/* tag */
+#define PDC_CHASSIS_DT_SYNDROME		(7ULL << 59)	/* syndrome */
+#define PDC_CHASSIS_DT_CODE_ADDR	(8ULL << 59)	/* code address */
+#define PDC_CHASSIS_DT_ASCII_MSG	(9ULL << 59)	/* ascii message */
+#define PDC_CHASSIS_DT_POST		(10ULL << 59)	/* POST code */
+#define PDC_CHASSIS_DT_TIMESTAMP	(11ULL << 59)	/* timestamp */
+#define PDC_CHASSIS_DT_DEV_STAT		(12ULL << 59)	/* device status */
+#define PDC_CHASSIS_DT_DEV_TYPE		(13ULL << 59)	/* device type */
+#define PDC_CHASSIS_DT_PB_DET		(14ULL << 59)	/* problem detail */
+#define PDC_CHASSIS_DT_ACT_LEV		(15ULL << 59)	/* activity level/timeout */
+#define PDC_CHASSIS_DT_SER_NUM		(16ULL << 59)	/* serial number */
+#define PDC_CHASSIS_DT_REV_NUM		(17ULL << 59)	/* revision number */
+#define PDC_CHASSIS_DT_INTERRUPT	(18ULL << 59)	/* interruption information */
+#define PDC_CHASSIS_DT_TEST_NUM		(19ULL << 59)	/* test number */
+#define PDC_CHASSIS_DT_STATE_CHG	(20ULL << 59)	/* major changes in system state */
+#define PDC_CHASSIS_DT_PROC_DEALLOC	(21ULL << 59)	/* processor deallocate */
+#define PDC_CHASSIS_DT_RESET		(30ULL << 59)	/* reset type and cause */
+#define PDC_CHASSIS_DT_PA_LEGACY	(31ULL << 59)	/* legacy PA hex chassis code */
+
+/* System states - part of major changes in system state data field */
+#define PDC_CHASSIS_SYSTATE_BSTART	(0ULL << 0)	/* boot start */
+#define PDC_CHASSIS_SYSTATE_BCOMP	(1ULL << 0)	/* boot complete */
+#define PDC_CHASSIS_SYSTATE_CHANGE	(2ULL << 0)	/* major change */
+#define PDC_CHASSIS_SYSTATE_LED		(3ULL << 0)	/* LED change */
+#define PDC_CHASSIS_SYSTATE_PANIC	(9ULL << 0)	/* OS Panic */
+#define PDC_CHASSIS_SYSTATE_DUMP	(10ULL << 0)	/* memory dump */
+#define PDC_CHASSIS_SYSTATE_HPMC	(11ULL << 0)	/* processing HPMC */
+#define PDC_CHASSIS_SYSTATE_HALT	(15ULL << 0)	/* system halted */
+
+/* Message ID */
+#define PDC_CHASSIS_MSG_ID		(0ULL << 40)	/* we do not handle msg IDs atm */
+
+/* EOM - separates log entries */
+#define PDC_CHASSIS_EOM_CLEAR		(0ULL << 43)
+#define PDC_CHASSIS_EOM_SET		(1ULL << 43)
+
+/*
+ * Preformated well known messages
+ */
+
+/* Boot started */
+#define PDC_CHASSIS_PMSG_BSTART		(PDC_CHASSIS_ALERT_SERPROC	| \
+					 PDC_CHASSIS_SRC_PROC		| \
+					 PDC_CHASSIS_SRC_D_PROC		| \
+					 PDC_CHASSIS_SRC_ID_UNSPEC	| \
+					 PDC_CHASSIS_PB_D_PROC_NONE	| \
+					 PDC_CHASSIS_CALL_ACT_HPUX_INIT	| \
+					 PDC_CHASSIS_ACT_STATUS_UNSPEC	| \
+					 PDC_CHASSIS_CALL_SACT_UNSPEC	| \
+					 PDC_CHASSIS_RET_HPUX		| \
+					 PDC_CHASSIS_REID_UNSPEC	| \
+					 PDC_CHASSIS_DT_STATE_CHG	| \
+					 PDC_CHASSIS_SYSTATE_BSTART	| \
+					 PDC_CHASSIS_MSG_ID		| \
+					 PDC_CHASSIS_EOM_SET		)
+
+/* Boot complete */
+#define PDC_CHASSIS_PMSG_BCOMPLETE	(PDC_CHASSIS_ALERT_SERPROC	| \
+					 PDC_CHASSIS_SRC_PROC		| \
+					 PDC_CHASSIS_SRC_D_PROC		| \
+					 PDC_CHASSIS_SRC_ID_UNSPEC	| \
+					 PDC_CHASSIS_PB_D_PROC_NONE	| \
+					 PDC_CHASSIS_CALL_ACT_HPUX_INIT	| \
+					 PDC_CHASSIS_ACT_STATUS_UNSPEC	| \
+					 PDC_CHASSIS_CALL_SACT_UNSPEC	| \
+					 PDC_CHASSIS_RET_HPUX		| \
+					 PDC_CHASSIS_REID_UNSPEC	| \
+					 PDC_CHASSIS_DT_STATE_CHG	| \
+					 PDC_CHASSIS_SYSTATE_BCOMP	| \
+					 PDC_CHASSIS_MSG_ID		| \
+					 PDC_CHASSIS_EOM_SET		)
+
+/* Shutdown */
+#define PDC_CHASSIS_PMSG_SHUTDOWN	(PDC_CHASSIS_ALERT_SERPROC	| \
+					 PDC_CHASSIS_SRC_PROC		| \
+					 PDC_CHASSIS_SRC_D_PROC		| \
+					 PDC_CHASSIS_SRC_ID_UNSPEC	| \
+					 PDC_CHASSIS_PB_D_PROC_NONE	| \
+					 PDC_CHASSIS_CALL_ACT_HPUX_SHUT	| \
+					 PDC_CHASSIS_ACT_STATUS_UNSPEC	| \
+					 PDC_CHASSIS_CALL_SACT_UNSPEC	| \
+					 PDC_CHASSIS_RET_HPUX		| \
+					 PDC_CHASSIS_REID_UNSPEC	| \
+					 PDC_CHASSIS_DT_STATE_CHG	| \
+					 PDC_CHASSIS_SYSTATE_HALT	| \
+					 PDC_CHASSIS_MSG_ID		| \
+					 PDC_CHASSIS_EOM_SET		)
+
+/* Panic */
+#define PDC_CHASSIS_PMSG_PANIC		(PDC_CHASSIS_ALERT_SOFT_FAIL	| \
+					 PDC_CHASSIS_SRC_PROC		| \
+					 PDC_CHASSIS_SRC_D_PROC		| \
+					 PDC_CHASSIS_SRC_ID_UNSPEC	| \
+					 PDC_CHASSIS_PB_D_PROC_NONE	| \
+					 PDC_CHASSIS_CALL_ACT_HPUX_PANIC| \
+					 PDC_CHASSIS_ACT_STATUS_UNSPEC	| \
+					 PDC_CHASSIS_CALL_SACT_UNSPEC	| \
+					 PDC_CHASSIS_RET_HPUX		| \
+					 PDC_CHASSIS_REID_UNSPEC	| \
+					 PDC_CHASSIS_DT_STATE_CHG	| \
+					 PDC_CHASSIS_SYSTATE_PANIC	| \
+					 PDC_CHASSIS_MSG_ID		| \
+					 PDC_CHASSIS_EOM_SET		)
+
+// FIXME: extrapolated data
+/* HPMC */
+#define PDC_CHASSIS_PMSG_HPMC		(PDC_CHASSIS_ALERT_CONF_CHG /*?*/	| \
+					 PDC_CHASSIS_SRC_PROC		| \
+					 PDC_CHASSIS_SRC_D_PROC		| \
+					 PDC_CHASSIS_SRC_ID_UNSPEC	| \
+					 PDC_CHASSIS_PB_D_PROC_NONE	| \
+					 PDC_CHASSIS_CALL_ACT_HPUX_WARN	| \
+					 PDC_CHASSIS_RET_HPUX		| \
+					 PDC_CHASSIS_DT_STATE_CHG	| \
+					 PDC_CHASSIS_SYSTATE_HPMC	| \
+					 PDC_CHASSIS_MSG_ID		| \
+					 PDC_CHASSIS_EOM_SET		)
+
+/* LPMC */
+#define PDC_CHASSIS_PMSG_LPMC		(PDC_CHASSIS_ALERT_BLOCKED /*?*/| \
+					 PDC_CHASSIS_SRC_PROC		| \
+					 PDC_CHASSIS_SRC_D_PROC		| \
+					 PDC_CHASSIS_SRC_ID_UNSPEC	| \
+					 PDC_CHASSIS_PB_D_PROC_NONE	| \
+					 PDC_CHASSIS_CALL_ACT_HPUX_WARN	| \
+					 PDC_CHASSIS_ACT_STATUS_UNSPEC	| \
+					 PDC_CHASSIS_CALL_SACT_UNSPEC	| \
+					 PDC_CHASSIS_RET_HPUX		| \
+					 PDC_CHASSIS_REID_UNSPEC	| \
+					 PDC_CHASSIS_DT_STATE_CHG	| \
+					 PDC_CHASSIS_SYSTATE_CHANGE	| \
+					 PDC_CHASSIS_MSG_ID		| \
+					 PDC_CHASSIS_EOM_SET		)
+
+#endif /* _PARISC_PDC_CHASSIS_H */
+/* vim: set ts=8 */
diff --git a/arch/parisc/include/asm/pdcpat.h b/arch/parisc/include/asm/pdcpat.h
new file mode 100644
index 000000000000..47539f117958
--- /dev/null
+++ b/arch/parisc/include/asm/pdcpat.h
@@ -0,0 +1,308 @@
+#ifndef __PARISC_PATPDC_H
+#define __PARISC_PATPDC_H
+
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright 2000 (c) Hewlett Packard (Paul Bame <bame()spam.parisc-linux.org>)
+ * Copyright 2000,2004 (c) Grant Grundler <grundler()nahspam.parisc-linux.org>
+ */
+
+
+#define PDC_PAT_CELL           	64L   /* Interface for gaining and 
+                                         * manipulatin g cell state within PD */
+#define PDC_PAT_CELL_GET_NUMBER    0L   /* Return Cell number */
+#define PDC_PAT_CELL_GET_INFO      1L   /* Returns info about Cell */
+#define PDC_PAT_CELL_MODULE        2L   /* Returns info about Module */
+#define PDC_PAT_CELL_SET_ATTENTION 9L   /* Set Cell Attention indicator */
+#define PDC_PAT_CELL_NUMBER_TO_LOC 10L   /* Cell Number -> Location */
+#define PDC_PAT_CELL_WALK_FABRIC   11L   /* Walk the Fabric */
+#define PDC_PAT_CELL_GET_RDT_SIZE  12L   /* Return Route Distance Table Sizes */
+#define PDC_PAT_CELL_GET_RDT       13L   /* Return Route Distance Tables */
+#define PDC_PAT_CELL_GET_LOCAL_PDH_SZ 14L /* Read Local PDH Buffer Size */
+#define PDC_PAT_CELL_SET_LOCAL_PDH    15L  /* Write Local PDH Buffer */
+#define PDC_PAT_CELL_GET_REMOTE_PDH_SZ 16L /* Return Remote PDH Buffer Size */
+#define PDC_PAT_CELL_GET_REMOTE_PDH 17L /* Read Remote PDH Buffer */
+#define PDC_PAT_CELL_GET_DBG_INFO   128L  /* Return DBG Buffer Info */
+#define PDC_PAT_CELL_CHANGE_ALIAS   129L  /* Change Non-Equivalent Alias Chacking */
+
+
+/*
+** Arg to PDC_PAT_CELL_MODULE memaddr[4]
+**
+** Addresses on the Merced Bus != all Runway Bus addresses.
+** This is intended for programming SBA/LBA chips range registers.
+*/
+#define IO_VIEW      0UL
+#define PA_VIEW      1UL
+
+/* PDC_PAT_CELL_MODULE entity type values */
+#define	PAT_ENTITY_CA	0	/* central agent */
+#define	PAT_ENTITY_PROC	1	/* processor */
+#define	PAT_ENTITY_MEM	2	/* memory controller */
+#define	PAT_ENTITY_SBA	3	/* system bus adapter */
+#define	PAT_ENTITY_LBA	4	/* local bus adapter */
+#define	PAT_ENTITY_PBC	5	/* processor bus converter */
+#define	PAT_ENTITY_XBC	6	/* crossbar fabric connect */
+#define	PAT_ENTITY_RC	7	/* fabric interconnect */
+
+/* PDC_PAT_CELL_MODULE address range type values */
+#define PAT_PBNUM           0         /* PCI Bus Number */
+#define PAT_LMMIO           1         /* < 4G MMIO Space */
+#define PAT_GMMIO           2         /* > 4G MMIO Space */
+#define PAT_NPIOP           3         /* Non Postable I/O Port Space */
+#define PAT_PIOP            4         /* Postable I/O Port Space */
+#define PAT_AHPA            5         /* Addional HPA Space */
+#define PAT_UFO             6         /* HPA Space (UFO for Mariposa) */
+#define PAT_GNIP            7         /* GNI Reserved Space */
+
+
+
+/* PDC PAT CHASSIS LOG -- Platform logging & forward progress functions */
+
+#define PDC_PAT_CHASSIS_LOG		65L
+#define PDC_PAT_CHASSIS_WRITE_LOG    	0L /* Write Log Entry */
+#define PDC_PAT_CHASSIS_READ_LOG     	1L /* Read  Log Entry */
+
+
+/* PDC PAT CPU  -- CPU configuration within the protection domain */
+
+#define PDC_PAT_CPU                	67L
+#define PDC_PAT_CPU_INFO            	0L /* Return CPU config info */
+#define PDC_PAT_CPU_DELETE          	1L /* Delete CPU */
+#define PDC_PAT_CPU_ADD             	2L /* Add    CPU */
+#define PDC_PAT_CPU_GET_NUMBER      	3L /* Return CPU Number */
+#define PDC_PAT_CPU_GET_HPA         	4L /* Return CPU HPA */
+#define PDC_PAT_CPU_STOP            	5L /* Stop   CPU */
+#define PDC_PAT_CPU_RENDEZVOUS      	6L /* Rendezvous CPU */
+#define PDC_PAT_CPU_GET_CLOCK_INFO  	7L /* Return CPU Clock info */
+#define PDC_PAT_CPU_GET_RENDEZVOUS_STATE 8L /* Return Rendezvous State */
+#define PDC_PAT_CPU_PLUNGE_FABRIC 	128L /* Plunge Fabric */
+#define PDC_PAT_CPU_UPDATE_CACHE_CLEANSING 129L /* Manipulate Cache 
+                                                 * Cleansing Mode */
+/*  PDC PAT EVENT -- Platform Events */
+
+#define PDC_PAT_EVENT              	68L
+#define PDC_PAT_EVENT_GET_CAPS     	0L /* Get Capabilities */
+#define PDC_PAT_EVENT_SET_MODE     	1L /* Set Notification Mode */
+#define PDC_PAT_EVENT_SCAN         	2L /* Scan Event */
+#define PDC_PAT_EVENT_HANDLE       	3L /* Handle Event */
+#define PDC_PAT_EVENT_GET_NB_CALL  	4L /* Get Non-Blocking call Args */
+
+/*  PDC PAT HPMC -- Cause processor to go into spin loop, and wait
+ *  			for wake up from Monarch Processor.
+ */
+
+#define PDC_PAT_HPMC               70L
+#define PDC_PAT_HPMC_RENDEZ_CPU     0L /* go into spin loop */
+#define PDC_PAT_HPMC_SET_PARAMS     1L /* Allows OS to specify intr which PDC 
+                                        * will use to interrupt OS during
+                                        * machine check rendezvous */
+
+/* parameters for PDC_PAT_HPMC_SET_PARAMS: */
+#define HPMC_SET_PARAMS_INTR 	    1L /* Rendezvous Interrupt */
+#define HPMC_SET_PARAMS_WAKE 	    2L /* Wake up processor */
+
+
+/*  PDC PAT IO  -- On-line services for I/O modules */
+
+#define PDC_PAT_IO                  71L
+#define PDC_PAT_IO_GET_SLOT_STATUS   	5L /* Get Slot Status Info*/
+#define PDC_PAT_IO_GET_LOC_FROM_HARDWARE 6L /* Get Physical Location from */
+                                            /* Hardware Path */
+#define PDC_PAT_IO_GET_HARDWARE_FROM_LOC 7L /* Get Hardware Path from 
+                                             * Physical Location */
+#define PDC_PAT_IO_GET_PCI_CONFIG_FROM_HW 11L /* Get PCI Configuration
+                                               * Address from Hardware Path */
+#define PDC_PAT_IO_GET_HW_FROM_PCI_CONFIG 12L /* Get Hardware Path 
+                                               * from PCI Configuration Address */
+#define PDC_PAT_IO_READ_HOST_BRIDGE_INFO 13L  /* Read Host Bridge State Info */
+#define PDC_PAT_IO_CLEAR_HOST_BRIDGE_INFO 14L /* Clear Host Bridge State Info*/
+#define PDC_PAT_IO_GET_PCI_ROUTING_TABLE_SIZE 15L /* Get PCI INT Routing Table 
+                                                   * Size */
+#define PDC_PAT_IO_GET_PCI_ROUTING_TABLE  16L /* Get PCI INT Routing Table */
+#define PDC_PAT_IO_GET_HINT_TABLE_SIZE 	17L /* Get Hint Table Size */
+#define PDC_PAT_IO_GET_HINT_TABLE   	18L /* Get Hint Table */
+#define PDC_PAT_IO_PCI_CONFIG_READ  	19L /* PCI Config Read */
+#define PDC_PAT_IO_PCI_CONFIG_WRITE 	20L /* PCI Config Write */
+#define PDC_PAT_IO_GET_NUM_IO_SLOTS 	21L /* Get Number of I/O Bay Slots in 
+                                       		  * Cabinet */
+#define PDC_PAT_IO_GET_LOC_IO_SLOTS 	22L /* Get Physical Location of I/O */
+                                   		     /* Bay Slots in Cabinet */
+#define PDC_PAT_IO_BAY_STATUS_INFO  	28L /* Get I/O Bay Slot Status Info */
+#define PDC_PAT_IO_GET_PROC_VIEW        29L /* Get Processor view of IO address */
+#define PDC_PAT_IO_PROG_SBA_DIR_RANGE   30L /* Program directed range */
+
+
+/* PDC PAT MEM  -- Manage memory page deallocation */
+
+#define PDC_PAT_MEM            72L
+#define PDC_PAT_MEM_PD_INFO     	0L /* Return PDT info for PD       */
+#define PDC_PAT_MEM_PD_CLEAR    	1L /* Clear PDT for PD             */
+#define PDC_PAT_MEM_PD_READ     	2L /* Read PDT entries for PD      */
+#define PDC_PAT_MEM_PD_RESET    	3L /* Reset clear bit for PD       */
+#define PDC_PAT_MEM_CELL_INFO   	5L /* Return PDT info For Cell     */
+#define PDC_PAT_MEM_CELL_CLEAR  	6L /* Clear PDT For Cell           */
+#define PDC_PAT_MEM_CELL_READ   	7L /* Read PDT entries For Cell    */
+#define PDC_PAT_MEM_CELL_RESET  	8L /* Reset clear bit For Cell     */
+#define PDC_PAT_MEM_SETGM	  	9L /* Set Golden Memory value      */
+#define PDC_PAT_MEM_ADD_PAGE    	10L /* ADDs a page to the cell      */
+#define PDC_PAT_MEM_ADDRESS     	11L /* Get Physical Location From   */
+                                    		 /* Memory Address               */
+#define PDC_PAT_MEM_GET_TXT_SIZE   	12L /* Get Formatted Text Size   */
+#define PDC_PAT_MEM_GET_PD_TXT     	13L /* Get PD Formatted Text     */
+#define PDC_PAT_MEM_GET_CELL_TXT   	14L /* Get Cell Formatted Text   */
+#define PDC_PAT_MEM_RD_STATE_INFO  	15L /* Read Mem Module State Info*/
+#define PDC_PAT_MEM_CLR_STATE_INFO 	16L /*Clear Mem Module State Info*/
+#define PDC_PAT_MEM_CLEAN_RANGE    	128L /*Clean Mem in specific range*/
+#define PDC_PAT_MEM_GET_TBL_SIZE   	131L /* Get Memory Table Size     */
+#define PDC_PAT_MEM_GET_TBL        	132L /* Get Memory Table          */
+
+
+/* PDC PAT NVOLATILE  --  Access Non-Volatile Memory */
+
+#define PDC_PAT_NVOLATILE	73L
+#define PDC_PAT_NVOLATILE_READ		0L /* Read Non-Volatile Memory   */
+#define PDC_PAT_NVOLATILE_WRITE		1L /* Write Non-Volatile Memory  */
+#define PDC_PAT_NVOLATILE_GET_SIZE	2L /* Return size of NVM         */
+#define PDC_PAT_NVOLATILE_VERIFY	3L /* Verify contents of NVM     */
+#define PDC_PAT_NVOLATILE_INIT		4L /* Initialize NVM             */
+
+/* PDC PAT PD */
+#define PDC_PAT_PD		74L         /* Protection Domain Info   */
+#define PDC_PAT_PD_GET_ADDR_MAP		0L  /* Get Address Map          */
+
+/* PDC_PAT_PD_GET_ADDR_MAP entry types */
+#define PAT_MEMORY_DESCRIPTOR		1   
+
+/* PDC_PAT_PD_GET_ADDR_MAP memory types */
+#define PAT_MEMTYPE_MEMORY		0
+#define PAT_MEMTYPE_FIRMWARE		4
+
+/* PDC_PAT_PD_GET_ADDR_MAP memory usage */
+#define PAT_MEMUSE_GENERAL		0
+#define PAT_MEMUSE_GI			128
+#define PAT_MEMUSE_GNI			129
+
+
+#ifndef __ASSEMBLY__
+#include <linux/types.h>
+
+#ifdef CONFIG_64BIT
+#define is_pdc_pat()	(PDC_TYPE_PAT == pdc_type)
+extern int pdc_pat_get_irt_size(unsigned long *num_entries, unsigned long cell_num);
+extern int pdc_pat_get_irt(void *r_addr, unsigned long cell_num);
+#else	/* ! CONFIG_64BIT */
+/* No PAT support for 32-bit kernels...sorry */
+#define is_pdc_pat()	(0)
+#define pdc_pat_get_irt_size(num_entries, cell_numn)	PDC_BAD_PROC
+#define pdc_pat_get_irt(r_addr, cell_num)		PDC_BAD_PROC
+#endif	/* ! CONFIG_64BIT */
+
+
+struct pdc_pat_cell_num {
+	unsigned long cell_num;
+	unsigned long cell_loc;
+};
+
+struct pdc_pat_cpu_num {
+	unsigned long cpu_num;
+	unsigned long cpu_loc;
+};
+
+struct pdc_pat_pd_addr_map_entry {
+	unsigned char entry_type;       /* 1 = Memory Descriptor Entry Type */
+	unsigned char reserve1[5];
+	unsigned char memory_type;
+	unsigned char memory_usage;
+	unsigned long paddr;
+	unsigned int  pages;            /* Length in 4K pages */
+	unsigned int  reserve2;
+	unsigned long cell_map;
+};
+
+/********************************************************************
+* PDC_PAT_CELL[Return Cell Module] memaddr[0] conf_base_addr
+* ----------------------------------------------------------
+* Bit  0 to 51 - conf_base_addr
+* Bit 52 to 62 - reserved
+* Bit       63 - endianess bit
+********************************************************************/
+#define PAT_GET_CBA(value) ((value) & 0xfffffffffffff000UL)
+
+/********************************************************************
+* PDC_PAT_CELL[Return Cell Module] memaddr[1] mod_info
+* ----------------------------------------------------
+* Bit  0 to  7 - entity type
+*    0 = central agent,            1 = processor,
+*    2 = memory controller,        3 = system bus adapter,
+*    4 = local bus adapter,        5 = processor bus converter,
+*    6 = crossbar fabric connect,  7 = fabric interconnect,
+*    8 to 254 reserved,            255 = unknown.
+* Bit  8 to 15 - DVI
+* Bit 16 to 23 - IOC functions
+* Bit 24 to 39 - reserved
+* Bit 40 to 63 - mod_pages
+*    number of 4K pages a module occupies starting at conf_base_addr
+********************************************************************/
+#define PAT_GET_ENTITY(value)	(((value) >> 56) & 0xffUL)
+#define PAT_GET_DVI(value)	(((value) >> 48) & 0xffUL)
+#define PAT_GET_IOC(value)	(((value) >> 40) & 0xffUL)
+#define PAT_GET_MOD_PAGES(value) ((value) & 0xffffffUL)
+
+
+/*
+** PDC_PAT_CELL_GET_INFO return block
+*/
+typedef struct pdc_pat_cell_info_rtn_block {
+	unsigned long cpu_info;
+	unsigned long cell_info;
+	unsigned long cell_location;
+	unsigned long reo_location;
+	unsigned long mem_size;
+	unsigned long dimm_status;
+	unsigned long pdc_rev;
+	unsigned long fabric_info0;
+	unsigned long fabric_info1;
+	unsigned long fabric_info2;
+	unsigned long fabric_info3;
+	unsigned long reserved[21];
+} pdc_pat_cell_info_rtn_block_t;
+
+
+/* FIXME: mod[508] should really be a union of the various mod components */
+struct pdc_pat_cell_mod_maddr_block {	/* PDC_PAT_CELL_MODULE */
+	unsigned long cba;		/* func 0 cfg space address */
+	unsigned long mod_info;		/* module information */
+	unsigned long mod_location;	/* physical location of the module */
+	struct hardware_path mod_path;	/* module path (device path - layers) */
+	unsigned long mod[508];		/* PAT cell module components */
+} __attribute__((aligned(8))) ;
+
+typedef struct pdc_pat_cell_mod_maddr_block pdc_pat_cell_mod_maddr_block_t;
+
+
+extern int pdc_pat_chassis_send_log(unsigned long status, unsigned long data);
+extern int pdc_pat_cell_get_number(struct pdc_pat_cell_num *cell_info);
+extern int pdc_pat_cell_module(unsigned long *actcnt, unsigned long ploc, unsigned long mod, unsigned long view_type, void *mem_addr);
+extern int pdc_pat_cell_num_to_loc(void *, unsigned long);
+
+extern int pdc_pat_cpu_get_number(struct pdc_pat_cpu_num *cpu_info, void *hpa);
+
+extern int pdc_pat_pd_get_addr_map(unsigned long *actual_len, void *mem_addr, unsigned long count, unsigned long offset);
+
+
+extern int pdc_pat_io_pci_cfg_read(unsigned long pci_addr, int pci_size, u32 *val); 
+extern int pdc_pat_io_pci_cfg_write(unsigned long pci_addr, int pci_size, u32 val); 
+
+
+/* Flag to indicate this is a PAT box...don't use this unless you
+** really have to...it might go away some day.
+*/
+extern int pdc_pat;     /* arch/parisc/kernel/inventory.c */
+
+#endif /* __ASSEMBLY__ */
+
+#endif /* ! __PARISC_PATPDC_H */
diff --git a/arch/parisc/include/asm/percpu.h b/arch/parisc/include/asm/percpu.h
new file mode 100644
index 000000000000..a0dcd1970128
--- /dev/null
+++ b/arch/parisc/include/asm/percpu.h
@@ -0,0 +1,7 @@
+#ifndef _PARISC_PERCPU_H
+#define _PARISC_PERCPU_H
+
+#include <asm-generic/percpu.h>
+
+#endif 
+
diff --git a/arch/parisc/include/asm/perf.h b/arch/parisc/include/asm/perf.h
new file mode 100644
index 000000000000..a18e11972c09
--- /dev/null
+++ b/arch/parisc/include/asm/perf.h
@@ -0,0 +1,74 @@
+#ifndef _ASM_PERF_H_
+#define _ASM_PERF_H_
+
+/* ioctls */
+#define PA_PERF_ON	_IO('p', 1)
+#define PA_PERF_OFF	_IOR('p', 2, unsigned int)
+#define PA_PERF_VERSION	_IOR('p', 3, int)
+
+#define PA_PERF_DEV	"perf"
+#define PA_PERF_MINOR	146
+
+/* Interface types */
+#define UNKNOWN_INTF    255
+#define ONYX_INTF         0
+#define CUDA_INTF         1
+
+/* Common Onyx and Cuda images */
+#define CPI                 0
+#define BUSUTIL             1
+#define TLBMISS             2
+#define TLBHANDMISS         3
+#define PTKN                4
+#define PNTKN               5
+#define IMISS               6
+#define DMISS               7
+#define DMISS_ACCESS        8 
+#define BIG_CPI 	    9
+#define BIG_LS		   10  
+#define BR_ABORT	   11
+#define ISNT		   12 
+#define QUADRANT           13
+#define RW_PDFET           14
+#define RW_WDFET           15
+#define SHLIB_CPI          16
+
+/* Cuda only Images */
+#define FLOPS              17
+#define CACHEMISS          18 
+#define BRANCHES           19             
+#define CRSTACK            20 
+#define I_CACHE_SPEC       21 
+#define MAX_CUDA_IMAGES    22 
+
+/* Onyx only Images */
+#define ADDR_INV_ABORT_ALU 17
+#define BRAD_STALL	   18 
+#define CNTL_IN_PIPEL	   19 
+#define DSNT_XFH	   20 
+#define FET_SIG1	   21 
+#define FET_SIG2	   22 
+#define G7_1		   23 
+#define G7_2		   24 
+#define G7_3 		   25
+#define G7_4		   26
+#define MPB_LABORT         27
+#define PANIC              28
+#define RARE_INST          29 
+#define RW_DFET            30 
+#define RW_IFET            31 
+#define RW_SDFET           32 
+#define SPEC_IFET          33 
+#define ST_COND0           34 
+#define ST_COND1           35 
+#define ST_COND2           36
+#define ST_COND3           37
+#define ST_COND4           38
+#define ST_UNPRED0         39 
+#define ST_UNPRED1         40 
+#define UNPRED             41 
+#define GO_STORE           42
+#define SHLIB_CALL         43
+#define MAX_ONYX_IMAGES    44
+
+#endif
diff --git a/arch/parisc/include/asm/pgalloc.h b/arch/parisc/include/asm/pgalloc.h
new file mode 100644
index 000000000000..fc987a1c12a8
--- /dev/null
+++ b/arch/parisc/include/asm/pgalloc.h
@@ -0,0 +1,149 @@
+#ifndef _ASM_PGALLOC_H
+#define _ASM_PGALLOC_H
+
+#include <linux/gfp.h>
+#include <linux/mm.h>
+#include <linux/threads.h>
+#include <asm/processor.h>
+#include <asm/fixmap.h>
+
+#include <asm/cache.h>
+
+/* Allocate the top level pgd (page directory)
+ *
+ * Here (for 64 bit kernels) we implement a Hybrid L2/L3 scheme: we
+ * allocate the first pmd adjacent to the pgd.  This means that we can
+ * subtract a constant offset to get to it.  The pmd and pgd sizes are
+ * arranged so that a single pmd covers 4GB (giving a full 64-bit
+ * process access to 8TB) so our lookups are effectively L2 for the
+ * first 4GB of the kernel (i.e. for all ILP32 processes and all the
+ * kernel for machines with under 4GB of memory) */
+static inline pgd_t *pgd_alloc(struct mm_struct *mm)
+{
+	pgd_t *pgd = (pgd_t *)__get_free_pages(GFP_KERNEL,
+					       PGD_ALLOC_ORDER);
+	pgd_t *actual_pgd = pgd;
+
+	if (likely(pgd != NULL)) {
+		memset(pgd, 0, PAGE_SIZE<<PGD_ALLOC_ORDER);
+#ifdef CONFIG_64BIT
+		actual_pgd += PTRS_PER_PGD;
+		/* Populate first pmd with allocated memory.  We mark it
+		 * with PxD_FLAG_ATTACHED as a signal to the system that this
+		 * pmd entry may not be cleared. */
+		__pgd_val_set(*actual_pgd, (PxD_FLAG_PRESENT | 
+				        PxD_FLAG_VALID | 
+					PxD_FLAG_ATTACHED) 
+			+ (__u32)(__pa((unsigned long)pgd) >> PxD_VALUE_SHIFT));
+		/* The first pmd entry also is marked with _PAGE_GATEWAY as
+		 * a signal that this pmd may not be freed */
+		__pgd_val_set(*pgd, PxD_FLAG_ATTACHED);
+#endif
+	}
+	return actual_pgd;
+}
+
+static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd)
+{
+#ifdef CONFIG_64BIT
+	pgd -= PTRS_PER_PGD;
+#endif
+	free_pages((unsigned long)pgd, PGD_ALLOC_ORDER);
+}
+
+#if PT_NLEVELS == 3
+
+/* Three Level Page Table Support for pmd's */
+
+static inline void pgd_populate(struct mm_struct *mm, pgd_t *pgd, pmd_t *pmd)
+{
+	__pgd_val_set(*pgd, (PxD_FLAG_PRESENT | PxD_FLAG_VALID) +
+		        (__u32)(__pa((unsigned long)pmd) >> PxD_VALUE_SHIFT));
+}
+
+static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long address)
+{
+	pmd_t *pmd = (pmd_t *)__get_free_pages(GFP_KERNEL|__GFP_REPEAT,
+					       PMD_ORDER);
+	if (pmd)
+		memset(pmd, 0, PAGE_SIZE<<PMD_ORDER);
+	return pmd;
+}
+
+static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd)
+{
+#ifdef CONFIG_64BIT
+	if(pmd_flag(*pmd) & PxD_FLAG_ATTACHED)
+		/* This is the permanent pmd attached to the pgd;
+		 * cannot free it */
+		return;
+#endif
+	free_pages((unsigned long)pmd, PMD_ORDER);
+}
+
+#else
+
+/* Two Level Page Table Support for pmd's */
+
+/*
+ * allocating and freeing a pmd is trivial: the 1-entry pmd is
+ * inside the pgd, so has no extra memory associated with it.
+ */
+
+#define pmd_alloc_one(mm, addr)		({ BUG(); ((pmd_t *)2); })
+#define pmd_free(mm, x)			do { } while (0)
+#define pgd_populate(mm, pmd, pte)	BUG()
+
+#endif
+
+static inline void
+pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmd, pte_t *pte)
+{
+#ifdef CONFIG_64BIT
+	/* preserve the gateway marker if this is the beginning of
+	 * the permanent pmd */
+	if(pmd_flag(*pmd) & PxD_FLAG_ATTACHED)
+		__pmd_val_set(*pmd, (PxD_FLAG_PRESENT |
+				 PxD_FLAG_VALID |
+				 PxD_FLAG_ATTACHED) 
+			+ (__u32)(__pa((unsigned long)pte) >> PxD_VALUE_SHIFT));
+	else
+#endif
+		__pmd_val_set(*pmd, (PxD_FLAG_PRESENT | PxD_FLAG_VALID) 
+			+ (__u32)(__pa((unsigned long)pte) >> PxD_VALUE_SHIFT));
+}
+
+#define pmd_populate(mm, pmd, pte_page) \
+	pmd_populate_kernel(mm, pmd, page_address(pte_page))
+#define pmd_pgtable(pmd) pmd_page(pmd)
+
+static inline pgtable_t
+pte_alloc_one(struct mm_struct *mm, unsigned long address)
+{
+	struct page *page = alloc_page(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO);
+	if (page)
+		pgtable_page_ctor(page);
+	return page;
+}
+
+static inline pte_t *
+pte_alloc_one_kernel(struct mm_struct *mm, unsigned long addr)
+{
+	pte_t *pte = (pte_t *)__get_free_page(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO);
+	return pte;
+}
+
+static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
+{
+	free_page((unsigned long)pte);
+}
+
+static inline void pte_free(struct mm_struct *mm, struct page *pte)
+{
+	pgtable_page_dtor(pte);
+	pte_free_kernel(mm, page_address(pte));
+}
+
+#define check_pgt_cache()	do { } while (0)
+
+#endif
diff --git a/arch/parisc/include/asm/pgtable.h b/arch/parisc/include/asm/pgtable.h
new file mode 100644
index 000000000000..470a4b88124d
--- /dev/null
+++ b/arch/parisc/include/asm/pgtable.h
@@ -0,0 +1,508 @@
+#ifndef _PARISC_PGTABLE_H
+#define _PARISC_PGTABLE_H
+
+#include <asm-generic/4level-fixup.h>
+
+#include <asm/fixmap.h>
+
+#ifndef __ASSEMBLY__
+/*
+ * we simulate an x86-style page table for the linux mm code
+ */
+
+#include <linux/mm.h>		/* for vm_area_struct */
+#include <linux/bitops.h>
+#include <asm/processor.h>
+#include <asm/cache.h>
+
+/*
+ * kern_addr_valid(ADDR) tests if ADDR is pointing to valid kernel
+ * memory.  For the return value to be meaningful, ADDR must be >=
+ * PAGE_OFFSET.  This operation can be relatively expensive (e.g.,
+ * require a hash-, or multi-level tree-lookup or something of that
+ * sort) but it guarantees to return TRUE only if accessing the page
+ * at that address does not cause an error.  Note that there may be
+ * addresses for which kern_addr_valid() returns FALSE even though an
+ * access would not cause an error (e.g., this is typically true for
+ * memory mapped I/O regions.
+ *
+ * XXX Need to implement this for parisc.
+ */
+#define kern_addr_valid(addr)	(1)
+
+/* Certain architectures need to do special things when PTEs
+ * within a page table are directly modified.  Thus, the following
+ * hook is made available.
+ */
+#define set_pte(pteptr, pteval)                                 \
+        do{                                                     \
+                *(pteptr) = (pteval);                           \
+        } while(0)
+#define set_pte_at(mm,addr,ptep,pteval) set_pte(ptep,pteval)
+
+#endif /* !__ASSEMBLY__ */
+
+#define pte_ERROR(e) \
+	printk("%s:%d: bad pte %08lx.\n", __FILE__, __LINE__, pte_val(e))
+#define pmd_ERROR(e) \
+	printk("%s:%d: bad pmd %08lx.\n", __FILE__, __LINE__, (unsigned long)pmd_val(e))
+#define pgd_ERROR(e) \
+	printk("%s:%d: bad pgd %08lx.\n", __FILE__, __LINE__, (unsigned long)pgd_val(e))
+
+/* This is the size of the initially mapped kernel memory */
+#ifdef CONFIG_64BIT
+#define KERNEL_INITIAL_ORDER	24	/* 0 to 1<<24 = 16MB */
+#else
+#define KERNEL_INITIAL_ORDER	23	/* 0 to 1<<23 = 8MB */
+#endif
+#define KERNEL_INITIAL_SIZE	(1 << KERNEL_INITIAL_ORDER)
+
+#if defined(CONFIG_64BIT) && defined(CONFIG_PARISC_PAGE_SIZE_4KB)
+#define PT_NLEVELS	3
+#define PGD_ORDER	1 /* Number of pages per pgd */
+#define PMD_ORDER	1 /* Number of pages per pmd */
+#define PGD_ALLOC_ORDER	2 /* first pgd contains pmd */
+#else
+#define PT_NLEVELS	2
+#define PGD_ORDER	1 /* Number of pages per pgd */
+#define PGD_ALLOC_ORDER	PGD_ORDER
+#endif
+
+/* Definitions for 3rd level (we use PLD here for Page Lower directory
+ * because PTE_SHIFT is used lower down to mean shift that has to be
+ * done to get usable bits out of the PTE) */
+#define PLD_SHIFT	PAGE_SHIFT
+#define PLD_SIZE	PAGE_SIZE
+#define BITS_PER_PTE	(PAGE_SHIFT - BITS_PER_PTE_ENTRY)
+#define PTRS_PER_PTE    (1UL << BITS_PER_PTE)
+
+/* Definitions for 2nd level */
+#define pgtable_cache_init()	do { } while (0)
+
+#define PMD_SHIFT       (PLD_SHIFT + BITS_PER_PTE)
+#define PMD_SIZE	(1UL << PMD_SHIFT)
+#define PMD_MASK	(~(PMD_SIZE-1))
+#if PT_NLEVELS == 3
+#define BITS_PER_PMD	(PAGE_SHIFT + PMD_ORDER - BITS_PER_PMD_ENTRY)
+#else
+#define BITS_PER_PMD	0
+#endif
+#define PTRS_PER_PMD    (1UL << BITS_PER_PMD)
+
+/* Definitions for 1st level */
+#define PGDIR_SHIFT	(PMD_SHIFT + BITS_PER_PMD)
+#define BITS_PER_PGD	(PAGE_SHIFT + PGD_ORDER - BITS_PER_PGD_ENTRY)
+#define PGDIR_SIZE	(1UL << PGDIR_SHIFT)
+#define PGDIR_MASK	(~(PGDIR_SIZE-1))
+#define PTRS_PER_PGD    (1UL << BITS_PER_PGD)
+#define USER_PTRS_PER_PGD       PTRS_PER_PGD
+
+#define MAX_ADDRBITS	(PGDIR_SHIFT + BITS_PER_PGD)
+#define MAX_ADDRESS	(1UL << MAX_ADDRBITS)
+
+#define SPACEID_SHIFT	(MAX_ADDRBITS - 32)
+
+/* This calculates the number of initial pages we need for the initial
+ * page tables */
+#if (KERNEL_INITIAL_ORDER) >= (PMD_SHIFT)
+# define PT_INITIAL	(1 << (KERNEL_INITIAL_ORDER - PMD_SHIFT))
+#else
+# define PT_INITIAL	(1)  /* all initial PTEs fit into one page */
+#endif
+
+/*
+ * pgd entries used up by user/kernel:
+ */
+
+#define FIRST_USER_ADDRESS	0
+
+/* NB: The tlb miss handlers make certain assumptions about the order */
+/*     of the following bits, so be careful (One example, bits 25-31  */
+/*     are moved together in one instruction).                        */
+
+#define _PAGE_READ_BIT     31   /* (0x001) read access allowed */
+#define _PAGE_WRITE_BIT    30   /* (0x002) write access allowed */
+#define _PAGE_EXEC_BIT     29   /* (0x004) execute access allowed */
+#define _PAGE_GATEWAY_BIT  28   /* (0x008) privilege promotion allowed */
+#define _PAGE_DMB_BIT      27   /* (0x010) Data Memory Break enable (B bit) */
+#define _PAGE_DIRTY_BIT    26   /* (0x020) Page Dirty (D bit) */
+#define _PAGE_FILE_BIT	_PAGE_DIRTY_BIT	/* overload this bit */
+#define _PAGE_REFTRAP_BIT  25   /* (0x040) Page Ref. Trap enable (T bit) */
+#define _PAGE_NO_CACHE_BIT 24   /* (0x080) Uncached Page (U bit) */
+#define _PAGE_ACCESSED_BIT 23   /* (0x100) Software: Page Accessed */
+#define _PAGE_PRESENT_BIT  22   /* (0x200) Software: translation valid */
+#define _PAGE_FLUSH_BIT    21   /* (0x400) Software: translation valid */
+				/*             for cache flushing only */
+#define _PAGE_USER_BIT     20   /* (0x800) Software: User accessible page */
+
+/* N.B. The bits are defined in terms of a 32 bit word above, so the */
+/*      following macro is ok for both 32 and 64 bit.                */
+
+#define xlate_pabit(x) (31 - x)
+
+/* this defines the shift to the usable bits in the PTE it is set so
+ * that the valid bits _PAGE_PRESENT_BIT and _PAGE_USER_BIT are set
+ * to zero */
+#define PTE_SHIFT	   	xlate_pabit(_PAGE_USER_BIT)
+
+/* PFN_PTE_SHIFT defines the shift of a PTE value to access the PFN field */
+#define PFN_PTE_SHIFT		12
+
+
+/* this is how many bits may be used by the file functions */
+#define PTE_FILE_MAX_BITS	(BITS_PER_LONG - PTE_SHIFT)
+
+#define pte_to_pgoff(pte) (pte_val(pte) >> PTE_SHIFT)
+#define pgoff_to_pte(off) ((pte_t) { ((off) << PTE_SHIFT) | _PAGE_FILE })
+
+#define _PAGE_READ     (1 << xlate_pabit(_PAGE_READ_BIT))
+#define _PAGE_WRITE    (1 << xlate_pabit(_PAGE_WRITE_BIT))
+#define _PAGE_RW       (_PAGE_READ | _PAGE_WRITE)
+#define _PAGE_EXEC     (1 << xlate_pabit(_PAGE_EXEC_BIT))
+#define _PAGE_GATEWAY  (1 << xlate_pabit(_PAGE_GATEWAY_BIT))
+#define _PAGE_DMB      (1 << xlate_pabit(_PAGE_DMB_BIT))
+#define _PAGE_DIRTY    (1 << xlate_pabit(_PAGE_DIRTY_BIT))
+#define _PAGE_REFTRAP  (1 << xlate_pabit(_PAGE_REFTRAP_BIT))
+#define _PAGE_NO_CACHE (1 << xlate_pabit(_PAGE_NO_CACHE_BIT))
+#define _PAGE_ACCESSED (1 << xlate_pabit(_PAGE_ACCESSED_BIT))
+#define _PAGE_PRESENT  (1 << xlate_pabit(_PAGE_PRESENT_BIT))
+#define _PAGE_FLUSH    (1 << xlate_pabit(_PAGE_FLUSH_BIT))
+#define _PAGE_USER     (1 << xlate_pabit(_PAGE_USER_BIT))
+#define _PAGE_FILE     (1 << xlate_pabit(_PAGE_FILE_BIT))
+
+#define _PAGE_TABLE	(_PAGE_PRESENT | _PAGE_READ | _PAGE_WRITE |  _PAGE_DIRTY | _PAGE_ACCESSED)
+#define _PAGE_CHG_MASK	(PAGE_MASK | _PAGE_ACCESSED | _PAGE_DIRTY)
+#define _PAGE_KERNEL	(_PAGE_PRESENT | _PAGE_EXEC | _PAGE_READ | _PAGE_WRITE | _PAGE_DIRTY | _PAGE_ACCESSED)
+
+/* The pgd/pmd contains a ptr (in phys addr space); since all pgds/pmds
+ * are page-aligned, we don't care about the PAGE_OFFSET bits, except
+ * for a few meta-information bits, so we shift the address to be
+ * able to effectively address 40/42/44-bits of physical address space
+ * depending on 4k/16k/64k PAGE_SIZE */
+#define _PxD_PRESENT_BIT   31
+#define _PxD_ATTACHED_BIT  30
+#define _PxD_VALID_BIT     29
+
+#define PxD_FLAG_PRESENT  (1 << xlate_pabit(_PxD_PRESENT_BIT))
+#define PxD_FLAG_ATTACHED (1 << xlate_pabit(_PxD_ATTACHED_BIT))
+#define PxD_FLAG_VALID    (1 << xlate_pabit(_PxD_VALID_BIT))
+#define PxD_FLAG_MASK     (0xf)
+#define PxD_FLAG_SHIFT    (4)
+#define PxD_VALUE_SHIFT   (8) /* (PAGE_SHIFT-PxD_FLAG_SHIFT) */
+
+#ifndef __ASSEMBLY__
+
+#define PAGE_NONE	__pgprot(_PAGE_PRESENT | _PAGE_USER | _PAGE_ACCESSED)
+#define PAGE_SHARED	__pgprot(_PAGE_PRESENT | _PAGE_USER | _PAGE_READ | _PAGE_WRITE | _PAGE_ACCESSED)
+/* Others seem to make this executable, I don't know if that's correct
+   or not.  The stack is mapped this way though so this is necessary
+   in the short term - dhd@linuxcare.com, 2000-08-08 */
+#define PAGE_READONLY	__pgprot(_PAGE_PRESENT | _PAGE_USER | _PAGE_READ | _PAGE_ACCESSED)
+#define PAGE_WRITEONLY  __pgprot(_PAGE_PRESENT | _PAGE_USER | _PAGE_WRITE | _PAGE_ACCESSED)
+#define PAGE_EXECREAD   __pgprot(_PAGE_PRESENT | _PAGE_USER | _PAGE_READ | _PAGE_EXEC |_PAGE_ACCESSED)
+#define PAGE_COPY       PAGE_EXECREAD
+#define PAGE_RWX        __pgprot(_PAGE_PRESENT | _PAGE_USER | _PAGE_READ | _PAGE_WRITE | _PAGE_EXEC |_PAGE_ACCESSED)
+#define PAGE_KERNEL	__pgprot(_PAGE_KERNEL)
+#define PAGE_KERNEL_RO	__pgprot(_PAGE_KERNEL & ~_PAGE_WRITE)
+#define PAGE_KERNEL_UNC	__pgprot(_PAGE_KERNEL | _PAGE_NO_CACHE)
+#define PAGE_GATEWAY    __pgprot(_PAGE_PRESENT | _PAGE_USER | _PAGE_ACCESSED | _PAGE_GATEWAY| _PAGE_READ)
+#define PAGE_FLUSH      __pgprot(_PAGE_FLUSH)
+
+
+/*
+ * We could have an execute only page using "gateway - promote to priv
+ * level 3", but that is kind of silly. So, the way things are defined
+ * now, we must always have read permission for pages with execute
+ * permission. For the fun of it we'll go ahead and support write only
+ * pages.
+ */
+
+	 /*xwr*/
+#define __P000  PAGE_NONE
+#define __P001  PAGE_READONLY
+#define __P010  __P000 /* copy on write */
+#define __P011  __P001 /* copy on write */
+#define __P100  PAGE_EXECREAD
+#define __P101  PAGE_EXECREAD
+#define __P110  __P100 /* copy on write */
+#define __P111  __P101 /* copy on write */
+
+#define __S000  PAGE_NONE
+#define __S001  PAGE_READONLY
+#define __S010  PAGE_WRITEONLY
+#define __S011  PAGE_SHARED
+#define __S100  PAGE_EXECREAD
+#define __S101  PAGE_EXECREAD
+#define __S110  PAGE_RWX
+#define __S111  PAGE_RWX
+
+
+extern pgd_t swapper_pg_dir[]; /* declared in init_task.c */
+
+/* initial page tables for 0-8MB for kernel */
+
+extern pte_t pg0[];
+
+/* zero page used for uninitialized stuff */
+
+extern unsigned long *empty_zero_page;
+
+/*
+ * ZERO_PAGE is a global shared page that is always zero: used
+ * for zero-mapped memory areas etc..
+ */
+
+#define ZERO_PAGE(vaddr) (virt_to_page(empty_zero_page))
+
+#define pte_none(x)     ((pte_val(x) == 0) || (pte_val(x) & _PAGE_FLUSH))
+#define pte_present(x)	(pte_val(x) & _PAGE_PRESENT)
+#define pte_clear(mm,addr,xp)	do { pte_val(*(xp)) = 0; } while (0)
+
+#define pmd_flag(x)	(pmd_val(x) & PxD_FLAG_MASK)
+#define pmd_address(x)	((unsigned long)(pmd_val(x) &~ PxD_FLAG_MASK) << PxD_VALUE_SHIFT)
+#define pgd_flag(x)	(pgd_val(x) & PxD_FLAG_MASK)
+#define pgd_address(x)	((unsigned long)(pgd_val(x) &~ PxD_FLAG_MASK) << PxD_VALUE_SHIFT)
+
+#if PT_NLEVELS == 3
+/* The first entry of the permanent pmd is not there if it contains
+ * the gateway marker */
+#define pmd_none(x)	(!pmd_val(x) || pmd_flag(x) == PxD_FLAG_ATTACHED)
+#else
+#define pmd_none(x)	(!pmd_val(x))
+#endif
+#define pmd_bad(x)	(!(pmd_flag(x) & PxD_FLAG_VALID))
+#define pmd_present(x)	(pmd_flag(x) & PxD_FLAG_PRESENT)
+static inline void pmd_clear(pmd_t *pmd) {
+#if PT_NLEVELS == 3
+	if (pmd_flag(*pmd) & PxD_FLAG_ATTACHED)
+		/* This is the entry pointing to the permanent pmd
+		 * attached to the pgd; cannot clear it */
+		__pmd_val_set(*pmd, PxD_FLAG_ATTACHED);
+	else
+#endif
+		__pmd_val_set(*pmd,  0);
+}
+
+
+
+#if PT_NLEVELS == 3
+#define pgd_page_vaddr(pgd) ((unsigned long) __va(pgd_address(pgd)))
+#define pgd_page(pgd)	virt_to_page((void *)pgd_page_vaddr(pgd))
+
+/* For 64 bit we have three level tables */
+
+#define pgd_none(x)     (!pgd_val(x))
+#define pgd_bad(x)      (!(pgd_flag(x) & PxD_FLAG_VALID))
+#define pgd_present(x)  (pgd_flag(x) & PxD_FLAG_PRESENT)
+static inline void pgd_clear(pgd_t *pgd) {
+#if PT_NLEVELS == 3
+	if(pgd_flag(*pgd) & PxD_FLAG_ATTACHED)
+		/* This is the permanent pmd attached to the pgd; cannot
+		 * free it */
+		return;
+#endif
+	__pgd_val_set(*pgd, 0);
+}
+#else
+/*
+ * The "pgd_xxx()" functions here are trivial for a folded two-level
+ * setup: the pgd is never bad, and a pmd always exists (as it's folded
+ * into the pgd entry)
+ */
+static inline int pgd_none(pgd_t pgd)		{ return 0; }
+static inline int pgd_bad(pgd_t pgd)		{ return 0; }
+static inline int pgd_present(pgd_t pgd)	{ return 1; }
+static inline void pgd_clear(pgd_t * pgdp)	{ }
+#endif
+
+/*
+ * The following only work if pte_present() is true.
+ * Undefined behaviour if not..
+ */
+static inline int pte_dirty(pte_t pte)		{ return pte_val(pte) & _PAGE_DIRTY; }
+static inline int pte_young(pte_t pte)		{ return pte_val(pte) & _PAGE_ACCESSED; }
+static inline int pte_write(pte_t pte)		{ return pte_val(pte) & _PAGE_WRITE; }
+static inline int pte_file(pte_t pte)		{ return pte_val(pte) & _PAGE_FILE; }
+static inline int pte_special(pte_t pte)	{ return 0; }
+
+static inline pte_t pte_mkclean(pte_t pte)	{ pte_val(pte) &= ~_PAGE_DIRTY; return pte; }
+static inline pte_t pte_mkold(pte_t pte)	{ pte_val(pte) &= ~_PAGE_ACCESSED; return pte; }
+static inline pte_t pte_wrprotect(pte_t pte)	{ pte_val(pte) &= ~_PAGE_WRITE; return pte; }
+static inline pte_t pte_mkdirty(pte_t pte)	{ pte_val(pte) |= _PAGE_DIRTY; return pte; }
+static inline pte_t pte_mkyoung(pte_t pte)	{ pte_val(pte) |= _PAGE_ACCESSED; return pte; }
+static inline pte_t pte_mkwrite(pte_t pte)	{ pte_val(pte) |= _PAGE_WRITE; return pte; }
+static inline pte_t pte_mkspecial(pte_t pte)	{ return pte; }
+
+/*
+ * Conversion functions: convert a page and protection to a page entry,
+ * and a page entry and page directory to the page they refer to.
+ */
+#define __mk_pte(addr,pgprot) \
+({									\
+	pte_t __pte;							\
+									\
+	pte_val(__pte) = ((((addr)>>PAGE_SHIFT)<<PFN_PTE_SHIFT) + pgprot_val(pgprot));	\
+									\
+	__pte;								\
+})
+
+#define mk_pte(page, pgprot)	pfn_pte(page_to_pfn(page), (pgprot))
+
+static inline pte_t pfn_pte(unsigned long pfn, pgprot_t pgprot)
+{
+	pte_t pte;
+	pte_val(pte) = (pfn << PFN_PTE_SHIFT) | pgprot_val(pgprot);
+	return pte;
+}
+
+static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
+{ pte_val(pte) = (pte_val(pte) & _PAGE_CHG_MASK) | pgprot_val(newprot); return pte; }
+
+/* Permanent address of a page.  On parisc we don't have highmem. */
+
+#define pte_pfn(x)		(pte_val(x) >> PFN_PTE_SHIFT)
+
+#define pte_page(pte)		(pfn_to_page(pte_pfn(pte)))
+
+#define pmd_page_vaddr(pmd)	((unsigned long) __va(pmd_address(pmd)))
+
+#define __pmd_page(pmd) ((unsigned long) __va(pmd_address(pmd)))
+#define pmd_page(pmd)	virt_to_page((void *)__pmd_page(pmd))
+
+#define pgd_index(address) ((address) >> PGDIR_SHIFT)
+
+/* to find an entry in a page-table-directory */
+#define pgd_offset(mm, address) \
+((mm)->pgd + ((address) >> PGDIR_SHIFT))
+
+/* to find an entry in a kernel page-table-directory */
+#define pgd_offset_k(address) pgd_offset(&init_mm, address)
+
+/* Find an entry in the second-level page table.. */
+
+#if PT_NLEVELS == 3
+#define pmd_offset(dir,address) \
+((pmd_t *) pgd_page_vaddr(*(dir)) + (((address)>>PMD_SHIFT) & (PTRS_PER_PMD-1)))
+#else
+#define pmd_offset(dir,addr) ((pmd_t *) dir)
+#endif
+
+/* Find an entry in the third-level page table.. */ 
+#define pte_index(address) (((address) >> PAGE_SHIFT) & (PTRS_PER_PTE-1))
+#define pte_offset_kernel(pmd, address) \
+	((pte_t *) pmd_page_vaddr(*(pmd)) + pte_index(address))
+#define pte_offset_map(pmd, address) pte_offset_kernel(pmd, address)
+#define pte_offset_map_nested(pmd, address) pte_offset_kernel(pmd, address)
+#define pte_unmap(pte) do { } while (0)
+#define pte_unmap_nested(pte) do { } while (0)
+
+#define pte_unmap(pte)			do { } while (0)
+#define pte_unmap_nested(pte)		do { } while (0)
+
+extern void paging_init (void);
+
+/* Used for deferring calls to flush_dcache_page() */
+
+#define PG_dcache_dirty         PG_arch_1
+
+extern void update_mmu_cache(struct vm_area_struct *, unsigned long, pte_t);
+
+/* Encode and de-code a swap entry */
+
+#define __swp_type(x)                     ((x).val & 0x1f)
+#define __swp_offset(x)                   ( (((x).val >> 6) &  0x7) | \
+					  (((x).val >> 8) & ~0x7) )
+#define __swp_entry(type, offset)         ((swp_entry_t) { (type) | \
+					    ((offset &  0x7) << 6) | \
+					    ((offset & ~0x7) << 8) })
+#define __pte_to_swp_entry(pte)		((swp_entry_t) { pte_val(pte) })
+#define __swp_entry_to_pte(x)		((pte_t) { (x).val })
+
+static inline int ptep_test_and_clear_young(struct vm_area_struct *vma, unsigned long addr, pte_t *ptep)
+{
+#ifdef CONFIG_SMP
+	if (!pte_young(*ptep))
+		return 0;
+	return test_and_clear_bit(xlate_pabit(_PAGE_ACCESSED_BIT), &pte_val(*ptep));
+#else
+	pte_t pte = *ptep;
+	if (!pte_young(pte))
+		return 0;
+	set_pte_at(vma->vm_mm, addr, ptep, pte_mkold(pte));
+	return 1;
+#endif
+}
+
+extern spinlock_t pa_dbit_lock;
+
+struct mm_struct;
+static inline pte_t ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
+{
+	pte_t old_pte;
+	pte_t pte;
+
+	spin_lock(&pa_dbit_lock);
+	pte = old_pte = *ptep;
+	pte_val(pte) &= ~_PAGE_PRESENT;
+	pte_val(pte) |= _PAGE_FLUSH;
+	set_pte_at(mm,addr,ptep,pte);
+	spin_unlock(&pa_dbit_lock);
+
+	return old_pte;
+}
+
+static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
+{
+#ifdef CONFIG_SMP
+	unsigned long new, old;
+
+	do {
+		old = pte_val(*ptep);
+		new = pte_val(pte_wrprotect(__pte (old)));
+	} while (cmpxchg((unsigned long *) ptep, old, new) != old);
+#else
+	pte_t old_pte = *ptep;
+	set_pte_at(mm, addr, ptep, pte_wrprotect(old_pte));
+#endif
+}
+
+#define pte_same(A,B)	(pte_val(A) == pte_val(B))
+
+#endif /* !__ASSEMBLY__ */
+
+
+/* TLB page size encoding - see table 3-1 in parisc20.pdf */
+#define _PAGE_SIZE_ENCODING_4K		0
+#define _PAGE_SIZE_ENCODING_16K		1
+#define _PAGE_SIZE_ENCODING_64K		2
+#define _PAGE_SIZE_ENCODING_256K	3
+#define _PAGE_SIZE_ENCODING_1M		4
+#define _PAGE_SIZE_ENCODING_4M		5
+#define _PAGE_SIZE_ENCODING_16M		6
+#define _PAGE_SIZE_ENCODING_64M		7
+
+#if defined(CONFIG_PARISC_PAGE_SIZE_4KB)
+# define _PAGE_SIZE_ENCODING_DEFAULT _PAGE_SIZE_ENCODING_4K
+#elif defined(CONFIG_PARISC_PAGE_SIZE_16KB)
+# define _PAGE_SIZE_ENCODING_DEFAULT _PAGE_SIZE_ENCODING_16K
+#elif defined(CONFIG_PARISC_PAGE_SIZE_64KB)
+# define _PAGE_SIZE_ENCODING_DEFAULT _PAGE_SIZE_ENCODING_64K
+#endif
+
+
+#define io_remap_pfn_range(vma, vaddr, pfn, size, prot)		\
+		remap_pfn_range(vma, vaddr, pfn, size, prot)
+
+#define pgprot_noncached(prot) __pgprot(pgprot_val(prot) | _PAGE_NO_CACHE)
+
+/* We provide our own get_unmapped_area to provide cache coherency */
+
+#define HAVE_ARCH_UNMAPPED_AREA
+
+#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG
+#define __HAVE_ARCH_PTEP_GET_AND_CLEAR
+#define __HAVE_ARCH_PTEP_SET_WRPROTECT
+#define __HAVE_ARCH_PTE_SAME
+#include <asm-generic/pgtable.h>
+
+#endif /* _PARISC_PGTABLE_H */
diff --git a/arch/parisc/include/asm/poll.h b/arch/parisc/include/asm/poll.h
new file mode 100644
index 000000000000..c98509d3149e
--- /dev/null
+++ b/arch/parisc/include/asm/poll.h
@@ -0,0 +1 @@
+#include <asm-generic/poll.h>
diff --git a/arch/parisc/include/asm/posix_types.h b/arch/parisc/include/asm/posix_types.h
new file mode 100644
index 000000000000..bb725a6630bb
--- /dev/null
+++ b/arch/parisc/include/asm/posix_types.h
@@ -0,0 +1,129 @@
+#ifndef __ARCH_PARISC_POSIX_TYPES_H
+#define __ARCH_PARISC_POSIX_TYPES_H
+
+/*
+ * This file is generally used by user-level software, so you need to
+ * be a little careful about namespace pollution etc.  Also, we cannot
+ * assume GCC is being used.
+ */
+typedef unsigned long		__kernel_ino_t;
+typedef unsigned short		__kernel_mode_t;
+typedef unsigned short		__kernel_nlink_t;
+typedef long			__kernel_off_t;
+typedef int			__kernel_pid_t;
+typedef unsigned short		__kernel_ipc_pid_t;
+typedef unsigned int		__kernel_uid_t;
+typedef unsigned int		__kernel_gid_t;
+typedef int			__kernel_suseconds_t;
+typedef long			__kernel_clock_t;
+typedef int			__kernel_timer_t;
+typedef int			__kernel_clockid_t;
+typedef int			__kernel_daddr_t;
+/* Note these change from narrow to wide kernels */
+#ifdef CONFIG_64BIT
+typedef unsigned long		__kernel_size_t;
+typedef long			__kernel_ssize_t;
+typedef long			__kernel_ptrdiff_t;
+typedef long			__kernel_time_t;
+#else
+typedef unsigned int		__kernel_size_t;
+typedef int			__kernel_ssize_t;
+typedef int			__kernel_ptrdiff_t;
+typedef long			__kernel_time_t;
+#endif
+typedef char *			__kernel_caddr_t;
+
+typedef unsigned short		__kernel_uid16_t;
+typedef unsigned short		__kernel_gid16_t;
+typedef unsigned int		__kernel_uid32_t;
+typedef unsigned int		__kernel_gid32_t;
+
+#ifdef __GNUC__
+typedef long long		__kernel_loff_t;
+typedef long long		__kernel_off64_t;
+typedef unsigned long long	__kernel_ino64_t;
+#endif
+
+typedef unsigned int		__kernel_old_dev_t;
+
+typedef struct {
+	int	val[2];
+} __kernel_fsid_t;
+
+/* compatibility stuff */
+typedef __kernel_uid_t __kernel_old_uid_t;
+typedef __kernel_gid_t __kernel_old_gid_t;
+
+#if defined(__KERNEL__)
+
+#undef __FD_SET
+static __inline__ void __FD_SET(unsigned long __fd, __kernel_fd_set *__fdsetp)
+{
+	unsigned long __tmp = __fd / __NFDBITS;
+	unsigned long __rem = __fd % __NFDBITS;
+	__fdsetp->fds_bits[__tmp] |= (1UL<<__rem);
+}
+
+#undef __FD_CLR
+static __inline__ void __FD_CLR(unsigned long __fd, __kernel_fd_set *__fdsetp)
+{
+	unsigned long __tmp = __fd / __NFDBITS;
+	unsigned long __rem = __fd % __NFDBITS;
+	__fdsetp->fds_bits[__tmp] &= ~(1UL<<__rem);
+}
+
+#undef __FD_ISSET
+static __inline__ int __FD_ISSET(unsigned long __fd, const __kernel_fd_set *__p)
+{ 
+	unsigned long __tmp = __fd / __NFDBITS;
+	unsigned long __rem = __fd % __NFDBITS;
+	return (__p->fds_bits[__tmp] & (1UL<<__rem)) != 0;
+}
+
+/*
+ * This will unroll the loop for the normal constant case (8 ints,
+ * for a 256-bit fd_set)
+ */
+#undef __FD_ZERO
+static __inline__ void __FD_ZERO(__kernel_fd_set *__p)
+{
+	unsigned long *__tmp = __p->fds_bits;
+	int __i;
+
+	if (__builtin_constant_p(__FDSET_LONGS)) {
+		switch (__FDSET_LONGS) {
+		case 16:
+			__tmp[ 0] = 0; __tmp[ 1] = 0;
+			__tmp[ 2] = 0; __tmp[ 3] = 0;
+			__tmp[ 4] = 0; __tmp[ 5] = 0;
+			__tmp[ 6] = 0; __tmp[ 7] = 0;
+			__tmp[ 8] = 0; __tmp[ 9] = 0;
+			__tmp[10] = 0; __tmp[11] = 0;
+			__tmp[12] = 0; __tmp[13] = 0;
+			__tmp[14] = 0; __tmp[15] = 0;
+			return;
+
+		case 8:
+			__tmp[ 0] = 0; __tmp[ 1] = 0;
+			__tmp[ 2] = 0; __tmp[ 3] = 0;
+			__tmp[ 4] = 0; __tmp[ 5] = 0;
+			__tmp[ 6] = 0; __tmp[ 7] = 0;
+			return;
+
+		case 4:
+			__tmp[ 0] = 0; __tmp[ 1] = 0;
+			__tmp[ 2] = 0; __tmp[ 3] = 0;
+			return;
+		}
+	}
+	__i = __FDSET_LONGS;
+	while (__i) {
+		__i--;
+		*__tmp = 0;
+		__tmp++;
+	}
+}
+
+#endif /* defined(__KERNEL__) */
+
+#endif
diff --git a/arch/parisc/include/asm/prefetch.h b/arch/parisc/include/asm/prefetch.h
new file mode 100644
index 000000000000..c5edc60c059f
--- /dev/null
+++ b/arch/parisc/include/asm/prefetch.h
@@ -0,0 +1,39 @@
+/*
+ * include/asm-parisc/prefetch.h
+ *
+ * PA 2.0 defines data prefetch instructions on page 6-11 of the Kane book.
+ * In addition, many implementations do hardware prefetching of both
+ * instructions and data.
+ *
+ * PA7300LC (page 14-4 of the ERS) also implements prefetching by a load
+ * to gr0 but not in a way that Linux can use.  If the load would cause an
+ * interruption (eg due to prefetching 0), it is suppressed on PA2.0
+ * processors, but not on 7300LC.
+ *
+ */
+
+#ifndef __ASM_PARISC_PREFETCH_H
+#define __ASM_PARISC_PREFETCH_H
+
+#ifndef __ASSEMBLY__
+#ifdef CONFIG_PREFETCH
+
+#define ARCH_HAS_PREFETCH
+static inline void prefetch(const void *addr)
+{
+	__asm__("ldw 0(%0), %%r0" : : "r" (addr));
+}
+
+/* LDD is a PA2.0 addition. */
+#ifdef CONFIG_PA20
+#define ARCH_HAS_PREFETCHW
+static inline void prefetchw(const void *addr)
+{
+	__asm__("ldd 0(%0), %%r0" : : "r" (addr));
+}
+#endif /* CONFIG_PA20 */
+
+#endif /* CONFIG_PREFETCH */
+#endif /* __ASSEMBLY__ */
+
+#endif /* __ASM_PARISC_PROCESSOR_H */
diff --git a/arch/parisc/include/asm/processor.h b/arch/parisc/include/asm/processor.h
new file mode 100644
index 000000000000..3c9d34844c83
--- /dev/null
+++ b/arch/parisc/include/asm/processor.h
@@ -0,0 +1,357 @@
+/*
+ * include/asm-parisc/processor.h
+ *
+ * Copyright (C) 1994 Linus Torvalds
+ * Copyright (C) 2001 Grant Grundler
+ */
+
+#ifndef __ASM_PARISC_PROCESSOR_H
+#define __ASM_PARISC_PROCESSOR_H
+
+#ifndef __ASSEMBLY__
+#include <linux/threads.h>
+
+#include <asm/prefetch.h>
+#include <asm/hardware.h>
+#include <asm/pdc.h>
+#include <asm/ptrace.h>
+#include <asm/types.h>
+#include <asm/system.h>
+#endif /* __ASSEMBLY__ */
+
+#define KERNEL_STACK_SIZE 	(4*PAGE_SIZE)
+
+/*
+ * Default implementation of macro that returns current
+ * instruction pointer ("program counter").
+ */
+#ifdef CONFIG_PA20
+#define current_ia(x)	__asm__("mfia %0" : "=r"(x))
+#else /* mfia added in pa2.0 */
+#define current_ia(x)	__asm__("blr 0,%0\n\tnop" : "=r"(x))
+#endif
+#define current_text_addr() ({ void *pc; current_ia(pc); pc; })
+
+#define TASK_SIZE_OF(tsk)       ((tsk)->thread.task_size)
+#define TASK_SIZE	        TASK_SIZE_OF(current)
+#define TASK_UNMAPPED_BASE      (current->thread.map_base)
+
+#define DEFAULT_TASK_SIZE32	(0xFFF00000UL)
+#define DEFAULT_MAP_BASE32	(0x40000000UL)
+
+#ifdef CONFIG_64BIT
+#define DEFAULT_TASK_SIZE       (MAX_ADDRESS-0xf000000)
+#define DEFAULT_MAP_BASE        (0x200000000UL)
+#else
+#define DEFAULT_TASK_SIZE	DEFAULT_TASK_SIZE32
+#define DEFAULT_MAP_BASE	DEFAULT_MAP_BASE32
+#endif
+
+#ifdef __KERNEL__
+
+/* XXX: STACK_TOP actually should be STACK_BOTTOM for parisc.
+ * prumpf */
+
+#define STACK_TOP	TASK_SIZE
+#define STACK_TOP_MAX	DEFAULT_TASK_SIZE
+
+#endif
+
+#ifndef __ASSEMBLY__
+
+/*
+ * Data detected about CPUs at boot time which is the same for all CPU's.
+ * HP boxes are SMP - ie identical processors.
+ *
+ * FIXME: some CPU rev info may be processor specific...
+ */
+struct system_cpuinfo_parisc {
+	unsigned int	cpu_count;
+	unsigned int	cpu_hz;
+	unsigned int	hversion;
+	unsigned int	sversion;
+	enum cpu_type	cpu_type;
+
+	struct {
+		struct pdc_model model;
+		unsigned long versions;
+		unsigned long cpuid;
+		unsigned long capabilities;
+		char   sys_model_name[81]; /* PDC-ROM returnes this model name */
+	} pdc;
+
+	const char	*cpu_name;	/* e.g. "PA7300LC (PCX-L2)" */
+	const char	*family_name;	/* e.g. "1.1e" */
+};
+
+
+/* Per CPU data structure - ie varies per CPU.  */
+struct cpuinfo_parisc {
+	unsigned long it_value;     /* Interval Timer at last timer Intr */
+	unsigned long it_delta;     /* Interval delta (tic_10ms / HZ * 100) */
+	unsigned long irq_count;    /* number of IRQ's since boot */
+	unsigned long irq_max_cr16; /* longest time to handle a single IRQ */
+	unsigned long cpuid;        /* aka slot_number or set to NO_PROC_ID */
+	unsigned long hpa;          /* Host Physical address */
+	unsigned long txn_addr;     /* MMIO addr of EIR or id_eid */
+#ifdef CONFIG_SMP
+	unsigned long pending_ipi;  /* bitmap of type ipi_message_type */
+	unsigned long ipi_count;    /* number ipi Interrupts */
+#endif
+	unsigned long bh_count;     /* number of times bh was invoked */
+	unsigned long prof_counter; /* per CPU profiling support */
+	unsigned long prof_multiplier;	/* per CPU profiling support */
+	unsigned long fp_rev;
+	unsigned long fp_model;
+	unsigned int state;
+	struct parisc_device *dev;
+	unsigned long loops_per_jiffy;
+};
+
+extern struct system_cpuinfo_parisc boot_cpu_data;
+extern struct cpuinfo_parisc cpu_data[NR_CPUS];
+#define current_cpu_data cpu_data[smp_processor_id()]
+
+#define CPU_HVERSION ((boot_cpu_data.hversion >> 4) & 0x0FFF)
+
+typedef struct {
+	int seg;  
+} mm_segment_t;
+
+#define ARCH_MIN_TASKALIGN	8
+
+struct thread_struct {
+	struct pt_regs regs;
+	unsigned long  task_size;
+	unsigned long  map_base;
+	unsigned long  flags;
+}; 
+
+/* Thread struct flags. */
+#define PARISC_UAC_NOPRINT	(1UL << 0)	/* see prctl and unaligned.c */
+#define PARISC_UAC_SIGBUS	(1UL << 1)
+#define PARISC_KERNEL_DEATH	(1UL << 31)	/* see die_if_kernel()... */
+
+#define PARISC_UAC_SHIFT	0
+#define PARISC_UAC_MASK		(PARISC_UAC_NOPRINT|PARISC_UAC_SIGBUS)
+
+#define SET_UNALIGN_CTL(task,value)                                       \
+        ({                                                                \
+        (task)->thread.flags = (((task)->thread.flags & ~PARISC_UAC_MASK) \
+                                | (((value) << PARISC_UAC_SHIFT) &        \
+                                   PARISC_UAC_MASK));                     \
+        0;                                                                \
+        })
+
+#define GET_UNALIGN_CTL(task,addr)                                        \
+        ({                                                                \
+        put_user(((task)->thread.flags & PARISC_UAC_MASK)                 \
+                 >> PARISC_UAC_SHIFT, (int __user *) (addr));             \
+        })
+
+#define INIT_THREAD { \
+	.regs = {	.gr	= { 0, }, \
+			.fr	= { 0, }, \
+			.sr	= { 0, }, \
+			.iasq	= { 0, }, \
+			.iaoq	= { 0, }, \
+			.cr27	= 0, \
+		}, \
+	.task_size	= DEFAULT_TASK_SIZE, \
+	.map_base	= DEFAULT_MAP_BASE, \
+	.flags		= 0 \
+	}
+
+/*
+ * Return saved PC of a blocked thread.  This is used by ps mostly.
+ */
+
+unsigned long thread_saved_pc(struct task_struct *t);
+void show_trace(struct task_struct *task, unsigned long *stack);
+
+/*
+ * Start user thread in another space.
+ *
+ * Note that we set both the iaoq and r31 to the new pc. When
+ * the kernel initially calls execve it will return through an
+ * rfi path that will use the values in the iaoq. The execve
+ * syscall path will return through the gateway page, and
+ * that uses r31 to branch to.
+ *
+ * For ELF we clear r23, because the dynamic linker uses it to pass
+ * the address of the finalizer function.
+ *
+ * We also initialize sr3 to an illegal value (illegal for our
+ * implementation, not for the architecture).
+ */
+typedef unsigned int elf_caddr_t;
+
+#define start_thread_som(regs, new_pc, new_sp) do {	\
+	unsigned long *sp = (unsigned long *)new_sp;	\
+	__u32 spaceid = (__u32)current->mm->context;	\
+	unsigned long pc = (unsigned long)new_pc;	\
+	/* offset pc for priv. level */			\
+	pc |= 3;					\
+							\
+	set_fs(USER_DS);				\
+	regs->iasq[0] = spaceid;			\
+	regs->iasq[1] = spaceid;			\
+	regs->iaoq[0] = pc;				\
+	regs->iaoq[1] = pc + 4;                         \
+	regs->sr[2] = LINUX_GATEWAY_SPACE;              \
+	regs->sr[3] = 0xffff;				\
+	regs->sr[4] = spaceid;				\
+	regs->sr[5] = spaceid;				\
+	regs->sr[6] = spaceid;				\
+	regs->sr[7] = spaceid;				\
+	regs->gr[ 0] = USER_PSW;                        \
+	regs->gr[30] = ((new_sp)+63)&~63;		\
+	regs->gr[31] = pc;				\
+							\
+	get_user(regs->gr[26],&sp[0]);			\
+	get_user(regs->gr[25],&sp[-1]); 		\
+	get_user(regs->gr[24],&sp[-2]); 		\
+	get_user(regs->gr[23],&sp[-3]); 		\
+} while(0)
+
+/* The ELF abi wants things done a "wee bit" differently than
+ * som does.  Supporting this behavior here avoids
+ * having our own version of create_elf_tables.
+ *
+ * Oh, and yes, that is not a typo, we are really passing argc in r25
+ * and argv in r24 (rather than r26 and r25).  This is because that's
+ * where __libc_start_main wants them.
+ *
+ * Duplicated from dl-machine.h for the benefit of readers:
+ *
+ *  Our initial stack layout is rather different from everyone else's
+ *  due to the unique PA-RISC ABI.  As far as I know it looks like
+ *  this:
+
+   -----------------------------------  (user startup code creates this frame)
+   |         32 bytes of magic       |
+   |---------------------------------|
+   | 32 bytes argument/sp save area  |
+   |---------------------------------| (bprm->p)
+   |	    ELF auxiliary info	     |
+   |         (up to 28 words)        |
+   |---------------------------------|
+   |		   NULL		     |
+   |---------------------------------|
+   |	   Environment pointers	     |
+   |---------------------------------|
+   |		   NULL		     |
+   |---------------------------------|
+   |        Argument pointers        |
+   |---------------------------------| <- argv
+   |          argc (1 word)          |
+   |---------------------------------| <- bprm->exec (HACK!)
+   |         N bytes of slack        |
+   |---------------------------------|
+   |	filename passed to execve    |
+   |---------------------------------| (mm->env_end)
+   |           env strings           |
+   |---------------------------------| (mm->env_start, mm->arg_end)
+   |           arg strings           |
+   |---------------------------------|
+   | additional faked arg strings if |
+   | we're invoked via binfmt_script |
+   |---------------------------------| (mm->arg_start)
+   stack base is at TASK_SIZE - rlim_max.
+
+on downward growing arches, it looks like this:
+   stack base at TASK_SIZE
+   | filename passed to execve
+   | env strings
+   | arg strings
+   | faked arg strings
+   | slack
+   | ELF
+   | envps
+   | argvs
+   | argc
+
+ *  The pleasant part of this is that if we need to skip arguments we
+ *  can just decrement argc and move argv, because the stack pointer
+ *  is utterly unrelated to the location of the environment and
+ *  argument vectors.
+ *
+ * Note that the S/390 people took the easy way out and hacked their
+ * GCC to make the stack grow downwards.
+ *
+ * Final Note: For entry from syscall, the W (wide) bit of the PSW
+ * is stuffed into the lowest bit of the user sp (%r30), so we fill
+ * it in here from the current->personality
+ */
+
+#ifdef CONFIG_64BIT
+#define USER_WIDE_MODE	(!test_thread_flag(TIF_32BIT))
+#else
+#define USER_WIDE_MODE	0
+#endif
+
+#define start_thread(regs, new_pc, new_sp) do {		\
+	elf_addr_t *sp = (elf_addr_t *)new_sp;		\
+	__u32 spaceid = (__u32)current->mm->context;	\
+	elf_addr_t pc = (elf_addr_t)new_pc | 3;		\
+	elf_caddr_t *argv = (elf_caddr_t *)bprm->exec + 1;	\
+							\
+	set_fs(USER_DS);				\
+	regs->iasq[0] = spaceid;			\
+	regs->iasq[1] = spaceid;			\
+	regs->iaoq[0] = pc;				\
+	regs->iaoq[1] = pc + 4;                         \
+	regs->sr[2] = LINUX_GATEWAY_SPACE;              \
+	regs->sr[3] = 0xffff;				\
+	regs->sr[4] = spaceid;				\
+	regs->sr[5] = spaceid;				\
+	regs->sr[6] = spaceid;				\
+	regs->sr[7] = spaceid;				\
+	regs->gr[ 0] = USER_PSW | (USER_WIDE_MODE ? PSW_W : 0); \
+	regs->fr[ 0] = 0LL;                            	\
+	regs->fr[ 1] = 0LL;                            	\
+	regs->fr[ 2] = 0LL;                            	\
+	regs->fr[ 3] = 0LL;                            	\
+	regs->gr[30] = (((unsigned long)sp + 63) &~ 63) | (USER_WIDE_MODE ? 1 : 0); \
+	regs->gr[31] = pc;				\
+							\
+	get_user(regs->gr[25], (argv - 1));		\
+	regs->gr[24] = (long) argv;			\
+	regs->gr[23] = 0;				\
+} while(0)
+
+struct task_struct;
+struct mm_struct;
+
+/* Free all resources held by a thread. */
+extern void release_thread(struct task_struct *);
+extern int kernel_thread(int (*fn)(void *), void * arg, unsigned long flags);
+
+/* Prepare to copy thread state - unlazy all lazy status */
+#define prepare_to_copy(tsk)	do { } while (0)
+
+extern void map_hpux_gateway_page(struct task_struct *tsk, struct mm_struct *mm);
+
+extern unsigned long get_wchan(struct task_struct *p);
+
+#define KSTK_EIP(tsk)	((tsk)->thread.regs.iaoq[0])
+#define KSTK_ESP(tsk)	((tsk)->thread.regs.gr[30])
+
+#define cpu_relax()	barrier()
+
+/* Used as a macro to identify the combined VIPT/PIPT cached
+ * CPUs which require a guarantee of coherency (no inequivalent
+ * aliases with different data, whether clean or not) to operate */
+static inline int parisc_requires_coherency(void)
+{
+#ifdef CONFIG_PA8X00
+	return (boot_cpu_data.cpu_type == mako) ||
+		(boot_cpu_data.cpu_type == mako2);
+#else
+	return 0;
+#endif
+}
+
+#endif /* __ASSEMBLY__ */
+
+#endif /* __ASM_PARISC_PROCESSOR_H */
diff --git a/arch/parisc/include/asm/psw.h b/arch/parisc/include/asm/psw.h
new file mode 100644
index 000000000000..5a3e23c9ce63
--- /dev/null
+++ b/arch/parisc/include/asm/psw.h
@@ -0,0 +1,62 @@
+#ifndef _PARISC_PSW_H
+
+
+#define	PSW_I	0x00000001
+#define	PSW_D	0x00000002
+#define	PSW_P	0x00000004
+#define	PSW_Q	0x00000008
+
+#define	PSW_R	0x00000010
+#define	PSW_F	0x00000020
+#define	PSW_G	0x00000040	/* PA1.x only */
+#define PSW_O	0x00000080	/* PA2.0 only */
+
+/* ssm/rsm instructions number PSW_W and PSW_E differently */
+#define PSW_SM_I	PSW_I	/* Enable External Interrupts */
+#define PSW_SM_D	PSW_D
+#define PSW_SM_P	PSW_P
+#define PSW_SM_Q	PSW_Q	/* Enable Interrupt State Collection */
+#define PSW_SM_R	PSW_R	/* Enable Recover Counter Trap */
+#define PSW_SM_W	0x200	/* PA2.0 only : Enable Wide Mode */
+
+#define PSW_SM_QUIET	PSW_SM_R+PSW_SM_Q+PSW_SM_P+PSW_SM_D+PSW_SM_I
+
+#define PSW_CB	0x0000ff00
+
+#define	PSW_M	0x00010000
+#define	PSW_V	0x00020000
+#define	PSW_C	0x00040000
+#define	PSW_B	0x00080000
+
+#define	PSW_X	0x00100000
+#define	PSW_N	0x00200000
+#define	PSW_L	0x00400000
+#define	PSW_H	0x00800000
+
+#define	PSW_T	0x01000000
+#define	PSW_S	0x02000000
+#define	PSW_E	0x04000000
+#define PSW_W	0x08000000	/* PA2.0 only */
+#define PSW_W_BIT       36      /* PA2.0 only */
+
+#define	PSW_Z	0x40000000	/* PA1.x only */
+#define	PSW_Y	0x80000000	/* PA1.x only */
+
+#ifdef CONFIG_64BIT
+#  define PSW_HI_CB 0x000000ff    /* PA2.0 only */
+#endif
+
+#ifdef CONFIG_64BIT
+#  define USER_PSW_HI_MASK	PSW_HI_CB
+#  define WIDE_PSW		PSW_W
+#else 
+#  define WIDE_PSW		0
+#endif
+
+/* Used when setting up for rfi */
+#define KERNEL_PSW    (WIDE_PSW | PSW_C | PSW_Q | PSW_P | PSW_D)
+#define REAL_MODE_PSW (WIDE_PSW | PSW_Q)
+#define USER_PSW_MASK (WIDE_PSW | PSW_T | PSW_N | PSW_X | PSW_B | PSW_V | PSW_CB)
+#define USER_PSW      (PSW_C | PSW_Q | PSW_P | PSW_D | PSW_I)
+
+#endif
diff --git a/arch/parisc/include/asm/ptrace.h b/arch/parisc/include/asm/ptrace.h
new file mode 100644
index 000000000000..3e94c5d85ff5
--- /dev/null
+++ b/arch/parisc/include/asm/ptrace.h
@@ -0,0 +1,58 @@
+#ifndef _PARISC_PTRACE_H
+#define _PARISC_PTRACE_H
+
+/* written by Philipp Rumpf, Copyright (C) 1999 SuSE GmbH Nuernberg
+** Copyright (C) 2000 Grant Grundler, Hewlett-Packard
+*/
+
+#include <linux/types.h>
+
+/* This struct defines the way the registers are stored on the 
+ * stack during a system call.
+ *
+ * N.B. gdb/strace care about the size and offsets within this
+ * structure. If you change things, you may break object compatibility
+ * for those applications.
+ */
+
+struct pt_regs {
+	unsigned long gr[32];	/* PSW is in gr[0] */
+	__u64 fr[32];
+	unsigned long sr[ 8];
+	unsigned long iasq[2];
+	unsigned long iaoq[2];
+	unsigned long cr27;
+	unsigned long pad0;     /* available for other uses */
+	unsigned long orig_r28;
+	unsigned long ksp;
+	unsigned long kpc;
+	unsigned long sar;	/* CR11 */
+	unsigned long iir;	/* CR19 */
+	unsigned long isr;	/* CR20 */
+	unsigned long ior;	/* CR21 */
+	unsigned long ipsw;	/* CR22 */
+};
+
+/*
+ * The numbers chosen here are somewhat arbitrary but absolutely MUST
+ * not overlap with any of the number assigned in <linux/ptrace.h>.
+ *
+ * These ones are taken from IA-64 on the assumption that theirs are
+ * the most correct (and we also want to support PTRACE_SINGLEBLOCK
+ * since we have taken branch traps too)
+ */
+#define PTRACE_SINGLEBLOCK	12	/* resume execution until next branch */
+
+#ifdef __KERNEL__
+
+#define task_regs(task) ((struct pt_regs *) ((char *)(task) + TASK_REGS))
+
+/* XXX should we use iaoq[1] or iaoq[0] ? */
+#define user_mode(regs)			(((regs)->iaoq[0] & 3) ? 1 : 0)
+#define user_space(regs)		(((regs)->iasq[1] != 0) ? 1 : 0)
+#define instruction_pointer(regs)	((regs)->iaoq[0] & ~3)
+unsigned long profile_pc(struct pt_regs *);
+extern void show_regs(struct pt_regs *);
+#endif
+
+#endif
diff --git a/arch/parisc/include/asm/real.h b/arch/parisc/include/asm/real.h
new file mode 100644
index 000000000000..82acb25db395
--- /dev/null
+++ b/arch/parisc/include/asm/real.h
@@ -0,0 +1,5 @@
+#ifndef _PARISC_REAL_H
+#define _PARISC_REAL_H
+
+
+#endif
diff --git a/arch/parisc/include/asm/resource.h b/arch/parisc/include/asm/resource.h
new file mode 100644
index 000000000000..8b06343b62ed
--- /dev/null
+++ b/arch/parisc/include/asm/resource.h
@@ -0,0 +1,7 @@
+#ifndef _ASM_PARISC_RESOURCE_H
+#define _ASM_PARISC_RESOURCE_H
+
+#define _STK_LIM_MAX	10 * _STK_LIM
+#include <asm-generic/resource.h>
+
+#endif
diff --git a/arch/parisc/include/asm/ropes.h b/arch/parisc/include/asm/ropes.h
new file mode 100644
index 000000000000..09f51d5ab57c
--- /dev/null
+++ b/arch/parisc/include/asm/ropes.h
@@ -0,0 +1,322 @@
+#ifndef _ASM_PARISC_ROPES_H_
+#define _ASM_PARISC_ROPES_H_
+
+#include <asm/parisc-device.h>
+
+#ifdef CONFIG_64BIT
+/* "low end" PA8800 machines use ZX1 chipset: PAT PDC and only run 64-bit */
+#define ZX1_SUPPORT
+#endif
+
+#ifdef CONFIG_PROC_FS
+/* depends on proc fs support. But costs CPU performance */
+#undef SBA_COLLECT_STATS
+#endif
+
+/*
+** The number of pdir entries to "free" before issuing
+** a read to PCOM register to flush out PCOM writes.
+** Interacts with allocation granularity (ie 4 or 8 entries
+** allocated and free'd/purged at a time might make this
+** less interesting).
+*/
+#define DELAYED_RESOURCE_CNT	16
+
+#define MAX_IOC		2	/* per Ike. Pluto/Astro only have 1. */
+#define ROPES_PER_IOC	8	/* per Ike half or Pluto/Astro */
+
+struct ioc {
+	void __iomem	*ioc_hpa;	/* I/O MMU base address */
+	char		*res_map;	/* resource map, bit == pdir entry */
+	u64		*pdir_base;	/* physical base address */
+	unsigned long	ibase;		/* pdir IOV Space base - shared w/lba_pci */
+	unsigned long	imask;		/* pdir IOV Space mask - shared w/lba_pci */
+#ifdef ZX1_SUPPORT
+	unsigned long	iovp_mask;	/* help convert IOVA to IOVP */
+#endif
+	unsigned long	*res_hint;	/* next avail IOVP - circular search */
+	spinlock_t	res_lock;
+	unsigned int	res_bitshift;	/* from the LEFT! */
+	unsigned int	res_size;	/* size of resource map in bytes */
+#ifdef SBA_HINT_SUPPORT
+/* FIXME : DMA HINTs not used */
+	unsigned long	hint_mask_pdir; /* bits used for DMA hints */
+	unsigned int	hint_shift_pdir;
+#endif
+#if DELAYED_RESOURCE_CNT > 0
+	int		saved_cnt;
+	struct sba_dma_pair {
+			dma_addr_t	iova;
+			size_t		size;
+        } saved[DELAYED_RESOURCE_CNT];
+#endif
+
+#ifdef SBA_COLLECT_STATS
+#define SBA_SEARCH_SAMPLE	0x100
+	unsigned long	avg_search[SBA_SEARCH_SAMPLE];
+	unsigned long	avg_idx;	/* current index into avg_search */
+	unsigned long	used_pages;
+	unsigned long	msingle_calls;
+	unsigned long	msingle_pages;
+	unsigned long	msg_calls;
+	unsigned long	msg_pages;
+	unsigned long	usingle_calls;
+	unsigned long	usingle_pages;
+	unsigned long	usg_calls;
+	unsigned long	usg_pages;
+#endif
+        /* STUFF We don't need in performance path */
+	unsigned int	pdir_size;	/* in bytes, determined by IOV Space size */
+};
+
+struct sba_device {
+	struct sba_device	*next;  /* list of SBA's in system */
+	struct parisc_device	*dev;   /* dev found in bus walk */
+	const char		*name;
+	void __iomem		*sba_hpa; /* base address */
+	spinlock_t		sba_lock;
+	unsigned int		flags;  /* state/functionality enabled */
+	unsigned int		hw_rev;  /* HW revision of chip */
+
+	struct resource		chip_resv; /* MMIO reserved for chip */
+	struct resource		iommu_resv; /* MMIO reserved for iommu */
+
+	unsigned int		num_ioc;  /* number of on-board IOC's */
+	struct ioc		ioc[MAX_IOC];
+};
+
+#define ASTRO_RUNWAY_PORT	0x582
+#define IKE_MERCED_PORT		0x803
+#define REO_MERCED_PORT		0x804
+#define REOG_MERCED_PORT	0x805
+#define PLUTO_MCKINLEY_PORT	0x880
+
+static inline int IS_ASTRO(struct parisc_device *d) {
+	return d->id.hversion == ASTRO_RUNWAY_PORT;
+}
+
+static inline int IS_IKE(struct parisc_device *d) {
+	return d->id.hversion == IKE_MERCED_PORT;
+}
+
+static inline int IS_PLUTO(struct parisc_device *d) {
+	return d->id.hversion == PLUTO_MCKINLEY_PORT;
+}
+
+#define PLUTO_IOVA_BASE	(1UL*1024*1024*1024)	/* 1GB */
+#define PLUTO_IOVA_SIZE	(1UL*1024*1024*1024)	/* 1GB */
+#define PLUTO_GART_SIZE	(PLUTO_IOVA_SIZE / 2)
+
+#define SBA_PDIR_VALID_BIT	0x8000000000000000ULL
+
+#define SBA_AGPGART_COOKIE	0x0000badbadc0ffeeULL
+
+#define SBA_FUNC_ID	0x0000	/* function id */
+#define SBA_FCLASS	0x0008	/* function class, bist, header, rev... */
+
+#define SBA_FUNC_SIZE 4096   /* SBA configuration function reg set */
+
+#define ASTRO_IOC_OFFSET	(32 * SBA_FUNC_SIZE)
+#define PLUTO_IOC_OFFSET	(1 * SBA_FUNC_SIZE)
+/* Ike's IOC's occupy functions 2 and 3 */
+#define IKE_IOC_OFFSET(p)	((p+2) * SBA_FUNC_SIZE)
+
+#define IOC_CTRL          0x8	/* IOC_CTRL offset */
+#define IOC_CTRL_TC       (1 << 0) /* TOC Enable */
+#define IOC_CTRL_CE       (1 << 1) /* Coalesce Enable */
+#define IOC_CTRL_DE       (1 << 2) /* Dillon Enable */
+#define IOC_CTRL_RM       (1 << 8) /* Real Mode */
+#define IOC_CTRL_NC       (1 << 9) /* Non Coherent Mode */
+#define IOC_CTRL_D4       (1 << 11) /* Disable 4-byte coalescing */
+#define IOC_CTRL_DD       (1 << 13) /* Disable distr. LMMIO range coalescing */
+
+/*
+** Offsets into MBIB (Function 0 on Ike and hopefully Astro)
+** Firmware programs this stuff. Don't touch it.
+*/
+#define LMMIO_DIRECT0_BASE  0x300
+#define LMMIO_DIRECT0_MASK  0x308
+#define LMMIO_DIRECT0_ROUTE 0x310
+
+#define LMMIO_DIST_BASE  0x360
+#define LMMIO_DIST_MASK  0x368
+#define LMMIO_DIST_ROUTE 0x370
+
+#define IOS_DIST_BASE	0x390
+#define IOS_DIST_MASK	0x398
+#define IOS_DIST_ROUTE	0x3A0
+
+#define IOS_DIRECT_BASE	0x3C0
+#define IOS_DIRECT_MASK	0x3C8
+#define IOS_DIRECT_ROUTE 0x3D0
+
+/*
+** Offsets into I/O TLB (Function 2 and 3 on Ike)
+*/
+#define ROPE0_CTL	0x200  /* "regbus pci0" */
+#define ROPE1_CTL	0x208
+#define ROPE2_CTL	0x210
+#define ROPE3_CTL	0x218
+#define ROPE4_CTL	0x220
+#define ROPE5_CTL	0x228
+#define ROPE6_CTL	0x230
+#define ROPE7_CTL	0x238
+
+#define IOC_ROPE0_CFG	0x500	/* pluto only */
+#define   IOC_ROPE_AO	  0x10	/* Allow "Relaxed Ordering" */
+
+#define HF_ENABLE	0x40
+
+#define IOC_IBASE	0x300	/* IO TLB */
+#define IOC_IMASK	0x308
+#define IOC_PCOM	0x310
+#define IOC_TCNFG	0x318
+#define IOC_PDIR_BASE	0x320
+
+/*
+** IOC supports 4/8/16/64KB page sizes (see TCNFG register)
+** It's safer (avoid memory corruption) to keep DMA page mappings
+** equivalently sized to VM PAGE_SIZE.
+**
+** We really can't avoid generating a new mapping for each
+** page since the Virtual Coherence Index has to be generated
+** and updated for each page.
+**
+** PAGE_SIZE could be greater than IOVP_SIZE. But not the inverse.
+*/
+#define IOVP_SIZE	PAGE_SIZE
+#define IOVP_SHIFT	PAGE_SHIFT
+#define IOVP_MASK	PAGE_MASK
+
+#define SBA_PERF_CFG	0x708	/* Performance Counter stuff */
+#define SBA_PERF_MASK1	0x718
+#define SBA_PERF_MASK2	0x730
+
+/*
+** Offsets into PCI Performance Counters (functions 12 and 13)
+** Controlled by PERF registers in function 2 & 3 respectively.
+*/
+#define SBA_PERF_CNT1	0x200
+#define SBA_PERF_CNT2	0x208
+#define SBA_PERF_CNT3	0x210
+
+/*
+** lba_device: Per instance Elroy data structure
+*/
+struct lba_device {
+	struct pci_hba_data	hba;
+
+	spinlock_t		lba_lock;
+	void			*iosapic_obj;
+
+#ifdef CONFIG_64BIT
+	void __iomem		*iop_base;	/* PA_VIEW - for IO port accessor funcs */
+#endif
+
+	int			flags;		/* state/functionality enabled */
+	int			hw_rev;		/* HW revision of chip */
+};
+
+#define ELROY_HVERS		0x782
+#define MERCURY_HVERS		0x783
+#define QUICKSILVER_HVERS	0x784
+
+static inline int IS_ELROY(struct parisc_device *d) {
+	return (d->id.hversion == ELROY_HVERS);
+}
+
+static inline int IS_MERCURY(struct parisc_device *d) {
+	return (d->id.hversion == MERCURY_HVERS);
+}
+
+static inline int IS_QUICKSILVER(struct parisc_device *d) {
+	return (d->id.hversion == QUICKSILVER_HVERS);
+}
+
+static inline int agp_mode_mercury(void __iomem *hpa) {
+	u64 bus_mode;
+
+	bus_mode = readl(hpa + 0x0620);
+	if (bus_mode & 1)
+		return 1;
+
+	return 0;
+}
+
+/*
+** I/O SAPIC init function
+** Caller knows where an I/O SAPIC is. LBA has an integrated I/O SAPIC.
+** Call setup as part of per instance initialization.
+** (ie *not* init_module() function unless only one is present.)
+** fixup_irq is to initialize PCI IRQ line support and
+** virtualize pcidev->irq value. To be called by pci_fixup_bus().
+*/
+extern void *iosapic_register(unsigned long hpa);
+extern int iosapic_fixup_irq(void *obj, struct pci_dev *pcidev);
+
+#define LBA_FUNC_ID	0x0000	/* function id */
+#define LBA_FCLASS	0x0008	/* function class, bist, header, rev... */
+#define LBA_CAPABLE	0x0030	/* capabilities register */
+
+#define LBA_PCI_CFG_ADDR	0x0040	/* poke CFG address here */
+#define LBA_PCI_CFG_DATA	0x0048	/* read or write data here */
+
+#define LBA_PMC_MTLT	0x0050	/* Firmware sets this - read only. */
+#define LBA_FW_SCRATCH	0x0058	/* Firmware writes the PCI bus number here. */
+#define LBA_ERROR_ADDR	0x0070	/* On error, address gets logged here */
+
+#define LBA_ARB_MASK	0x0080	/* bit 0 enable arbitration. PAT/PDC enables */
+#define LBA_ARB_PRI	0x0088	/* firmware sets this. */
+#define LBA_ARB_MODE	0x0090	/* firmware sets this. */
+#define LBA_ARB_MTLT	0x0098	/* firmware sets this. */
+
+#define LBA_MOD_ID	0x0100	/* Module ID. PDC_PAT_CELL reports 4 */
+
+#define LBA_STAT_CTL	0x0108	/* Status & Control */
+#define   LBA_BUS_RESET		0x01	/*  Deassert PCI Bus Reset Signal */
+#define   CLEAR_ERRLOG		0x10	/*  "Clear Error Log" cmd */
+#define   CLEAR_ERRLOG_ENABLE	0x20	/*  "Clear Error Log" Enable */
+#define   HF_ENABLE	0x40	/*    enable HF mode (default is -1 mode) */
+
+#define LBA_LMMIO_BASE	0x0200	/* < 4GB I/O address range */
+#define LBA_LMMIO_MASK	0x0208
+
+#define LBA_GMMIO_BASE	0x0210	/* > 4GB I/O address range */
+#define LBA_GMMIO_MASK	0x0218
+
+#define LBA_WLMMIO_BASE	0x0220	/* All < 4GB ranges under the same *SBA* */
+#define LBA_WLMMIO_MASK	0x0228
+
+#define LBA_WGMMIO_BASE	0x0230	/* All > 4GB ranges under the same *SBA* */
+#define LBA_WGMMIO_MASK	0x0238
+
+#define LBA_IOS_BASE	0x0240	/* I/O port space for this LBA */
+#define LBA_IOS_MASK	0x0248
+
+#define LBA_ELMMIO_BASE	0x0250	/* Extra LMMIO range */
+#define LBA_ELMMIO_MASK	0x0258
+
+#define LBA_EIOS_BASE	0x0260	/* Extra I/O port space */
+#define LBA_EIOS_MASK	0x0268
+
+#define LBA_GLOBAL_MASK	0x0270	/* Mercury only: Global Address Mask */
+#define LBA_DMA_CTL	0x0278	/* firmware sets this */
+
+#define LBA_IBASE	0x0300	/* SBA DMA support */
+#define LBA_IMASK	0x0308
+
+/* FIXME: ignore DMA Hint stuff until we can measure performance */
+#define LBA_HINT_CFG	0x0310
+#define LBA_HINT_BASE	0x0380	/* 14 registers at every 8 bytes. */
+
+#define LBA_BUS_MODE	0x0620
+
+/* ERROR regs are needed for config cycle kluges */
+#define LBA_ERROR_CONFIG 0x0680
+#define     LBA_SMART_MODE 0x20
+#define LBA_ERROR_STATUS 0x0688
+#define LBA_ROPE_CTL     0x06A0
+
+#define LBA_IOSAPIC_BASE	0x800 /* Offset of IRQ logic */
+
+#endif /*_ASM_PARISC_ROPES_H_*/
diff --git a/arch/parisc/include/asm/rt_sigframe.h b/arch/parisc/include/asm/rt_sigframe.h
new file mode 100644
index 000000000000..f0dd3b30f6c4
--- /dev/null
+++ b/arch/parisc/include/asm/rt_sigframe.h
@@ -0,0 +1,23 @@
+#ifndef _ASM_PARISC_RT_SIGFRAME_H
+#define _ASM_PARISC_RT_SIGFRAME_H
+
+#define SIGRETURN_TRAMP 4
+#define SIGRESTARTBLOCK_TRAMP 5 
+#define TRAMP_SIZE (SIGRETURN_TRAMP + SIGRESTARTBLOCK_TRAMP)
+
+struct rt_sigframe {
+	/* XXX: Must match trampoline size in arch/parisc/kernel/signal.c 
+	        Secondary to that it must protect the ERESTART_RESTARTBLOCK
+		trampoline we left on the stack (we were bad and didn't 
+		change sp so we could run really fast.) */
+	unsigned int tramp[TRAMP_SIZE];
+	struct siginfo info;
+	struct ucontext uc;
+};
+
+#define	SIGFRAME		128
+#define FUNCTIONCALLFRAME	96
+#define PARISC_RT_SIGFRAME_SIZE					\
+	(((sizeof(struct rt_sigframe) + FUNCTIONCALLFRAME) + SIGFRAME) & -SIGFRAME)
+
+#endif
diff --git a/arch/parisc/include/asm/rtc.h b/arch/parisc/include/asm/rtc.h
new file mode 100644
index 000000000000..099d641a42c2
--- /dev/null
+++ b/arch/parisc/include/asm/rtc.h
@@ -0,0 +1,131 @@
+/* 
+ * include/asm-parisc/rtc.h
+ *
+ * Copyright 2002 Randolph CHung <tausq@debian.org>
+ *
+ * Based on: include/asm-ppc/rtc.h and the genrtc driver in the
+ * 2.4 parisc linux tree
+ */
+
+#ifndef __ASM_RTC_H__
+#define __ASM_RTC_H__
+
+#ifdef __KERNEL__
+
+#include <linux/rtc.h>
+
+#include <asm/pdc.h>
+
+#define SECS_PER_HOUR   (60 * 60)
+#define SECS_PER_DAY    (SECS_PER_HOUR * 24)
+
+
+#define RTC_PIE 0x40		/* periodic interrupt enable */
+#define RTC_AIE 0x20		/* alarm interrupt enable */
+#define RTC_UIE 0x10		/* update-finished interrupt enable */
+
+#define RTC_BATT_BAD 0x100	/* battery bad */
+
+/* some dummy definitions */
+#define RTC_SQWE 0x08		/* enable square-wave output */
+#define RTC_DM_BINARY 0x04	/* all time/date values are BCD if clear */
+#define RTC_24H 0x02		/* 24 hour mode - else hours bit 7 means pm */
+#define RTC_DST_EN 0x01	        /* auto switch DST - works f. USA only */
+
+# define __isleap(year) \
+  ((year) % 4 == 0 && ((year) % 100 != 0 || (year) % 400 == 0))
+
+/* How many days come before each month (0-12).  */
+static const unsigned short int __mon_yday[2][13] =
+{
+	/* Normal years.  */
+	{ 0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334, 365 },
+	/* Leap years.  */
+	{ 0, 31, 60, 91, 121, 152, 182, 213, 244, 274, 305, 335, 366 }
+};
+
+static inline unsigned int get_rtc_time(struct rtc_time *wtime)
+{
+	struct pdc_tod tod_data;
+	long int days, rem, y;
+	const unsigned short int *ip;
+
+	memset(wtime, 0, sizeof(*wtime));
+	if (pdc_tod_read(&tod_data) < 0)
+		return RTC_24H | RTC_BATT_BAD;
+
+	// most of the remainder of this function is:
+//	Copyright (C) 1991, 1993, 1997, 1998 Free Software Foundation, Inc.
+//	This was originally a part of the GNU C Library.
+//      It is distributed under the GPL, and was swiped from offtime.c
+
+
+	days = tod_data.tod_sec / SECS_PER_DAY;
+	rem = tod_data.tod_sec % SECS_PER_DAY;
+
+	wtime->tm_hour = rem / SECS_PER_HOUR;
+	rem %= SECS_PER_HOUR;
+	wtime->tm_min = rem / 60;
+	wtime->tm_sec = rem % 60;
+
+	y = 1970;
+
+#define DIV(a, b) ((a) / (b) - ((a) % (b) < 0))
+#define LEAPS_THRU_END_OF(y) (DIV (y, 4) - DIV (y, 100) + DIV (y, 400))
+
+	while (days < 0 || days >= (__isleap (y) ? 366 : 365))
+	{
+		/* Guess a corrected year, assuming 365 days per year.  */
+		long int yg = y + days / 365 - (days % 365 < 0);
+
+		/* Adjust DAYS and Y to match the guessed year.  */
+		days -= ((yg - y) * 365
+			 + LEAPS_THRU_END_OF (yg - 1)
+			 - LEAPS_THRU_END_OF (y - 1));
+		y = yg;
+	}
+	wtime->tm_year = y - 1900;
+
+	ip = __mon_yday[__isleap(y)];
+	for (y = 11; days < (long int) ip[y]; --y)
+		continue;
+	days -= ip[y];
+	wtime->tm_mon = y;
+	wtime->tm_mday = days + 1;
+
+	return RTC_24H;
+}
+
+static int set_rtc_time(struct rtc_time *wtime)
+{
+	u_int32_t secs;
+
+	secs = mktime(wtime->tm_year + 1900, wtime->tm_mon + 1, wtime->tm_mday, 
+		      wtime->tm_hour, wtime->tm_min, wtime->tm_sec);
+
+	if(pdc_tod_set(secs, 0) < 0)
+		return -1;
+	else
+		return 0;
+
+}
+
+static inline unsigned int get_rtc_ss(void)
+{
+	struct rtc_time h;
+
+	get_rtc_time(&h);
+	return h.tm_sec;
+}
+
+static inline int get_rtc_pll(struct rtc_pll_info *pll)
+{
+	return -EINVAL;
+}
+static inline int set_rtc_pll(struct rtc_pll_info *pll)
+{
+	return -EINVAL;
+}
+
+#endif /* __KERNEL__ */
+#endif /* __ASM_RTC_H__ */
diff --git a/arch/parisc/include/asm/runway.h b/arch/parisc/include/asm/runway.h
new file mode 100644
index 000000000000..5bea02da7e22
--- /dev/null
+++ b/arch/parisc/include/asm/runway.h
@@ -0,0 +1,12 @@
+#ifndef ASM_PARISC_RUNWAY_H
+#define ASM_PARISC_RUNWAY_H
+#ifdef __KERNEL__
+
+/* declared in arch/parisc/kernel/setup.c */
+extern struct proc_dir_entry * proc_runway_root;
+
+#define RUNWAY_STATUS	0x10
+#define RUNWAY_DEBUG	0x40
+
+#endif /* __KERNEL__ */
+#endif /* ASM_PARISC_RUNWAY_H */
diff --git a/arch/parisc/include/asm/scatterlist.h b/arch/parisc/include/asm/scatterlist.h
new file mode 100644
index 000000000000..62269b31ebf4
--- /dev/null
+++ b/arch/parisc/include/asm/scatterlist.h
@@ -0,0 +1,27 @@
+#ifndef _ASM_PARISC_SCATTERLIST_H
+#define _ASM_PARISC_SCATTERLIST_H
+
+#include <asm/page.h>
+#include <asm/types.h>
+
+struct scatterlist {
+#ifdef CONFIG_DEBUG_SG
+	unsigned long sg_magic;
+#endif
+	unsigned long page_link;
+	unsigned int offset;
+
+	unsigned int length;
+
+	/* an IOVA can be 64-bits on some PA-Risc platforms. */
+	dma_addr_t iova;	/* I/O Virtual Address */
+	__u32      iova_length; /* bytes mapped */
+};
+
+#define sg_virt_addr(sg) ((unsigned long)sg_virt(sg))
+#define sg_dma_address(sg) ((sg)->iova)
+#define sg_dma_len(sg)     ((sg)->iova_length)
+
+#define ISA_DMA_THRESHOLD (~0UL)
+
+#endif /* _ASM_PARISC_SCATTERLIST_H */
diff --git a/arch/parisc/include/asm/sections.h b/arch/parisc/include/asm/sections.h
new file mode 100644
index 000000000000..9d13c3507ad6
--- /dev/null
+++ b/arch/parisc/include/asm/sections.h
@@ -0,0 +1,12 @@
+#ifndef _PARISC_SECTIONS_H
+#define _PARISC_SECTIONS_H
+
+/* nothing to see, move along */
+#include <asm-generic/sections.h>
+
+#ifdef CONFIG_64BIT
+#undef dereference_function_descriptor
+void *dereference_function_descriptor(void *);
+#endif
+
+#endif
diff --git a/arch/parisc/include/asm/segment.h b/arch/parisc/include/asm/segment.h
new file mode 100644
index 000000000000..26794ddb6524
--- /dev/null
+++ b/arch/parisc/include/asm/segment.h
@@ -0,0 +1,6 @@
+#ifndef __PARISC_SEGMENT_H
+#define __PARISC_SEGMENT_H
+
+/* Only here because we have some old header files that expect it.. */
+
+#endif
diff --git a/arch/parisc/include/asm/sembuf.h b/arch/parisc/include/asm/sembuf.h
new file mode 100644
index 000000000000..1e59ffd3bd1e
--- /dev/null
+++ b/arch/parisc/include/asm/sembuf.h
@@ -0,0 +1,29 @@
+#ifndef _PARISC_SEMBUF_H
+#define _PARISC_SEMBUF_H
+
+/* 
+ * The semid64_ds structure for parisc architecture.
+ * Note extra padding because this structure is passed back and forth
+ * between kernel and user space.
+ *
+ * Pad space is left for:
+ * - 64-bit time_t to solve y2038 problem
+ * - 2 miscellaneous 32-bit values
+ */
+
+struct semid64_ds {
+	struct ipc64_perm sem_perm;		/* permissions .. see ipc.h */
+#ifndef CONFIG_64BIT
+	unsigned int	__pad1;
+#endif
+	__kernel_time_t	sem_otime;		/* last semop time */
+#ifndef CONFIG_64BIT
+	unsigned int	__pad2;
+#endif
+	__kernel_time_t	sem_ctime;		/* last change time */
+	unsigned int	sem_nsems;		/* no. of semaphores in array */
+	unsigned int	__unused1;
+	unsigned int	__unused2;
+};
+
+#endif /* _PARISC_SEMBUF_H */
diff --git a/arch/parisc/include/asm/serial.h b/arch/parisc/include/asm/serial.h
new file mode 100644
index 000000000000..d7e3cc60dbc3
--- /dev/null
+++ b/arch/parisc/include/asm/serial.h
@@ -0,0 +1,10 @@
+/*
+ * include/asm-parisc/serial.h
+ */
+
+/*
+ * This is used for 16550-compatible UARTs
+ */
+#define BASE_BAUD ( 1843200 / 16 )
+
+#define SERIAL_PORT_DFNS
diff --git a/arch/parisc/include/asm/setup.h b/arch/parisc/include/asm/setup.h
new file mode 100644
index 000000000000..7da2e5b8747e
--- /dev/null
+++ b/arch/parisc/include/asm/setup.h
@@ -0,0 +1,6 @@
+#ifndef _PARISC_SETUP_H
+#define _PARISC_SETUP_H
+
+#define COMMAND_LINE_SIZE	1024
+
+#endif /* _PARISC_SETUP_H */
diff --git a/arch/parisc/include/asm/shmbuf.h b/arch/parisc/include/asm/shmbuf.h
new file mode 100644
index 000000000000..0a3eada1863b
--- /dev/null
+++ b/arch/parisc/include/asm/shmbuf.h
@@ -0,0 +1,58 @@
+#ifndef _PARISC_SHMBUF_H
+#define _PARISC_SHMBUF_H
+
+/* 
+ * The shmid64_ds structure for parisc architecture.
+ * Note extra padding because this structure is passed back and forth
+ * between kernel and user space.
+ *
+ * Pad space is left for:
+ * - 64-bit time_t to solve y2038 problem
+ * - 2 miscellaneous 32-bit values
+ */
+
+struct shmid64_ds {
+	struct ipc64_perm	shm_perm;	/* operation perms */
+#ifndef CONFIG_64BIT
+	unsigned int		__pad1;
+#endif
+	__kernel_time_t		shm_atime;	/* last attach time */
+#ifndef CONFIG_64BIT
+	unsigned int		__pad2;
+#endif
+	__kernel_time_t		shm_dtime;	/* last detach time */
+#ifndef CONFIG_64BIT
+	unsigned int		__pad3;
+#endif
+	__kernel_time_t		shm_ctime;	/* last change time */
+#ifndef CONFIG_64BIT
+	unsigned int		__pad4;
+#endif
+	size_t			shm_segsz;	/* size of segment (bytes) */
+	__kernel_pid_t		shm_cpid;	/* pid of creator */
+	__kernel_pid_t		shm_lpid;	/* pid of last operator */
+	unsigned int		shm_nattch;	/* no. of current attaches */
+	unsigned int		__unused1;
+	unsigned int		__unused2;
+};
+
+#ifdef CONFIG_64BIT
+/* The 'unsigned int' (formerly 'unsigned long') data types below will
+ * ensure that a 32-bit app calling shmctl(*,IPC_INFO,*) will work on
+ * a wide kernel, but if some of these values are meant to contain pointers
+ * they may need to be 'long long' instead. -PB XXX FIXME
+ */
+#endif
+struct shminfo64 {
+	unsigned int	shmmax;
+	unsigned int	shmmin;
+	unsigned int	shmmni;
+	unsigned int	shmseg;
+	unsigned int	shmall;
+	unsigned int	__unused1;
+	unsigned int	__unused2;
+	unsigned int	__unused3;
+	unsigned int	__unused4;
+};
+
+#endif /* _PARISC_SHMBUF_H */
diff --git a/arch/parisc/include/asm/shmparam.h b/arch/parisc/include/asm/shmparam.h
new file mode 100644
index 000000000000..628ddc22faa8
--- /dev/null
+++ b/arch/parisc/include/asm/shmparam.h
@@ -0,0 +1,8 @@
+#ifndef _ASMPARISC_SHMPARAM_H
+#define _ASMPARISC_SHMPARAM_H
+
+#define __ARCH_FORCE_SHMLBA 	1
+
+#define SHMLBA 0x00400000   /* attach addr needs to be 4 Mb aligned */
+
+#endif /* _ASMPARISC_SHMPARAM_H */
diff --git a/arch/parisc/include/asm/sigcontext.h b/arch/parisc/include/asm/sigcontext.h
new file mode 100644
index 000000000000..27ef31bb3b6e
--- /dev/null
+++ b/arch/parisc/include/asm/sigcontext.h
@@ -0,0 +1,20 @@
+#ifndef _ASMPARISC_SIGCONTEXT_H
+#define _ASMPARISC_SIGCONTEXT_H
+
+#define PARISC_SC_FLAG_ONSTACK 1<<0
+#define PARISC_SC_FLAG_IN_SYSCALL 1<<1
+
+/* We will add more stuff here as it becomes necessary, until we know
+   it works. */
+struct sigcontext {
+	unsigned long sc_flags;
+
+	unsigned long sc_gr[32]; /* PSW in sc_gr[0] */
+	unsigned long long sc_fr[32]; /* FIXME, do we need other state info? */
+	unsigned long sc_iasq[2];
+	unsigned long sc_iaoq[2];
+	unsigned long sc_sar; /* cr11 */
+};
+
+
+#endif
diff --git a/arch/parisc/include/asm/siginfo.h b/arch/parisc/include/asm/siginfo.h
new file mode 100644
index 000000000000..d4909f55fe35
--- /dev/null
+++ b/arch/parisc/include/asm/siginfo.h
@@ -0,0 +1,14 @@
+#ifndef _PARISC_SIGINFO_H
+#define _PARISC_SIGINFO_H
+
+#include <asm-generic/siginfo.h>
+
+/*
+ * SIGTRAP si_codes
+ */
+#define TRAP_BRANCH	(__SI_FAULT|3)	/* process taken branch trap */
+#define TRAP_HWBKPT	(__SI_FAULT|4)	/* hardware breakpoint or watchpoint */
+#undef NSIGTRAP
+#define NSIGTRAP	4
+
+#endif
diff --git a/arch/parisc/include/asm/signal.h b/arch/parisc/include/asm/signal.h
new file mode 100644
index 000000000000..c20356375d1d
--- /dev/null
+++ b/arch/parisc/include/asm/signal.h
@@ -0,0 +1,153 @@
+#ifndef _ASM_PARISC_SIGNAL_H
+#define _ASM_PARISC_SIGNAL_H
+
+#define SIGHUP		 1
+#define SIGINT		 2
+#define SIGQUIT		 3
+#define SIGILL		 4
+#define SIGTRAP		 5
+#define SIGABRT		 6
+#define SIGIOT		 6
+#define SIGEMT		 7
+#define SIGFPE		 8
+#define SIGKILL		 9
+#define SIGBUS		10
+#define SIGSEGV		11
+#define SIGSYS		12 /* Linux doesn't use this */
+#define SIGPIPE		13
+#define SIGALRM		14
+#define SIGTERM		15
+#define SIGUSR1		16
+#define SIGUSR2		17
+#define SIGCHLD		18
+#define SIGPWR		19
+#define SIGVTALRM	20
+#define SIGPROF		21
+#define SIGIO		22
+#define SIGPOLL		SIGIO
+#define SIGWINCH	23
+#define SIGSTOP		24
+#define SIGTSTP		25
+#define SIGCONT		26
+#define SIGTTIN		27
+#define SIGTTOU		28
+#define SIGURG		29
+#define SIGLOST		30 /* Linux doesn't use this either */
+#define	SIGUNUSED	31
+#define SIGRESERVE	SIGUNUSED
+
+#define SIGXCPU		33
+#define SIGXFSZ		34
+#define SIGSTKFLT	36
+
+/* These should not be considered constants from userland.  */
+#define SIGRTMIN	37
+#define SIGRTMAX	_NSIG /* it's 44 under HP/UX */
+
+/*
+ * SA_FLAGS values:
+ *
+ * SA_ONSTACK indicates that a registered stack_t will be used.
+ * SA_RESTART flag to get restarting signals (which were the default long ago)
+ * SA_NOCLDSTOP flag to turn off SIGCHLD when children stop.
+ * SA_RESETHAND clears the handler when the signal is delivered.
+ * SA_NOCLDWAIT flag on SIGCHLD to inhibit zombies.
+ * SA_NODEFER prevents the current signal from being masked in the handler.
+ *
+ * SA_ONESHOT and SA_NOMASK are the historical Linux names for the Single
+ * Unix names RESETHAND and NODEFER respectively.
+ */
+#define SA_ONSTACK	0x00000001
+#define SA_RESETHAND	0x00000004
+#define SA_NOCLDSTOP	0x00000008
+#define SA_SIGINFO	0x00000010
+#define SA_NODEFER	0x00000020
+#define SA_RESTART	0x00000040
+#define SA_NOCLDWAIT	0x00000080
+#define _SA_SIGGFAULT	0x00000100 /* HPUX */
+
+#define SA_NOMASK	SA_NODEFER
+#define SA_ONESHOT	SA_RESETHAND
+
+#define SA_RESTORER	0x04000000 /* obsolete -- ignored */
+
+/* 
+ * sigaltstack controls
+ */
+#define SS_ONSTACK	1
+#define SS_DISABLE	2
+
+#define MINSIGSTKSZ	2048
+#define SIGSTKSZ	8192
+
+#ifdef __KERNEL__
+
+#define _NSIG		64
+/* bits-per-word, where word apparently means 'long' not 'int' */
+#define _NSIG_BPW	BITS_PER_LONG
+#define _NSIG_WORDS	(_NSIG / _NSIG_BPW)
+
+#endif /* __KERNEL__ */
+
+#define SIG_BLOCK          0	/* for blocking signals */
+#define SIG_UNBLOCK        1	/* for unblocking signals */
+#define SIG_SETMASK        2	/* for setting the signal mask */
+
+#define SIG_DFL	((__sighandler_t)0)	/* default signal handling */
+#define SIG_IGN	((__sighandler_t)1)	/* ignore signal */
+#define SIG_ERR	((__sighandler_t)-1)	/* error return from signal */
+
+# ifndef __ASSEMBLY__
+
+#  include <linux/types.h>
+
+/* Avoid too many header ordering problems.  */
+struct siginfo;
+
+/* Type of a signal handler.  */
+#ifdef CONFIG_64BIT
+/* function pointers on 64-bit parisc are pointers to little structs and the
+ * compiler doesn't support code which changes or tests the address of
+ * the function in the little struct.  This is really ugly -PB
+ */
+typedef char __user *__sighandler_t;
+#else
+typedef void __signalfn_t(int);
+typedef __signalfn_t __user *__sighandler_t;
+#endif
+
+typedef struct sigaltstack {
+	void __user *ss_sp;
+	int ss_flags;
+	size_t ss_size;
+} stack_t;
+
+#ifdef __KERNEL__
+
+/* Most things should be clean enough to redefine this at will, if care
+   is taken to make libc match.  */
+
+typedef unsigned long old_sigset_t;		/* at least 32 bits */
+
+typedef struct {
+	/* next_signal() assumes this is a long - no choice */
+	unsigned long sig[_NSIG_WORDS];
+} sigset_t;
+
+struct sigaction {
+	__sighandler_t sa_handler;
+	unsigned long sa_flags;
+	sigset_t sa_mask;		/* mask last for extensibility */
+};
+
+struct k_sigaction {
+	struct sigaction sa;
+};
+
+#define ptrace_signal_deliver(regs, cookie) do { } while (0)
+
+#include <asm/sigcontext.h>
+
+#endif /* __KERNEL__ */
+#endif /* !__ASSEMBLY */
+#endif /* _ASM_PARISC_SIGNAL_H */
diff --git a/arch/parisc/include/asm/smp.h b/arch/parisc/include/asm/smp.h
new file mode 100644
index 000000000000..398cdbaf4e54
--- /dev/null
+++ b/arch/parisc/include/asm/smp.h
@@ -0,0 +1,68 @@
+#ifndef __ASM_SMP_H
+#define __ASM_SMP_H
+
+
+#if defined(CONFIG_SMP)
+
+/* Page Zero Location PDC will look for the address to branch to when we poke
+** slave CPUs still in "Icache loop".
+*/
+#define PDC_OS_BOOT_RENDEZVOUS     0x10
+#define PDC_OS_BOOT_RENDEZVOUS_HI  0x28
+
+#ifndef ASSEMBLY
+#include <linux/bitops.h>
+#include <linux/threads.h>	/* for NR_CPUS */
+#include <linux/cpumask.h>
+typedef unsigned long address_t;
+
+extern cpumask_t cpu_online_map;
+
+
+/*
+ *	Private routines/data
+ *
+ *	physical and logical are equivalent until we support CPU hotplug.
+ */
+#define cpu_number_map(cpu)	(cpu)
+#define cpu_logical_map(cpu)	(cpu)
+
+extern void smp_send_reschedule(int cpu);
+extern void smp_send_all_nop(void);
+
+extern void arch_send_call_function_single_ipi(int cpu);
+extern void arch_send_call_function_ipi(cpumask_t mask);
+
+#endif /* !ASSEMBLY */
+
+/*
+ *	This magic constant controls our willingness to transfer
+ *      a process across CPUs. Such a transfer incurs cache and tlb
+ *      misses. The current value is inherited from i386. Still needs
+ *      to be tuned for parisc.
+ */
+ 
+#define PROC_CHANGE_PENALTY	15		/* Schedule penalty */
+
+extern unsigned long cpu_present_mask;
+
+#define raw_smp_processor_id()	(current_thread_info()->cpu)
+
+#else /* CONFIG_SMP */
+
+static inline void smp_send_all_nop(void) { return; }
+
+#endif
+
+#define NO_PROC_ID		0xFF		/* No processor magic marker */
+#define ANY_PROC_ID		0xFF		/* Any processor magic marker */
+static inline int __cpu_disable (void) {
+  return 0;
+}
+static inline void __cpu_die (unsigned int cpu) {
+  while(1)
+    ;
+}
+extern int __cpu_up (unsigned int cpu);
+
+#endif /*  __ASM_SMP_H */
diff --git a/arch/parisc/include/asm/socket.h b/arch/parisc/include/asm/socket.h
new file mode 100644
index 000000000000..fba402c95ac2
--- /dev/null
+++ b/arch/parisc/include/asm/socket.h
@@ -0,0 +1,62 @@
+#ifndef _ASM_SOCKET_H
+#define _ASM_SOCKET_H
+
+#include <asm/sockios.h>
+
+/* For setsockopt(2) */
+#define SOL_SOCKET	0xffff
+
+#define SO_DEBUG	0x0001
+#define SO_REUSEADDR	0x0004
+#define SO_KEEPALIVE	0x0008
+#define SO_DONTROUTE	0x0010
+#define SO_BROADCAST	0x0020
+#define SO_LINGER	0x0080
+#define SO_OOBINLINE	0x0100
+/* To add :#define SO_REUSEPORT 0x0200 */
+#define SO_SNDBUF	0x1001
+#define SO_RCVBUF	0x1002
+#define SO_SNDBUFFORCE	0x100a
+#define SO_RCVBUFFORCE	0x100b
+#define SO_SNDLOWAT	0x1003
+#define SO_RCVLOWAT	0x1004
+#define SO_SNDTIMEO	0x1005
+#define SO_RCVTIMEO	0x1006
+#define SO_ERROR	0x1007
+#define SO_TYPE		0x1008
+#define SO_PEERNAME	0x2000
+
+#define SO_NO_CHECK	0x400b
+#define SO_PRIORITY	0x400c
+#define SO_BSDCOMPAT	0x400e
+#define SO_PASSCRED	0x4010
+#define SO_PEERCRED	0x4011
+#define SO_TIMESTAMP	0x4012
+#define SCM_TIMESTAMP	SO_TIMESTAMP
+#define SO_TIMESTAMPNS	0x4013
+#define SCM_TIMESTAMPNS	SO_TIMESTAMPNS
+
+/* Security levels - as per NRL IPv6 - don't actually do anything */
+#define SO_SECURITY_AUTHENTICATION		0x4016
+#define SO_SECURITY_ENCRYPTION_TRANSPORT	0x4017
+#define SO_SECURITY_ENCRYPTION_NETWORK		0x4018
+
+#define SO_BINDTODEVICE	0x4019
+
+/* Socket filtering */
+#define SO_ATTACH_FILTER        0x401a
+#define SO_DETACH_FILTER        0x401b
+
+#define SO_ACCEPTCONN		0x401c
+
+#define SO_PEERSEC		0x401d
+#define SO_PASSSEC		0x401e
+
+#define SO_MARK			0x401f
+
+/* O_NONBLOCK clashes with the bits used for socket types.  Therefore we
+ * have to define SOCK_NONBLOCK to a different value here.
+ */
+#define SOCK_NONBLOCK   0x40000000
+
+#endif /* _ASM_SOCKET_H */
diff --git a/arch/parisc/include/asm/sockios.h b/arch/parisc/include/asm/sockios.h
new file mode 100644
index 000000000000..dabfbc7483f6
--- /dev/null
+++ b/arch/parisc/include/asm/sockios.h
@@ -0,0 +1,13 @@
+#ifndef __ARCH_PARISC_SOCKIOS__
+#define __ARCH_PARISC_SOCKIOS__
+
+/* Socket-level I/O control calls. */
+#define FIOSETOWN 	0x8901
+#define SIOCSPGRP	0x8902
+#define FIOGETOWN	0x8903
+#define SIOCGPGRP	0x8904
+#define SIOCATMARK	0x8905
+#define SIOCGSTAMP	0x8906		/* Get stamp (timeval) */
+#define SIOCGSTAMPNS	0x8907		/* Get stamp (timespec) */
+
+#endif
diff --git a/arch/parisc/include/asm/spinlock.h b/arch/parisc/include/asm/spinlock.h
new file mode 100644
index 000000000000..f3d2090a18dc
--- /dev/null
+++ b/arch/parisc/include/asm/spinlock.h
@@ -0,0 +1,194 @@
+#ifndef __ASM_SPINLOCK_H
+#define __ASM_SPINLOCK_H
+
+#include <asm/system.h>
+#include <asm/processor.h>
+#include <asm/spinlock_types.h>
+
+static inline int __raw_spin_is_locked(raw_spinlock_t *x)
+{
+	volatile unsigned int *a = __ldcw_align(x);
+	return *a == 0;
+}
+
+#define __raw_spin_lock(lock) __raw_spin_lock_flags(lock, 0)
+#define __raw_spin_unlock_wait(x) \
+		do { cpu_relax(); } while (__raw_spin_is_locked(x))
+
+static inline void __raw_spin_lock_flags(raw_spinlock_t *x,
+					 unsigned long flags)
+{
+	volatile unsigned int *a;
+
+	mb();
+	a = __ldcw_align(x);
+	while (__ldcw(a) == 0)
+		while (*a == 0)
+			if (flags & PSW_SM_I) {
+				local_irq_enable();
+				cpu_relax();
+				local_irq_disable();
+			} else
+				cpu_relax();
+	mb();
+}
+
+static inline void __raw_spin_unlock(raw_spinlock_t *x)
+{
+	volatile unsigned int *a;
+	mb();
+	a = __ldcw_align(x);
+	*a = 1;
+	mb();
+}
+
+static inline int __raw_spin_trylock(raw_spinlock_t *x)
+{
+	volatile unsigned int *a;
+	int ret;
+
+	mb();
+	a = __ldcw_align(x);
+        ret = __ldcw(a) != 0;
+	mb();
+
+	return ret;
+}
+
+/*
+ * Read-write spinlocks, allowing multiple readers but only one writer.
+ * Linux rwlocks are unfair to writers; they can be starved for an indefinite
+ * time by readers.  With care, they can also be taken in interrupt context.
+ *
+ * In the PA-RISC implementation, we have a spinlock and a counter.
+ * Readers use the lock to serialise their access to the counter (which
+ * records how many readers currently hold the lock).
+ * Writers hold the spinlock, preventing any readers or other writers from
+ * grabbing the rwlock.
+ */
+
+/* Note that we have to ensure interrupts are disabled in case we're
+ * interrupted by some other code that wants to grab the same read lock */
+static  __inline__ void __raw_read_lock(raw_rwlock_t *rw)
+{
+	unsigned long flags;
+	local_irq_save(flags);
+	__raw_spin_lock_flags(&rw->lock, flags);
+	rw->counter++;
+	__raw_spin_unlock(&rw->lock);
+	local_irq_restore(flags);
+}
+
+/* Note that we have to ensure interrupts are disabled in case we're
+ * interrupted by some other code that wants to grab the same read lock */
+static  __inline__ void __raw_read_unlock(raw_rwlock_t *rw)
+{
+	unsigned long flags;
+	local_irq_save(flags);
+	__raw_spin_lock_flags(&rw->lock, flags);
+	rw->counter--;
+	__raw_spin_unlock(&rw->lock);
+	local_irq_restore(flags);
+}
+
+/* Note that we have to ensure interrupts are disabled in case we're
+ * interrupted by some other code that wants to grab the same read lock */
+static __inline__ int __raw_read_trylock(raw_rwlock_t *rw)
+{
+	unsigned long flags;
+ retry:
+	local_irq_save(flags);
+	if (__raw_spin_trylock(&rw->lock)) {
+		rw->counter++;
+		__raw_spin_unlock(&rw->lock);
+		local_irq_restore(flags);
+		return 1;
+	}
+
+	local_irq_restore(flags);
+	/* If write-locked, we fail to acquire the lock */
+	if (rw->counter < 0)
+		return 0;
+
+	/* Wait until we have a realistic chance at the lock */
+	while (__raw_spin_is_locked(&rw->lock) && rw->counter >= 0)
+		cpu_relax();
+
+	goto retry;
+}
+
+/* Note that we have to ensure interrupts are disabled in case we're
+ * interrupted by some other code that wants to read_trylock() this lock */
+static __inline__ void __raw_write_lock(raw_rwlock_t *rw)
+{
+	unsigned long flags;
+retry:
+	local_irq_save(flags);
+	__raw_spin_lock_flags(&rw->lock, flags);
+
+	if (rw->counter != 0) {
+		__raw_spin_unlock(&rw->lock);
+		local_irq_restore(flags);
+
+		while (rw->counter != 0)
+			cpu_relax();
+
+		goto retry;
+	}
+
+	rw->counter = -1; /* mark as write-locked */
+	mb();
+	local_irq_restore(flags);
+}
+
+static __inline__ void __raw_write_unlock(raw_rwlock_t *rw)
+{
+	rw->counter = 0;
+	__raw_spin_unlock(&rw->lock);
+}
+
+/* Note that we have to ensure interrupts are disabled in case we're
+ * interrupted by some other code that wants to read_trylock() this lock */
+static __inline__ int __raw_write_trylock(raw_rwlock_t *rw)
+{
+	unsigned long flags;
+	int result = 0;
+
+	local_irq_save(flags);
+	if (__raw_spin_trylock(&rw->lock)) {
+		if (rw->counter == 0) {
+			rw->counter = -1;
+			result = 1;
+		} else {
+			/* Read-locked.  Oh well. */
+			__raw_spin_unlock(&rw->lock);
+		}
+	}
+	local_irq_restore(flags);
+
+	return result;
+}
+
+/*
+ * read_can_lock - would read_trylock() succeed?
+ * @lock: the rwlock in question.
+ */
+static __inline__ int __raw_read_can_lock(raw_rwlock_t *rw)
+{
+	return rw->counter >= 0;
+}
+
+/*
+ * write_can_lock - would write_trylock() succeed?
+ * @lock: the rwlock in question.
+ */
+static __inline__ int __raw_write_can_lock(raw_rwlock_t *rw)
+{
+	return !rw->counter;
+}
+
+#define _raw_spin_relax(lock)	cpu_relax()
+#define _raw_read_relax(lock)	cpu_relax()
+#define _raw_write_relax(lock)	cpu_relax()
+
+#endif /* __ASM_SPINLOCK_H */
diff --git a/arch/parisc/include/asm/spinlock_types.h b/arch/parisc/include/asm/spinlock_types.h
new file mode 100644
index 000000000000..3f72f47cf4b2
--- /dev/null
+++ b/arch/parisc/include/asm/spinlock_types.h
@@ -0,0 +1,21 @@
+#ifndef __ASM_SPINLOCK_TYPES_H
+#define __ASM_SPINLOCK_TYPES_H
+
+typedef struct {
+#ifdef CONFIG_PA20
+	volatile unsigned int slock;
+# define __RAW_SPIN_LOCK_UNLOCKED { 1 }
+#else
+	volatile unsigned int lock[4];
+# define __RAW_SPIN_LOCK_UNLOCKED	{ { 1, 1, 1, 1 } }
+#endif
+} raw_spinlock_t;
+
+typedef struct {
+	raw_spinlock_t lock;
+	volatile int counter;
+} raw_rwlock_t;
+
+#define __RAW_RW_LOCK_UNLOCKED		{ __RAW_SPIN_LOCK_UNLOCKED, 0 }
+
+#endif
diff --git a/arch/parisc/include/asm/stat.h b/arch/parisc/include/asm/stat.h
new file mode 100644
index 000000000000..9d5fbbc5c31f
--- /dev/null
+++ b/arch/parisc/include/asm/stat.h
@@ -0,0 +1,100 @@
+#ifndef _PARISC_STAT_H
+#define _PARISC_STAT_H
+
+#include <linux/types.h>
+
+struct stat {
+	unsigned int	st_dev;		/* dev_t is 32 bits on parisc */
+	ino_t		st_ino;		/* 32 bits */
+	mode_t		st_mode;	/* 16 bits */
+	nlink_t		st_nlink;	/* 16 bits */
+	unsigned short	st_reserved1;	/* old st_uid */
+	unsigned short	st_reserved2;	/* old st_gid */
+	unsigned int	st_rdev;
+	off_t		st_size;
+	time_t		st_atime;
+	unsigned int	st_atime_nsec;
+	time_t		st_mtime;
+	unsigned int	st_mtime_nsec;
+	time_t		st_ctime;
+	unsigned int	st_ctime_nsec;
+	int		st_blksize;
+	int		st_blocks;
+	unsigned int	__unused1;	/* ACL stuff */
+	unsigned int	__unused2;	/* network */
+	ino_t		__unused3;	/* network */
+	unsigned int	__unused4;	/* cnodes */
+	unsigned short	__unused5;	/* netsite */
+	short		st_fstype;
+	unsigned int	st_realdev;
+	unsigned short	st_basemode;
+	unsigned short	st_spareshort;
+	uid_t		st_uid;
+	gid_t		st_gid;
+	unsigned int	st_spare4[3];
+};
+
+#define STAT_HAVE_NSEC
+
+typedef __kernel_off64_t	off64_t;
+
+struct hpux_stat64 {
+	unsigned int	st_dev;		/* dev_t is 32 bits on parisc */
+	ino_t           st_ino;         /* 32 bits */
+	mode_t		st_mode;	/* 16 bits */
+	nlink_t		st_nlink;	/* 16 bits */
+	unsigned short	st_reserved1;	/* old st_uid */
+	unsigned short	st_reserved2;	/* old st_gid */
+	unsigned int	st_rdev;
+	off64_t		st_size;
+	time_t		st_atime;
+	unsigned int	st_spare1;
+	time_t		st_mtime;
+	unsigned int	st_spare2;
+	time_t		st_ctime;
+	unsigned int	st_spare3;
+	int		st_blksize;
+	__u64		st_blocks;
+	unsigned int	__unused1;	/* ACL stuff */
+	unsigned int	__unused2;	/* network */
+	ino_t           __unused3;      /* network */
+	unsigned int	__unused4;	/* cnodes */
+	unsigned short	__unused5;	/* netsite */
+	short		st_fstype;
+	unsigned int	st_realdev;
+	unsigned short	st_basemode;
+	unsigned short	st_spareshort;
+	uid_t		st_uid;
+	gid_t		st_gid;
+	unsigned int	st_spare4[3];
+};
+
+/* This is the struct that 32-bit userspace applications are expecting.
+ * How 64-bit apps are going to be compiled, I have no idea.  But at least
+ * this way, we don't have a wrapper in the kernel.
+ */
+struct stat64 {
+	unsigned long long	st_dev;
+	unsigned int		__pad1;
+
+	unsigned int		__st_ino;	/* Not actually filled in */
+	unsigned int		st_mode;
+	unsigned int		st_nlink;
+	unsigned int		st_uid;
+	unsigned int		st_gid;
+	unsigned long long	st_rdev;
+	unsigned int		__pad2;
+	signed long long	st_size;
+	signed int		st_blksize;
+
+	signed long long	st_blocks;
+	signed int		st_atime;
+	unsigned int		st_atime_nsec;
+	signed int		st_mtime;
+	unsigned int		st_mtime_nsec;
+	signed int		st_ctime;
+	unsigned int		st_ctime_nsec;
+	unsigned long long	st_ino;
+};
+
+#endif
diff --git a/arch/parisc/include/asm/statfs.h b/arch/parisc/include/asm/statfs.h
new file mode 100644
index 000000000000..1d2b8130b23d
--- /dev/null
+++ b/arch/parisc/include/asm/statfs.h
@@ -0,0 +1,58 @@
+#ifndef _PARISC_STATFS_H
+#define _PARISC_STATFS_H
+
+#ifndef __KERNEL_STRICT_NAMES
+
+#include <linux/types.h>
+
+typedef __kernel_fsid_t	fsid_t;
+
+#endif
+
+/*
+ * It appears that PARISC could be 64 _or_ 32 bit.
+ * 64-bit fields must be explicitly 64-bit in statfs64.
+ */
+struct statfs {
+	long f_type;
+	long f_bsize;
+	long f_blocks;
+	long f_bfree;
+	long f_bavail;
+	long f_files;
+	long f_ffree;
+	__kernel_fsid_t f_fsid;
+	long f_namelen;
+	long f_frsize;
+	long f_spare[5];
+};
+
+struct statfs64 {
+	long f_type;
+	long f_bsize;
+	__u64 f_blocks;
+	__u64 f_bfree;
+	__u64 f_bavail;
+	__u64 f_files;
+	__u64 f_ffree;
+	__kernel_fsid_t f_fsid;
+	long f_namelen;
+	long f_frsize;
+	long f_spare[5];
+};
+
+struct compat_statfs64 {
+	__u32 f_type;
+	__u32 f_bsize;
+	__u64 f_blocks;
+	__u64 f_bfree;
+	__u64 f_bavail;
+	__u64 f_files;
+	__u64 f_ffree;
+	__kernel_fsid_t f_fsid;
+	__u32 f_namelen;
+	__u32 f_frsize;
+	__u32 f_spare[5];
+};
+
+#endif
diff --git a/arch/parisc/include/asm/string.h b/arch/parisc/include/asm/string.h
new file mode 100644
index 000000000000..eda01be65e35
--- /dev/null
+++ b/arch/parisc/include/asm/string.h
@@ -0,0 +1,10 @@
+#ifndef _PA_STRING_H_
+#define _PA_STRING_H_
+
+#define __HAVE_ARCH_MEMSET
+extern void * memset(void *, int, size_t);
+
+#define __HAVE_ARCH_MEMCPY
+void * memcpy(void * dest,const void *src,size_t count);
+
+#endif
diff --git a/arch/parisc/include/asm/superio.h b/arch/parisc/include/asm/superio.h
new file mode 100644
index 000000000000..6598acb4d46d
--- /dev/null
+++ b/arch/parisc/include/asm/superio.h
@@ -0,0 +1,85 @@
+#ifndef _PARISC_SUPERIO_H
+#define _PARISC_SUPERIO_H
+
+#define IC_PIC1    0x20		/* PCI I/O address of master 8259 */
+#define IC_PIC2    0xA0		/* PCI I/O address of slave */
+
+/* Config Space Offsets to configuration and base address registers */
+#define SIO_CR     0x5A		/* Configuration Register */
+#define SIO_ACPIBAR 0x88	/* ACPI BAR */
+#define SIO_FDCBAR 0x90		/* Floppy Disk Controller BAR */
+#define SIO_SP1BAR 0x94		/* Serial 1 BAR */
+#define SIO_SP2BAR 0x98		/* Serial 2 BAR */
+#define SIO_PPBAR  0x9C		/* Parallel BAR */
+
+#define TRIGGER_1  0x67		/* Edge/level trigger register 1 */
+#define TRIGGER_2  0x68		/* Edge/level trigger register 2 */
+
+/* Interrupt Routing Control registers */
+#define CFG_IR_SER    0x69	/* Serial 1 [0:3] and Serial 2 [4:7] */
+#define CFG_IR_PFD    0x6a	/* Parallel [0:3] and Floppy [4:7] */
+#define CFG_IR_IDE    0x6b	/* IDE1     [0:3] and IDE2 [4:7] */
+#define CFG_IR_INTAB  0x6c	/* PCI INTA [0:3] and INT B [4:7] */
+#define CFG_IR_INTCD  0x6d	/* PCI INTC [0:3] and INT D [4:7] */
+#define CFG_IR_PS2    0x6e	/* PS/2 KBINT [0:3] and Mouse [4:7] */
+#define CFG_IR_FXBUS  0x6f	/* FXIRQ[0] [0:3] and FXIRQ[1] [4:7] */
+#define CFG_IR_USB    0x70	/* FXIRQ[2] [0:3] and USB [4:7] */
+#define CFG_IR_ACPI   0x71	/* ACPI SCI [0:3] and reserved [4:7] */
+
+#define CFG_IR_LOW     CFG_IR_SER	/* Lowest interrupt routing reg */
+#define CFG_IR_HIGH    CFG_IR_ACPI	/* Highest interrupt routing reg */
+
+/* 8259 operational control words */
+#define OCW2_EOI   0x20		/* Non-specific EOI */
+#define OCW2_SEOI  0x60		/* Specific EOI */
+#define OCW3_IIR   0x0A		/* Read request register */
+#define OCW3_ISR   0x0B		/* Read service register */
+#define OCW3_POLL  0x0C		/* Poll the PIC for an interrupt vector */
+
+/* Interrupt lines. Only PIC1 is used */
+#define USB_IRQ    1		/* USB */
+#define SP1_IRQ    3		/* Serial port 1 */
+#define SP2_IRQ    4		/* Serial port 2 */
+#define PAR_IRQ    5		/* Parallel port */
+#define FDC_IRQ    6		/* Floppy controller */
+#define IDE_IRQ    7		/* IDE (pri+sec) */
+
+/* ACPI registers */
+#define USB_REG_CR	0x1f	/* USB Regulator Control Register */
+
+#define SUPERIO_NIRQS   8
+
+struct superio_device {
+	u32 fdc_base;
+	u32 sp1_base;
+	u32 sp2_base;
+	u32 pp_base;
+	u32 acpi_base;
+	int suckyio_irq_enabled;
+	struct pci_dev *lio_pdev;       /* pci device for legacy IO (fn 1) */
+	struct pci_dev *usb_pdev;       /* pci device for USB (fn 2) */
+};
+
+/*
+ * Does NS make a 87415 based plug in PCI card? If so, because of this
+ * macro we currently don't support it being plugged into a machine
+ * that contains a SuperIO chip AND has CONFIG_SUPERIO enabled.
+ *
+ * This could be fixed by checking to see if function 1 exists, and
+ * if it is SuperIO Legacy IO; but really now, is this combination
+ * going to EVER happen?
+ */
+
+#define SUPERIO_IDE_FN 0 /* Function number of IDE controller */
+#define SUPERIO_LIO_FN 1 /* Function number of Legacy IO controller */
+#define SUPERIO_USB_FN 2 /* Function number of USB controller */
+
+#define is_superio_device(x) \
+	(((x)->vendor == PCI_VENDOR_ID_NS) && \
+	(  ((x)->device == PCI_DEVICE_ID_NS_87415) \
+	|| ((x)->device == PCI_DEVICE_ID_NS_87560_LIO) \
+	|| ((x)->device == PCI_DEVICE_ID_NS_87560_USB) ) )
+
+extern int superio_fixup_irq(struct pci_dev *pcidev); /* called by iosapic */
+
+#endif /* _PARISC_SUPERIO_H */
diff --git a/arch/parisc/include/asm/system.h b/arch/parisc/include/asm/system.h
new file mode 100644
index 000000000000..ee80c920b464
--- /dev/null
+++ b/arch/parisc/include/asm/system.h
@@ -0,0 +1,182 @@
+#ifndef __PARISC_SYSTEM_H
+#define __PARISC_SYSTEM_H
+
+#include <asm/psw.h>
+
+/* The program status word as bitfields.  */
+struct pa_psw {
+	unsigned int y:1;
+	unsigned int z:1;
+	unsigned int rv:2;
+	unsigned int w:1;
+	unsigned int e:1;
+	unsigned int s:1;
+	unsigned int t:1;
+
+	unsigned int h:1;
+	unsigned int l:1;
+	unsigned int n:1;
+	unsigned int x:1;
+	unsigned int b:1;
+	unsigned int c:1;
+	unsigned int v:1;
+	unsigned int m:1;
+
+	unsigned int cb:8;
+
+	unsigned int o:1;
+	unsigned int g:1;
+	unsigned int f:1;
+	unsigned int r:1;
+	unsigned int q:1;
+	unsigned int p:1;
+	unsigned int d:1;
+	unsigned int i:1;
+};
+
+#ifdef CONFIG_64BIT
+#define pa_psw(task) ((struct pa_psw *) ((char *) (task) + TASK_PT_PSW + 4))
+#else
+#define pa_psw(task) ((struct pa_psw *) ((char *) (task) + TASK_PT_PSW))
+#endif
+
+struct task_struct;
+
+extern struct task_struct *_switch_to(struct task_struct *, struct task_struct *);
+
+#define switch_to(prev, next, last) do {			\
+	(last) = _switch_to(prev, next);			\
+} while(0)
+
+/* interrupt control */
+#define local_save_flags(x)	__asm__ __volatile__("ssm 0, %0" : "=r" (x) : : "memory")
+#define local_irq_disable()	__asm__ __volatile__("rsm %0,%%r0\n" : : "i" (PSW_I) : "memory" )
+#define local_irq_enable()	__asm__ __volatile__("ssm %0,%%r0\n" : : "i" (PSW_I) : "memory" )
+
+#define local_irq_save(x) \
+	__asm__ __volatile__("rsm %1,%0" : "=r" (x) :"i" (PSW_I) : "memory" )
+#define local_irq_restore(x) \
+	__asm__ __volatile__("mtsm %0" : : "r" (x) : "memory" )
+
+#define irqs_disabled()			\
+({					\
+	unsigned long flags;		\
+	local_save_flags(flags);	\
+	(flags & PSW_I) == 0;		\
+})
+
+#define mfctl(reg)	({		\
+	unsigned long cr;		\
+	__asm__ __volatile__(		\
+		"mfctl " #reg ",%0" :	\
+		 "=r" (cr)		\
+	);				\
+	cr;				\
+})
+
+#define mtctl(gr, cr) \
+	__asm__ __volatile__("mtctl %0,%1" \
+		: /* no outputs */ \
+		: "r" (gr), "i" (cr) : "memory")
+
+/* these are here to de-mystefy the calling code, and to provide hooks */
+/* which I needed for debugging EIEM problems -PB */
+#define get_eiem() mfctl(15)
+static inline void set_eiem(unsigned long val)
+{
+	mtctl(val, 15);
+}
+
+#define mfsp(reg)	({		\
+	unsigned long cr;		\
+	__asm__ __volatile__(		\
+		"mfsp " #reg ",%0" :	\
+		 "=r" (cr)		\
+	);				\
+	cr;				\
+})
+
+#define mtsp(gr, cr) \
+	__asm__ __volatile__("mtsp %0,%1" \
+		: /* no outputs */ \
+		: "r" (gr), "i" (cr) : "memory")
+
+
+/*
+** This is simply the barrier() macro from linux/kernel.h but when serial.c
+** uses tqueue.h uses smp_mb() defined using barrier(), linux/kernel.h
+** hasn't yet been included yet so it fails, thus repeating the macro here.
+**
+** PA-RISC architecture allows for weakly ordered memory accesses although
+** none of the processors use it. There is a strong ordered bit that is
+** set in the O-bit of the page directory entry. Operating systems that
+** can not tolerate out of order accesses should set this bit when mapping
+** pages. The O-bit of the PSW should also be set to 1 (I don't believe any
+** of the processor implemented the PSW O-bit). The PCX-W ERS states that
+** the TLB O-bit is not implemented so the page directory does not need to
+** have the O-bit set when mapping pages (section 3.1). This section also
+** states that the PSW Y, Z, G, and O bits are not implemented.
+** So it looks like nothing needs to be done for parisc-linux (yet).
+** (thanks to chada for the above comment -ggg)
+**
+** The __asm__ op below simple prevents gcc/ld from reordering
+** instructions across the mb() "call".
+*/
+#define mb()		__asm__ __volatile__("":::"memory")	/* barrier() */
+#define rmb()		mb()
+#define wmb()		mb()
+#define smp_mb()	mb()
+#define smp_rmb()	mb()
+#define smp_wmb()	mb()
+#define smp_read_barrier_depends()	do { } while(0)
+#define read_barrier_depends()		do { } while(0)
+
+#define set_mb(var, value)		do { var = value; mb(); } while (0)
+
+#ifndef CONFIG_PA20
+/* Because kmalloc only guarantees 8-byte alignment for kmalloc'd data,
+   and GCC only guarantees 8-byte alignment for stack locals, we can't
+   be assured of 16-byte alignment for atomic lock data even if we
+   specify "__attribute ((aligned(16)))" in the type declaration.  So,
+   we use a struct containing an array of four ints for the atomic lock
+   type and dynamically select the 16-byte aligned int from the array
+   for the semaphore.  */
+
+#define __PA_LDCW_ALIGNMENT	16
+#define __ldcw_align(a) ({					\
+	unsigned long __ret = (unsigned long) &(a)->lock[0];	\
+	__ret = (__ret + __PA_LDCW_ALIGNMENT - 1)		\
+		& ~(__PA_LDCW_ALIGNMENT - 1);			\
+	(volatile unsigned int *) __ret;			\
+})
+#define __LDCW	"ldcw"
+
+#else /*CONFIG_PA20*/
+/* From: "Jim Hull" <jim.hull of hp.com>
+   I've attached a summary of the change, but basically, for PA 2.0, as
+   long as the ",CO" (coherent operation) completer is specified, then the
+   16-byte alignment requirement for ldcw and ldcd is relaxed, and instead
+   they only require "natural" alignment (4-byte for ldcw, 8-byte for
+   ldcd). */
+
+#define __PA_LDCW_ALIGNMENT	4
+#define __ldcw_align(a) ((volatile unsigned int *)a)
+#define __LDCW	"ldcw,co"
+
+#endif /*!CONFIG_PA20*/
+
+/* LDCW, the only atomic read-write operation PA-RISC has. *sigh*.  */
+#define __ldcw(a) ({						\
+	unsigned __ret;						\
+	__asm__ __volatile__(__LDCW " 0(%1),%0"			\
+		: "=r" (__ret) : "r" (a));			\
+	__ret;							\
+})
+
+#ifdef CONFIG_SMP
+# define __lock_aligned __attribute__((__section__(".data.lock_aligned")))
+#endif
+
+#define arch_align_stack(x) (x)
+
+#endif
diff --git a/arch/parisc/include/asm/termbits.h b/arch/parisc/include/asm/termbits.h
new file mode 100644
index 000000000000..d8bbc73b16b7
--- /dev/null
+++ b/arch/parisc/include/asm/termbits.h
@@ -0,0 +1,200 @@
+#ifndef __ARCH_PARISC_TERMBITS_H__
+#define __ARCH_PARISC_TERMBITS_H__
+
+#include <linux/posix_types.h>
+
+typedef unsigned char	cc_t;
+typedef unsigned int	speed_t;
+typedef unsigned int	tcflag_t;
+
+#define NCCS 19
+struct termios {
+	tcflag_t c_iflag;		/* input mode flags */
+	tcflag_t c_oflag;		/* output mode flags */
+	tcflag_t c_cflag;		/* control mode flags */
+	tcflag_t c_lflag;		/* local mode flags */
+	cc_t c_line;			/* line discipline */
+	cc_t c_cc[NCCS];		/* control characters */
+};
+
+struct termios2 {
+	tcflag_t c_iflag;		/* input mode flags */
+	tcflag_t c_oflag;		/* output mode flags */
+	tcflag_t c_cflag;		/* control mode flags */
+	tcflag_t c_lflag;		/* local mode flags */
+	cc_t c_line;			/* line discipline */
+	cc_t c_cc[NCCS];		/* control characters */
+	speed_t c_ispeed;		/* input speed */
+	speed_t c_ospeed;		/* output speed */
+};
+
+struct ktermios {
+	tcflag_t c_iflag;		/* input mode flags */
+	tcflag_t c_oflag;		/* output mode flags */
+	tcflag_t c_cflag;		/* control mode flags */
+	tcflag_t c_lflag;		/* local mode flags */
+	cc_t c_line;			/* line discipline */
+	cc_t c_cc[NCCS];		/* control characters */
+	speed_t c_ispeed;		/* input speed */
+	speed_t c_ospeed;		/* output speed */
+};
+
+/* c_cc characters */
+#define VINTR 0
+#define VQUIT 1
+#define VERASE 2
+#define VKILL 3
+#define VEOF 4
+#define VTIME 5
+#define VMIN 6
+#define VSWTC 7
+#define VSTART 8
+#define VSTOP 9
+#define VSUSP 10
+#define VEOL 11
+#define VREPRINT 12
+#define VDISCARD 13
+#define VWERASE 14
+#define VLNEXT 15
+#define VEOL2 16
+
+
+/* c_iflag bits */
+#define IGNBRK	0000001
+#define BRKINT	0000002
+#define IGNPAR	0000004
+#define PARMRK	0000010
+#define INPCK	0000020
+#define ISTRIP	0000040
+#define INLCR	0000100
+#define IGNCR	0000200
+#define ICRNL	0000400
+#define IUCLC	0001000
+#define IXON	0002000
+#define IXANY	0004000
+#define IXOFF	0010000
+#define IMAXBEL	0040000
+#define IUTF8	0100000
+
+/* c_oflag bits */
+#define OPOST	0000001
+#define OLCUC	0000002
+#define ONLCR	0000004
+#define OCRNL	0000010
+#define ONOCR	0000020
+#define ONLRET	0000040
+#define OFILL	0000100
+#define OFDEL	0000200
+#define NLDLY	0000400
+#define   NL0	0000000
+#define   NL1	0000400
+#define CRDLY	0003000
+#define   CR0	0000000
+#define   CR1	0001000
+#define   CR2	0002000
+#define   CR3	0003000
+#define TABDLY	0014000
+#define   TAB0	0000000
+#define   TAB1	0004000
+#define   TAB2	0010000
+#define   TAB3	0014000
+#define   XTABS	0014000
+#define BSDLY	0020000
+#define   BS0	0000000
+#define   BS1	0020000
+#define VTDLY	0040000
+#define   VT0	0000000
+#define   VT1	0040000
+#define FFDLY	0100000
+#define   FF0	0000000
+#define   FF1	0100000
+
+/* c_cflag bit meaning */
+#define CBAUD   0010017
+#define  B0     0000000         /* hang up */
+#define  B50    0000001
+#define  B75    0000002
+#define  B110   0000003
+#define  B134   0000004
+#define  B150   0000005
+#define  B200   0000006
+#define  B300   0000007
+#define  B600   0000010
+#define  B1200  0000011
+#define  B1800  0000012
+#define  B2400  0000013
+#define  B4800  0000014
+#define  B9600  0000015
+#define  B19200 0000016
+#define  B38400 0000017
+#define EXTA B19200
+#define EXTB B38400
+#define CSIZE   0000060
+#define   CS5   0000000
+#define   CS6   0000020
+#define   CS7   0000040
+#define   CS8   0000060
+#define CSTOPB  0000100
+#define CREAD   0000200
+#define PARENB  0000400
+#define PARODD  0001000
+#define HUPCL   0002000
+#define CLOCAL  0004000
+#define CBAUDEX 0010000
+#define    BOTHER 0010000
+#define    B57600 0010001
+#define   B115200 0010002
+#define   B230400 0010003
+#define   B460800 0010004
+#define   B500000 0010005
+#define   B576000 0010006
+#define   B921600 0010007
+#define  B1000000 0010010
+#define  B1152000 0010011
+#define  B1500000 0010012
+#define  B2000000 0010013
+#define  B2500000 0010014
+#define  B3000000 0010015
+#define  B3500000 0010016
+#define  B4000000 0010017
+#define CIBAUD    002003600000		/* input baud rate */
+#define CMSPAR    010000000000          /* mark or space (stick) parity */
+#define CRTSCTS   020000000000          /* flow control */
+
+#define IBSHIFT	16		/* Shift from CBAUD to CIBAUD */
+
+
+/* c_lflag bits */
+#define ISIG    0000001
+#define ICANON  0000002
+#define XCASE   0000004
+#define ECHO    0000010
+#define ECHOE   0000020
+#define ECHOK   0000040
+#define ECHONL  0000100
+#define NOFLSH  0000200
+#define TOSTOP  0000400
+#define ECHOCTL 0001000
+#define ECHOPRT 0002000
+#define ECHOKE  0004000
+#define FLUSHO  0010000
+#define PENDIN  0040000
+#define IEXTEN  0100000
+
+/* tcflow() and TCXONC use these */
+#define	TCOOFF		0
+#define	TCOON		1
+#define	TCIOFF		2
+#define	TCION		3
+
+/* tcflush() and TCFLSH use these */
+#define	TCIFLUSH	0
+#define	TCOFLUSH	1
+#define	TCIOFLUSH	2
+
+/* tcsetattr uses these */
+#define	TCSANOW		0
+#define	TCSADRAIN	1
+#define	TCSAFLUSH	2
+
+#endif
diff --git a/arch/parisc/include/asm/termios.h b/arch/parisc/include/asm/termios.h
new file mode 100644
index 000000000000..a2a57a4548af
--- /dev/null
+++ b/arch/parisc/include/asm/termios.h
@@ -0,0 +1,90 @@
+#ifndef _PARISC_TERMIOS_H
+#define _PARISC_TERMIOS_H
+
+#include <asm/termbits.h>
+#include <asm/ioctls.h>
+
+struct winsize {
+	unsigned short ws_row;
+	unsigned short ws_col;
+	unsigned short ws_xpixel;
+	unsigned short ws_ypixel;
+};
+
+#define NCC 8
+struct termio {
+	unsigned short c_iflag;		/* input mode flags */
+	unsigned short c_oflag;		/* output mode flags */
+	unsigned short c_cflag;		/* control mode flags */
+	unsigned short c_lflag;		/* local mode flags */
+	unsigned char c_line;		/* line discipline */
+	unsigned char c_cc[NCC];	/* control characters */
+};
+
+/* modem lines */
+#define TIOCM_LE	0x001
+#define TIOCM_DTR	0x002
+#define TIOCM_RTS	0x004
+#define TIOCM_ST	0x008
+#define TIOCM_SR	0x010
+#define TIOCM_CTS	0x020
+#define TIOCM_CAR	0x040
+#define TIOCM_RNG	0x080
+#define TIOCM_DSR	0x100
+#define TIOCM_CD	TIOCM_CAR
+#define TIOCM_RI	TIOCM_RNG
+#define TIOCM_OUT1	0x2000
+#define TIOCM_OUT2	0x4000
+#define TIOCM_LOOP	0x8000
+
+/* ioctl (fd, TIOCSERGETLSR, &result) where result may be as below */
+
+#ifdef __KERNEL__
+
+/*	intr=^C		quit=^\		erase=del	kill=^U
+	eof=^D		vtime=\0	vmin=\1		sxtc=\0
+	start=^Q	stop=^S		susp=^Z		eol=\0
+	reprint=^R	discard=^U	werase=^W	lnext=^V
+	eol2=\0
+*/
+#define INIT_C_CC "\003\034\177\025\004\0\1\0\021\023\032\0\022\017\027\026\0"
+
+/*
+ * Translate a "termio" structure into a "termios". Ugh.
+ */
+#define SET_LOW_TERMIOS_BITS(termios, termio, x) { \
+	unsigned short __tmp; \
+	get_user(__tmp,&(termio)->x); \
+	*(unsigned short *) &(termios)->x = __tmp; \
+}
+
+#define user_termio_to_kernel_termios(termios, termio) \
+({ \
+	SET_LOW_TERMIOS_BITS(termios, termio, c_iflag); \
+	SET_LOW_TERMIOS_BITS(termios, termio, c_oflag); \
+	SET_LOW_TERMIOS_BITS(termios, termio, c_cflag); \
+	SET_LOW_TERMIOS_BITS(termios, termio, c_lflag); \
+	copy_from_user((termios)->c_cc, (termio)->c_cc, NCC); \
+})
+
+/*
+ * Translate a "termios" structure into a "termio". Ugh.
+ */
+#define kernel_termios_to_user_termio(termio, termios) \
+({ \
+	put_user((termios)->c_iflag, &(termio)->c_iflag); \
+	put_user((termios)->c_oflag, &(termio)->c_oflag); \
+	put_user((termios)->c_cflag, &(termio)->c_cflag); \
+	put_user((termios)->c_lflag, &(termio)->c_lflag); \
+	put_user((termios)->c_line,  &(termio)->c_line); \
+	copy_to_user((termio)->c_cc, (termios)->c_cc, NCC); \
+})
+
+#define user_termios_to_kernel_termios(k, u) copy_from_user(k, u, sizeof(struct termios2))
+#define kernel_termios_to_user_termios(u, k) copy_to_user(u, k, sizeof(struct termios2))
+#define user_termios_to_kernel_termios_1(k, u) copy_from_user(k, u, sizeof(struct termios))
+#define kernel_termios_to_user_termios_1(u, k) copy_to_user(u, k, sizeof(struct termios))
+
+#endif	/* __KERNEL__ */
+
+#endif	/* _PARISC_TERMIOS_H */
diff --git a/arch/parisc/include/asm/thread_info.h b/arch/parisc/include/asm/thread_info.h
new file mode 100644
index 000000000000..9f812741c355
--- /dev/null
+++ b/arch/parisc/include/asm/thread_info.h
@@ -0,0 +1,74 @@
+#ifndef _ASM_PARISC_THREAD_INFO_H
+#define _ASM_PARISC_THREAD_INFO_H
+
+#ifdef __KERNEL__
+
+#ifndef __ASSEMBLY__
+#include <asm/processor.h>
+
+struct thread_info {
+	struct task_struct *task;	/* main task structure */
+	struct exec_domain *exec_domain;/* execution domain */
+	unsigned long flags;		/* thread_info flags (see TIF_*) */
+	mm_segment_t addr_limit;	/* user-level address space limit */
+	__u32 cpu;			/* current CPU */
+	int preempt_count;		/* 0=premptable, <0=BUG; will also serve as bh-counter */
+	struct restart_block restart_block;
+};
+
+#define INIT_THREAD_INFO(tsk)			\
+{						\
+	.task		= &tsk,			\
+	.exec_domain	= &default_exec_domain,	\
+	.flags		= 0,			\
+	.cpu		= 0,			\
+	.addr_limit	= KERNEL_DS,		\
+	.preempt_count	= 1,			\
+  	.restart_block	= {			\
+		.fn = do_no_restart_syscall	\
+	}					\
+}
+
+#define init_thread_info        (init_thread_union.thread_info)
+#define init_stack              (init_thread_union.stack)
+
+/* thread information allocation */
+
+#define THREAD_SIZE_ORDER            2
+/* Be sure to hunt all references to this down when you change the size of
+ * the kernel stack */
+#define THREAD_SIZE             (PAGE_SIZE << THREAD_SIZE_ORDER)
+#define THREAD_SHIFT            (PAGE_SHIFT + THREAD_SIZE_ORDER)
+
+/* how to get the thread information struct from C */
+#define current_thread_info()	((struct thread_info *)mfctl(30))
+
+#endif /* !__ASSEMBLY */
+
+#define PREEMPT_ACTIVE_BIT	28
+#define PREEMPT_ACTIVE		(1 << PREEMPT_ACTIVE_BIT)
+
+/*
+ * thread information flags
+ */
+#define TIF_SYSCALL_TRACE	0	/* syscall trace active */
+#define TIF_SIGPENDING		1	/* signal pending */
+#define TIF_NEED_RESCHED	2	/* rescheduling necessary */
+#define TIF_POLLING_NRFLAG	3	/* true if poll_idle() is polling TIF_NEED_RESCHED */
+#define TIF_32BIT               4       /* 32 bit binary */
+#define TIF_MEMDIE		5
+#define TIF_RESTORE_SIGMASK	6	/* restore saved signal mask */
+
+#define _TIF_SYSCALL_TRACE	(1 << TIF_SYSCALL_TRACE)
+#define _TIF_SIGPENDING		(1 << TIF_SIGPENDING)
+#define _TIF_NEED_RESCHED	(1 << TIF_NEED_RESCHED)
+#define _TIF_POLLING_NRFLAG	(1 << TIF_POLLING_NRFLAG)
+#define _TIF_32BIT		(1 << TIF_32BIT)
+#define _TIF_RESTORE_SIGMASK	(1 << TIF_RESTORE_SIGMASK)
+
+#define _TIF_USER_WORK_MASK     (_TIF_SIGPENDING | \
+                                 _TIF_NEED_RESCHED | _TIF_RESTORE_SIGMASK)
+
+#endif /* __KERNEL__ */
+
+#endif /* _ASM_PARISC_THREAD_INFO_H */
diff --git a/arch/parisc/include/asm/timex.h b/arch/parisc/include/asm/timex.h
new file mode 100644
index 000000000000..3b68d77273d9
--- /dev/null
+++ b/arch/parisc/include/asm/timex.h
@@ -0,0 +1,20 @@
+/*
+ * linux/include/asm-parisc/timex.h
+ *
+ * PARISC architecture timex specifications
+ */
+#ifndef _ASMPARISC_TIMEX_H
+#define _ASMPARISC_TIMEX_H
+
+#include <asm/system.h>
+
+#define CLOCK_TICK_RATE	1193180 /* Underlying HZ */
+
+typedef unsigned long cycles_t;
+
+static inline cycles_t get_cycles (void)
+{
+	return mfctl(16);
+}
+
+#endif
diff --git a/arch/parisc/include/asm/tlb.h b/arch/parisc/include/asm/tlb.h
new file mode 100644
index 000000000000..383b1db310ee
--- /dev/null
+++ b/arch/parisc/include/asm/tlb.h
@@ -0,0 +1,27 @@
+#ifndef _PARISC_TLB_H
+#define _PARISC_TLB_H
+
+#define tlb_flush(tlb)			\
+do {	if ((tlb)->fullmm)		\
+		flush_tlb_mm((tlb)->mm);\
+} while (0)
+
+#define tlb_start_vma(tlb, vma) \
+do {	if (!(tlb)->fullmm)	\
+		flush_cache_range(vma, vma->vm_start, vma->vm_end); \
+} while (0)
+
+#define tlb_end_vma(tlb, vma)	\
+do {	if (!(tlb)->fullmm)	\
+		flush_tlb_range(vma, vma->vm_start, vma->vm_end); \
+} while (0)
+
+#define __tlb_remove_tlb_entry(tlb, pte, address) \
+	do { } while (0)
+
+#include <asm-generic/tlb.h>
+
+#define __pmd_free_tlb(tlb, pmd)	pmd_free((tlb)->mm, pmd)
+#define __pte_free_tlb(tlb, pte)	pte_free((tlb)->mm, pte)
+
+#endif
diff --git a/arch/parisc/include/asm/tlbflush.h b/arch/parisc/include/asm/tlbflush.h
new file mode 100644
index 000000000000..b72ec66db699
--- /dev/null
+++ b/arch/parisc/include/asm/tlbflush.h
@@ -0,0 +1,80 @@
+#ifndef _PARISC_TLBFLUSH_H
+#define _PARISC_TLBFLUSH_H
+
+/* TLB flushing routines.... */
+
+#include <linux/mm.h>
+#include <linux/sched.h>
+#include <asm/mmu_context.h>
+
+
+/* This is for the serialisation of PxTLB broadcasts.  At least on the
+ * N class systems, only one PxTLB inter processor broadcast can be
+ * active at any one time on the Merced bus.  This tlb purge
+ * synchronisation is fairly lightweight and harmless so we activate
+ * it on all SMP systems not just the N class.  We also need to have
+ * preemption disabled on uniprocessor machines, and spin_lock does that
+ * nicely.
+ */
+extern spinlock_t pa_tlb_lock;
+
+#define purge_tlb_start(x) spin_lock(&pa_tlb_lock)
+#define purge_tlb_end(x) spin_unlock(&pa_tlb_lock)
+
+extern void flush_tlb_all(void);
+extern void flush_tlb_all_local(void *);
+
+/*
+ * flush_tlb_mm()
+ *
+ * XXX This code is NOT valid for HP-UX compatibility processes,
+ * (although it will probably work 99% of the time). HP-UX
+ * processes are free to play with the space id's and save them
+ * over long periods of time, etc. so we have to preserve the
+ * space and just flush the entire tlb. We need to check the
+ * personality in order to do that, but the personality is not
+ * currently being set correctly.
+ *
+ * Of course, Linux processes could do the same thing, but
+ * we don't support that (and the compilers, dynamic linker,
+ * etc. do not do that).
+ */
+
+static inline void flush_tlb_mm(struct mm_struct *mm)
+{
+	BUG_ON(mm == &init_mm); /* Should never happen */
+
+#ifdef CONFIG_SMP
+	flush_tlb_all();
+#else
+	if (mm) {
+		if (mm->context != 0)
+			free_sid(mm->context);
+		mm->context = alloc_sid();
+		if (mm == current->active_mm)
+			load_context(mm->context);
+	}
+#endif
+}
+
+static inline void flush_tlb_page(struct vm_area_struct *vma,
+	unsigned long addr)
+{
+	/* For one page, it's not worth testing the split_tlb variable */
+
+	mb();
+	mtsp(vma->vm_mm->context,1);
+	purge_tlb_start();
+	pdtlb(addr);
+	pitlb(addr);
+	purge_tlb_end();
+}
+
+void __flush_tlb_range(unsigned long sid,
+	unsigned long start, unsigned long end);
+
+#define flush_tlb_range(vma,start,end) __flush_tlb_range((vma)->vm_mm->context,start,end)
+
+#define flush_tlb_kernel_range(start, end) __flush_tlb_range(0,start,end)
+
+#endif
diff --git a/arch/parisc/include/asm/topology.h b/arch/parisc/include/asm/topology.h
new file mode 100644
index 000000000000..d8133eb0b1e7
--- /dev/null
+++ b/arch/parisc/include/asm/topology.h
@@ -0,0 +1,6 @@
+#ifndef _ASM_PARISC_TOPOLOGY_H
+#define _ASM_PARISC_TOPOLOGY_H
+
+#include <asm-generic/topology.h>
+
+#endif /* _ASM_PARISC_TOPOLOGY_H */
diff --git a/arch/parisc/include/asm/traps.h b/arch/parisc/include/asm/traps.h
new file mode 100644
index 000000000000..1945f995f2df
--- /dev/null
+++ b/arch/parisc/include/asm/traps.h
@@ -0,0 +1,16 @@
+#ifndef __ASM_TRAPS_H
+#define __ASM_TRAPS_H
+
+#ifdef __KERNEL__
+struct pt_regs;
+
+/* traps.c */
+void parisc_terminate(char *msg, struct pt_regs *regs,
+		int code, unsigned long offset);
+
+/* mm/fault.c */
+void do_page_fault(struct pt_regs *regs, unsigned long code,
+		unsigned long address);
+#endif
+
+#endif
diff --git a/arch/parisc/include/asm/types.h b/arch/parisc/include/asm/types.h
new file mode 100644
index 000000000000..7f5a39bfb4ce
--- /dev/null
+++ b/arch/parisc/include/asm/types.h
@@ -0,0 +1,36 @@
+#ifndef _PARISC_TYPES_H
+#define _PARISC_TYPES_H
+
+#include <asm-generic/int-ll64.h>
+
+#ifndef __ASSEMBLY__
+
+typedef unsigned short umode_t;
+
+#endif /* __ASSEMBLY__ */
+
+/*
+ * These aren't exported outside the kernel to avoid name space clashes
+ */
+#ifdef __KERNEL__
+
+#ifdef CONFIG_64BIT
+#define BITS_PER_LONG 64
+#define SHIFT_PER_LONG 6
+#else
+#define BITS_PER_LONG 32
+#define SHIFT_PER_LONG 5
+#endif
+
+#ifndef __ASSEMBLY__
+
+/* Dma addresses are 32-bits wide.  */
+
+typedef u32 dma_addr_t;
+typedef u64 dma64_addr_t;
+
+#endif /* __ASSEMBLY__ */
+
+#endif /* __KERNEL__ */
+
+#endif
diff --git a/arch/parisc/include/asm/uaccess.h b/arch/parisc/include/asm/uaccess.h
new file mode 100644
index 000000000000..4878b9501f24
--- /dev/null
+++ b/arch/parisc/include/asm/uaccess.h
@@ -0,0 +1,244 @@
+#ifndef __PARISC_UACCESS_H
+#define __PARISC_UACCESS_H
+
+/*
+ * User space memory access functions
+ */
+#include <asm/page.h>
+#include <asm/system.h>
+#include <asm/cache.h>
+#include <asm-generic/uaccess.h>
+
+#define VERIFY_READ 0
+#define VERIFY_WRITE 1
+
+#define KERNEL_DS	((mm_segment_t){0})
+#define USER_DS 	((mm_segment_t){1})
+
+#define segment_eq(a,b)	((a).seg == (b).seg)
+
+#define get_ds()	(KERNEL_DS)
+#define get_fs()	(current_thread_info()->addr_limit)
+#define set_fs(x)	(current_thread_info()->addr_limit = (x))
+
+/*
+ * Note that since kernel addresses are in a separate address space on
+ * parisc, we don't need to do anything for access_ok().
+ * We just let the page fault handler do the right thing. This also means
+ * that put_user is the same as __put_user, etc.
+ */
+
+extern int __get_kernel_bad(void);
+extern int __get_user_bad(void);
+extern int __put_kernel_bad(void);
+extern int __put_user_bad(void);
+
+static inline long access_ok(int type, const void __user * addr,
+		unsigned long size)
+{
+	return 1;
+}
+
+#define put_user __put_user
+#define get_user __get_user
+
+#if !defined(CONFIG_64BIT)
+#define LDD_KERNEL(ptr)		__get_kernel_bad();
+#define LDD_USER(ptr)		__get_user_bad();
+#define STD_KERNEL(x, ptr)	__put_kernel_asm64(x,ptr)
+#define STD_USER(x, ptr)	__put_user_asm64(x,ptr)
+#define ASM_WORD_INSN		".word\t"
+#else
+#define LDD_KERNEL(ptr)		__get_kernel_asm("ldd",ptr)
+#define LDD_USER(ptr)		__get_user_asm("ldd",ptr)
+#define STD_KERNEL(x, ptr)	__put_kernel_asm("std",x,ptr)
+#define STD_USER(x, ptr)	__put_user_asm("std",x,ptr)
+#define ASM_WORD_INSN		".dword\t"
+#endif
+
+/*
+ * The exception table contains two values: the first is an address
+ * for an instruction that is allowed to fault, and the second is
+ * the address to the fixup routine. 
+ */
+
+struct exception_table_entry {
+	unsigned long insn;  /* address of insn that is allowed to fault.   */
+	long fixup;          /* fixup routine */
+};
+
+#define ASM_EXCEPTIONTABLE_ENTRY( fault_addr, except_addr )\
+	".section __ex_table,\"aw\"\n"			   \
+	ASM_WORD_INSN #fault_addr ", " #except_addr "\n\t" \
+	".previous\n"
+
+/*
+ * The page fault handler stores, in a per-cpu area, the following information
+ * if a fixup routine is available.
+ */
+struct exception_data {
+	unsigned long fault_ip;
+	unsigned long fault_space;
+	unsigned long fault_addr;
+};
+
+#define __get_user(x,ptr)                               \
+({                                                      \
+	register long __gu_err __asm__ ("r8") = 0;      \
+	register long __gu_val __asm__ ("r9") = 0;      \
+							\
+	if (segment_eq(get_fs(),KERNEL_DS)) {           \
+	    switch (sizeof(*(ptr))) {                   \
+	    case 1: __get_kernel_asm("ldb",ptr); break; \
+	    case 2: __get_kernel_asm("ldh",ptr); break; \
+	    case 4: __get_kernel_asm("ldw",ptr); break; \
+	    case 8: LDD_KERNEL(ptr); break;		\
+	    default: __get_kernel_bad(); break;         \
+	    }                                           \
+	}                                               \
+	else {                                          \
+	    switch (sizeof(*(ptr))) {                   \
+	    case 1: __get_user_asm("ldb",ptr); break;   \
+	    case 2: __get_user_asm("ldh",ptr); break;   \
+	    case 4: __get_user_asm("ldw",ptr); break;   \
+	    case 8: LDD_USER(ptr);  break;		\
+	    default: __get_user_bad(); break;           \
+	    }                                           \
+	}                                               \
+							\
+	(x) = (__typeof__(*(ptr))) __gu_val;            \
+	__gu_err;                                       \
+})
+
+#define __get_kernel_asm(ldx,ptr)                       \
+	__asm__("\n1:\t" ldx "\t0(%2),%0\n\t"		\
+		ASM_EXCEPTIONTABLE_ENTRY(1b, fixup_get_user_skip_1)\
+		: "=r"(__gu_val), "=r"(__gu_err)        \
+		: "r"(ptr), "1"(__gu_err)		\
+		: "r1");
+
+#define __get_user_asm(ldx,ptr)                         \
+	__asm__("\n1:\t" ldx "\t0(%%sr3,%2),%0\n\t"	\
+		ASM_EXCEPTIONTABLE_ENTRY(1b,fixup_get_user_skip_1)\
+		: "=r"(__gu_val), "=r"(__gu_err)        \
+		: "r"(ptr), "1"(__gu_err)		\
+		: "r1");
+
+#define __put_user(x,ptr)                                       \
+({								\
+	register long __pu_err __asm__ ("r8") = 0;      	\
+        __typeof__(*(ptr)) __x = (__typeof__(*(ptr)))(x);	\
+								\
+	if (segment_eq(get_fs(),KERNEL_DS)) {                   \
+	    switch (sizeof(*(ptr))) {                           \
+	    case 1: __put_kernel_asm("stb",__x,ptr); break;     \
+	    case 2: __put_kernel_asm("sth",__x,ptr); break;     \
+	    case 4: __put_kernel_asm("stw",__x,ptr); break;     \
+	    case 8: STD_KERNEL(__x,ptr); break;			\
+	    default: __put_kernel_bad(); break;			\
+	    }                                                   \
+	}                                                       \
+	else {                                                  \
+	    switch (sizeof(*(ptr))) {                           \
+	    case 1: __put_user_asm("stb",__x,ptr); break;       \
+	    case 2: __put_user_asm("sth",__x,ptr); break;       \
+	    case 4: __put_user_asm("stw",__x,ptr); break;       \
+	    case 8: STD_USER(__x,ptr); break;			\
+	    default: __put_user_bad(); break;			\
+	    }                                                   \
+	}                                                       \
+								\
+	__pu_err;						\
+})
+
+/*
+ * The "__put_user/kernel_asm()" macros tell gcc they read from memory
+ * instead of writing. This is because they do not write to any memory
+ * gcc knows about, so there are no aliasing issues. These macros must
+ * also be aware that "fixup_put_user_skip_[12]" are executed in the
+ * context of the fault, and any registers used there must be listed
+ * as clobbers. In this case only "r1" is used by the current routines.
+ * r8/r9 are already listed as err/val.
+ */
+
+#define __put_kernel_asm(stx,x,ptr)                         \
+	__asm__ __volatile__ (                              \
+		"\n1:\t" stx "\t%2,0(%1)\n\t"		    \
+		ASM_EXCEPTIONTABLE_ENTRY(1b,fixup_put_user_skip_1)\
+		: "=r"(__pu_err)                            \
+		: "r"(ptr), "r"(x), "0"(__pu_err)	    \
+	    	: "r1")
+
+#define __put_user_asm(stx,x,ptr)                           \
+	__asm__ __volatile__ (                              \
+		"\n1:\t" stx "\t%2,0(%%sr3,%1)\n\t"	    \
+		ASM_EXCEPTIONTABLE_ENTRY(1b,fixup_put_user_skip_1)\
+		: "=r"(__pu_err)                            \
+		: "r"(ptr), "r"(x), "0"(__pu_err)	    \
+		: "r1")
+
+
+#if !defined(CONFIG_64BIT)
+
+#define __put_kernel_asm64(__val,ptr) do {		    \
+	u64 __val64 = (u64)(__val);			    \
+	u32 hi = (__val64) >> 32;			    \
+	u32 lo = (__val64) & 0xffffffff;		    \
+	__asm__ __volatile__ (				    \
+		"\n1:\tstw %2,0(%1)"			    \
+		"\n2:\tstw %3,4(%1)\n\t"		    \
+		ASM_EXCEPTIONTABLE_ENTRY(1b,fixup_put_user_skip_2)\
+		ASM_EXCEPTIONTABLE_ENTRY(2b,fixup_put_user_skip_1)\
+		: "=r"(__pu_err)                            \
+		: "r"(ptr), "r"(hi), "r"(lo), "0"(__pu_err) \
+		: "r1");				    \
+} while (0)
+
+#define __put_user_asm64(__val,ptr) do {	    	    \
+	u64 __val64 = (u64)(__val);			    \
+	u32 hi = (__val64) >> 32;			    \
+	u32 lo = (__val64) & 0xffffffff;		    \
+	__asm__ __volatile__ (				    \
+		"\n1:\tstw %2,0(%%sr3,%1)"		    \
+		"\n2:\tstw %3,4(%%sr3,%1)\n\t"		    \
+		ASM_EXCEPTIONTABLE_ENTRY(1b,fixup_put_user_skip_2)\
+		ASM_EXCEPTIONTABLE_ENTRY(2b,fixup_put_user_skip_1)\
+		: "=r"(__pu_err)                            \
+		: "r"(ptr), "r"(hi), "r"(lo), "0"(__pu_err) \
+		: "r1");				    \
+} while (0)
+
+#endif /* !defined(CONFIG_64BIT) */
+
+
+/*
+ * Complex access routines -- external declarations
+ */
+
+extern unsigned long lcopy_to_user(void __user *, const void *, unsigned long);
+extern unsigned long lcopy_from_user(void *, const void __user *, unsigned long);
+extern unsigned long lcopy_in_user(void __user *, const void __user *, unsigned long);
+extern long lstrncpy_from_user(char *, const char __user *, long);
+extern unsigned lclear_user(void __user *,unsigned long);
+extern long lstrnlen_user(const char __user *,long);
+
+/*
+ * Complex access routines -- macros
+ */
+
+#define strncpy_from_user lstrncpy_from_user
+#define strnlen_user lstrnlen_user
+#define strlen_user(str) lstrnlen_user(str, 0x7fffffffL)
+#define clear_user lclear_user
+#define __clear_user lclear_user
+
+unsigned long copy_to_user(void __user *dst, const void *src, unsigned long len);
+#define __copy_to_user copy_to_user
+unsigned long copy_from_user(void *dst, const void __user *src, unsigned long len);
+#define __copy_from_user copy_from_user
+unsigned long copy_in_user(void __user *dst, const void __user *src, unsigned long len);
+#define __copy_in_user copy_in_user
+#define __copy_to_user_inatomic __copy_to_user
+#define __copy_from_user_inatomic __copy_from_user
+
+#endif /* __PARISC_UACCESS_H */
diff --git a/arch/parisc/include/asm/ucontext.h b/arch/parisc/include/asm/ucontext.h
new file mode 100644
index 000000000000..6c8883e4b0bd
--- /dev/null
+++ b/arch/parisc/include/asm/ucontext.h
@@ -0,0 +1,12 @@
+#ifndef _ASM_PARISC_UCONTEXT_H
+#define _ASM_PARISC_UCONTEXT_H
+
+struct ucontext {
+	unsigned int	  uc_flags;
+	struct ucontext  *uc_link;
+	stack_t		  uc_stack;
+	struct sigcontext uc_mcontext;
+	sigset_t	  uc_sigmask;	/* mask last for extensibility */
+};
+
+#endif /* !_ASM_PARISC_UCONTEXT_H */
diff --git a/arch/parisc/include/asm/unaligned.h b/arch/parisc/include/asm/unaligned.h
new file mode 100644
index 000000000000..dfc5d3321a54
--- /dev/null
+++ b/arch/parisc/include/asm/unaligned.h
@@ -0,0 +1,16 @@
+#ifndef _ASM_PARISC_UNALIGNED_H
+#define _ASM_PARISC_UNALIGNED_H
+
+#include <linux/unaligned/be_struct.h>
+#include <linux/unaligned/le_byteshift.h>
+#include <linux/unaligned/generic.h>
+#define get_unaligned	__get_unaligned_be
+#define put_unaligned	__put_unaligned_be
+
+#ifdef __KERNEL__
+struct pt_regs;
+void handle_unaligned(struct pt_regs *regs);
+int check_unaligned(struct pt_regs *regs);
+#endif
+
+#endif /* _ASM_PARISC_UNALIGNED_H */
diff --git a/arch/parisc/include/asm/unistd.h b/arch/parisc/include/asm/unistd.h
new file mode 100644
index 000000000000..a7d857f0e4f4
--- /dev/null
+++ b/arch/parisc/include/asm/unistd.h
@@ -0,0 +1,991 @@
+#ifndef _ASM_PARISC_UNISTD_H_
+#define _ASM_PARISC_UNISTD_H_
+
+/*
+ * This file contains the system call numbers.
+ */
+
+/*
+ *   HP-UX system calls get their native numbers for binary compatibility.
+ */
+
+#define __NR_HPUX_exit                    1
+#define __NR_HPUX_fork                    2
+#define __NR_HPUX_read                    3
+#define __NR_HPUX_write                   4
+#define __NR_HPUX_open                    5
+#define __NR_HPUX_close                   6
+#define __NR_HPUX_wait                    7
+#define __NR_HPUX_creat                   8
+#define __NR_HPUX_link                    9
+#define __NR_HPUX_unlink                 10
+#define __NR_HPUX_execv                  11
+#define __NR_HPUX_chdir                  12
+#define __NR_HPUX_time                   13
+#define __NR_HPUX_mknod                  14
+#define __NR_HPUX_chmod                  15
+#define __NR_HPUX_chown                  16
+#define __NR_HPUX_break                  17
+#define __NR_HPUX_lchmod                 18
+#define __NR_HPUX_lseek                  19
+#define __NR_HPUX_getpid                 20
+#define __NR_HPUX_mount                  21
+#define __NR_HPUX_umount                 22
+#define __NR_HPUX_setuid                 23
+#define __NR_HPUX_getuid                 24
+#define __NR_HPUX_stime                  25
+#define __NR_HPUX_ptrace                 26
+#define __NR_HPUX_alarm                  27
+#define __NR_HPUX_oldfstat               28
+#define __NR_HPUX_pause                  29
+#define __NR_HPUX_utime                  30
+#define __NR_HPUX_stty                   31
+#define __NR_HPUX_gtty                   32
+#define __NR_HPUX_access                 33
+#define __NR_HPUX_nice                   34
+#define __NR_HPUX_ftime                  35
+#define __NR_HPUX_sync                   36
+#define __NR_HPUX_kill                   37
+#define __NR_HPUX_stat                   38
+#define __NR_HPUX_setpgrp3               39
+#define __NR_HPUX_lstat                  40
+#define __NR_HPUX_dup                    41
+#define __NR_HPUX_pipe                   42
+#define __NR_HPUX_times                  43
+#define __NR_HPUX_profil                 44
+#define __NR_HPUX_ki_call                45
+#define __NR_HPUX_setgid                 46
+#define __NR_HPUX_getgid                 47
+#define __NR_HPUX_sigsys                 48
+#define __NR_HPUX_reserved1              49
+#define __NR_HPUX_reserved2              50
+#define __NR_HPUX_acct                   51
+#define __NR_HPUX_set_userthreadid       52
+#define __NR_HPUX_oldlock                53
+#define __NR_HPUX_ioctl                  54
+#define __NR_HPUX_reboot                 55
+#define __NR_HPUX_symlink                56
+#define __NR_HPUX_utssys                 57
+#define __NR_HPUX_readlink               58
+#define __NR_HPUX_execve                 59
+#define __NR_HPUX_umask                  60
+#define __NR_HPUX_chroot                 61
+#define __NR_HPUX_fcntl                  62
+#define __NR_HPUX_ulimit                 63
+#define __NR_HPUX_getpagesize            64
+#define __NR_HPUX_mremap                 65
+#define __NR_HPUX_vfork                  66
+#define __NR_HPUX_vread                  67
+#define __NR_HPUX_vwrite                 68
+#define __NR_HPUX_sbrk                   69
+#define __NR_HPUX_sstk                   70
+#define __NR_HPUX_mmap                   71
+#define __NR_HPUX_vadvise                72
+#define __NR_HPUX_munmap                 73
+#define __NR_HPUX_mprotect               74
+#define __NR_HPUX_madvise                75
+#define __NR_HPUX_vhangup                76
+#define __NR_HPUX_swapoff                77
+#define __NR_HPUX_mincore                78
+#define __NR_HPUX_getgroups              79
+#define __NR_HPUX_setgroups              80
+#define __NR_HPUX_getpgrp2               81
+#define __NR_HPUX_setpgrp2               82
+#define __NR_HPUX_setitimer              83
+#define __NR_HPUX_wait3                  84
+#define __NR_HPUX_swapon                 85
+#define __NR_HPUX_getitimer              86
+#define __NR_HPUX_gethostname42          87
+#define __NR_HPUX_sethostname42          88
+#define __NR_HPUX_getdtablesize          89
+#define __NR_HPUX_dup2                   90
+#define __NR_HPUX_getdopt                91
+#define __NR_HPUX_fstat                  92
+#define __NR_HPUX_select                 93
+#define __NR_HPUX_setdopt                94
+#define __NR_HPUX_fsync                  95
+#define __NR_HPUX_setpriority            96
+#define __NR_HPUX_socket_old             97
+#define __NR_HPUX_connect_old            98
+#define __NR_HPUX_accept_old             99
+#define __NR_HPUX_getpriority           100
+#define __NR_HPUX_send_old              101
+#define __NR_HPUX_recv_old              102
+#define __NR_HPUX_socketaddr_old        103
+#define __NR_HPUX_bind_old              104
+#define __NR_HPUX_setsockopt_old        105
+#define __NR_HPUX_listen_old            106
+#define __NR_HPUX_vtimes_old            107
+#define __NR_HPUX_sigvector             108
+#define __NR_HPUX_sigblock              109
+#define __NR_HPUX_siggetmask            110
+#define __NR_HPUX_sigpause              111
+#define __NR_HPUX_sigstack              112
+#define __NR_HPUX_recvmsg_old           113
+#define __NR_HPUX_sendmsg_old           114
+#define __NR_HPUX_vtrace_old            115
+#define __NR_HPUX_gettimeofday          116
+#define __NR_HPUX_getrusage             117
+#define __NR_HPUX_getsockopt_old        118
+#define __NR_HPUX_resuba_old            119
+#define __NR_HPUX_readv                 120
+#define __NR_HPUX_writev                121
+#define __NR_HPUX_settimeofday          122
+#define __NR_HPUX_fchown                123
+#define __NR_HPUX_fchmod                124
+#define __NR_HPUX_recvfrom_old          125
+#define __NR_HPUX_setresuid             126
+#define __NR_HPUX_setresgid             127
+#define __NR_HPUX_rename                128
+#define __NR_HPUX_truncate              129
+#define __NR_HPUX_ftruncate             130
+#define __NR_HPUX_flock_old             131
+#define __NR_HPUX_sysconf               132
+#define __NR_HPUX_sendto_old            133
+#define __NR_HPUX_shutdown_old          134
+#define __NR_HPUX_socketpair_old        135
+#define __NR_HPUX_mkdir                 136
+#define __NR_HPUX_rmdir                 137
+#define __NR_HPUX_utimes_old            138
+#define __NR_HPUX_sigcleanup_old        139
+#define __NR_HPUX_setcore               140
+#define __NR_HPUX_getpeername_old       141
+#define __NR_HPUX_gethostid             142
+#define __NR_HPUX_sethostid             143
+#define __NR_HPUX_getrlimit             144
+#define __NR_HPUX_setrlimit             145
+#define __NR_HPUX_killpg_old            146
+#define __NR_HPUX_cachectl              147
+#define __NR_HPUX_quotactl              148
+#define __NR_HPUX_get_sysinfo           149
+#define __NR_HPUX_getsockname_old       150
+#define __NR_HPUX_privgrp               151
+#define __NR_HPUX_rtprio                152
+#define __NR_HPUX_plock                 153
+#define __NR_HPUX_reserved3             154
+#define __NR_HPUX_lockf                 155
+#define __NR_HPUX_semget                156
+#define __NR_HPUX_osemctl               157
+#define __NR_HPUX_semop                 158
+#define __NR_HPUX_msgget                159
+#define __NR_HPUX_omsgctl               160
+#define __NR_HPUX_msgsnd                161
+#define __NR_HPUX_msgrecv               162
+#define __NR_HPUX_shmget                163
+#define __NR_HPUX_oshmctl               164
+#define __NR_HPUX_shmat                 165
+#define __NR_HPUX_shmdt                 166
+#define __NR_HPUX_m68020_advise         167
+/* [168,189] are for Discless/DUX */
+#define __NR_HPUX_csp                   168
+#define __NR_HPUX_cluster               169
+#define __NR_HPUX_mkrnod                170
+#define __NR_HPUX_test                  171
+#define __NR_HPUX_unsp_open             172
+#define __NR_HPUX_reserved4             173
+#define __NR_HPUX_getcontext_old        174
+#define __NR_HPUX_osetcontext           175
+#define __NR_HPUX_bigio                 176
+#define __NR_HPUX_pipenode              177
+#define __NR_HPUX_lsync                 178
+#define __NR_HPUX_getmachineid          179
+#define __NR_HPUX_cnodeid               180
+#define __NR_HPUX_cnodes                181
+#define __NR_HPUX_swapclients           182
+#define __NR_HPUX_rmt_process           183
+#define __NR_HPUX_dskless_stats         184
+#define __NR_HPUX_sigprocmask           185
+#define __NR_HPUX_sigpending            186
+#define __NR_HPUX_sigsuspend            187
+#define __NR_HPUX_sigaction             188
+#define __NR_HPUX_reserved5             189
+#define __NR_HPUX_nfssvc                190
+#define __NR_HPUX_getfh                 191
+#define __NR_HPUX_getdomainname         192
+#define __NR_HPUX_setdomainname         193
+#define __NR_HPUX_async_daemon          194
+#define __NR_HPUX_getdirentries         195
+#define __NR_HPUX_statfs                196
+#define __NR_HPUX_fstatfs               197
+#define __NR_HPUX_vfsmount              198
+#define __NR_HPUX_reserved6             199
+#define __NR_HPUX_waitpid               200
+/* 201 - 223 missing */
+#define __NR_HPUX_sigsetreturn          224
+#define __NR_HPUX_sigsetstatemask       225
+/* 226 missing */
+#define __NR_HPUX_cs                    227
+#define __NR_HPUX_cds                   228
+#define __NR_HPUX_set_no_trunc          229
+#define __NR_HPUX_pathconf              230
+#define __NR_HPUX_fpathconf             231
+/* 232, 233 missing */
+#define __NR_HPUX_nfs_fcntl             234
+#define __NR_HPUX_ogetacl               235
+#define __NR_HPUX_ofgetacl              236
+#define __NR_HPUX_osetacl               237
+#define __NR_HPUX_ofsetacl              238
+#define __NR_HPUX_pstat                 239
+#define __NR_HPUX_getaudid              240
+#define __NR_HPUX_setaudid              241
+#define __NR_HPUX_getaudproc            242
+#define __NR_HPUX_setaudproc            243
+#define __NR_HPUX_getevent              244
+#define __NR_HPUX_setevent              245
+#define __NR_HPUX_audwrite              246
+#define __NR_HPUX_audswitch             247
+#define __NR_HPUX_audctl                248
+#define __NR_HPUX_ogetaccess            249
+#define __NR_HPUX_fsctl                 250
+/* 251 - 258 missing */
+#define __NR_HPUX_swapfs                259
+#define __NR_HPUX_fss                   260
+/* 261 - 266 missing */
+#define __NR_HPUX_tsync                 267
+#define __NR_HPUX_getnumfds             268
+#define __NR_HPUX_poll                  269
+#define __NR_HPUX_getmsg                270
+#define __NR_HPUX_putmsg                271
+#define __NR_HPUX_fchdir                272
+#define __NR_HPUX_getmount_cnt          273
+#define __NR_HPUX_getmount_entry        274
+#define __NR_HPUX_accept                275
+#define __NR_HPUX_bind                  276
+#define __NR_HPUX_connect               277
+#define __NR_HPUX_getpeername           278
+#define __NR_HPUX_getsockname           279
+#define __NR_HPUX_getsockopt            280
+#define __NR_HPUX_listen                281
+#define __NR_HPUX_recv                  282
+#define __NR_HPUX_recvfrom              283
+#define __NR_HPUX_recvmsg               284
+#define __NR_HPUX_send                  285
+#define __NR_HPUX_sendmsg               286
+#define __NR_HPUX_sendto                287
+#define __NR_HPUX_setsockopt            288
+#define __NR_HPUX_shutdown              289
+#define __NR_HPUX_socket                290
+#define __NR_HPUX_socketpair            291
+#define __NR_HPUX_proc_open             292
+#define __NR_HPUX_proc_close            293
+#define __NR_HPUX_proc_send             294
+#define __NR_HPUX_proc_recv             295
+#define __NR_HPUX_proc_sendrecv         296
+#define __NR_HPUX_proc_syscall          297
+/* 298 - 311 missing */
+#define __NR_HPUX_semctl                312
+#define __NR_HPUX_msgctl                313
+#define __NR_HPUX_shmctl                314
+#define __NR_HPUX_mpctl                 315
+#define __NR_HPUX_exportfs              316
+#define __NR_HPUX_getpmsg               317
+#define __NR_HPUX_putpmsg               318
+/* 319 missing */
+#define __NR_HPUX_msync                 320
+#define __NR_HPUX_msleep                321
+#define __NR_HPUX_mwakeup               322
+#define __NR_HPUX_msem_init             323
+#define __NR_HPUX_msem_remove           324
+#define __NR_HPUX_adjtime               325
+#define __NR_HPUX_kload                 326
+#define __NR_HPUX_fattach               327
+#define __NR_HPUX_fdetach               328
+#define __NR_HPUX_serialize             329
+#define __NR_HPUX_statvfs               330
+#define __NR_HPUX_fstatvfs              331
+#define __NR_HPUX_lchown                332
+#define __NR_HPUX_getsid                333
+#define __NR_HPUX_sysfs                 334
+/* 335, 336 missing */
+#define __NR_HPUX_sched_setparam        337
+#define __NR_HPUX_sched_getparam        338
+#define __NR_HPUX_sched_setscheduler    339
+#define __NR_HPUX_sched_getscheduler    340
+#define __NR_HPUX_sched_yield           341
+#define __NR_HPUX_sched_get_priority_max 342
+#define __NR_HPUX_sched_get_priority_min 343
+#define __NR_HPUX_sched_rr_get_interval 344
+#define __NR_HPUX_clock_settime         345
+#define __NR_HPUX_clock_gettime         346
+#define __NR_HPUX_clock_getres          347
+#define __NR_HPUX_timer_create          348
+#define __NR_HPUX_timer_delete          349
+#define __NR_HPUX_timer_settime         350
+#define __NR_HPUX_timer_gettime         351
+#define __NR_HPUX_timer_getoverrun      352
+#define __NR_HPUX_nanosleep             353
+#define __NR_HPUX_toolbox               354
+/* 355 missing */
+#define __NR_HPUX_getdents              356
+#define __NR_HPUX_getcontext            357
+#define __NR_HPUX_sysinfo               358
+#define __NR_HPUX_fcntl64               359
+#define __NR_HPUX_ftruncate64           360
+#define __NR_HPUX_fstat64               361
+#define __NR_HPUX_getdirentries64       362
+#define __NR_HPUX_getrlimit64           363
+#define __NR_HPUX_lockf64               364
+#define __NR_HPUX_lseek64               365
+#define __NR_HPUX_lstat64               366
+#define __NR_HPUX_mmap64                367
+#define __NR_HPUX_setrlimit64           368
+#define __NR_HPUX_stat64                369
+#define __NR_HPUX_truncate64            370
+#define __NR_HPUX_ulimit64              371
+#define __NR_HPUX_pread                 372
+#define __NR_HPUX_preadv                373
+#define __NR_HPUX_pwrite                374
+#define __NR_HPUX_pwritev               375
+#define __NR_HPUX_pread64               376
+#define __NR_HPUX_preadv64              377
+#define __NR_HPUX_pwrite64              378
+#define __NR_HPUX_pwritev64             379
+#define __NR_HPUX_setcontext            380
+#define __NR_HPUX_sigaltstack           381
+#define __NR_HPUX_waitid                382
+#define __NR_HPUX_setpgrp               383
+#define __NR_HPUX_recvmsg2              384
+#define __NR_HPUX_sendmsg2              385
+#define __NR_HPUX_socket2               386
+#define __NR_HPUX_socketpair2           387
+#define __NR_HPUX_setregid              388
+#define __NR_HPUX_lwp_create            389
+#define __NR_HPUX_lwp_terminate         390
+#define __NR_HPUX_lwp_wait              391
+#define __NR_HPUX_lwp_suspend           392
+#define __NR_HPUX_lwp_resume            393
+/* 394 missing */
+#define __NR_HPUX_lwp_abort_syscall     395
+#define __NR_HPUX_lwp_info              396
+#define __NR_HPUX_lwp_kill              397
+#define __NR_HPUX_ksleep                398
+#define __NR_HPUX_kwakeup               399
+/* 400 missing */
+#define __NR_HPUX_pstat_getlwp          401
+#define __NR_HPUX_lwp_exit              402
+#define __NR_HPUX_lwp_continue          403
+#define __NR_HPUX_getacl                404
+#define __NR_HPUX_fgetacl               405
+#define __NR_HPUX_setacl                406
+#define __NR_HPUX_fsetacl               407
+#define __NR_HPUX_getaccess             408
+#define __NR_HPUX_lwp_mutex_init        409
+#define __NR_HPUX_lwp_mutex_lock_sys    410
+#define __NR_HPUX_lwp_mutex_unlock      411
+#define __NR_HPUX_lwp_cond_init         412
+#define __NR_HPUX_lwp_cond_signal       413
+#define __NR_HPUX_lwp_cond_broadcast    414
+#define __NR_HPUX_lwp_cond_wait_sys     415
+#define __NR_HPUX_lwp_getscheduler      416
+#define __NR_HPUX_lwp_setscheduler      417
+#define __NR_HPUX_lwp_getstate          418
+#define __NR_HPUX_lwp_setstate          419
+#define __NR_HPUX_lwp_detach            420
+#define __NR_HPUX_mlock                 421
+#define __NR_HPUX_munlock               422
+#define __NR_HPUX_mlockall              423
+#define __NR_HPUX_munlockall            424
+#define __NR_HPUX_shm_open              425
+#define __NR_HPUX_shm_unlink            426
+#define __NR_HPUX_sigqueue              427
+#define __NR_HPUX_sigwaitinfo           428
+#define __NR_HPUX_sigtimedwait          429
+#define __NR_HPUX_sigwait               430
+#define __NR_HPUX_aio_read              431
+#define __NR_HPUX_aio_write             432
+#define __NR_HPUX_lio_listio            433
+#define __NR_HPUX_aio_error             434
+#define __NR_HPUX_aio_return            435
+#define __NR_HPUX_aio_cancel            436
+#define __NR_HPUX_aio_suspend           437
+#define __NR_HPUX_aio_fsync             438
+#define __NR_HPUX_mq_open               439
+#define __NR_HPUX_mq_close              440
+#define __NR_HPUX_mq_unlink             441
+#define __NR_HPUX_mq_send               442
+#define __NR_HPUX_mq_receive            443
+#define __NR_HPUX_mq_notify             444
+#define __NR_HPUX_mq_setattr            445
+#define __NR_HPUX_mq_getattr            446
+#define __NR_HPUX_ksem_open             447
+#define __NR_HPUX_ksem_unlink           448
+#define __NR_HPUX_ksem_close            449
+#define __NR_HPUX_ksem_post             450
+#define __NR_HPUX_ksem_wait             451
+#define __NR_HPUX_ksem_read             452
+#define __NR_HPUX_ksem_trywait          453
+#define __NR_HPUX_lwp_rwlock_init       454
+#define __NR_HPUX_lwp_rwlock_destroy    455
+#define __NR_HPUX_lwp_rwlock_rdlock_sys 456
+#define __NR_HPUX_lwp_rwlock_wrlock_sys 457
+#define __NR_HPUX_lwp_rwlock_tryrdlock  458
+#define __NR_HPUX_lwp_rwlock_trywrlock  459
+#define __NR_HPUX_lwp_rwlock_unlock     460
+#define __NR_HPUX_ttrace                461
+#define __NR_HPUX_ttrace_wait           462
+#define __NR_HPUX_lf_wire_mem           463
+#define __NR_HPUX_lf_unwire_mem         464
+#define __NR_HPUX_lf_send_pin_map       465
+#define __NR_HPUX_lf_free_buf           466
+#define __NR_HPUX_lf_wait_nq            467
+#define __NR_HPUX_lf_wakeup_conn_q      468
+#define __NR_HPUX_lf_unused             469
+#define __NR_HPUX_lwp_sema_init         470
+#define __NR_HPUX_lwp_sema_post         471
+#define __NR_HPUX_lwp_sema_wait         472
+#define __NR_HPUX_lwp_sema_trywait      473
+#define __NR_HPUX_lwp_sema_destroy      474
+#define __NR_HPUX_statvfs64             475
+#define __NR_HPUX_fstatvfs64            476
+#define __NR_HPUX_msh_register          477
+#define __NR_HPUX_ptrace64              478
+#define __NR_HPUX_sendfile              479
+#define __NR_HPUX_sendpath              480
+#define __NR_HPUX_sendfile64            481
+#define __NR_HPUX_sendpath64            482
+#define __NR_HPUX_modload               483
+#define __NR_HPUX_moduload              484
+#define __NR_HPUX_modpath               485
+#define __NR_HPUX_getksym               486
+#define __NR_HPUX_modadm                487
+#define __NR_HPUX_modstat               488
+#define __NR_HPUX_lwp_detached_exit     489
+#define __NR_HPUX_crashconf             490
+#define __NR_HPUX_siginhibit            491
+#define __NR_HPUX_sigenable             492
+#define __NR_HPUX_spuctl                493
+#define __NR_HPUX_zerokernelsum         494
+#define __NR_HPUX_nfs_kstat             495
+#define __NR_HPUX_aio_read64            496
+#define __NR_HPUX_aio_write64           497
+#define __NR_HPUX_aio_error64           498
+#define __NR_HPUX_aio_return64          499
+#define __NR_HPUX_aio_cancel64          500
+#define __NR_HPUX_aio_suspend64         501
+#define __NR_HPUX_aio_fsync64           502
+#define __NR_HPUX_lio_listio64          503
+#define __NR_HPUX_recv2                 504
+#define __NR_HPUX_recvfrom2             505
+#define __NR_HPUX_send2                 506
+#define __NR_HPUX_sendto2               507
+#define __NR_HPUX_acl                   508
+#define __NR_HPUX___cnx_p2p_ctl         509
+#define __NR_HPUX___cnx_gsched_ctl      510
+#define __NR_HPUX___cnx_pmon_ctl        511
+
+#define __NR_HPUX_syscalls		512
+
+/*
+ * Linux system call numbers.
+ *
+ * Cary Coutant says that we should just use another syscall gateway
+ * page to avoid clashing with the HPUX space, and I think he's right:
+ * it will would keep a branch out of our syscall entry path, at the
+ * very least.  If we decide to change it later, we can ``just'' tweak
+ * the LINUX_GATEWAY_ADDR define at the bottom and make __NR_Linux be
+ * 1024 or something.  Oh, and recompile libc. =)
+ *
+ * 64-bit HPUX binaries get the syscall gateway address passed in a register
+ * from the kernel at startup, which seems a sane strategy.
+ */
+
+#define __NR_Linux                0
+#define __NR_restart_syscall      (__NR_Linux + 0)
+#define __NR_exit                 (__NR_Linux + 1)
+#define __NR_fork                 (__NR_Linux + 2)
+#define __NR_read                 (__NR_Linux + 3)
+#define __NR_write                (__NR_Linux + 4)
+#define __NR_open                 (__NR_Linux + 5)
+#define __NR_close                (__NR_Linux + 6)
+#define __NR_waitpid              (__NR_Linux + 7)
+#define __NR_creat                (__NR_Linux + 8)
+#define __NR_link                 (__NR_Linux + 9)
+#define __NR_unlink              (__NR_Linux + 10)
+#define __NR_execve              (__NR_Linux + 11)
+#define __NR_chdir               (__NR_Linux + 12)
+#define __NR_time                (__NR_Linux + 13)
+#define __NR_mknod               (__NR_Linux + 14)
+#define __NR_chmod               (__NR_Linux + 15)
+#define __NR_lchown              (__NR_Linux + 16)
+#define __NR_socket              (__NR_Linux + 17)
+#define __NR_stat                (__NR_Linux + 18)
+#define __NR_lseek               (__NR_Linux + 19)
+#define __NR_getpid              (__NR_Linux + 20)
+#define __NR_mount               (__NR_Linux + 21)
+#define __NR_bind                (__NR_Linux + 22)
+#define __NR_setuid              (__NR_Linux + 23)
+#define __NR_getuid              (__NR_Linux + 24)
+#define __NR_stime               (__NR_Linux + 25)
+#define __NR_ptrace              (__NR_Linux + 26)
+#define __NR_alarm               (__NR_Linux + 27)
+#define __NR_fstat               (__NR_Linux + 28)
+#define __NR_pause               (__NR_Linux + 29)
+#define __NR_utime               (__NR_Linux + 30)
+#define __NR_connect             (__NR_Linux + 31)
+#define __NR_listen              (__NR_Linux + 32)
+#define __NR_access              (__NR_Linux + 33)
+#define __NR_nice                (__NR_Linux + 34)
+#define __NR_accept              (__NR_Linux + 35)
+#define __NR_sync                (__NR_Linux + 36)
+#define __NR_kill                (__NR_Linux + 37)
+#define __NR_rename              (__NR_Linux + 38)
+#define __NR_mkdir               (__NR_Linux + 39)
+#define __NR_rmdir               (__NR_Linux + 40)
+#define __NR_dup                 (__NR_Linux + 41)
+#define __NR_pipe                (__NR_Linux + 42)
+#define __NR_times               (__NR_Linux + 43)
+#define __NR_getsockname         (__NR_Linux + 44)
+#define __NR_brk                 (__NR_Linux + 45)
+#define __NR_setgid              (__NR_Linux + 46)
+#define __NR_getgid              (__NR_Linux + 47)
+#define __NR_signal              (__NR_Linux + 48)
+#define __NR_geteuid             (__NR_Linux + 49)
+#define __NR_getegid             (__NR_Linux + 50)
+#define __NR_acct                (__NR_Linux + 51)
+#define __NR_umount2             (__NR_Linux + 52)
+#define __NR_getpeername         (__NR_Linux + 53)
+#define __NR_ioctl               (__NR_Linux + 54)
+#define __NR_fcntl               (__NR_Linux + 55)
+#define __NR_socketpair          (__NR_Linux + 56)
+#define __NR_setpgid             (__NR_Linux + 57)
+#define __NR_send                (__NR_Linux + 58)
+#define __NR_uname               (__NR_Linux + 59)
+#define __NR_umask               (__NR_Linux + 60)
+#define __NR_chroot              (__NR_Linux + 61)
+#define __NR_ustat               (__NR_Linux + 62)
+#define __NR_dup2                (__NR_Linux + 63)
+#define __NR_getppid             (__NR_Linux + 64)
+#define __NR_getpgrp             (__NR_Linux + 65)
+#define __NR_setsid              (__NR_Linux + 66)
+#define __NR_pivot_root          (__NR_Linux + 67)
+#define __NR_sgetmask            (__NR_Linux + 68)
+#define __NR_ssetmask            (__NR_Linux + 69)
+#define __NR_setreuid            (__NR_Linux + 70)
+#define __NR_setregid            (__NR_Linux + 71)
+#define __NR_mincore             (__NR_Linux + 72)
+#define __NR_sigpending          (__NR_Linux + 73)
+#define __NR_sethostname         (__NR_Linux + 74)
+#define __NR_setrlimit           (__NR_Linux + 75)
+#define __NR_getrlimit           (__NR_Linux + 76)
+#define __NR_getrusage           (__NR_Linux + 77)
+#define __NR_gettimeofday        (__NR_Linux + 78)
+#define __NR_settimeofday        (__NR_Linux + 79)
+#define __NR_getgroups           (__NR_Linux + 80)
+#define __NR_setgroups           (__NR_Linux + 81)
+#define __NR_sendto              (__NR_Linux + 82)
+#define __NR_symlink             (__NR_Linux + 83)
+#define __NR_lstat               (__NR_Linux + 84)
+#define __NR_readlink            (__NR_Linux + 85)
+#define __NR_uselib              (__NR_Linux + 86)
+#define __NR_swapon              (__NR_Linux + 87)
+#define __NR_reboot              (__NR_Linux + 88)
+#define __NR_mmap2             (__NR_Linux + 89)
+#define __NR_mmap                (__NR_Linux + 90)
+#define __NR_munmap              (__NR_Linux + 91)
+#define __NR_truncate            (__NR_Linux + 92)
+#define __NR_ftruncate           (__NR_Linux + 93)
+#define __NR_fchmod              (__NR_Linux + 94)
+#define __NR_fchown              (__NR_Linux + 95)
+#define __NR_getpriority         (__NR_Linux + 96)
+#define __NR_setpriority         (__NR_Linux + 97)
+#define __NR_recv                (__NR_Linux + 98)
+#define __NR_statfs              (__NR_Linux + 99)
+#define __NR_fstatfs            (__NR_Linux + 100)
+#define __NR_stat64           (__NR_Linux + 101)
+/* #define __NR_socketcall         (__NR_Linux + 102) */
+#define __NR_syslog             (__NR_Linux + 103)
+#define __NR_setitimer          (__NR_Linux + 104)
+#define __NR_getitimer          (__NR_Linux + 105)
+#define __NR_capget             (__NR_Linux + 106)
+#define __NR_capset             (__NR_Linux + 107)
+#define __NR_pread64            (__NR_Linux + 108)
+#define __NR_pwrite64           (__NR_Linux + 109)
+#define __NR_getcwd             (__NR_Linux + 110)
+#define __NR_vhangup            (__NR_Linux + 111)
+#define __NR_fstat64            (__NR_Linux + 112)
+#define __NR_vfork              (__NR_Linux + 113)
+#define __NR_wait4              (__NR_Linux + 114)
+#define __NR_swapoff            (__NR_Linux + 115)
+#define __NR_sysinfo            (__NR_Linux + 116)
+#define __NR_shutdown           (__NR_Linux + 117)
+#define __NR_fsync              (__NR_Linux + 118)
+#define __NR_madvise            (__NR_Linux + 119)
+#define __NR_clone              (__NR_Linux + 120)
+#define __NR_setdomainname      (__NR_Linux + 121)
+#define __NR_sendfile           (__NR_Linux + 122)
+#define __NR_recvfrom           (__NR_Linux + 123)
+#define __NR_adjtimex           (__NR_Linux + 124)
+#define __NR_mprotect           (__NR_Linux + 125)
+#define __NR_sigprocmask        (__NR_Linux + 126)
+#define __NR_create_module      (__NR_Linux + 127)
+#define __NR_init_module        (__NR_Linux + 128)
+#define __NR_delete_module      (__NR_Linux + 129)
+#define __NR_get_kernel_syms    (__NR_Linux + 130)
+#define __NR_quotactl           (__NR_Linux + 131)
+#define __NR_getpgid            (__NR_Linux + 132)
+#define __NR_fchdir             (__NR_Linux + 133)
+#define __NR_bdflush            (__NR_Linux + 134)
+#define __NR_sysfs              (__NR_Linux + 135)
+#define __NR_personality        (__NR_Linux + 136)
+#define __NR_afs_syscall        (__NR_Linux + 137) /* Syscall for Andrew File System */
+#define __NR_setfsuid           (__NR_Linux + 138)
+#define __NR_setfsgid           (__NR_Linux + 139)
+#define __NR__llseek            (__NR_Linux + 140)
+#define __NR_getdents           (__NR_Linux + 141)
+#define __NR__newselect         (__NR_Linux + 142)
+#define __NR_flock              (__NR_Linux + 143)
+#define __NR_msync              (__NR_Linux + 144)
+#define __NR_readv              (__NR_Linux + 145)
+#define __NR_writev             (__NR_Linux + 146)
+#define __NR_getsid             (__NR_Linux + 147)
+#define __NR_fdatasync          (__NR_Linux + 148)
+#define __NR__sysctl            (__NR_Linux + 149)
+#define __NR_mlock              (__NR_Linux + 150)
+#define __NR_munlock            (__NR_Linux + 151)
+#define __NR_mlockall           (__NR_Linux + 152)
+#define __NR_munlockall         (__NR_Linux + 153)
+#define __NR_sched_setparam             (__NR_Linux + 154)
+#define __NR_sched_getparam             (__NR_Linux + 155)
+#define __NR_sched_setscheduler         (__NR_Linux + 156)
+#define __NR_sched_getscheduler         (__NR_Linux + 157)
+#define __NR_sched_yield                (__NR_Linux + 158)
+#define __NR_sched_get_priority_max     (__NR_Linux + 159)
+#define __NR_sched_get_priority_min     (__NR_Linux + 160)
+#define __NR_sched_rr_get_interval      (__NR_Linux + 161)
+#define __NR_nanosleep          (__NR_Linux + 162)
+#define __NR_mremap             (__NR_Linux + 163)
+#define __NR_setresuid          (__NR_Linux + 164)
+#define __NR_getresuid          (__NR_Linux + 165)
+#define __NR_sigaltstack        (__NR_Linux + 166)
+#define __NR_query_module       (__NR_Linux + 167)
+#define __NR_poll               (__NR_Linux + 168)
+#define __NR_nfsservctl         (__NR_Linux + 169)
+#define __NR_setresgid          (__NR_Linux + 170)
+#define __NR_getresgid          (__NR_Linux + 171)
+#define __NR_prctl              (__NR_Linux + 172)
+#define __NR_rt_sigreturn       (__NR_Linux + 173)
+#define __NR_rt_sigaction       (__NR_Linux + 174)
+#define __NR_rt_sigprocmask     (__NR_Linux + 175)
+#define __NR_rt_sigpending      (__NR_Linux + 176)
+#define __NR_rt_sigtimedwait    (__NR_Linux + 177)
+#define __NR_rt_sigqueueinfo    (__NR_Linux + 178)
+#define __NR_rt_sigsuspend      (__NR_Linux + 179)
+#define __NR_chown              (__NR_Linux + 180)
+#define __NR_setsockopt         (__NR_Linux + 181)
+#define __NR_getsockopt         (__NR_Linux + 182)
+#define __NR_sendmsg            (__NR_Linux + 183)
+#define __NR_recvmsg            (__NR_Linux + 184)
+#define __NR_semop              (__NR_Linux + 185)
+#define __NR_semget             (__NR_Linux + 186)
+#define __NR_semctl             (__NR_Linux + 187)
+#define __NR_msgsnd             (__NR_Linux + 188)
+#define __NR_msgrcv             (__NR_Linux + 189)
+#define __NR_msgget             (__NR_Linux + 190)
+#define __NR_msgctl             (__NR_Linux + 191)
+#define __NR_shmat              (__NR_Linux + 192)
+#define __NR_shmdt              (__NR_Linux + 193)
+#define __NR_shmget             (__NR_Linux + 194)
+#define __NR_shmctl             (__NR_Linux + 195)
+
+#define __NR_getpmsg		(__NR_Linux + 196) /* Somebody *wants* streams? */
+#define __NR_putpmsg		(__NR_Linux + 197)
+
+#define __NR_lstat64            (__NR_Linux + 198)
+#define __NR_truncate64         (__NR_Linux + 199)
+#define __NR_ftruncate64        (__NR_Linux + 200)
+#define __NR_getdents64         (__NR_Linux + 201)
+#define __NR_fcntl64            (__NR_Linux + 202)
+#define __NR_attrctl            (__NR_Linux + 203)
+#define __NR_acl_get            (__NR_Linux + 204)
+#define __NR_acl_set            (__NR_Linux + 205)
+#define __NR_gettid             (__NR_Linux + 206)
+#define __NR_readahead          (__NR_Linux + 207)
+#define __NR_tkill              (__NR_Linux + 208)
+#define __NR_sendfile64         (__NR_Linux + 209)
+#define __NR_futex              (__NR_Linux + 210)
+#define __NR_sched_setaffinity  (__NR_Linux + 211)
+#define __NR_sched_getaffinity  (__NR_Linux + 212)
+#define __NR_set_thread_area    (__NR_Linux + 213)
+#define __NR_get_thread_area    (__NR_Linux + 214)
+#define __NR_io_setup           (__NR_Linux + 215)
+#define __NR_io_destroy         (__NR_Linux + 216)
+#define __NR_io_getevents       (__NR_Linux + 217)
+#define __NR_io_submit          (__NR_Linux + 218)
+#define __NR_io_cancel          (__NR_Linux + 219)
+#define __NR_alloc_hugepages    (__NR_Linux + 220)
+#define __NR_free_hugepages     (__NR_Linux + 221)
+#define __NR_exit_group         (__NR_Linux + 222)
+#define __NR_lookup_dcookie     (__NR_Linux + 223)
+#define __NR_epoll_create       (__NR_Linux + 224)
+#define __NR_epoll_ctl          (__NR_Linux + 225)
+#define __NR_epoll_wait         (__NR_Linux + 226)
+#define __NR_remap_file_pages   (__NR_Linux + 227)
+#define __NR_semtimedop         (__NR_Linux + 228)
+#define __NR_mq_open            (__NR_Linux + 229)
+#define __NR_mq_unlink          (__NR_Linux + 230)
+#define __NR_mq_timedsend       (__NR_Linux + 231)
+#define __NR_mq_timedreceive    (__NR_Linux + 232)
+#define __NR_mq_notify          (__NR_Linux + 233)
+#define __NR_mq_getsetattr      (__NR_Linux + 234)
+#define __NR_waitid		(__NR_Linux + 235)
+#define __NR_fadvise64_64	(__NR_Linux + 236)
+#define __NR_set_tid_address	(__NR_Linux + 237)
+#define __NR_setxattr		(__NR_Linux + 238)
+#define __NR_lsetxattr		(__NR_Linux + 239)
+#define __NR_fsetxattr		(__NR_Linux + 240)
+#define __NR_getxattr		(__NR_Linux + 241)
+#define __NR_lgetxattr		(__NR_Linux + 242)
+#define __NR_fgetxattr		(__NR_Linux + 243)
+#define __NR_listxattr		(__NR_Linux + 244)
+#define __NR_llistxattr		(__NR_Linux + 245)
+#define __NR_flistxattr		(__NR_Linux + 246)
+#define __NR_removexattr	(__NR_Linux + 247)
+#define __NR_lremovexattr	(__NR_Linux + 248)
+#define __NR_fremovexattr	(__NR_Linux + 249)
+#define __NR_timer_create	(__NR_Linux + 250)
+#define __NR_timer_settime	(__NR_Linux + 251)
+#define __NR_timer_gettime	(__NR_Linux + 252)
+#define __NR_timer_getoverrun	(__NR_Linux + 253)
+#define __NR_timer_delete	(__NR_Linux + 254)
+#define __NR_clock_settime	(__NR_Linux + 255)
+#define __NR_clock_gettime	(__NR_Linux + 256)
+#define __NR_clock_getres	(__NR_Linux + 257)
+#define __NR_clock_nanosleep	(__NR_Linux + 258)
+#define __NR_tgkill		(__NR_Linux + 259)
+#define __NR_mbind		(__NR_Linux + 260)
+#define __NR_get_mempolicy	(__NR_Linux + 261)
+#define __NR_set_mempolicy	(__NR_Linux + 262)
+#define __NR_vserver		(__NR_Linux + 263)
+#define __NR_add_key		(__NR_Linux + 264)
+#define __NR_request_key	(__NR_Linux + 265)
+#define __NR_keyctl		(__NR_Linux + 266)
+#define __NR_ioprio_set		(__NR_Linux + 267)
+#define __NR_ioprio_get		(__NR_Linux + 268)
+#define __NR_inotify_init	(__NR_Linux + 269)
+#define __NR_inotify_add_watch	(__NR_Linux + 270)
+#define __NR_inotify_rm_watch	(__NR_Linux + 271)
+#define __NR_migrate_pages	(__NR_Linux + 272)
+#define __NR_pselect6		(__NR_Linux + 273)
+#define __NR_ppoll		(__NR_Linux + 274)
+#define __NR_openat		(__NR_Linux + 275)
+#define __NR_mkdirat		(__NR_Linux + 276)
+#define __NR_mknodat		(__NR_Linux + 277)
+#define __NR_fchownat		(__NR_Linux + 278)
+#define __NR_futimesat		(__NR_Linux + 279)
+#define __NR_fstatat64		(__NR_Linux + 280)
+#define __NR_unlinkat		(__NR_Linux + 281)
+#define __NR_renameat		(__NR_Linux + 282)
+#define __NR_linkat		(__NR_Linux + 283)
+#define __NR_symlinkat		(__NR_Linux + 284)
+#define __NR_readlinkat		(__NR_Linux + 285)
+#define __NR_fchmodat		(__NR_Linux + 286)
+#define __NR_faccessat		(__NR_Linux + 287)
+#define __NR_unshare		(__NR_Linux + 288)
+#define __NR_set_robust_list	(__NR_Linux + 289)
+#define __NR_get_robust_list	(__NR_Linux + 290)
+#define __NR_splice		(__NR_Linux + 291)
+#define __NR_sync_file_range	(__NR_Linux + 292)
+#define __NR_tee		(__NR_Linux + 293)
+#define __NR_vmsplice		(__NR_Linux + 294)
+#define __NR_move_pages		(__NR_Linux + 295)
+#define __NR_getcpu		(__NR_Linux + 296)
+#define __NR_epoll_pwait	(__NR_Linux + 297)
+#define __NR_statfs64		(__NR_Linux + 298)
+#define __NR_fstatfs64		(__NR_Linux + 299)
+#define __NR_kexec_load		(__NR_Linux + 300)
+#define __NR_utimensat		(__NR_Linux + 301)
+#define __NR_signalfd		(__NR_Linux + 302)
+#define __NR_timerfd		(__NR_Linux + 303)
+#define __NR_eventfd		(__NR_Linux + 304)
+#define __NR_fallocate		(__NR_Linux + 305)
+#define __NR_timerfd_create	(__NR_Linux + 306)
+#define __NR_timerfd_settime	(__NR_Linux + 307)
+#define __NR_timerfd_gettime	(__NR_Linux + 308)
+
+#define __NR_Linux_syscalls	(__NR_timerfd_gettime + 1)
+
+
+#define __IGNORE_select		/* newselect */
+#define __IGNORE_fadvise64	/* fadvise64_64 */
+#define __IGNORE_utimes		/* utime */
+
+
+#define HPUX_GATEWAY_ADDR       0xC0000004
+#define LINUX_GATEWAY_ADDR      0x100
+
+#ifdef __KERNEL__
+#ifndef __ASSEMBLY__
+
+#define SYS_ify(syscall_name)   __NR_##syscall_name
+
+#ifndef ASM_LINE_SEP
+# define ASM_LINE_SEP ;
+#endif
+
+/* Definition taken from glibc 2.3.3
+ * sysdeps/unix/sysv/linux/hppa/sysdep.h
+ */
+
+#ifdef PIC
+/* WARNING: CANNOT BE USED IN A NOP! */
+# define K_STW_ASM_PIC	"       copy %%r19, %%r4\n"
+# define K_LDW_ASM_PIC	"       copy %%r4, %%r19\n"
+# define K_USING_GR4	"%r4",
+#else
+# define K_STW_ASM_PIC	" \n"
+# define K_LDW_ASM_PIC	" \n"
+# define K_USING_GR4
+#endif
+
+/* GCC has to be warned that a syscall may clobber all the ABI
+   registers listed as "caller-saves", see page 8, Table 2
+   in section 2.2.6 of the PA-RISC RUN-TIME architecture
+   document. However! r28 is the result and will conflict with
+   the clobber list so it is left out. Also the input arguments
+   registers r20 -> r26 will conflict with the list so they
+   are treated specially. Although r19 is clobbered by the syscall
+   we cannot say this because it would violate ABI, thus we say
+   r4 is clobbered and use that register to save/restore r19
+   across the syscall. */
+
+#define K_CALL_CLOB_REGS "%r1", "%r2", K_USING_GR4 \
+	        	 "%r20", "%r29", "%r31"
+
+#undef K_INLINE_SYSCALL
+#define K_INLINE_SYSCALL(name, nr, args...)	({			\
+	long __sys_res;							\
+	{								\
+		register unsigned long __res __asm__("r28");		\
+		K_LOAD_ARGS_##nr(args)					\
+		/* FIXME: HACK stw/ldw r19 around syscall */		\
+		__asm__ volatile(					\
+			K_STW_ASM_PIC					\
+			"	ble  0x100(%%sr2, %%r0)\n"		\
+			"	ldi %1, %%r20\n"			\
+			K_LDW_ASM_PIC					\
+			: "=r" (__res)					\
+			: "i" (SYS_ify(name)) K_ASM_ARGS_##nr   	\
+			: "memory", K_CALL_CLOB_REGS K_CLOB_ARGS_##nr	\
+		);							\
+		__sys_res = (long)__res;				\
+	}								\
+	if ( (unsigned long)__sys_res >= (unsigned long)-4095 ){	\
+		errno = -__sys_res;		        		\
+		__sys_res = -1;						\
+	}								\
+	__sys_res;							\
+})
+
+#define K_LOAD_ARGS_0()
+#define K_LOAD_ARGS_1(r26)					\
+	register unsigned long __r26 __asm__("r26") = (unsigned long)(r26);   \
+	K_LOAD_ARGS_0()
+#define K_LOAD_ARGS_2(r26,r25)					\
+	register unsigned long __r25 __asm__("r25") = (unsigned long)(r25);   \
+	K_LOAD_ARGS_1(r26)
+#define K_LOAD_ARGS_3(r26,r25,r24)				\
+	register unsigned long __r24 __asm__("r24") = (unsigned long)(r24);   \
+	K_LOAD_ARGS_2(r26,r25)
+#define K_LOAD_ARGS_4(r26,r25,r24,r23)				\
+	register unsigned long __r23 __asm__("r23") = (unsigned long)(r23);   \
+	K_LOAD_ARGS_3(r26,r25,r24)
+#define K_LOAD_ARGS_5(r26,r25,r24,r23,r22)			\
+	register unsigned long __r22 __asm__("r22") = (unsigned long)(r22);   \
+	K_LOAD_ARGS_4(r26,r25,r24,r23)
+#define K_LOAD_ARGS_6(r26,r25,r24,r23,r22,r21)			\
+	register unsigned long __r21 __asm__("r21") = (unsigned long)(r21);   \
+	K_LOAD_ARGS_5(r26,r25,r24,r23,r22)
+
+/* Even with zero args we use r20 for the syscall number */
+#define K_ASM_ARGS_0
+#define K_ASM_ARGS_1 K_ASM_ARGS_0, "r" (__r26)
+#define K_ASM_ARGS_2 K_ASM_ARGS_1, "r" (__r25)
+#define K_ASM_ARGS_3 K_ASM_ARGS_2, "r" (__r24)
+#define K_ASM_ARGS_4 K_ASM_ARGS_3, "r" (__r23)
+#define K_ASM_ARGS_5 K_ASM_ARGS_4, "r" (__r22)
+#define K_ASM_ARGS_6 K_ASM_ARGS_5, "r" (__r21)
+
+/* The registers not listed as inputs but clobbered */
+#define K_CLOB_ARGS_6
+#define K_CLOB_ARGS_5 K_CLOB_ARGS_6, "%r21"
+#define K_CLOB_ARGS_4 K_CLOB_ARGS_5, "%r22"
+#define K_CLOB_ARGS_3 K_CLOB_ARGS_4, "%r23"
+#define K_CLOB_ARGS_2 K_CLOB_ARGS_3, "%r24"
+#define K_CLOB_ARGS_1 K_CLOB_ARGS_2, "%r25"
+#define K_CLOB_ARGS_0 K_CLOB_ARGS_1, "%r26"
+
+#define _syscall0(type,name)						\
+type name(void)								\
+{									\
+    return K_INLINE_SYSCALL(name, 0);	                                \
+}
+
+#define _syscall1(type,name,type1,arg1)					\
+type name(type1 arg1)							\
+{									\
+    return K_INLINE_SYSCALL(name, 1, arg1);	                        \
+}
+
+#define _syscall2(type,name,type1,arg1,type2,arg2)			\
+type name(type1 arg1, type2 arg2)					\
+{									\
+    return K_INLINE_SYSCALL(name, 2, arg1, arg2);	                \
+}
+
+#define _syscall3(type,name,type1,arg1,type2,arg2,type3,arg3)		\
+type name(type1 arg1, type2 arg2, type3 arg3)				\
+{									\
+    return K_INLINE_SYSCALL(name, 3, arg1, arg2, arg3);	                \
+}
+
+#define _syscall4(type,name,type1,arg1,type2,arg2,type3,arg3,type4,arg4) \
+type name(type1 arg1, type2 arg2, type3 arg3, type4 arg4)		\
+{									\
+    return K_INLINE_SYSCALL(name, 4, arg1, arg2, arg3, arg4);	        \
+}
+
+/* select takes 5 arguments */
+#define _syscall5(type,name,type1,arg1,type2,arg2,type3,arg3,type4,arg4,type5,arg5) \
+type name(type1 arg1, type2 arg2, type3 arg3, type4 arg4, type5 arg5)	\
+{									\
+    return K_INLINE_SYSCALL(name, 5, arg1, arg2, arg3, arg4, arg5);	\
+}
+
+#define __ARCH_WANT_OLD_READDIR
+#define __ARCH_WANT_STAT64
+#define __ARCH_WANT_SYS_ALARM
+#define __ARCH_WANT_SYS_GETHOSTNAME
+#define __ARCH_WANT_SYS_PAUSE
+#define __ARCH_WANT_SYS_SGETMASK
+#define __ARCH_WANT_SYS_SIGNAL
+#define __ARCH_WANT_SYS_TIME
+#define __ARCH_WANT_COMPAT_SYS_TIME
+#define __ARCH_WANT_SYS_UTIME
+#define __ARCH_WANT_SYS_WAITPID
+#define __ARCH_WANT_SYS_SOCKETCALL
+#define __ARCH_WANT_SYS_FADVISE64
+#define __ARCH_WANT_SYS_GETPGRP
+#define __ARCH_WANT_SYS_LLSEEK
+#define __ARCH_WANT_SYS_NICE
+#define __ARCH_WANT_SYS_OLD_GETRLIMIT
+#define __ARCH_WANT_SYS_OLDUMOUNT
+#define __ARCH_WANT_SYS_SIGPENDING
+#define __ARCH_WANT_SYS_SIGPROCMASK
+#define __ARCH_WANT_SYS_RT_SIGACTION
+#define __ARCH_WANT_SYS_RT_SIGSUSPEND
+#define __ARCH_WANT_COMPAT_SYS_RT_SIGSUSPEND
+
+#endif /* __ASSEMBLY__ */
+
+#undef STR
+
+/*
+ * "Conditional" syscalls
+ *
+ * What we want is __attribute__((weak,alias("sys_ni_syscall"))),
+ * but it doesn't work on all toolchains, so we just do it by hand
+ */
+#define cond_syscall(x) asm(".weak\t" #x "\n\t.set\t" #x ",sys_ni_syscall")
+
+#endif /* __KERNEL__ */
+#endif /* _ASM_PARISC_UNISTD_H_ */
diff --git a/arch/parisc/include/asm/unwind.h b/arch/parisc/include/asm/unwind.h
new file mode 100644
index 000000000000..2f7e6e50a158
--- /dev/null
+++ b/arch/parisc/include/asm/unwind.h
@@ -0,0 +1,77 @@
+#ifndef _UNWIND_H_
+#define _UNWIND_H_
+
+#include <linux/list.h>
+
+/* From ABI specifications */
+struct unwind_table_entry {
+	unsigned int region_start;
+	unsigned int region_end;
+	unsigned int Cannot_unwind:1; /* 0 */
+	unsigned int Millicode:1;	/* 1 */
+	unsigned int Millicode_save_sr0:1;	/* 2 */
+	unsigned int Region_description:2;	/* 3..4 */
+	unsigned int reserved1:1;	/* 5 */
+	unsigned int Entry_SR:1;	/* 6 */
+	unsigned int Entry_FR:4;	/* number saved *//* 7..10 */
+	unsigned int Entry_GR:5;	/* number saved *//* 11..15 */
+	unsigned int Args_stored:1;	/* 16 */
+	unsigned int Variable_Frame:1;	/* 17 */
+	unsigned int Separate_Package_Body:1;	/* 18 */
+	unsigned int Frame_Extension_Millicode:1;	/* 19 */
+	unsigned int Stack_Overflow_Check:1;	/* 20 */
+	unsigned int Two_Instruction_SP_Increment:1;	/* 21 */
+	unsigned int Ada_Region:1;	/* 22 */
+	unsigned int cxx_info:1;	/* 23 */
+	unsigned int cxx_try_catch:1;	/* 24 */
+	unsigned int sched_entry_seq:1;	/* 25 */
+	unsigned int reserved2:1;	/* 26 */
+	unsigned int Save_SP:1;	/* 27 */
+	unsigned int Save_RP:1;	/* 28 */
+	unsigned int Save_MRP_in_frame:1;	/* 29 */
+	unsigned int extn_ptr_defined:1;	/* 30 */
+	unsigned int Cleanup_defined:1;	/* 31 */
+	
+	unsigned int MPE_XL_interrupt_marker:1;	/* 0 */
+	unsigned int HP_UX_interrupt_marker:1;	/* 1 */
+	unsigned int Large_frame:1;	/* 2 */
+	unsigned int Pseudo_SP_Set:1;	/* 3 */
+	unsigned int reserved4:1;	/* 4 */
+	unsigned int Total_frame_size:27;	/* 5..31 */
+};
+
+struct unwind_table {
+	struct list_head list;
+	const char *name;
+	unsigned long gp;
+	unsigned long base_addr;
+	unsigned long start;
+	unsigned long end;
+	const struct unwind_table_entry *table;
+	unsigned long length;
+};
+
+struct unwind_frame_info {
+	struct task_struct *t;
+	/* Eventually we would like to be able to get at any of the registers
+	   available; but for now we only try to get the sp and ip for each
+	   frame */
+	/* struct pt_regs regs; */
+	unsigned long sp, ip, rp, r31;
+	unsigned long prev_sp, prev_ip;
+};
+
+struct unwind_table *
+unwind_table_add(const char *name, unsigned long base_addr, 
+		 unsigned long gp, void *start, void *end);
+void
+unwind_table_remove(struct unwind_table *table);
+
+void unwind_frame_init(struct unwind_frame_info *info, struct task_struct *t, 
+		       struct pt_regs *regs);
+void unwind_frame_init_from_blocked_task(struct unwind_frame_info *info, struct task_struct *t);
+void unwind_frame_init_running(struct unwind_frame_info *info, struct pt_regs *regs);
+int unwind_once(struct unwind_frame_info *info);
+int unwind_to_user(struct unwind_frame_info *info);
+
+#endif
diff --git a/arch/parisc/include/asm/user.h b/arch/parisc/include/asm/user.h
new file mode 100644
index 000000000000..80224753e508
--- /dev/null
+++ b/arch/parisc/include/asm/user.h
@@ -0,0 +1,5 @@
+/* This file should not exist, but lots of generic code still includes
+   it. It's a hangover from old a.out days and the traditional core
+   dump format.  We are ELF-only, and so are our core dumps.  If we
+   need to support HP/UX core format then we'll do it here
+   eventually. */
diff --git a/arch/parisc/include/asm/vga.h b/arch/parisc/include/asm/vga.h
new file mode 100644
index 000000000000..171399a88ca6
--- /dev/null
+++ b/arch/parisc/include/asm/vga.h
@@ -0,0 +1,6 @@
+#ifndef __ASM_PARISC_VGA_H__
+#define __ASM_PARISC_VGA_H__
+
+/* nothing */
+
+#endif /* __ASM_PARISC_VGA_H__ */
diff --git a/arch/parisc/include/asm/xor.h b/arch/parisc/include/asm/xor.h
new file mode 100644
index 000000000000..c82eb12a5b18
--- /dev/null
+++ b/arch/parisc/include/asm/xor.h
@@ -0,0 +1 @@
+#include <asm-generic/xor.h>
diff --git a/include/asm-parisc/Kbuild b/include/asm-parisc/Kbuild
deleted file mode 100644
index f88b252e419c..000000000000
--- a/include/asm-parisc/Kbuild
+++ /dev/null
@@ -1,3 +0,0 @@
-include include/asm-generic/Kbuild.asm
-
-unifdef-y += pdc.h
diff --git a/include/asm-parisc/a.out.h b/include/asm-parisc/a.out.h
deleted file mode 100644
index eb04e34c5bb1..000000000000
--- a/include/asm-parisc/a.out.h
+++ /dev/null
@@ -1,20 +0,0 @@
-#ifndef __PARISC_A_OUT_H__
-#define __PARISC_A_OUT_H__
-
-struct exec
-{
-  unsigned int a_info;		/* Use macros N_MAGIC, etc for access */
-  unsigned a_text;		/* length of text, in bytes */
-  unsigned a_data;		/* length of data, in bytes */
-  unsigned a_bss;		/* length of uninitialized data area for file, in bytes */
-  unsigned a_syms;		/* length of symbol table data in file, in bytes */
-  unsigned a_entry;		/* start address */
-  unsigned a_trsize;		/* length of relocation info for text, in bytes */
-  unsigned a_drsize;		/* length of relocation info for data, in bytes */
-};
-
-#define N_TRSIZE(a)	((a).a_trsize)
-#define N_DRSIZE(a)	((a).a_drsize)
-#define N_SYMSIZE(a)	((a).a_syms)
-
-#endif /* __A_OUT_GNU_H__ */
diff --git a/include/asm-parisc/agp.h b/include/asm-parisc/agp.h
deleted file mode 100644
index 9651660da639..000000000000
--- a/include/asm-parisc/agp.h
+++ /dev/null
@@ -1,24 +0,0 @@
-#ifndef _ASM_PARISC_AGP_H
-#define _ASM_PARISC_AGP_H
-
-/*
- * PARISC specific AGP definitions.
- * Copyright (c) 2006 Kyle McMartin <kyle@parisc-linux.org>
- *
- */
-
-#define map_page_into_agp(page)		/* nothing */
-#define unmap_page_from_agp(page)	/* nothing */
-#define flush_agp_cache()		mb()
-
-/* Convert a physical address to an address suitable for the GART. */
-#define phys_to_gart(x) (x)
-#define gart_to_phys(x) (x)
-
-/* GATT allocation. Returns/accepts GATT kernel virtual address. */
-#define alloc_gatt_pages(order)		\
-	((char *)__get_free_pages(GFP_KERNEL, (order)))
-#define free_gatt_pages(table, order)	\
-	free_pages((unsigned long)(table), (order))
-
-#endif /* _ASM_PARISC_AGP_H */
diff --git a/include/asm-parisc/asmregs.h b/include/asm-parisc/asmregs.h
deleted file mode 100644
index d93c646e1887..000000000000
--- a/include/asm-parisc/asmregs.h
+++ /dev/null
@@ -1,183 +0,0 @@
-/*
- * Copyright (C) 1999 Hewlett-Packard (Frank Rowand)
- *
- *	This program is free software; you can redistribute it and/or modify
- *	it under the terms of the GNU General Public License as published by
- *	the Free Software Foundation; either version 2, or (at your option)
- *	any later version.
- *
- *	This program is distributed in the hope that it will be useful,
- *	but WITHOUT ANY WARRANTY; without even the implied warranty of
- *	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *	GNU General Public License for more details.
- *
- *	You should have received a copy of the GNU General Public License
- *	along with this program; if not, write to the Free Software
- *	Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- */
-
-#ifndef _PARISC_ASMREGS_H
-#define _PARISC_ASMREGS_H
-
-;! General Registers
-
-rp:	.reg	%r2
-arg3:	.reg	%r23
-arg2:	.reg	%r24
-arg1:	.reg	%r25
-arg0:	.reg	%r26
-dp:	.reg	%r27
-ret0:	.reg	%r28
-ret1:	.reg	%r29
-sl:	.reg	%r29
-sp:	.reg	%r30
-
-#if 0
-/* PA20_REVISIT */
-arg7:	.reg	r19
-arg6:	.reg	r20
-arg5:	.reg	r21
-arg4:	.reg	r22
-gp:	.reg	r27
-ap:	.reg	r29
-#endif
-
-
-r0:	.reg	%r0
-r1:	.reg	%r1
-r2:	.reg	%r2
-r3:	.reg	%r3
-r4:	.reg	%r4
-r5:	.reg	%r5
-r6:	.reg	%r6
-r7:	.reg	%r7
-r8:	.reg	%r8
-r9:	.reg	%r9
-r10:	.reg	%r10
-r11:	.reg	%r11
-r12:	.reg	%r12
-r13:	.reg	%r13
-r14:	.reg	%r14
-r15:	.reg	%r15
-r16:	.reg	%r16
-r17:	.reg	%r17
-r18:	.reg	%r18
-r19:	.reg	%r19
-r20:	.reg	%r20
-r21:	.reg	%r21
-r22:	.reg	%r22
-r23:	.reg	%r23
-r24:	.reg	%r24
-r25:	.reg	%r25
-r26:	.reg	%r26
-r27:	.reg	%r27
-r28:	.reg	%r28
-r29:	.reg	%r29
-r30:	.reg	%r30
-r31:	.reg	%r31
-
-
-;! Space Registers
-
-sr0:	.reg	%sr0
-sr1:	.reg	%sr1
-sr2:	.reg	%sr2
-sr3:	.reg	%sr3
-sr4:	.reg	%sr4
-sr5:	.reg	%sr5
-sr6:	.reg	%sr6
-sr7:	.reg	%sr7
-
-
-;! Floating Point Registers
-
-fr0:	.reg	%fr0
-fr1:	.reg	%fr1
-fr2:	.reg	%fr2
-fr3:	.reg	%fr3
-fr4:	.reg	%fr4
-fr5:	.reg	%fr5
-fr6:	.reg	%fr6
-fr7:	.reg	%fr7
-fr8:	.reg	%fr8
-fr9:	.reg	%fr9
-fr10:	.reg	%fr10
-fr11:	.reg	%fr11
-fr12:	.reg	%fr12
-fr13:	.reg	%fr13
-fr14:	.reg	%fr14
-fr15:	.reg	%fr15
-fr16:	.reg	%fr16
-fr17:	.reg	%fr17
-fr18:	.reg	%fr18
-fr19:	.reg	%fr19
-fr20:	.reg	%fr20
-fr21:	.reg	%fr21
-fr22:	.reg	%fr22
-fr23:	.reg	%fr23
-fr24:	.reg	%fr24
-fr25:	.reg	%fr25
-fr26:	.reg	%fr26
-fr27:	.reg	%fr27
-fr28:	.reg	%fr28
-fr29:	.reg	%fr29
-fr30:	.reg	%fr30
-fr31:	.reg	%fr31
-
-
-;! Control Registers
-
-rctr:	.reg	%cr0
-pidr1:	.reg	%cr8
-pidr2:	.reg	%cr9
-ccr:	.reg	%cr10
-sar:	.reg	%cr11
-pidr3:	.reg	%cr12
-pidr4:	.reg	%cr13
-iva:	.reg	%cr14
-eiem:	.reg	%cr15
-itmr:	.reg	%cr16
-pcsq:	.reg	%cr17
-pcoq:	.reg	%cr18
-iir:	.reg	%cr19
-isr:	.reg	%cr20
-ior:	.reg	%cr21
-ipsw:	.reg	%cr22
-eirr:	.reg	%cr23
-tr0:	.reg	%cr24
-tr1:	.reg	%cr25
-tr2:	.reg	%cr26
-tr3:	.reg	%cr27
-tr4:	.reg	%cr28
-tr5:	.reg	%cr29
-tr6:	.reg	%cr30
-tr7:	.reg	%cr31
-
-
-cr0:	.reg	%cr0
-cr8:	.reg	%cr8
-cr9:	.reg	%cr9
-cr10:	.reg	%cr10
-cr11:	.reg	%cr11
-cr12:	.reg	%cr12
-cr13:	.reg	%cr13
-cr14:	.reg	%cr14
-cr15:	.reg	%cr15
-cr16:	.reg	%cr16
-cr17:	.reg	%cr17
-cr18:	.reg	%cr18
-cr19:	.reg	%cr19
-cr20:	.reg	%cr20
-cr21:	.reg	%cr21
-cr22:	.reg	%cr22
-cr23:	.reg	%cr23
-cr24:	.reg	%cr24
-cr25:	.reg	%cr25
-cr26:	.reg	%cr26
-cr27:	.reg	%cr27
-cr28:	.reg	%cr28
-cr29:	.reg	%cr29
-cr30:	.reg	%cr30
-cr31:	.reg	%cr31
-
-#endif
diff --git a/include/asm-parisc/assembly.h b/include/asm-parisc/assembly.h
deleted file mode 100644
index ffb208840ecc..000000000000
--- a/include/asm-parisc/assembly.h
+++ /dev/null
@@ -1,519 +0,0 @@
-/*
- * Copyright (C) 1999 Hewlett-Packard (Frank Rowand)
- * Copyright (C) 1999 Philipp Rumpf <prumpf@tux.org>
- * Copyright (C) 1999 SuSE GmbH
- *
- *    This program is free software; you can redistribute it and/or modify
- *    it under the terms of the GNU General Public License as published by
- *    the Free Software Foundation; either version 2, or (at your option)
- *    any later version.
- *
- *    This program is distributed in the hope that it will be useful,
- *    but WITHOUT ANY WARRANTY; without even the implied warranty of
- *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *    GNU General Public License for more details.
- *
- *    You should have received a copy of the GNU General Public License
- *    along with this program; if not, write to the Free Software
- *    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- */
-
-#ifndef _PARISC_ASSEMBLY_H
-#define _PARISC_ASSEMBLY_H
-
-#define CALLEE_FLOAT_FRAME_SIZE	80
-
-#ifdef CONFIG_64BIT
-#define LDREG	ldd
-#define STREG	std
-#define LDREGX  ldd,s
-#define LDREGM	ldd,mb
-#define STREGM	std,ma
-#define SHRREG	shrd
-#define SHLREG	shld
-#define ANDCM   andcm,*
-#define	COND(x)	* ## x
-#define RP_OFFSET	16
-#define FRAME_SIZE	128
-#define CALLEE_REG_FRAME_SIZE	144
-#define ASM_ULONG_INSN	.dword
-#else	/* CONFIG_64BIT */
-#define LDREG	ldw
-#define STREG	stw
-#define LDREGX  ldwx,s
-#define LDREGM	ldwm
-#define STREGM	stwm
-#define SHRREG	shr
-#define SHLREG	shlw
-#define ANDCM   andcm
-#define COND(x)	x
-#define RP_OFFSET	20
-#define FRAME_SIZE	64
-#define CALLEE_REG_FRAME_SIZE	128
-#define ASM_ULONG_INSN	.word
-#endif
-
-#define CALLEE_SAVE_FRAME_SIZE (CALLEE_REG_FRAME_SIZE + CALLEE_FLOAT_FRAME_SIZE)
-
-#ifdef CONFIG_PA20
-#define LDCW		ldcw,co
-#define BL		b,l
-# ifdef CONFIG_64BIT
-#  define LEVEL		2.0w
-# else
-#  define LEVEL		2.0
-# endif
-#else
-#define LDCW		ldcw
-#define BL		bl
-#define LEVEL		1.1
-#endif
-
-#ifdef __ASSEMBLY__
-
-#ifdef CONFIG_64BIT
-/* the 64-bit pa gnu assembler unfortunately defaults to .level 1.1 or 2.0 so
- * work around that for now... */
-	.level 2.0w
-#endif
-
-#include <asm/asm-offsets.h>
-#include <asm/page.h>
-
-#include <asm/asmregs.h>
-
-	sp	=	30
-	gp	=	27
-	ipsw	=	22
-
-	/*
-	 * We provide two versions of each macro to convert from physical
-	 * to virtual and vice versa. The "_r1" versions take one argument
-	 * register, but trashes r1 to do the conversion. The other
-	 * version takes two arguments: a src and destination register.
-	 * However, the source and destination registers can not be
-	 * the same register.
-	 */
-
-	.macro  tophys  grvirt, grphys
-	ldil    L%(__PAGE_OFFSET), \grphys
-	sub     \grvirt, \grphys, \grphys
-	.endm
-	
-	.macro  tovirt  grphys, grvirt
-	ldil    L%(__PAGE_OFFSET), \grvirt
-	add     \grphys, \grvirt, \grvirt
-	.endm
-
-	.macro  tophys_r1  gr
-	ldil    L%(__PAGE_OFFSET), %r1
-	sub     \gr, %r1, \gr
-	.endm
-	
-	.macro  tovirt_r1  gr
-	ldil    L%(__PAGE_OFFSET), %r1
-	add     \gr, %r1, \gr
-	.endm
-
-	.macro delay value
-	ldil	L%\value, 1
-	ldo	R%\value(1), 1
-	addib,UV,n -1,1,.
-	addib,NUV,n -1,1,.+8
-	nop
-	.endm
-
-	.macro	debug value
-	.endm
-
-
-	/* Shift Left - note the r and t can NOT be the same! */
-	.macro shl r, sa, t
-	dep,z	\r, 31-\sa, 32-\sa, \t
-	.endm
-
-	/* The PA 2.0 shift left */
-	.macro shlw r, sa, t
-	depw,z	\r, 31-\sa, 32-\sa, \t
-	.endm
-
-	/* And the PA 2.0W shift left */
-	.macro shld r, sa, t
-	depd,z	\r, 63-\sa, 64-\sa, \t
-	.endm
-
-	/* Shift Right - note the r and t can NOT be the same! */
-	.macro shr r, sa, t
-	extru \r, 31-\sa, 32-\sa, \t
-	.endm
-
-	/* pa20w version of shift right */
-	.macro shrd r, sa, t
-	extrd,u \r, 63-\sa, 64-\sa, \t
-	.endm
-
-	/* load 32-bit 'value' into 'reg' compensating for the ldil
-	 * sign-extension when running in wide mode.
-	 * WARNING!! neither 'value' nor 'reg' can be expressions
-	 * containing '.'!!!! */
-	.macro	load32 value, reg
-	ldil	L%\value, \reg
-	ldo	R%\value(\reg), \reg
-	.endm
-
-	.macro loadgp
-#ifdef CONFIG_64BIT
-	ldil		L%__gp, %r27
-	ldo		R%__gp(%r27), %r27
-#else
-	ldil		L%$global$, %r27
-	ldo		R%$global$(%r27), %r27
-#endif
-	.endm
-
-#define SAVE_SP(r, where) mfsp r, %r1 ! STREG %r1, where
-#define REST_SP(r, where) LDREG where, %r1 ! mtsp %r1, r
-#define SAVE_CR(r, where) mfctl r, %r1 ! STREG %r1, where
-#define REST_CR(r, where) LDREG where, %r1 ! mtctl %r1, r
-
-	.macro	save_general	regs
-	STREG %r1, PT_GR1 (\regs)
-	STREG %r2, PT_GR2 (\regs)
-	STREG %r3, PT_GR3 (\regs)
-	STREG %r4, PT_GR4 (\regs)
-	STREG %r5, PT_GR5 (\regs)
-	STREG %r6, PT_GR6 (\regs)
-	STREG %r7, PT_GR7 (\regs)
-	STREG %r8, PT_GR8 (\regs)
-	STREG %r9, PT_GR9 (\regs)
-	STREG %r10, PT_GR10(\regs)
-	STREG %r11, PT_GR11(\regs)
-	STREG %r12, PT_GR12(\regs)
-	STREG %r13, PT_GR13(\regs)
-	STREG %r14, PT_GR14(\regs)
-	STREG %r15, PT_GR15(\regs)
-	STREG %r16, PT_GR16(\regs)
-	STREG %r17, PT_GR17(\regs)
-	STREG %r18, PT_GR18(\regs)
-	STREG %r19, PT_GR19(\regs)
-	STREG %r20, PT_GR20(\regs)
-	STREG %r21, PT_GR21(\regs)
-	STREG %r22, PT_GR22(\regs)
-	STREG %r23, PT_GR23(\regs)
-	STREG %r24, PT_GR24(\regs)
-	STREG %r25, PT_GR25(\regs)
-	/* r26 is saved in get_stack and used to preserve a value across virt_map */
-	STREG %r27, PT_GR27(\regs)
-	STREG %r28, PT_GR28(\regs)
-	/* r29 is saved in get_stack and used to point to saved registers */
-	/* r30 stack pointer saved in get_stack */
-	STREG %r31, PT_GR31(\regs)
-	.endm
-
-	.macro	rest_general	regs
-	/* r1 used as a temp in rest_stack and is restored there */
-	LDREG PT_GR2 (\regs), %r2
-	LDREG PT_GR3 (\regs), %r3
-	LDREG PT_GR4 (\regs), %r4
-	LDREG PT_GR5 (\regs), %r5
-	LDREG PT_GR6 (\regs), %r6
-	LDREG PT_GR7 (\regs), %r7
-	LDREG PT_GR8 (\regs), %r8
-	LDREG PT_GR9 (\regs), %r9
-	LDREG PT_GR10(\regs), %r10
-	LDREG PT_GR11(\regs), %r11
-	LDREG PT_GR12(\regs), %r12
-	LDREG PT_GR13(\regs), %r13
-	LDREG PT_GR14(\regs), %r14
-	LDREG PT_GR15(\regs), %r15
-	LDREG PT_GR16(\regs), %r16
-	LDREG PT_GR17(\regs), %r17
-	LDREG PT_GR18(\regs), %r18
-	LDREG PT_GR19(\regs), %r19
-	LDREG PT_GR20(\regs), %r20
-	LDREG PT_GR21(\regs), %r21
-	LDREG PT_GR22(\regs), %r22
-	LDREG PT_GR23(\regs), %r23
-	LDREG PT_GR24(\regs), %r24
-	LDREG PT_GR25(\regs), %r25
-	LDREG PT_GR26(\regs), %r26
-	LDREG PT_GR27(\regs), %r27
-	LDREG PT_GR28(\regs), %r28
-	/* r29 points to register save area, and is restored in rest_stack */
-	/* r30 stack pointer restored in rest_stack */
-	LDREG PT_GR31(\regs), %r31
-	.endm
-
-	.macro	save_fp 	regs
-	fstd,ma  %fr0, 8(\regs)
-	fstd,ma	 %fr1, 8(\regs)
-	fstd,ma	 %fr2, 8(\regs)
-	fstd,ma	 %fr3, 8(\regs)
-	fstd,ma	 %fr4, 8(\regs)
-	fstd,ma	 %fr5, 8(\regs)
-	fstd,ma	 %fr6, 8(\regs)
-	fstd,ma	 %fr7, 8(\regs)
-	fstd,ma	 %fr8, 8(\regs)
-	fstd,ma	 %fr9, 8(\regs)
-	fstd,ma	%fr10, 8(\regs)
-	fstd,ma	%fr11, 8(\regs)
-	fstd,ma	%fr12, 8(\regs)
-	fstd,ma	%fr13, 8(\regs)
-	fstd,ma	%fr14, 8(\regs)
-	fstd,ma	%fr15, 8(\regs)
-	fstd,ma	%fr16, 8(\regs)
-	fstd,ma	%fr17, 8(\regs)
-	fstd,ma	%fr18, 8(\regs)
-	fstd,ma	%fr19, 8(\regs)
-	fstd,ma	%fr20, 8(\regs)
-	fstd,ma	%fr21, 8(\regs)
-	fstd,ma	%fr22, 8(\regs)
-	fstd,ma	%fr23, 8(\regs)
-	fstd,ma	%fr24, 8(\regs)
-	fstd,ma	%fr25, 8(\regs)
-	fstd,ma	%fr26, 8(\regs)
-	fstd,ma	%fr27, 8(\regs)
-	fstd,ma	%fr28, 8(\regs)
-	fstd,ma	%fr29, 8(\regs)
-	fstd,ma	%fr30, 8(\regs)
-	fstd	%fr31, 0(\regs)
-	.endm
-
-	.macro	rest_fp 	regs
-	fldd	0(\regs),	 %fr31
-	fldd,mb	-8(\regs),       %fr30
-	fldd,mb	-8(\regs),       %fr29
-	fldd,mb	-8(\regs),       %fr28
-	fldd,mb	-8(\regs),       %fr27
-	fldd,mb	-8(\regs),       %fr26
-	fldd,mb	-8(\regs),       %fr25
-	fldd,mb	-8(\regs),       %fr24
-	fldd,mb	-8(\regs),       %fr23
-	fldd,mb	-8(\regs),       %fr22
-	fldd,mb	-8(\regs),       %fr21
-	fldd,mb	-8(\regs),       %fr20
-	fldd,mb	-8(\regs),       %fr19
-	fldd,mb	-8(\regs),       %fr18
-	fldd,mb	-8(\regs),       %fr17
-	fldd,mb	-8(\regs),       %fr16
-	fldd,mb	-8(\regs),       %fr15
-	fldd,mb	-8(\regs),       %fr14
-	fldd,mb	-8(\regs),       %fr13
-	fldd,mb	-8(\regs),       %fr12
-	fldd,mb	-8(\regs),       %fr11
-	fldd,mb	-8(\regs),       %fr10
-	fldd,mb	-8(\regs),       %fr9
-	fldd,mb	-8(\regs),       %fr8
-	fldd,mb	-8(\regs),       %fr7
-	fldd,mb	-8(\regs),       %fr6
-	fldd,mb	-8(\regs),       %fr5
-	fldd,mb	-8(\regs),       %fr4
-	fldd,mb	-8(\regs),       %fr3
-	fldd,mb	-8(\regs),       %fr2
-	fldd,mb	-8(\regs),       %fr1
-	fldd,mb	-8(\regs),       %fr0
-	.endm
-
-	.macro	callee_save_float
-	fstd,ma	 %fr12,	8(%r30)
-	fstd,ma	 %fr13,	8(%r30)
-	fstd,ma	 %fr14,	8(%r30)
-	fstd,ma	 %fr15,	8(%r30)
-	fstd,ma	 %fr16,	8(%r30)
-	fstd,ma	 %fr17,	8(%r30)
-	fstd,ma	 %fr18,	8(%r30)
-	fstd,ma	 %fr19,	8(%r30)
-	fstd,ma	 %fr20,	8(%r30)
-	fstd,ma	 %fr21,	8(%r30)
-	.endm
-
-	.macro	callee_rest_float
-	fldd,mb	-8(%r30),   %fr21
-	fldd,mb	-8(%r30),   %fr20
-	fldd,mb	-8(%r30),   %fr19
-	fldd,mb	-8(%r30),   %fr18
-	fldd,mb	-8(%r30),   %fr17
-	fldd,mb	-8(%r30),   %fr16
-	fldd,mb	-8(%r30),   %fr15
-	fldd,mb	-8(%r30),   %fr14
-	fldd,mb	-8(%r30),   %fr13
-	fldd,mb	-8(%r30),   %fr12
-	.endm
-
-#ifdef CONFIG_64BIT
-	.macro	callee_save
-	std,ma	  %r3,	 CALLEE_REG_FRAME_SIZE(%r30)
-	mfctl	  %cr27, %r3
-	std	  %r4,	-136(%r30)
-	std	  %r5,	-128(%r30)
-	std	  %r6,	-120(%r30)
-	std	  %r7,	-112(%r30)
-	std	  %r8,	-104(%r30)
-	std	  %r9,	 -96(%r30)
-	std	 %r10,	 -88(%r30)
-	std	 %r11,	 -80(%r30)
-	std	 %r12,	 -72(%r30)
-	std	 %r13,	 -64(%r30)
-	std	 %r14,	 -56(%r30)
-	std	 %r15,	 -48(%r30)
-	std	 %r16,	 -40(%r30)
-	std	 %r17,	 -32(%r30)
-	std	 %r18,	 -24(%r30)
-	std	  %r3,	 -16(%r30)
-	.endm
-
-	.macro	callee_rest
-	ldd	 -16(%r30),    %r3
-	ldd	 -24(%r30),   %r18
-	ldd	 -32(%r30),   %r17
-	ldd	 -40(%r30),   %r16
-	ldd	 -48(%r30),   %r15
-	ldd	 -56(%r30),   %r14
-	ldd	 -64(%r30),   %r13
-	ldd	 -72(%r30),   %r12
-	ldd	 -80(%r30),   %r11
-	ldd	 -88(%r30),   %r10
-	ldd	 -96(%r30),    %r9
-	ldd	-104(%r30),    %r8
-	ldd	-112(%r30),    %r7
-	ldd	-120(%r30),    %r6
-	ldd	-128(%r30),    %r5
-	ldd	-136(%r30),    %r4
-	mtctl	%r3, %cr27
-	ldd,mb	-CALLEE_REG_FRAME_SIZE(%r30),    %r3
-	.endm
-
-#else /* ! CONFIG_64BIT */
-
-	.macro	callee_save
-	stw,ma	 %r3,	CALLEE_REG_FRAME_SIZE(%r30)
-	mfctl	 %cr27, %r3
-	stw	 %r4,	-124(%r30)
-	stw	 %r5,	-120(%r30)
-	stw	 %r6,	-116(%r30)
-	stw	 %r7,	-112(%r30)
-	stw	 %r8,	-108(%r30)
-	stw	 %r9,	-104(%r30)
-	stw	 %r10,	-100(%r30)
-	stw	 %r11,	 -96(%r30)
-	stw	 %r12,	 -92(%r30)
-	stw	 %r13,	 -88(%r30)
-	stw	 %r14,	 -84(%r30)
-	stw	 %r15,	 -80(%r30)
-	stw	 %r16,	 -76(%r30)
-	stw	 %r17,	 -72(%r30)
-	stw	 %r18,	 -68(%r30)
-	stw	  %r3,	 -64(%r30)
-	.endm
-
-	.macro	callee_rest
-	ldw	 -64(%r30),    %r3
-	ldw	 -68(%r30),   %r18
-	ldw	 -72(%r30),   %r17
-	ldw	 -76(%r30),   %r16
-	ldw	 -80(%r30),   %r15
-	ldw	 -84(%r30),   %r14
-	ldw	 -88(%r30),   %r13
-	ldw	 -92(%r30),   %r12
-	ldw	 -96(%r30),   %r11
-	ldw	-100(%r30),   %r10
-	ldw	-104(%r30),   %r9
-	ldw	-108(%r30),   %r8
-	ldw	-112(%r30),   %r7
-	ldw	-116(%r30),   %r6
-	ldw	-120(%r30),   %r5
-	ldw	-124(%r30),   %r4
-	mtctl	%r3, %cr27
-	ldw,mb	-CALLEE_REG_FRAME_SIZE(%r30),   %r3
-	.endm
-#endif /* ! CONFIG_64BIT */
-
-	.macro	save_specials	regs
-
-	SAVE_SP  (%sr0, PT_SR0 (\regs))
-	SAVE_SP  (%sr1, PT_SR1 (\regs))
-	SAVE_SP  (%sr2, PT_SR2 (\regs))
-	SAVE_SP  (%sr3, PT_SR3 (\regs))
-	SAVE_SP  (%sr4, PT_SR4 (\regs))
-	SAVE_SP  (%sr5, PT_SR5 (\regs))
-	SAVE_SP  (%sr6, PT_SR6 (\regs))
-	SAVE_SP  (%sr7, PT_SR7 (\regs))
-
-	SAVE_CR  (%cr17, PT_IASQ0(\regs))
-	mtctl	 %r0,	%cr17
-	SAVE_CR  (%cr17, PT_IASQ1(\regs))
-
-	SAVE_CR  (%cr18, PT_IAOQ0(\regs))
-	mtctl	 %r0,	%cr18
-	SAVE_CR  (%cr18, PT_IAOQ1(\regs))
-
-#ifdef CONFIG_64BIT
-	/* cr11 (sar) is a funny one.  5 bits on PA1.1 and 6 bit on PA2.0
-	 * For PA2.0 mtsar or mtctl always write 6 bits, but mfctl only
-	 * reads 5 bits.  Use mfctl,w to read all six bits.  Otherwise
-	 * we lose the 6th bit on a save/restore over interrupt.
-	 */
-	mfctl,w  %cr11, %r1
-	STREG    %r1, PT_SAR (\regs)
-#else
-	SAVE_CR  (%cr11, PT_SAR  (\regs))
-#endif
-	SAVE_CR  (%cr19, PT_IIR  (\regs))
-
-	/*
-	 * Code immediately following this macro (in intr_save) relies
-	 * on r8 containing ipsw.
-	 */
-	mfctl    %cr22, %r8
-	STREG    %r8,   PT_PSW(\regs)
-	.endm
-
-	.macro	rest_specials	regs
-
-	REST_SP  (%sr0, PT_SR0 (\regs))
-	REST_SP  (%sr1, PT_SR1 (\regs))
-	REST_SP  (%sr2, PT_SR2 (\regs))
-	REST_SP  (%sr3, PT_SR3 (\regs))
-	REST_SP  (%sr4, PT_SR4 (\regs))
-	REST_SP  (%sr5, PT_SR5 (\regs))
-	REST_SP  (%sr6, PT_SR6 (\regs))
-	REST_SP  (%sr7, PT_SR7 (\regs))
-
-	REST_CR	(%cr17, PT_IASQ0(\regs))
-	REST_CR	(%cr17, PT_IASQ1(\regs))
-
-	REST_CR	(%cr18, PT_IAOQ0(\regs))
-	REST_CR	(%cr18, PT_IAOQ1(\regs))
-
-	REST_CR (%cr11, PT_SAR	(\regs))
-
-	REST_CR	(%cr22, PT_PSW	(\regs))
-	.endm
-
-
-	/* First step to create a "relied upon translation"
-	 * See PA 2.0 Arch. page F-4 and F-5.
-	 *
-	 * The ssm was originally necessary due to a "PCxT bug".
-	 * But someone decided it needed to be added to the architecture
-	 * and this "feature" went into rev3 of PA-RISC 1.1 Arch Manual.
-	 * It's been carried forward into PA 2.0 Arch as well. :^(
-	 *
-	 * "ssm 0,%r0" is a NOP with side effects (prefetch barrier).
-	 * rsm/ssm prevents the ifetch unit from speculatively fetching
-	 * instructions past this line in the code stream.
-	 * PA 2.0 processor will single step all insn in the same QUAD (4 insn).
-	 */
-	.macro	pcxt_ssm_bug
-	rsm	PSW_SM_I,%r0
-	nop	/* 1 */
-	nop	/* 2 */
-	nop	/* 3 */
-	nop	/* 4 */
-	nop	/* 5 */
-	nop	/* 6 */
-	nop	/* 7 */
-	.endm
-
-#endif /* __ASSEMBLY__ */
-#endif
diff --git a/include/asm-parisc/atomic.h b/include/asm-parisc/atomic.h
deleted file mode 100644
index 57fcc4a5ebb4..000000000000
--- a/include/asm-parisc/atomic.h
+++ /dev/null
@@ -1,348 +0,0 @@
-/* Copyright (C) 2000 Philipp Rumpf <prumpf@tux.org>
- * Copyright (C) 2006 Kyle McMartin <kyle@parisc-linux.org>
- */
-
-#ifndef _ASM_PARISC_ATOMIC_H_
-#define _ASM_PARISC_ATOMIC_H_
-
-#include <linux/types.h>
-#include <asm/system.h>
-
-/*
- * Atomic operations that C can't guarantee us.  Useful for
- * resource counting etc..
- *
- * And probably incredibly slow on parisc.  OTOH, we don't
- * have to write any serious assembly.   prumpf
- */
-
-#ifdef CONFIG_SMP
-#include <asm/spinlock.h>
-#include <asm/cache.h>		/* we use L1_CACHE_BYTES */
-
-/* Use an array of spinlocks for our atomic_ts.
- * Hash function to index into a different SPINLOCK.
- * Since "a" is usually an address, use one spinlock per cacheline.
- */
-#  define ATOMIC_HASH_SIZE 4
-#  define ATOMIC_HASH(a) (&(__atomic_hash[ (((unsigned long) a)/L1_CACHE_BYTES) & (ATOMIC_HASH_SIZE-1) ]))
-
-extern raw_spinlock_t __atomic_hash[ATOMIC_HASH_SIZE] __lock_aligned;
-
-/* Can't use raw_spin_lock_irq because of #include problems, so
- * this is the substitute */
-#define _atomic_spin_lock_irqsave(l,f) do {	\
-	raw_spinlock_t *s = ATOMIC_HASH(l);		\
-	local_irq_save(f);			\
-	__raw_spin_lock(s);			\
-} while(0)
-
-#define _atomic_spin_unlock_irqrestore(l,f) do {	\
-	raw_spinlock_t *s = ATOMIC_HASH(l);			\
-	__raw_spin_unlock(s);				\
-	local_irq_restore(f);				\
-} while(0)
-
-
-#else
-#  define _atomic_spin_lock_irqsave(l,f) do { local_irq_save(f); } while (0)
-#  define _atomic_spin_unlock_irqrestore(l,f) do { local_irq_restore(f); } while (0)
-#endif
-
-/* This should get optimized out since it's never called.
-** Or get a link error if xchg is used "wrong".
-*/
-extern void __xchg_called_with_bad_pointer(void);
-
-
-/* __xchg32/64 defined in arch/parisc/lib/bitops.c */
-extern unsigned long __xchg8(char, char *);
-extern unsigned long __xchg32(int, int *);
-#ifdef CONFIG_64BIT
-extern unsigned long __xchg64(unsigned long, unsigned long *);
-#endif
-
-/* optimizer better get rid of switch since size is a constant */
-static __inline__ unsigned long
-__xchg(unsigned long x, __volatile__ void * ptr, int size)
-{
-	switch(size) {
-#ifdef CONFIG_64BIT
-	case 8: return __xchg64(x,(unsigned long *) ptr);
-#endif
-	case 4: return __xchg32((int) x, (int *) ptr);
-	case 1: return __xchg8((char) x, (char *) ptr);
-	}
-	__xchg_called_with_bad_pointer();
-	return x;
-}
-
-
-/*
-** REVISIT - Abandoned use of LDCW in xchg() for now:
-** o need to test sizeof(*ptr) to avoid clearing adjacent bytes
-** o and while we are at it, could CONFIG_64BIT code use LDCD too?
-**
-**	if (__builtin_constant_p(x) && (x == NULL))
-**		if (((unsigned long)p & 0xf) == 0)
-**			return __ldcw(p);
-*/
-#define xchg(ptr,x) \
-	((__typeof__(*(ptr)))__xchg((unsigned long)(x),(ptr),sizeof(*(ptr))))
-
-
-#define __HAVE_ARCH_CMPXCHG	1
-
-/* bug catcher for when unsupported size is used - won't link */
-extern void __cmpxchg_called_with_bad_pointer(void);
-
-/* __cmpxchg_u32/u64 defined in arch/parisc/lib/bitops.c */
-extern unsigned long __cmpxchg_u32(volatile unsigned int *m, unsigned int old, unsigned int new_);
-extern unsigned long __cmpxchg_u64(volatile unsigned long *ptr, unsigned long old, unsigned long new_);
-
-/* don't worry...optimizer will get rid of most of this */
-static __inline__ unsigned long
-__cmpxchg(volatile void *ptr, unsigned long old, unsigned long new_, int size)
-{
-	switch(size) {
-#ifdef CONFIG_64BIT
-	case 8: return __cmpxchg_u64((unsigned long *)ptr, old, new_);
-#endif
-	case 4: return __cmpxchg_u32((unsigned int *)ptr, (unsigned int) old, (unsigned int) new_);
-	}
-	__cmpxchg_called_with_bad_pointer();
-	return old;
-}
-
-#define cmpxchg(ptr,o,n)						 \
-  ({									 \
-     __typeof__(*(ptr)) _o_ = (o);					 \
-     __typeof__(*(ptr)) _n_ = (n);					 \
-     (__typeof__(*(ptr))) __cmpxchg((ptr), (unsigned long)_o_,		 \
-				    (unsigned long)_n_, sizeof(*(ptr))); \
-  })
-
-#include <asm-generic/cmpxchg-local.h>
-
-static inline unsigned long __cmpxchg_local(volatile void *ptr,
-				      unsigned long old,
-				      unsigned long new_, int size)
-{
-	switch (size) {
-#ifdef CONFIG_64BIT
-	case 8:	return __cmpxchg_u64((unsigned long *)ptr, old, new_);
-#endif
-	case 4:	return __cmpxchg_u32(ptr, old, new_);
-	default:
-		return __cmpxchg_local_generic(ptr, old, new_, size);
-	}
-}
-
-/*
- * cmpxchg_local and cmpxchg64_local are atomic wrt current CPU. Always make
- * them available.
- */
-#define cmpxchg_local(ptr, o, n)				  	\
-	((__typeof__(*(ptr)))__cmpxchg_local((ptr), (unsigned long)(o),	\
-			(unsigned long)(n), sizeof(*(ptr))))
-#ifdef CONFIG_64BIT
-#define cmpxchg64_local(ptr, o, n)					\
-  ({									\
-	BUILD_BUG_ON(sizeof(*(ptr)) != 8);				\
-	cmpxchg_local((ptr), (o), (n));					\
-  })
-#else
-#define cmpxchg64_local(ptr, o, n) __cmpxchg64_local_generic((ptr), (o), (n))
-#endif
-
-/* Note that we need not lock read accesses - aligned word writes/reads
- * are atomic, so a reader never sees unconsistent values.
- *
- * Cache-line alignment would conflict with, for example, linux/module.h
- */
-
-typedef struct { volatile int counter; } atomic_t;
-
-/* It's possible to reduce all atomic operations to either
- * __atomic_add_return, atomic_set and atomic_read (the latter
- * is there only for consistency).
- */
-
-static __inline__ int __atomic_add_return(int i, atomic_t *v)
-{
-	int ret;
-	unsigned long flags;
-	_atomic_spin_lock_irqsave(v, flags);
-
-	ret = (v->counter += i);
-
-	_atomic_spin_unlock_irqrestore(v, flags);
-	return ret;
-}
-
-static __inline__ void atomic_set(atomic_t *v, int i) 
-{
-	unsigned long flags;
-	_atomic_spin_lock_irqsave(v, flags);
-
-	v->counter = i;
-
-	_atomic_spin_unlock_irqrestore(v, flags);
-}
-
-static __inline__ int atomic_read(const atomic_t *v)
-{
-	return v->counter;
-}
-
-/* exported interface */
-#define atomic_cmpxchg(v, o, n) (cmpxchg(&((v)->counter), (o), (n)))
-#define atomic_xchg(v, new) (xchg(&((v)->counter), new))
-
-/**
- * atomic_add_unless - add unless the number is a given value
- * @v: pointer of type atomic_t
- * @a: the amount to add to v...
- * @u: ...unless v is equal to u.
- *
- * Atomically adds @a to @v, so long as it was not @u.
- * Returns non-zero if @v was not @u, and zero otherwise.
- */
-static __inline__ int atomic_add_unless(atomic_t *v, int a, int u)
-{
-	int c, old;
-	c = atomic_read(v);
-	for (;;) {
-		if (unlikely(c == (u)))
-			break;
-		old = atomic_cmpxchg((v), c, c + (a));
-		if (likely(old == c))
-			break;
-		c = old;
-	}
-	return c != (u);
-}
-
-#define atomic_inc_not_zero(v) atomic_add_unless((v), 1, 0)
-
-#define atomic_add(i,v)	((void)(__atomic_add_return( ((int)i),(v))))
-#define atomic_sub(i,v)	((void)(__atomic_add_return(-((int)i),(v))))
-#define atomic_inc(v)	((void)(__atomic_add_return(   1,(v))))
-#define atomic_dec(v)	((void)(__atomic_add_return(  -1,(v))))
-
-#define atomic_add_return(i,v)	(__atomic_add_return( ((int)i),(v)))
-#define atomic_sub_return(i,v)	(__atomic_add_return(-((int)i),(v)))
-#define atomic_inc_return(v)	(__atomic_add_return(   1,(v)))
-#define atomic_dec_return(v)	(__atomic_add_return(  -1,(v)))
-
-#define atomic_add_negative(a, v)	(atomic_add_return((a), (v)) < 0)
-
-/*
- * atomic_inc_and_test - increment and test
- * @v: pointer of type atomic_t
- *
- * Atomically increments @v by 1
- * and returns true if the result is zero, or false for all
- * other cases.
- */
-#define atomic_inc_and_test(v) (atomic_inc_return(v) == 0)
-
-#define atomic_dec_and_test(v)	(atomic_dec_return(v) == 0)
-
-#define atomic_sub_and_test(i,v)	(atomic_sub_return((i),(v)) == 0)
-
-#define ATOMIC_INIT(i)	((atomic_t) { (i) })
-
-#define smp_mb__before_atomic_dec()	smp_mb()
-#define smp_mb__after_atomic_dec()	smp_mb()
-#define smp_mb__before_atomic_inc()	smp_mb()
-#define smp_mb__after_atomic_inc()	smp_mb()
-
-#ifdef CONFIG_64BIT
-
-typedef struct { volatile s64 counter; } atomic64_t;
-
-#define ATOMIC64_INIT(i) ((atomic64_t) { (i) })
-
-static __inline__ int
-__atomic64_add_return(s64 i, atomic64_t *v)
-{
-	int ret;
-	unsigned long flags;
-	_atomic_spin_lock_irqsave(v, flags);
-
-	ret = (v->counter += i);
-
-	_atomic_spin_unlock_irqrestore(v, flags);
-	return ret;
-}
-
-static __inline__ void
-atomic64_set(atomic64_t *v, s64 i)
-{
-	unsigned long flags;
-	_atomic_spin_lock_irqsave(v, flags);
-
-	v->counter = i;
-
-	_atomic_spin_unlock_irqrestore(v, flags);
-}
-
-static __inline__ s64
-atomic64_read(const atomic64_t *v)
-{
-	return v->counter;
-}
-
-#define atomic64_add(i,v)	((void)(__atomic64_add_return( ((s64)i),(v))))
-#define atomic64_sub(i,v)	((void)(__atomic64_add_return(-((s64)i),(v))))
-#define atomic64_inc(v)		((void)(__atomic64_add_return(   1,(v))))
-#define atomic64_dec(v)		((void)(__atomic64_add_return(  -1,(v))))
-
-#define atomic64_add_return(i,v)	(__atomic64_add_return( ((s64)i),(v)))
-#define atomic64_sub_return(i,v)	(__atomic64_add_return(-((s64)i),(v)))
-#define atomic64_inc_return(v)		(__atomic64_add_return(   1,(v)))
-#define atomic64_dec_return(v)		(__atomic64_add_return(  -1,(v)))
-
-#define atomic64_add_negative(a, v)	(atomic64_add_return((a), (v)) < 0)
-
-#define atomic64_inc_and_test(v) 	(atomic64_inc_return(v) == 0)
-#define atomic64_dec_and_test(v)	(atomic64_dec_return(v) == 0)
-#define atomic64_sub_and_test(i,v)	(atomic64_sub_return((i),(v)) == 0)
-
-/* exported interface */
-#define atomic64_cmpxchg(v, o, n) \
-	((__typeof__((v)->counter))cmpxchg(&((v)->counter), (o), (n)))
-#define atomic64_xchg(v, new) (xchg(&((v)->counter), new))
-
-/**
- * atomic64_add_unless - add unless the number is a given value
- * @v: pointer of type atomic64_t
- * @a: the amount to add to v...
- * @u: ...unless v is equal to u.
- *
- * Atomically adds @a to @v, so long as it was not @u.
- * Returns non-zero if @v was not @u, and zero otherwise.
- */
-static __inline__ int atomic64_add_unless(atomic64_t *v, long a, long u)
-{
-	long c, old;
-	c = atomic64_read(v);
-	for (;;) {
-		if (unlikely(c == (u)))
-			break;
-		old = atomic64_cmpxchg((v), c, c + (a));
-		if (likely(old == c))
-			break;
-		c = old;
-	}
-	return c != (u);
-}
-
-#define atomic64_inc_not_zero(v) atomic64_add_unless((v), 1, 0)
-
-#endif /* CONFIG_64BIT */
-
-#include <asm-generic/atomic.h>
-
-#endif /* _ASM_PARISC_ATOMIC_H_ */
diff --git a/include/asm-parisc/auxvec.h b/include/asm-parisc/auxvec.h
deleted file mode 100644
index 9c3ac4b89dc9..000000000000
--- a/include/asm-parisc/auxvec.h
+++ /dev/null
@@ -1,4 +0,0 @@
-#ifndef __ASMPARISC_AUXVEC_H
-#define __ASMPARISC_AUXVEC_H
-
-#endif
diff --git a/include/asm-parisc/bitops.h b/include/asm-parisc/bitops.h
deleted file mode 100644
index 7a6ea10bd231..000000000000
--- a/include/asm-parisc/bitops.h
+++ /dev/null
@@ -1,239 +0,0 @@
-#ifndef _PARISC_BITOPS_H
-#define _PARISC_BITOPS_H
-
-#ifndef _LINUX_BITOPS_H
-#error only <linux/bitops.h> can be included directly
-#endif
-
-#include <linux/compiler.h>
-#include <asm/types.h>		/* for BITS_PER_LONG/SHIFT_PER_LONG */
-#include <asm/byteorder.h>
-#include <asm/atomic.h>
-
-/*
- * HP-PARISC specific bit operations
- * for a detailed description of the functions please refer
- * to include/asm-i386/bitops.h or kerneldoc
- */
-
-#define CHOP_SHIFTCOUNT(x) (((unsigned long) (x)) & (BITS_PER_LONG - 1))
-
-
-#define smp_mb__before_clear_bit()      smp_mb()
-#define smp_mb__after_clear_bit()       smp_mb()
-
-/* See http://marc.theaimsgroup.com/?t=108826637900003 for discussion
- * on use of volatile and __*_bit() (set/clear/change):
- *	*_bit() want use of volatile.
- *	__*_bit() are "relaxed" and don't use spinlock or volatile.
- */
-
-static __inline__ void set_bit(int nr, volatile unsigned long * addr)
-{
-	unsigned long mask = 1UL << CHOP_SHIFTCOUNT(nr);
-	unsigned long flags;
-
-	addr += (nr >> SHIFT_PER_LONG);
-	_atomic_spin_lock_irqsave(addr, flags);
-	*addr |= mask;
-	_atomic_spin_unlock_irqrestore(addr, flags);
-}
-
-static __inline__ void clear_bit(int nr, volatile unsigned long * addr)
-{
-	unsigned long mask = ~(1UL << CHOP_SHIFTCOUNT(nr));
-	unsigned long flags;
-
-	addr += (nr >> SHIFT_PER_LONG);
-	_atomic_spin_lock_irqsave(addr, flags);
-	*addr &= mask;
-	_atomic_spin_unlock_irqrestore(addr, flags);
-}
-
-static __inline__ void change_bit(int nr, volatile unsigned long * addr)
-{
-	unsigned long mask = 1UL << CHOP_SHIFTCOUNT(nr);
-	unsigned long flags;
-
-	addr += (nr >> SHIFT_PER_LONG);
-	_atomic_spin_lock_irqsave(addr, flags);
-	*addr ^= mask;
-	_atomic_spin_unlock_irqrestore(addr, flags);
-}
-
-static __inline__ int test_and_set_bit(int nr, volatile unsigned long * addr)
-{
-	unsigned long mask = 1UL << CHOP_SHIFTCOUNT(nr);
-	unsigned long old;
-	unsigned long flags;
-	int set;
-
-	addr += (nr >> SHIFT_PER_LONG);
-	_atomic_spin_lock_irqsave(addr, flags);
-	old = *addr;
-	set = (old & mask) ? 1 : 0;
-	if (!set)
-		*addr = old | mask;
-	_atomic_spin_unlock_irqrestore(addr, flags);
-
-	return set;
-}
-
-static __inline__ int test_and_clear_bit(int nr, volatile unsigned long * addr)
-{
-	unsigned long mask = 1UL << CHOP_SHIFTCOUNT(nr);
-	unsigned long old;
-	unsigned long flags;
-	int set;
-
-	addr += (nr >> SHIFT_PER_LONG);
-	_atomic_spin_lock_irqsave(addr, flags);
-	old = *addr;
-	set = (old & mask) ? 1 : 0;
-	if (set)
-		*addr = old & ~mask;
-	_atomic_spin_unlock_irqrestore(addr, flags);
-
-	return set;
-}
-
-static __inline__ int test_and_change_bit(int nr, volatile unsigned long * addr)
-{
-	unsigned long mask = 1UL << CHOP_SHIFTCOUNT(nr);
-	unsigned long oldbit;
-	unsigned long flags;
-
-	addr += (nr >> SHIFT_PER_LONG);
-	_atomic_spin_lock_irqsave(addr, flags);
-	oldbit = *addr;
-	*addr = oldbit ^ mask;
-	_atomic_spin_unlock_irqrestore(addr, flags);
-
-	return (oldbit & mask) ? 1 : 0;
-}
-
-#include <asm-generic/bitops/non-atomic.h>
-
-#ifdef __KERNEL__
-
-/**
- * __ffs - find first bit in word. returns 0 to "BITS_PER_LONG-1".
- * @word: The word to search
- *
- * __ffs() return is undefined if no bit is set.
- *
- * 32-bit fast __ffs by LaMont Jones "lamont At hp com".
- * 64-bit enhancement by Grant Grundler "grundler At parisc-linux org".
- * (with help from willy/jejb to get the semantics right)
- *
- * This algorithm avoids branches by making use of nullification.
- * One side effect of "extr" instructions is it sets PSW[N] bit.
- * How PSW[N] (nullify next insn) gets set is determined by the 
- * "condition" field (eg "<>" or "TR" below) in the extr* insn.
- * Only the 1st and one of either the 2cd or 3rd insn will get executed.
- * Each set of 3 insn will get executed in 2 cycles on PA8x00 vs 16 or so
- * cycles for each mispredicted branch.
- */
-
-static __inline__ unsigned long __ffs(unsigned long x)
-{
-	unsigned long ret;
-
-	__asm__(
-#ifdef CONFIG_64BIT
-		" ldi       63,%1\n"
-		" extrd,u,*<>  %0,63,32,%%r0\n"
-		" extrd,u,*TR  %0,31,32,%0\n"	/* move top 32-bits down */
-		" addi    -32,%1,%1\n"
-#else
-		" ldi       31,%1\n"
-#endif
-		" extru,<>  %0,31,16,%%r0\n"
-		" extru,TR  %0,15,16,%0\n"	/* xxxx0000 -> 0000xxxx */
-		" addi    -16,%1,%1\n"
-		" extru,<>  %0,31,8,%%r0\n"
-		" extru,TR  %0,23,8,%0\n"	/* 0000xx00 -> 000000xx */
-		" addi    -8,%1,%1\n"
-		" extru,<>  %0,31,4,%%r0\n"
-		" extru,TR  %0,27,4,%0\n"	/* 000000x0 -> 0000000x */
-		" addi    -4,%1,%1\n"
-		" extru,<>  %0,31,2,%%r0\n"
-		" extru,TR  %0,29,2,%0\n"	/* 0000000y, 1100b -> 0011b */
-		" addi    -2,%1,%1\n"
-		" extru,=  %0,31,1,%%r0\n"	/* check last bit */
-		" addi    -1,%1,%1\n"
-			: "+r" (x), "=r" (ret) );
-	return ret;
-}
-
-#include <asm-generic/bitops/ffz.h>
-
-/*
- * ffs: find first bit set. returns 1 to BITS_PER_LONG or 0 (if none set)
- * This is defined the same way as the libc and compiler builtin
- * ffs routines, therefore differs in spirit from the above ffz (man ffs).
- */
-static __inline__ int ffs(int x)
-{
-	return x ? (__ffs((unsigned long)x) + 1) : 0;
-}
-
-/*
- * fls: find last (most significant) bit set.
- * fls(0) = 0, fls(1) = 1, fls(0x80000000) = 32.
- */
-
-static __inline__ int fls(int x)
-{
-	int ret;
-	if (!x)
-		return 0;
-
-	__asm__(
-	"	ldi		1,%1\n"
-	"	extru,<>	%0,15,16,%%r0\n"
-	"	zdep,TR		%0,15,16,%0\n"		/* xxxx0000 */
-	"	addi		16,%1,%1\n"
-	"	extru,<>	%0,7,8,%%r0\n"
-	"	zdep,TR		%0,23,24,%0\n"		/* xx000000 */
-	"	addi		8,%1,%1\n"
-	"	extru,<>	%0,3,4,%%r0\n"
-	"	zdep,TR		%0,27,28,%0\n"		/* x0000000 */
-	"	addi		4,%1,%1\n"
-	"	extru,<>	%0,1,2,%%r0\n"
-	"	zdep,TR		%0,29,30,%0\n"		/* y0000000 (y&3 = 0) */
-	"	addi		2,%1,%1\n"
-	"	extru,=		%0,0,1,%%r0\n"
-	"	addi		1,%1,%1\n"		/* if y & 8, add 1 */
-		: "+r" (x), "=r" (ret) );
-
-	return ret;
-}
-
-#include <asm-generic/bitops/__fls.h>
-#include <asm-generic/bitops/fls64.h>
-#include <asm-generic/bitops/hweight.h>
-#include <asm-generic/bitops/lock.h>
-#include <asm-generic/bitops/sched.h>
-
-#endif /* __KERNEL__ */
-
-#include <asm-generic/bitops/find.h>
-
-#ifdef __KERNEL__
-
-#include <asm-generic/bitops/ext2-non-atomic.h>
-
-/* '3' is bits per byte */
-#define LE_BYTE_ADDR ((sizeof(unsigned long) - 1) << 3)
-
-#define ext2_set_bit_atomic(l,nr,addr) \
-		test_and_set_bit((nr)   ^ LE_BYTE_ADDR, (unsigned long *)addr)
-#define ext2_clear_bit_atomic(l,nr,addr) \
-		test_and_clear_bit( (nr) ^ LE_BYTE_ADDR, (unsigned long *)addr)
-
-#endif	/* __KERNEL__ */
-
-#include <asm-generic/bitops/minix-le.h>
-
-#endif /* _PARISC_BITOPS_H */
diff --git a/include/asm-parisc/bug.h b/include/asm-parisc/bug.h
deleted file mode 100644
index 8cfc553fc837..000000000000
--- a/include/asm-parisc/bug.h
+++ /dev/null
@@ -1,92 +0,0 @@
-#ifndef _PARISC_BUG_H
-#define _PARISC_BUG_H
-
-/*
- * Tell the user there is some problem.
- * The offending file and line are encoded in the __bug_table section.
- */
-
-#ifdef CONFIG_BUG
-#define HAVE_ARCH_BUG
-#define HAVE_ARCH_WARN_ON
-
-/* the break instruction is used as BUG() marker.  */
-#define	PARISC_BUG_BREAK_ASM	"break 0x1f, 0x1fff"
-#define	PARISC_BUG_BREAK_INSN	0x03ffe01f  /* PARISC_BUG_BREAK_ASM */
-
-#if defined(CONFIG_64BIT)
-#define ASM_WORD_INSN		".dword\t"
-#else
-#define ASM_WORD_INSN		".word\t"
-#endif
-
-#ifdef CONFIG_DEBUG_BUGVERBOSE
-#define BUG()								\
-	do {								\
-		asm volatile("\n"					\
-			     "1:\t" PARISC_BUG_BREAK_ASM "\n"		\
-			     "\t.pushsection __bug_table,\"a\"\n"	\
-			     "2:\t" ASM_WORD_INSN "1b, %c0\n"		\
-			     "\t.short %c1, %c2\n"			\
-			     "\t.org 2b+%c3\n"				\
-			     "\t.popsection"				\
-			     : : "i" (__FILE__), "i" (__LINE__),	\
-			     "i" (0), "i" (sizeof(struct bug_entry)) ); \
-		for(;;) ;						\
-	} while(0)
-
-#else
-#define BUG()								\
-	do {								\
-		asm volatile(PARISC_BUG_BREAK_ASM : : );		\
-		for(;;) ;						\
-	} while(0)
-#endif
-
-#ifdef CONFIG_DEBUG_BUGVERBOSE
-#define __WARN()							\
-	do {								\
-		asm volatile("\n"					\
-			     "1:\t" PARISC_BUG_BREAK_ASM "\n"		\
-			     "\t.pushsection __bug_table,\"a\"\n"	\
-			     "2:\t" ASM_WORD_INSN "1b, %c0\n"		\
-			     "\t.short %c1, %c2\n"			\
-			     "\t.org 2b+%c3\n"				\
-			     "\t.popsection"				\
-			     : : "i" (__FILE__), "i" (__LINE__),	\
-			     "i" (BUGFLAG_WARNING),			\
-			     "i" (sizeof(struct bug_entry)) );		\
-	} while(0)
-#else
-#define __WARN()							\
-	do {								\
-		asm volatile("\n"					\
-			     "1:\t" PARISC_BUG_BREAK_ASM "\n"		\
-			     "\t.pushsection __bug_table,\"a\"\n"	\
-			     "2:\t" ASM_WORD_INSN "1b\n"		\
-			     "\t.short %c0\n"				\
-			     "\t.org 2b+%c1\n"				\
-			     "\t.popsection"				\
-			     : : "i" (BUGFLAG_WARNING),			\
-			     "i" (sizeof(struct bug_entry)) );		\
-	} while(0)
-#endif
-
-
-#define WARN_ON(x) ({						\
-	int __ret_warn_on = !!(x);				\
-	if (__builtin_constant_p(__ret_warn_on)) {		\
-		if (__ret_warn_on)				\
-			__WARN();				\
-	} else {						\
-		if (unlikely(__ret_warn_on))			\
-			__WARN();				\
-	}							\
-	unlikely(__ret_warn_on);				\
-})
-
-#endif
-
-#include <asm-generic/bug.h>
-#endif
-
diff --git a/include/asm-parisc/bugs.h b/include/asm-parisc/bugs.h
deleted file mode 100644
index 9e6284342a5f..000000000000
--- a/include/asm-parisc/bugs.h
+++ /dev/null
@@ -1,19 +0,0 @@
-/*
- *  include/asm-parisc/bugs.h
- *
- *  Copyright (C) 1999	Mike Shaver
- */
-
-/*
- * This is included by init/main.c to check for architecture-dependent bugs.
- *
- * Needs:
- *	void check_bugs(void);
- */
-
-#include <asm/processor.h>
-
-static inline void check_bugs(void)
-{
-//	identify_cpu(&boot_cpu_data);
-}
diff --git a/include/asm-parisc/byteorder.h b/include/asm-parisc/byteorder.h
deleted file mode 100644
index db148313de5d..000000000000
--- a/include/asm-parisc/byteorder.h
+++ /dev/null
@@ -1,82 +0,0 @@
-#ifndef _PARISC_BYTEORDER_H
-#define _PARISC_BYTEORDER_H
-
-#include <asm/types.h>
-#include <linux/compiler.h>
-
-#ifdef __GNUC__
-
-static __inline__ __attribute_const__ __u16 ___arch__swab16(__u16 x)
-{
-	__asm__("dep %0, 15, 8, %0\n\t"		/* deposit 00ab -> 0bab */
-		"shd %%r0, %0, 8, %0"		/* shift 000000ab -> 00ba */
-		: "=r" (x)
-		: "0" (x));
-	return x;
-}
-
-static __inline__ __attribute_const__ __u32 ___arch__swab24(__u32 x)
-{
-	__asm__("shd %0, %0, 8, %0\n\t"		/* shift xabcxabc -> cxab */
-		"dep %0, 15, 8, %0\n\t"		/* deposit cxab -> cbab */
-		"shd %%r0, %0, 8, %0"		/* shift 0000cbab -> 0cba */
-		: "=r" (x)
-		: "0" (x));
-	return x;
-}
-
-static __inline__ __attribute_const__ __u32 ___arch__swab32(__u32 x)
-{
-	unsigned int temp;
-	__asm__("shd %0, %0, 16, %1\n\t"	/* shift abcdabcd -> cdab */
-		"dep %1, 15, 8, %1\n\t"		/* deposit cdab -> cbab */
-		"shd %0, %1, 8, %0"		/* shift abcdcbab -> dcba */
-		: "=r" (x), "=&r" (temp)
-		: "0" (x));
-	return x;
-}
-
-
-#if BITS_PER_LONG > 32
-/*
-** From "PA-RISC 2.0 Architecture", HP Professional Books.
-** See Appendix I page 8 , "Endian Byte Swapping".
-**
-** Pretty cool algorithm: (* == zero'd bits)
-**      PERMH   01234567 -> 67452301 into %0
-**      HSHL    67452301 -> 7*5*3*1* into %1
-**      HSHR    67452301 -> *6*4*2*0 into %0
-**      OR      %0 | %1  -> 76543210 into %0 (all done!)
-*/
-static __inline__ __attribute_const__ __u64 ___arch__swab64(__u64 x) {
-	__u64 temp;
-	__asm__("permh,3210 %0, %0\n\t"
-		"hshl %0, 8, %1\n\t"
-		"hshr,u %0, 8, %0\n\t"
-		"or %1, %0, %0"
-		: "=r" (x), "=&r" (temp)
-		: "0" (x));
-	return x;
-}
-#define __arch__swab64(x) ___arch__swab64(x)
-#define __BYTEORDER_HAS_U64__
-#elif !defined(__STRICT_ANSI__)
-static __inline__ __attribute_const__ __u64 ___arch__swab64(__u64 x)
-{
-	__u32 t1 = ___arch__swab32((__u32) x);
-	__u32 t2 = ___arch__swab32((__u32) (x >> 32));
-	return (((__u64) t1 << 32) | t2);
-}
-#define __arch__swab64(x) ___arch__swab64(x)
-#define __BYTEORDER_HAS_U64__
-#endif
-
-#define __arch__swab16(x) ___arch__swab16(x)
-#define __arch__swab24(x) ___arch__swab24(x)
-#define __arch__swab32(x) ___arch__swab32(x)
-
-#endif /* __GNUC__ */
-
-#include <linux/byteorder/big_endian.h>
-
-#endif /* _PARISC_BYTEORDER_H */
diff --git a/include/asm-parisc/cache.h b/include/asm-parisc/cache.h
deleted file mode 100644
index 32c2cca74345..000000000000
--- a/include/asm-parisc/cache.h
+++ /dev/null
@@ -1,60 +0,0 @@
-/*
- * include/asm-parisc/cache.h
- */
-
-#ifndef __ARCH_PARISC_CACHE_H
-#define __ARCH_PARISC_CACHE_H
-
-
-/*
- * PA 2.0 processors have 64-byte cachelines; PA 1.1 processors have
- * 32-byte cachelines.  The default configuration is not for SMP anyway,
- * so if you're building for SMP, you should select the appropriate
- * processor type.  There is a potential livelock danger when running
- * a machine with this value set too small, but it's more probable you'll
- * just ruin performance.
- */
-#ifdef CONFIG_PA20
-#define L1_CACHE_BYTES 64
-#define L1_CACHE_SHIFT 6
-#else
-#define L1_CACHE_BYTES 32
-#define L1_CACHE_SHIFT 5
-#endif
-
-#ifndef __ASSEMBLY__
-
-#define L1_CACHE_ALIGN(x)       (((x)+(L1_CACHE_BYTES-1))&~(L1_CACHE_BYTES-1))
-
-#define SMP_CACHE_BYTES L1_CACHE_BYTES
-
-#define __read_mostly __attribute__((__section__(".data.read_mostly")))
-
-void parisc_cache_init(void);	/* initializes cache-flushing */
-void disable_sr_hashing_asm(int); /* low level support for above */
-void disable_sr_hashing(void);   /* turns off space register hashing */
-void free_sid(unsigned long);
-unsigned long alloc_sid(void);
-
-struct seq_file;
-extern void show_cache_info(struct seq_file *m);
-
-extern int split_tlb;
-extern int dcache_stride;
-extern int icache_stride;
-extern struct pdc_cache_info cache_info;
-void parisc_setup_cache_timing(void);
-
-#define pdtlb(addr)         asm volatile("pdtlb 0(%%sr1,%0)" : : "r" (addr));
-#define pitlb(addr)         asm volatile("pitlb 0(%%sr1,%0)" : : "r" (addr));
-#define pdtlb_kernel(addr)  asm volatile("pdtlb 0(%0)" : : "r" (addr));
-
-#endif /* ! __ASSEMBLY__ */
-
-/* Classes of processor wrt: disabling space register hashing */
-
-#define SRHASH_PCXST    0   /* pcxs, pcxt, pcxt_ */
-#define SRHASH_PCXL     1   /* pcxl */
-#define SRHASH_PA20     2   /* pcxu, pcxu_, pcxw, pcxw_ */
-
-#endif
diff --git a/include/asm-parisc/cacheflush.h b/include/asm-parisc/cacheflush.h
deleted file mode 100644
index b7ca6dc7fddc..000000000000
--- a/include/asm-parisc/cacheflush.h
+++ /dev/null
@@ -1,121 +0,0 @@
-#ifndef _PARISC_CACHEFLUSH_H
-#define _PARISC_CACHEFLUSH_H
-
-#include <linux/mm.h>
-
-/* The usual comment is "Caches aren't brain-dead on the <architecture>".
- * Unfortunately, that doesn't apply to PA-RISC. */
-
-/* Internal implementation */
-void flush_data_cache_local(void *);  /* flushes local data-cache only */
-void flush_instruction_cache_local(void *); /* flushes local code-cache only */
-#ifdef CONFIG_SMP
-void flush_data_cache(void); /* flushes data-cache only (all processors) */
-void flush_instruction_cache(void); /* flushes i-cache only (all processors) */
-#else
-#define flush_data_cache() flush_data_cache_local(NULL)
-#define flush_instruction_cache() flush_instruction_cache_local(NULL)
-#endif
-
-#define flush_cache_dup_mm(mm) flush_cache_mm(mm)
-
-void flush_user_icache_range_asm(unsigned long, unsigned long);
-void flush_kernel_icache_range_asm(unsigned long, unsigned long);
-void flush_user_dcache_range_asm(unsigned long, unsigned long);
-void flush_kernel_dcache_range_asm(unsigned long, unsigned long);
-void flush_kernel_dcache_page_asm(void *);
-void flush_kernel_icache_page(void *);
-void flush_user_dcache_page(unsigned long);
-void flush_user_icache_page(unsigned long);
-void flush_user_dcache_range(unsigned long, unsigned long);
-void flush_user_icache_range(unsigned long, unsigned long);
-
-/* Cache flush operations */
-
-void flush_cache_all_local(void);
-void flush_cache_all(void);
-void flush_cache_mm(struct mm_struct *mm);
-
-#define flush_kernel_dcache_range(start,size) \
-	flush_kernel_dcache_range_asm((start), (start)+(size));
-
-#define flush_cache_vmap(start, end)		flush_cache_all()
-#define flush_cache_vunmap(start, end)		flush_cache_all()
-
-extern void flush_dcache_page(struct page *page);
-
-#define flush_dcache_mmap_lock(mapping) \
-	spin_lock_irq(&(mapping)->tree_lock)
-#define flush_dcache_mmap_unlock(mapping) \
-	spin_unlock_irq(&(mapping)->tree_lock)
-
-#define flush_icache_page(vma,page)	do { 		\
-	flush_kernel_dcache_page(page);			\
-	flush_kernel_icache_page(page_address(page)); 	\
-} while (0)
-
-#define flush_icache_range(s,e)		do { 		\
-	flush_kernel_dcache_range_asm(s,e); 		\
-	flush_kernel_icache_range_asm(s,e); 		\
-} while (0)
-
-#define copy_to_user_page(vma, page, vaddr, dst, src, len) \
-do { \
-	flush_cache_page(vma, vaddr, page_to_pfn(page)); \
-	memcpy(dst, src, len); \
-	flush_kernel_dcache_range_asm((unsigned long)dst, (unsigned long)dst + len); \
-} while (0)
-
-#define copy_from_user_page(vma, page, vaddr, dst, src, len) \
-do { \
-	flush_cache_page(vma, vaddr, page_to_pfn(page)); \
-	memcpy(dst, src, len); \
-} while (0)
-
-void flush_cache_page(struct vm_area_struct *vma, unsigned long vmaddr, unsigned long pfn);
-void flush_cache_range(struct vm_area_struct *vma,
-		unsigned long start, unsigned long end);
-
-#define ARCH_HAS_FLUSH_ANON_PAGE
-static inline void
-flush_anon_page(struct vm_area_struct *vma, struct page *page, unsigned long vmaddr)
-{
-	if (PageAnon(page))
-		flush_user_dcache_page(vmaddr);
-}
-
-#define ARCH_HAS_FLUSH_KERNEL_DCACHE_PAGE
-void flush_kernel_dcache_page_addr(void *addr);
-static inline void flush_kernel_dcache_page(struct page *page)
-{
-	flush_kernel_dcache_page_addr(page_address(page));
-}
-
-#ifdef CONFIG_DEBUG_RODATA
-void mark_rodata_ro(void);
-#endif
-
-#ifdef CONFIG_PA8X00
-/* Only pa8800, pa8900 needs this */
-#define ARCH_HAS_KMAP
-
-void kunmap_parisc(void *addr);
-
-static inline void *kmap(struct page *page)
-{
-	might_sleep();
-	return page_address(page);
-}
-
-#define kunmap(page)			kunmap_parisc(page_address(page))
-
-#define kmap_atomic(page, idx)		page_address(page)
-
-#define kunmap_atomic(addr, idx)	kunmap_parisc(addr)
-
-#define kmap_atomic_pfn(pfn, idx)	page_address(pfn_to_page(pfn))
-#define kmap_atomic_to_page(ptr)	virt_to_page(ptr)
-#endif
-
-#endif /* _PARISC_CACHEFLUSH_H */
-
diff --git a/include/asm-parisc/checksum.h b/include/asm-parisc/checksum.h
deleted file mode 100644
index e9639ccc3fce..000000000000
--- a/include/asm-parisc/checksum.h
+++ /dev/null
@@ -1,210 +0,0 @@
-#ifndef _PARISC_CHECKSUM_H
-#define _PARISC_CHECKSUM_H
-
-#include <linux/in6.h>
-
-/*
- * computes the checksum of a memory block at buff, length len,
- * and adds in "sum" (32-bit)
- *
- * returns a 32-bit number suitable for feeding into itself
- * or csum_tcpudp_magic
- *
- * this function must be called with even lengths, except
- * for the last fragment, which may be odd
- *
- * it's best to have buff aligned on a 32-bit boundary
- */
-extern __wsum csum_partial(const void *, int, __wsum);
-
-/*
- * The same as csum_partial, but copies from src while it checksums.
- *
- * Here even more important to align src and dst on a 32-bit (or even
- * better 64-bit) boundary
- */
-extern __wsum csum_partial_copy_nocheck(const void *, void *, int, __wsum);
-
-/*
- * this is a new version of the above that records errors it finds in *errp,
- * but continues and zeros the rest of the buffer.
- */
-extern __wsum csum_partial_copy_from_user(const void __user *src,
-		void *dst, int len, __wsum sum, int *errp);
-
-/*
- *	Optimized for IP headers, which always checksum on 4 octet boundaries.
- *
- *	Written by Randolph Chung <tausq@debian.org>, and then mucked with by
- *	LaMont Jones <lamont@debian.org>
- */
-static inline __sum16 ip_fast_csum(const void *iph, unsigned int ihl)
-{
-	unsigned int sum;
-
-	__asm__ __volatile__ (
-"	ldws,ma		4(%1), %0\n"
-"	addib,<=	-4, %2, 2f\n"
-"\n"
-"	ldws		4(%1), %%r20\n"
-"	ldws		8(%1), %%r21\n"
-"	add		%0, %%r20, %0\n"
-"	ldws,ma		12(%1), %%r19\n"
-"	addc		%0, %%r21, %0\n"
-"	addc		%0, %%r19, %0\n"
-"1:	ldws,ma		4(%1), %%r19\n"
-"	addib,<		0, %2, 1b\n"
-"	addc		%0, %%r19, %0\n"
-"\n"
-"	extru		%0, 31, 16, %%r20\n"
-"	extru		%0, 15, 16, %%r21\n"
-"	addc		%%r20, %%r21, %0\n"
-"	extru		%0, 15, 16, %%r21\n"
-"	add		%0, %%r21, %0\n"
-"	subi		-1, %0, %0\n"
-"2:\n"
-	: "=r" (sum), "=r" (iph), "=r" (ihl)
-	: "1" (iph), "2" (ihl)
-	: "r19", "r20", "r21", "memory");
-
-	return (__force __sum16)sum;
-}
-
-/*
- *	Fold a partial checksum
- */
-static inline __sum16 csum_fold(__wsum csum)
-{
-	u32 sum = (__force u32)csum;
-	/* add the swapped two 16-bit halves of sum,
-	   a possible carry from adding the two 16-bit halves,
-	   will carry from the lower half into the upper half,
-	   giving us the correct sum in the upper half. */
-	sum += (sum << 16) + (sum >> 16);
-	return (__force __sum16)(~sum >> 16);
-}
- 
-static inline __wsum csum_tcpudp_nofold(__be32 saddr, __be32 daddr,
-					       unsigned short len,
-					       unsigned short proto,
-					       __wsum sum)
-{
-	__asm__(
-	"	add  %1, %0, %0\n"
-	"	addc %2, %0, %0\n"
-	"	addc %3, %0, %0\n"
-	"	addc %%r0, %0, %0\n"
-		: "=r" (sum)
-		: "r" (daddr), "r"(saddr), "r"(proto+len), "0"(sum));
-	return sum;
-}
-
-/*
- * computes the checksum of the TCP/UDP pseudo-header
- * returns a 16-bit checksum, already complemented
- */
-static inline __sum16 csum_tcpudp_magic(__be32 saddr, __be32 daddr,
-						   unsigned short len,
-						   unsigned short proto,
-						   __wsum sum)
-{
-	return csum_fold(csum_tcpudp_nofold(saddr,daddr,len,proto,sum));
-}
-
-/*
- * this routine is used for miscellaneous IP-like checksums, mainly
- * in icmp.c
- */
-static inline __sum16 ip_compute_csum(const void *buf, int len)
-{
-	 return csum_fold (csum_partial(buf, len, 0));
-}
-
-
-#define _HAVE_ARCH_IPV6_CSUM
-static __inline__ __sum16 csum_ipv6_magic(const struct in6_addr *saddr,
-					  const struct in6_addr *daddr,
-					  __u32 len, unsigned short proto,
-					  __wsum sum)
-{
-	__asm__ __volatile__ (
-
-#if BITS_PER_LONG > 32
-
-	/*
-	** We can execute two loads and two adds per cycle on PA 8000.
-	** But add insn's get serialized waiting for the carry bit.
-	** Try to keep 4 registers with "live" values ahead of the ALU.
-	*/
-
-"	ldd,ma		8(%1), %%r19\n"	/* get 1st saddr word */
-"	ldd,ma		8(%2), %%r20\n"	/* get 1st daddr word */
-"	add		%8, %3, %3\n"/* add 16-bit proto + len */
-"	add		%%r19, %0, %0\n"
-"	ldd,ma		8(%1), %%r21\n"	/* 2cd saddr */
-"	ldd,ma		8(%2), %%r22\n"	/* 2cd daddr */
-"	add,dc		%%r20, %0, %0\n"
-"	add,dc		%%r21, %0, %0\n"
-"	add,dc		%%r22, %0, %0\n"
-"	add,dc		%3, %0, %0\n"  /* fold in proto+len | carry bit */
-"	extrd,u		%0, 31, 32, %%r19\n"	/* copy upper half down */
-"	depdi		0, 31, 32, %0\n"	/* clear upper half */
-"	add		%%r19, %0, %0\n"	/* fold into 32-bits */
-"	addc		0, %0, %0\n"		/* add carry */
-
-#else
-
-	/*
-	** For PA 1.x, the insn order doesn't matter as much.
-	** Insn stream is serialized on the carry bit here too.
-	** result from the previous operation (eg r0 + x)
-	*/
-
-"	ldw,ma		4(%1), %%r19\n"	/* get 1st saddr word */
-"	ldw,ma		4(%2), %%r20\n"	/* get 1st daddr word */
-"	add		%8, %3, %3\n"	/* add 16-bit proto + len */
-"	add		%%r19, %0, %0\n"
-"	ldw,ma		4(%1), %%r21\n"	/* 2cd saddr */
-"	addc		%%r20, %0, %0\n"
-"	ldw,ma		4(%2), %%r22\n"	/* 2cd daddr */
-"	addc		%%r21, %0, %0\n"
-"	ldw,ma		4(%1), %%r19\n"	/* 3rd saddr */
-"	addc		%%r22, %0, %0\n"
-"	ldw,ma		4(%2), %%r20\n"	/* 3rd daddr */
-"	addc		%%r19, %0, %0\n"
-"	ldw,ma		4(%1), %%r21\n"	/* 4th saddr */
-"	addc		%%r20, %0, %0\n"
-"	ldw,ma		4(%2), %%r22\n"	/* 4th daddr */
-"	addc		%%r21, %0, %0\n"
-"	addc		%%r22, %0, %0\n"
-"	addc		%3, %0, %0\n"	/* fold in proto+len, catch carry */
-
-#endif
-	: "=r" (sum), "=r" (saddr), "=r" (daddr), "=r" (len)
-	: "0" (sum), "1" (saddr), "2" (daddr), "3" (len), "r" (proto)
-	: "r19", "r20", "r21", "r22");
-	return csum_fold(sum);
-}
-
-/* 
- *	Copy and checksum to user
- */
-#define HAVE_CSUM_COPY_USER
-static __inline__ __wsum csum_and_copy_to_user(const void *src,
-						      void __user *dst,
-						      int len, __wsum sum,
-						      int *err_ptr)
-{
-	/* code stolen from include/asm-mips64 */
-	sum = csum_partial(src, len, sum);
-	 
-	if (copy_to_user(dst, src, len)) {
-		*err_ptr = -EFAULT;
-		return (__force __wsum)-1;
-	}
-
-	return sum;
-}
-
-#endif
-
diff --git a/include/asm-parisc/compat.h b/include/asm-parisc/compat.h
deleted file mode 100644
index 7f32611a7a5e..000000000000
--- a/include/asm-parisc/compat.h
+++ /dev/null
@@ -1,165 +0,0 @@
-#ifndef _ASM_PARISC_COMPAT_H
-#define _ASM_PARISC_COMPAT_H
-/*
- * Architecture specific compatibility types
- */
-#include <linux/types.h>
-#include <linux/sched.h>
-#include <linux/thread_info.h>
-
-#define COMPAT_USER_HZ 100
-
-typedef u32	compat_size_t;
-typedef s32	compat_ssize_t;
-typedef s32	compat_time_t;
-typedef s32	compat_clock_t;
-typedef s32	compat_pid_t;
-typedef u32	__compat_uid_t;
-typedef u32	__compat_gid_t;
-typedef u32	__compat_uid32_t;
-typedef u32	__compat_gid32_t;
-typedef u16	compat_mode_t;
-typedef u32	compat_ino_t;
-typedef u32	compat_dev_t;
-typedef s32	compat_off_t;
-typedef s64	compat_loff_t;
-typedef u16	compat_nlink_t;
-typedef u16	compat_ipc_pid_t;
-typedef s32	compat_daddr_t;
-typedef u32	compat_caddr_t;
-typedef s32	compat_timer_t;
-
-typedef s32	compat_int_t;
-typedef s32	compat_long_t;
-typedef s64	compat_s64;
-typedef u32	compat_uint_t;
-typedef u32	compat_ulong_t;
-typedef u64	compat_u64;
-
-struct compat_timespec {
-	compat_time_t		tv_sec;
-	s32			tv_nsec;
-};
-
-struct compat_timeval {
-	compat_time_t		tv_sec;
-	s32			tv_usec;
-};
-
-struct compat_stat {
-	compat_dev_t		st_dev;	/* dev_t is 32 bits on parisc */
-	compat_ino_t		st_ino;	/* 32 bits */
-	compat_mode_t		st_mode;	/* 16 bits */
-	compat_nlink_t  	st_nlink;	/* 16 bits */
-	u16			st_reserved1;	/* old st_uid */
-	u16			st_reserved2;	/* old st_gid */
-	compat_dev_t		st_rdev;
-	compat_off_t		st_size;
-	compat_time_t		st_atime;
-	u32			st_atime_nsec;
-	compat_time_t		st_mtime;
-	u32			st_mtime_nsec;
-	compat_time_t		st_ctime;
-	u32			st_ctime_nsec;
-	s32			st_blksize;
-	s32			st_blocks;
-	u32			__unused1;	/* ACL stuff */
-	compat_dev_t		__unused2;	/* network */
-	compat_ino_t		__unused3;	/* network */
-	u32			__unused4;	/* cnodes */
-	u16			__unused5;	/* netsite */
-	short			st_fstype;
-	compat_dev_t		st_realdev;
-	u16			st_basemode;
-	u16			st_spareshort;
-	__compat_uid32_t	st_uid;
-	__compat_gid32_t	st_gid;
-	u32			st_spare4[3];
-};
-
-struct compat_flock {
-	short			l_type;
-	short			l_whence;
-	compat_off_t		l_start;
-	compat_off_t		l_len;
-	compat_pid_t		l_pid;
-};
-
-struct compat_flock64 {
-	short			l_type;
-	short			l_whence;
-	compat_loff_t		l_start;
-	compat_loff_t		l_len;
-	compat_pid_t		l_pid;
-};
-
-struct compat_statfs {
-	s32		f_type;
-	s32		f_bsize;
-	s32		f_blocks;
-	s32		f_bfree;
-	s32		f_bavail;
-	s32		f_files;
-	s32		f_ffree;
-	__kernel_fsid_t	f_fsid;
-	s32		f_namelen;
-	s32		f_frsize;
-	s32		f_spare[5];
-};
-
-struct compat_sigcontext {
-	compat_int_t sc_flags;
-	compat_int_t sc_gr[32]; /* PSW in sc_gr[0] */
-	u64 sc_fr[32];
-	compat_int_t sc_iasq[2];
-	compat_int_t sc_iaoq[2];
-	compat_int_t sc_sar; /* cr11 */
-};
-
-#define COMPAT_RLIM_INFINITY 0xffffffff
-
-typedef u32		compat_old_sigset_t;	/* at least 32 bits */
-
-#define _COMPAT_NSIG		64
-#define _COMPAT_NSIG_BPW	32
-
-typedef u32		compat_sigset_word;
-
-#define COMPAT_OFF_T_MAX	0x7fffffff
-#define COMPAT_LOFF_T_MAX	0x7fffffffffffffffL
-
-/*
- * A pointer passed in from user mode. This should not
- * be used for syscall parameters, just declare them
- * as pointers because the syscall entry code will have
- * appropriately converted them already.
- */
-typedef	u32		compat_uptr_t;
-
-static inline void __user *compat_ptr(compat_uptr_t uptr)
-{
-	return (void __user *)(unsigned long)uptr;
-}
-
-static inline compat_uptr_t ptr_to_compat(void __user *uptr)
-{
-	return (u32)(unsigned long)uptr;
-}
-
-static __inline__ void __user *compat_alloc_user_space(long len)
-{
-	struct pt_regs *regs = &current->thread.regs;
-	return (void __user *)regs->gr[30];
-}
-
-static inline int __is_compat_task(struct task_struct *t)
-{
-	return test_ti_thread_flag(task_thread_info(t), TIF_32BIT);
-}
-
-static inline int is_compat_task(void)
-{
-	return __is_compat_task(current);
-}
-
-#endif /* _ASM_PARISC_COMPAT_H */
diff --git a/include/asm-parisc/compat_rt_sigframe.h b/include/asm-parisc/compat_rt_sigframe.h
deleted file mode 100644
index 81bec28bdc48..000000000000
--- a/include/asm-parisc/compat_rt_sigframe.h
+++ /dev/null
@@ -1,50 +0,0 @@
-#include<linux/compat.h>
-#include<linux/compat_siginfo.h>
-#include<asm/compat_ucontext.h>
-
-#ifndef _ASM_PARISC_COMPAT_RT_SIGFRAME_H
-#define _ASM_PARISC_COMPAT_RT_SIGFRAME_H
-
-/* In a deft move of uber-hackery, we decide to carry the top half of all
- * 64-bit registers in a non-portable, non-ABI, hidden structure.
- * Userspace can read the hidden structure if it *wants* but is never
- * guaranteed to be in the same place. Infact the uc_sigmask from the 
- * ucontext_t structure may push the hidden register file downards
- */
-struct compat_regfile {
-	/* Upper half of all the 64-bit registers that were truncated
-	   on a copy to a 32-bit userspace */
-	compat_int_t rf_gr[32];
-	compat_int_t rf_iasq[2];
-	compat_int_t rf_iaoq[2];
-	compat_int_t rf_sar;
-};
-
-#define COMPAT_SIGRETURN_TRAMP 4
-#define COMPAT_SIGRESTARTBLOCK_TRAMP 5 
-#define COMPAT_TRAMP_SIZE (COMPAT_SIGRETURN_TRAMP + COMPAT_SIGRESTARTBLOCK_TRAMP)
-
-struct compat_rt_sigframe {
-	/* XXX: Must match trampoline size in arch/parisc/kernel/signal.c 
-	        Secondary to that it must protect the ERESTART_RESTARTBLOCK
-		trampoline we left on the stack (we were bad and didn't 
-		change sp so we could run really fast.) */
-	compat_uint_t tramp[COMPAT_TRAMP_SIZE];
-	compat_siginfo_t info;
-	struct compat_ucontext uc;
-	/* Hidden location of truncated registers, *must* be last. */
-	struct compat_regfile regs; 
-};
-
-/*
- * The 32-bit ABI wants at least 48 bytes for a function call frame:
- * 16 bytes for arg0-arg3, and 32 bytes for magic (the only part of
- * which Linux/parisc uses is sp-20 for the saved return pointer...)
- * Then, the stack pointer must be rounded to a cache line (64 bytes).
- */
-#define SIGFRAME32		64
-#define FUNCTIONCALLFRAME32	48
-#define PARISC_RT_SIGFRAME_SIZE32					\
-	(((sizeof(struct compat_rt_sigframe) + FUNCTIONCALLFRAME32) + SIGFRAME32) & -SIGFRAME32)
-
-#endif
diff --git a/include/asm-parisc/compat_signal.h b/include/asm-parisc/compat_signal.h
deleted file mode 100644
index 6ad02c360b21..000000000000
--- a/include/asm-parisc/compat_signal.h
+++ /dev/null
@@ -1,2 +0,0 @@
-/* Use generic */
-#include <asm-generic/compat_signal.h>
diff --git a/include/asm-parisc/compat_ucontext.h b/include/asm-parisc/compat_ucontext.h
deleted file mode 100644
index 2f7292afde3c..000000000000
--- a/include/asm-parisc/compat_ucontext.h
+++ /dev/null
@@ -1,17 +0,0 @@
-#ifndef _ASM_PARISC_COMPAT_UCONTEXT_H
-#define _ASM_PARISC_COMPAT_UCONTEXT_H
-
-#include <linux/compat.h>
-
-/* 32-bit ucontext as seen from an 64-bit kernel */
-struct compat_ucontext {
-	compat_uint_t uc_flags;
-	compat_uptr_t uc_link;
-	compat_stack_t uc_stack;	/* struct compat_sigaltstack (12 bytes)*/	
-	/* FIXME: Pad out to get uc_mcontext to start at an 8-byte aligned boundary */
-	compat_uint_t pad[1];
-	struct compat_sigcontext uc_mcontext;
-	compat_sigset_t uc_sigmask;	/* mask last for extensibility */
-};
-
-#endif /* !_ASM_PARISC_COMPAT_UCONTEXT_H */
diff --git a/include/asm-parisc/cputime.h b/include/asm-parisc/cputime.h
deleted file mode 100644
index dcdf2fbd7e72..000000000000
--- a/include/asm-parisc/cputime.h
+++ /dev/null
@@ -1,6 +0,0 @@
-#ifndef __PARISC_CPUTIME_H
-#define __PARISC_CPUTIME_H
-
-#include <asm-generic/cputime.h>
-
-#endif /* __PARISC_CPUTIME_H */
diff --git a/include/asm-parisc/current.h b/include/asm-parisc/current.h
deleted file mode 100644
index 0fb9338e3bf2..000000000000
--- a/include/asm-parisc/current.h
+++ /dev/null
@@ -1,15 +0,0 @@
-#ifndef _PARISC_CURRENT_H
-#define _PARISC_CURRENT_H
-
-#include <linux/thread_info.h>
-
-struct task_struct;
-
-static inline struct task_struct * get_current(void)
-{
-	return current_thread_info()->task;
-}
- 
-#define current get_current()
-
-#endif /* !(_PARISC_CURRENT_H) */
diff --git a/include/asm-parisc/delay.h b/include/asm-parisc/delay.h
deleted file mode 100644
index 7a75e984674b..000000000000
--- a/include/asm-parisc/delay.h
+++ /dev/null
@@ -1,43 +0,0 @@
-#ifndef _PARISC_DELAY_H
-#define _PARISC_DELAY_H
-
-#include <asm/system.h>    /* for mfctl() */
-#include <asm/processor.h> /* for boot_cpu_data */
-
-
-/*
- * Copyright (C) 1993 Linus Torvalds
- *
- * Delay routines
- */
-
-static __inline__ void __delay(unsigned long loops) {
-	asm volatile(
-	"	.balignl	64,0x34000034\n"
-	"	addib,UV -1,%0,.\n"
-	"	nop\n"
-		: "=r" (loops) : "0" (loops));
-}
-
-static __inline__ void __cr16_delay(unsigned long clocks) {
-	unsigned long start;
-
-	/*
-	 * Note: Due to unsigned math, cr16 rollovers shouldn't be
-	 * a problem here. However, on 32 bit, we need to make sure
-	 * we don't pass in too big a value. The current default
-	 * value of MAX_UDELAY_MS should help prevent this.
-	 */
-
-	start = mfctl(16);
-	while ((mfctl(16) - start) < clocks)
-	    ;
-}
-
-static __inline__ void __udelay(unsigned long usecs) {
-	__cr16_delay(usecs * ((unsigned long)boot_cpu_data.cpu_hz / 1000000UL));
-}
-
-#define udelay(n) __udelay(n)
-
-#endif /* defined(_PARISC_DELAY_H) */
diff --git a/include/asm-parisc/device.h b/include/asm-parisc/device.h
deleted file mode 100644
index d8f9872b0e2d..000000000000
--- a/include/asm-parisc/device.h
+++ /dev/null
@@ -1,7 +0,0 @@
-/*
- * Arch specific extensions to struct device
- *
- * This file is released under the GPLv2
- */
-#include <asm-generic/device.h>
-
diff --git a/include/asm-parisc/div64.h b/include/asm-parisc/div64.h
deleted file mode 100644
index 6cd978cefb28..000000000000
--- a/include/asm-parisc/div64.h
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/div64.h>
diff --git a/include/asm-parisc/dma-mapping.h b/include/asm-parisc/dma-mapping.h
deleted file mode 100644
index 53af696f23d2..000000000000
--- a/include/asm-parisc/dma-mapping.h
+++ /dev/null
@@ -1,253 +0,0 @@
-#ifndef _PARISC_DMA_MAPPING_H
-#define _PARISC_DMA_MAPPING_H
-
-#include <linux/mm.h>
-#include <asm/cacheflush.h>
-#include <asm/scatterlist.h>
-
-/* See Documentation/DMA-mapping.txt */
-struct hppa_dma_ops {
-	int  (*dma_supported)(struct device *dev, u64 mask);
-	void *(*alloc_consistent)(struct device *dev, size_t size, dma_addr_t *iova, gfp_t flag);
-	void *(*alloc_noncoherent)(struct device *dev, size_t size, dma_addr_t *iova, gfp_t flag);
-	void (*free_consistent)(struct device *dev, size_t size, void *vaddr, dma_addr_t iova);
-	dma_addr_t (*map_single)(struct device *dev, void *addr, size_t size, enum dma_data_direction direction);
-	void (*unmap_single)(struct device *dev, dma_addr_t iova, size_t size, enum dma_data_direction direction);
-	int  (*map_sg)(struct device *dev, struct scatterlist *sg, int nents, enum dma_data_direction direction);
-	void (*unmap_sg)(struct device *dev, struct scatterlist *sg, int nhwents, enum dma_data_direction direction);
-	void (*dma_sync_single_for_cpu)(struct device *dev, dma_addr_t iova, unsigned long offset, size_t size, enum dma_data_direction direction);
-	void (*dma_sync_single_for_device)(struct device *dev, dma_addr_t iova, unsigned long offset, size_t size, enum dma_data_direction direction);
-	void (*dma_sync_sg_for_cpu)(struct device *dev, struct scatterlist *sg, int nelems, enum dma_data_direction direction);
-	void (*dma_sync_sg_for_device)(struct device *dev, struct scatterlist *sg, int nelems, enum dma_data_direction direction);
-};
-
-/*
-** We could live without the hppa_dma_ops indirection if we didn't want
-** to support 4 different coherent dma models with one binary (they will
-** someday be loadable modules):
-**     I/O MMU        consistent method           dma_sync behavior
-**  =============   ======================       =======================
-**  a) PA-7x00LC    uncachable host memory          flush/purge
-**  b) U2/Uturn      cachable host memory              NOP
-**  c) Ike/Astro     cachable host memory              NOP
-**  d) EPIC/SAGA     memory on EPIC/SAGA         flush/reset DMA channel
-**
-** PA-7[13]00LC processors have a GSC bus interface and no I/O MMU.
-**
-** Systems (eg PCX-T workstations) that don't fall into the above
-** categories will need to modify the needed drivers to perform
-** flush/purge and allocate "regular" cacheable pages for everything.
-*/
-
-#ifdef CONFIG_PA11
-extern struct hppa_dma_ops pcxl_dma_ops;
-extern struct hppa_dma_ops pcx_dma_ops;
-#endif
-
-extern struct hppa_dma_ops *hppa_dma_ops;
-
-static inline void *
-dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle,
-		   gfp_t flag)
-{
-	return hppa_dma_ops->alloc_consistent(dev, size, dma_handle, flag);
-}
-
-static inline void *
-dma_alloc_noncoherent(struct device *dev, size_t size, dma_addr_t *dma_handle,
-		      gfp_t flag)
-{
-	return hppa_dma_ops->alloc_noncoherent(dev, size, dma_handle, flag);
-}
-
-static inline void
-dma_free_coherent(struct device *dev, size_t size, 
-		    void *vaddr, dma_addr_t dma_handle)
-{
-	hppa_dma_ops->free_consistent(dev, size, vaddr, dma_handle);
-}
-
-static inline void
-dma_free_noncoherent(struct device *dev, size_t size, 
-		    void *vaddr, dma_addr_t dma_handle)
-{
-	hppa_dma_ops->free_consistent(dev, size, vaddr, dma_handle);
-}
-
-static inline dma_addr_t
-dma_map_single(struct device *dev, void *ptr, size_t size,
-	       enum dma_data_direction direction)
-{
-	return hppa_dma_ops->map_single(dev, ptr, size, direction);
-}
-
-static inline void
-dma_unmap_single(struct device *dev, dma_addr_t dma_addr, size_t size,
-		 enum dma_data_direction direction)
-{
-	hppa_dma_ops->unmap_single(dev, dma_addr, size, direction);
-}
-
-static inline int
-dma_map_sg(struct device *dev, struct scatterlist *sg, int nents,
-	   enum dma_data_direction direction)
-{
-	return hppa_dma_ops->map_sg(dev, sg, nents, direction);
-}
-
-static inline void
-dma_unmap_sg(struct device *dev, struct scatterlist *sg, int nhwentries,
-	     enum dma_data_direction direction)
-{
-	hppa_dma_ops->unmap_sg(dev, sg, nhwentries, direction);
-}
-
-static inline dma_addr_t
-dma_map_page(struct device *dev, struct page *page, unsigned long offset,
-	     size_t size, enum dma_data_direction direction)
-{
-	return dma_map_single(dev, (page_address(page) + (offset)), size, direction);
-}
-
-static inline void
-dma_unmap_page(struct device *dev, dma_addr_t dma_address, size_t size,
-	       enum dma_data_direction direction)
-{
-	dma_unmap_single(dev, dma_address, size, direction);
-}
-
-
-static inline void
-dma_sync_single_for_cpu(struct device *dev, dma_addr_t dma_handle, size_t size,
-		enum dma_data_direction direction)
-{
-	if(hppa_dma_ops->dma_sync_single_for_cpu)
-		hppa_dma_ops->dma_sync_single_for_cpu(dev, dma_handle, 0, size, direction);
-}
-
-static inline void
-dma_sync_single_for_device(struct device *dev, dma_addr_t dma_handle, size_t size,
-		enum dma_data_direction direction)
-{
-	if(hppa_dma_ops->dma_sync_single_for_device)
-		hppa_dma_ops->dma_sync_single_for_device(dev, dma_handle, 0, size, direction);
-}
-
-static inline void
-dma_sync_single_range_for_cpu(struct device *dev, dma_addr_t dma_handle,
-		      unsigned long offset, size_t size,
-		      enum dma_data_direction direction)
-{
-	if(hppa_dma_ops->dma_sync_single_for_cpu)
-		hppa_dma_ops->dma_sync_single_for_cpu(dev, dma_handle, offset, size, direction);
-}
-
-static inline void
-dma_sync_single_range_for_device(struct device *dev, dma_addr_t dma_handle,
-		      unsigned long offset, size_t size,
-		      enum dma_data_direction direction)
-{
-	if(hppa_dma_ops->dma_sync_single_for_device)
-		hppa_dma_ops->dma_sync_single_for_device(dev, dma_handle, offset, size, direction);
-}
-
-static inline void
-dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg, int nelems,
-		 enum dma_data_direction direction)
-{
-	if(hppa_dma_ops->dma_sync_sg_for_cpu)
-		hppa_dma_ops->dma_sync_sg_for_cpu(dev, sg, nelems, direction);
-}
-
-static inline void
-dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg, int nelems,
-		 enum dma_data_direction direction)
-{
-	if(hppa_dma_ops->dma_sync_sg_for_device)
-		hppa_dma_ops->dma_sync_sg_for_device(dev, sg, nelems, direction);
-}
-
-static inline int
-dma_supported(struct device *dev, u64 mask)
-{
-	return hppa_dma_ops->dma_supported(dev, mask);
-}
-
-static inline int
-dma_set_mask(struct device *dev, u64 mask)
-{
-	if(!dev->dma_mask || !dma_supported(dev, mask))
-		return -EIO;
-
-	*dev->dma_mask = mask;
-
-	return 0;
-}
-
-static inline int
-dma_get_cache_alignment(void)
-{
-	return dcache_stride;
-}
-
-static inline int
-dma_is_consistent(struct device *dev, dma_addr_t dma_addr)
-{
-	return (hppa_dma_ops->dma_sync_single_for_cpu == NULL);
-}
-
-static inline void
-dma_cache_sync(struct device *dev, void *vaddr, size_t size,
-	       enum dma_data_direction direction)
-{
-	if(hppa_dma_ops->dma_sync_single_for_cpu)
-		flush_kernel_dcache_range((unsigned long)vaddr, size);
-}
-
-static inline void *
-parisc_walk_tree(struct device *dev)
-{
-	struct device *otherdev;
-	if(likely(dev->platform_data != NULL))
-		return dev->platform_data;
-	/* OK, just traverse the bus to find it */
-	for(otherdev = dev->parent; otherdev;
-	    otherdev = otherdev->parent) {
-		if(otherdev->platform_data) {
-			dev->platform_data = otherdev->platform_data;
-			break;
-		}
-	}
-	BUG_ON(!dev->platform_data);
-	return dev->platform_data;
-}
-		
-#define GET_IOC(dev) (HBA_DATA(parisc_walk_tree(dev))->iommu);	
-	
-
-#ifdef CONFIG_IOMMU_CCIO
-struct parisc_device;
-struct ioc;
-void * ccio_get_iommu(const struct parisc_device *dev);
-int ccio_request_resource(const struct parisc_device *dev,
-		struct resource *res);
-int ccio_allocate_resource(const struct parisc_device *dev,
-		struct resource *res, unsigned long size,
-		unsigned long min, unsigned long max, unsigned long align);
-#else /* !CONFIG_IOMMU_CCIO */
-#define ccio_get_iommu(dev) NULL
-#define ccio_request_resource(dev, res) insert_resource(&iomem_resource, res)
-#define ccio_allocate_resource(dev, res, size, min, max, align) \
-		allocate_resource(&iomem_resource, res, size, min, max, \
-				align, NULL, NULL)
-#endif /* !CONFIG_IOMMU_CCIO */
-
-#ifdef CONFIG_IOMMU_SBA
-struct parisc_device;
-void * sba_get_iommu(struct parisc_device *dev);
-#endif
-
-/* At the moment, we panic on error for IOMMU resource exaustion */
-#define dma_mapping_error(dev, x)	0
-
-#endif
diff --git a/include/asm-parisc/dma.h b/include/asm-parisc/dma.h
deleted file mode 100644
index 31ad0f05af3d..000000000000
--- a/include/asm-parisc/dma.h
+++ /dev/null
@@ -1,186 +0,0 @@
-/* $Id: dma.h,v 1.2 1999/04/27 00:46:18 deller Exp $
- * linux/include/asm/dma.h: Defines for using and allocating dma channels.
- * Written by Hennus Bergman, 1992.
- * High DMA channel support & info by Hannu Savolainen
- * and John Boyd, Nov. 1992.
- * (c) Copyright 2000, Grant Grundler
- */
-
-#ifndef _ASM_DMA_H
-#define _ASM_DMA_H
-
-#include <asm/io.h>		/* need byte IO */
-#include <asm/system.h>	
-
-#define dma_outb	outb
-#define dma_inb		inb
-
-/*
-** DMA_CHUNK_SIZE is used by the SCSI mid-layer to break up
-** (or rather not merge) DMAs into manageable chunks.
-** On parisc, this is more of the software/tuning constraint
-** rather than the HW. I/O MMU allocation algorithms can be
-** faster with smaller sizes (to some degree).
-*/
-#define DMA_CHUNK_SIZE	(BITS_PER_LONG*PAGE_SIZE)
-
-/* The maximum address that we can perform a DMA transfer to on this platform
-** New dynamic DMA interfaces should obsolete this....
-*/
-#define MAX_DMA_ADDRESS (~0UL)
-
-/*
-** We don't have DMA channels... well V-class does but the
-** Dynamic DMA Mapping interface will support them... right? :^)
-** Note: this is not relevant right now for PA-RISC, but we cannot 
-** leave this as undefined because some things (e.g. sound)
-** won't compile :-(
-*/
-#define MAX_DMA_CHANNELS 8
-#define DMA_MODE_READ	0x44	/* I/O to memory, no autoinit, increment, single mode */
-#define DMA_MODE_WRITE	0x48	/* memory to I/O, no autoinit, increment, single mode */
-#define DMA_MODE_CASCADE 0xC0	/* pass thru DREQ->HRQ, DACK<-HLDA only */
-
-#define DMA_AUTOINIT	0x10
-
-/* 8237 DMA controllers */
-#define IO_DMA1_BASE	0x00	/* 8 bit slave DMA, channels 0..3 */
-#define IO_DMA2_BASE	0xC0	/* 16 bit master DMA, ch 4(=slave input)..7 */
-
-/* DMA controller registers */
-#define DMA1_CMD_REG		0x08	/* command register (w) */
-#define DMA1_STAT_REG		0x08	/* status register (r) */
-#define DMA1_REQ_REG            0x09    /* request register (w) */
-#define DMA1_MASK_REG		0x0A	/* single-channel mask (w) */
-#define DMA1_MODE_REG		0x0B	/* mode register (w) */
-#define DMA1_CLEAR_FF_REG	0x0C	/* clear pointer flip-flop (w) */
-#define DMA1_TEMP_REG           0x0D    /* Temporary Register (r) */
-#define DMA1_RESET_REG		0x0D	/* Master Clear (w) */
-#define DMA1_CLR_MASK_REG       0x0E    /* Clear Mask */
-#define DMA1_MASK_ALL_REG       0x0F    /* all-channels mask (w) */
-#define DMA1_EXT_MODE_REG	(0x400 | DMA1_MODE_REG)
-
-#define DMA2_CMD_REG		0xD0	/* command register (w) */
-#define DMA2_STAT_REG		0xD0	/* status register (r) */
-#define DMA2_REQ_REG            0xD2    /* request register (w) */
-#define DMA2_MASK_REG		0xD4	/* single-channel mask (w) */
-#define DMA2_MODE_REG		0xD6	/* mode register (w) */
-#define DMA2_CLEAR_FF_REG	0xD8	/* clear pointer flip-flop (w) */
-#define DMA2_TEMP_REG           0xDA    /* Temporary Register (r) */
-#define DMA2_RESET_REG		0xDA	/* Master Clear (w) */
-#define DMA2_CLR_MASK_REG       0xDC    /* Clear Mask */
-#define DMA2_MASK_ALL_REG       0xDE    /* all-channels mask (w) */
-#define DMA2_EXT_MODE_REG	(0x400 | DMA2_MODE_REG)
-
-static __inline__ unsigned long claim_dma_lock(void)
-{
-	return 0;
-}
-
-static __inline__ void release_dma_lock(unsigned long flags)
-{
-}
-
-
-/* Get DMA residue count. After a DMA transfer, this
- * should return zero. Reading this while a DMA transfer is
- * still in progress will return unpredictable results.
- * If called before the channel has been used, it may return 1.
- * Otherwise, it returns the number of _bytes_ left to transfer.
- *
- * Assumes DMA flip-flop is clear.
- */
-static __inline__ int get_dma_residue(unsigned int dmanr)
-{
-	unsigned int io_port = (dmanr<=3)? ((dmanr&3)<<1) + 1 + IO_DMA1_BASE
-					 : ((dmanr&3)<<2) + 2 + IO_DMA2_BASE;
-
-	/* using short to get 16-bit wrap around */
-	unsigned short count;
-
-	count = 1 + dma_inb(io_port);
-	count += dma_inb(io_port) << 8;
-	
-	return (dmanr<=3)? count : (count<<1);
-}
-
-/* enable/disable a specific DMA channel */
-static __inline__ void enable_dma(unsigned int dmanr)
-{
-#ifdef CONFIG_SUPERIO
-	if (dmanr<=3)
-		dma_outb(dmanr,  DMA1_MASK_REG);
-	else
-		dma_outb(dmanr & 3,  DMA2_MASK_REG);
-#endif
-}
-
-static __inline__ void disable_dma(unsigned int dmanr)
-{
-#ifdef CONFIG_SUPERIO
-	if (dmanr<=3)
-		dma_outb(dmanr | 4,  DMA1_MASK_REG);
-	else
-		dma_outb((dmanr & 3) | 4,  DMA2_MASK_REG);
-#endif
-}
-
-/* reserve a DMA channel */
-#define request_dma(dmanr, device_id)	(0)
-
-/* Clear the 'DMA Pointer Flip Flop'.
- * Write 0 for LSB/MSB, 1 for MSB/LSB access.
- * Use this once to initialize the FF to a known state.
- * After that, keep track of it. :-)
- * --- In order to do that, the DMA routines below should ---
- * --- only be used while holding the DMA lock ! ---
- */
-static __inline__ void clear_dma_ff(unsigned int dmanr)
-{
-}
-
-/* set mode (above) for a specific DMA channel */
-static __inline__ void set_dma_mode(unsigned int dmanr, char mode)
-{
-}
-
-/* Set only the page register bits of the transfer address.
- * This is used for successive transfers when we know the contents of
- * the lower 16 bits of the DMA current address register, but a 64k boundary
- * may have been crossed.
- */
-static __inline__ void set_dma_page(unsigned int dmanr, char pagenr)
-{
-}
-
-
-/* Set transfer address & page bits for specific DMA channel.
- * Assumes dma flipflop is clear.
- */
-static __inline__ void set_dma_addr(unsigned int dmanr, unsigned int a)
-{
-}
-
-
-/* Set transfer size (max 64k for DMA1..3, 128k for DMA5..7) for
- * a specific DMA channel.
- * You must ensure the parameters are valid.
- * NOTE: from a manual: "the number of transfers is one more
- * than the initial word count"! This is taken into account.
- * Assumes dma flip-flop is clear.
- * NOTE 2: "count" represents _bytes_ and must be even for channels 5-7.
- */
-static __inline__ void set_dma_count(unsigned int dmanr, unsigned int count)
-{
-}
-
-
-#define free_dma(dmanr)
-
-#ifdef CONFIG_PCI
-extern int isa_dma_bridge_buggy;
-#else
-#define isa_dma_bridge_buggy 	(0)
-#endif
-
-#endif /* _ASM_DMA_H */
diff --git a/include/asm-parisc/eisa_bus.h b/include/asm-parisc/eisa_bus.h
deleted file mode 100644
index 201085f83dd5..000000000000
--- a/include/asm-parisc/eisa_bus.h
+++ /dev/null
@@ -1,23 +0,0 @@
-/*
- * eisa_bus.h interface between the eisa BA driver and the bus enumerator
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- *
- * Copyright (c) 2002 Daniel Engstrom <5116@telia.com>
- *
- */
-
-#ifndef ASM_EISA_H
-#define ASM_EISA_H
-
-extern void eisa_make_irq_level(int num);
-extern void eisa_make_irq_edge(int num);
-extern int eisa_enumerator(unsigned long eeprom_addr,
-			   struct resource *io_parent, 
-			   struct resource *mem_parent);
-extern int eisa_eeprom_init(unsigned long addr);
-
-#endif
diff --git a/include/asm-parisc/eisa_eeprom.h b/include/asm-parisc/eisa_eeprom.h
deleted file mode 100644
index 9c9da980402a..000000000000
--- a/include/asm-parisc/eisa_eeprom.h
+++ /dev/null
@@ -1,153 +0,0 @@
-/*
- * eisa_eeprom.h - provide support for EISA adapters in PA-RISC machines
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- *
- * Copyright (c) 2001, 2002 Daniel Engstrom <5116@telia.com>
- *
- */
-
-#ifndef ASM_EISA_EEPROM_H
-#define ASM_EISA_EEPROM_H
-
-extern void __iomem *eisa_eeprom_addr;
-
-#define HPEE_MAX_LENGTH       0x2000	/* maximum eeprom length */
-
-#define HPEE_SLOT_INFO(slot) (20+(48*slot))
-
-struct eeprom_header 
-{
-   
-	u_int32_t num_writes;       /* number of writes */
- 	u_int8_t  flags;            /* flags, usage? */
-	u_int8_t  ver_maj;
-	u_int8_t  ver_min;
-	u_int8_t  num_slots;        /* number of EISA slots in system */
-	u_int16_t csum;             /* checksum, I don't know how to calulate this */
-	u_int8_t  pad[10];
-} __attribute__ ((packed));
-
-
-struct eeprom_eisa_slot_info
-{
-	u_int32_t eisa_slot_id;
-	u_int32_t config_data_offset;
-	u_int32_t num_writes;
-	u_int16_t csum;
-	u_int16_t num_functions;
-	u_int16_t config_data_length;
-	
-	/* bits 0..3 are the duplicate slot id */ 
-#define HPEE_SLOT_INFO_EMBEDDED  0x10
-#define HPEE_SLOT_INFO_VIRTUAL   0x20
-#define HPEE_SLOT_INFO_NO_READID 0x40
-#define HPEE_SLOT_INFO_DUPLICATE 0x80
-	u_int8_t slot_info;
-	
-#define HPEE_SLOT_FEATURES_ENABLE         0x01
-#define HPEE_SLOT_FEATURES_IOCHK          0x02
-#define HPEE_SLOT_FEATURES_CFG_INCOMPLETE 0x80
-	u_int8_t slot_features;
-	
-	u_int8_t  ver_min;
-	u_int8_t  ver_maj;
-	
-#define HPEE_FUNCTION_INFO_HAVE_TYPE      0x01
-#define HPEE_FUNCTION_INFO_HAVE_MEMORY    0x02
-#define HPEE_FUNCTION_INFO_HAVE_IRQ       0x04
-#define HPEE_FUNCTION_INFO_HAVE_DMA       0x08
-#define HPEE_FUNCTION_INFO_HAVE_PORT      0x10
-#define HPEE_FUNCTION_INFO_HAVE_PORT_INIT 0x20
-/* I think there are two slighty different 
- * versions of the function_info field 
- * one int the fixed header and one optional 
- * in the parsed slot data area */
-#define HPEE_FUNCTION_INFO_HAVE_FUNCTION  0x01
-#define HPEE_FUNCTION_INFO_F_DISABLED     0x80
-#define HPEE_FUNCTION_INFO_CFG_FREE_FORM  0x40
-	u_int8_t  function_info;
-
-#define HPEE_FLAG_BOARD_IS_ISA		  0x01 /* flag and minor version for isa board */
-	u_int8_t  flags;
-	u_int8_t  pad[24];
-} __attribute__ ((packed));
-
-
-#define HPEE_MEMORY_MAX_ENT   9
-/* memory descriptor: byte 0 */
-#define HPEE_MEMORY_WRITABLE  0x01
-#define HPEE_MEMORY_CACHABLE  0x02
-#define HPEE_MEMORY_TYPE_MASK 0x18
-#define HPEE_MEMORY_TYPE_SYS  0x00
-#define HPEE_MEMORY_TYPE_EXP  0x08
-#define HPEE_MEMORY_TYPE_VIR  0x10
-#define HPEE_MEMORY_TYPE_OTH  0x18
-#define HPEE_MEMORY_SHARED    0x20
-#define HPEE_MEMORY_MORE      0x80
-
-/* memory descriptor: byte 1 */
-#define HPEE_MEMORY_WIDTH_MASK 0x03
-#define HPEE_MEMORY_WIDTH_BYTE 0x00
-#define HPEE_MEMORY_WIDTH_WORD 0x01
-#define HPEE_MEMORY_WIDTH_DWORD 0x02
-#define HPEE_MEMORY_DECODE_MASK 0x0c
-#define HPEE_MEMORY_DECODE_20BITS 0x00
-#define HPEE_MEMORY_DECODE_24BITS 0x04
-#define HPEE_MEMORY_DECODE_32BITS 0x08
-/* byte 2 and 3 are a 16bit LE value
- * containging the memory size in kilobytes */
-/* byte 4,5,6 are a 24bit LE value
- * containing the memory base address */
-
-
-#define HPEE_IRQ_MAX_ENT      7
-/* Interrupt entry: byte 0 */
-#define HPEE_IRQ_CHANNEL_MASK 0xf
-#define HPEE_IRQ_TRIG_LEVEL   0x20
-#define HPEE_IRQ_MORE         0x80
-/* byte 1 seems to be unused */
-
-#define HPEE_DMA_MAX_ENT     4
-
-/* dma entry: byte 0 */
-#define HPEE_DMA_CHANNEL_MASK 7
-#define HPEE_DMA_SIZE_MASK	0xc
-#define HPEE_DMA_SIZE_BYTE	0x0
-#define HPEE_DMA_SIZE_WORD	0x4
-#define HPEE_DMA_SIZE_DWORD	0x8
-#define HPEE_DMA_SHARED      0x40
-#define HPEE_DMA_MORE        0x80
-
-/* dma entry: byte 1 */
-#define HPEE_DMA_TIMING_MASK 0x30
-#define HPEE_DMA_TIMING_ISA	0x0
-#define HPEE_DMA_TIMING_TYPEA 0x10
-#define HPEE_DMA_TIMING_TYPEB 0x20
-#define HPEE_DMA_TIMING_TYPEC 0x30
-
-#define HPEE_PORT_MAX_ENT 20
-/* port entry byte 0 */
-#define HPEE_PORT_SIZE_MASK 0x1f
-#define HPEE_PORT_SHARED    0x40
-#define HPEE_PORT_MORE      0x80
-/* byte 1 and 2 is a 16bit LE value
- * conating the start port number */
-
-#define HPEE_PORT_INIT_MAX_LEN     60 /* in bytes here */
-/* port init entry byte 0 */
-#define HPEE_PORT_INIT_WIDTH_MASK  0x3
-#define HPEE_PORT_INIT_WIDTH_BYTE  0x0
-#define HPEE_PORT_INIT_WIDTH_WORD  0x1
-#define HPEE_PORT_INIT_WIDTH_DWORD 0x2
-#define HPEE_PORT_INIT_MASK        0x4
-#define HPEE_PORT_INIT_MORE        0x80
-
-#define HPEE_SELECTION_MAX_ENT 26
-
-#define HPEE_TYPE_MAX_LEN    80
-
-#endif
diff --git a/include/asm-parisc/elf.h b/include/asm-parisc/elf.h
deleted file mode 100644
index d0a4a8262818..000000000000
--- a/include/asm-parisc/elf.h
+++ /dev/null
@@ -1,342 +0,0 @@
-#ifndef __ASMPARISC_ELF_H
-#define __ASMPARISC_ELF_H
-
-/*
- * ELF register definitions..
- */
-
-#include <asm/ptrace.h>
-
-#define EM_PARISC 15
-
-/* HPPA specific definitions.  */
-
-/* Legal values for e_flags field of Elf32_Ehdr.  */
-
-#define EF_PARISC_TRAPNIL	0x00010000 /* Trap nil pointer dereference.  */
-#define EF_PARISC_EXT		0x00020000 /* Program uses arch. extensions. */
-#define EF_PARISC_LSB		0x00040000 /* Program expects little endian. */
-#define EF_PARISC_WIDE		0x00080000 /* Program expects wide mode.  */
-#define EF_PARISC_NO_KABP	0x00100000 /* No kernel assisted branch
-					      prediction.  */
-#define EF_PARISC_LAZYSWAP	0x00400000 /* Allow lazy swapping.  */
-#define EF_PARISC_ARCH		0x0000ffff /* Architecture version.  */
-
-/* Defined values for `e_flags & EF_PARISC_ARCH' are:  */
-
-#define EFA_PARISC_1_0		    0x020b /* PA-RISC 1.0 big-endian.  */
-#define EFA_PARISC_1_1		    0x0210 /* PA-RISC 1.1 big-endian.  */
-#define EFA_PARISC_2_0		    0x0214 /* PA-RISC 2.0 big-endian.  */
-
-/* Additional section indices.  */
-
-#define SHN_PARISC_ANSI_COMMON	0xff00	   /* Section for tenatively declared
-					      symbols in ANSI C.  */
-#define SHN_PARISC_HUGE_COMMON	0xff01	   /* Common blocks in huge model.  */
-
-/* Legal values for sh_type field of Elf32_Shdr.  */
-
-#define SHT_PARISC_EXT		0x70000000 /* Contains product specific ext. */
-#define SHT_PARISC_UNWIND	0x70000001 /* Unwind information.  */
-#define SHT_PARISC_DOC		0x70000002 /* Debug info for optimized code. */
-
-/* Legal values for sh_flags field of Elf32_Shdr.  */
-
-#define SHF_PARISC_SHORT	0x20000000 /* Section with short addressing. */
-#define SHF_PARISC_HUGE		0x40000000 /* Section far from gp.  */
-#define SHF_PARISC_SBP		0x80000000 /* Static branch prediction code. */
-
-/* Legal values for ST_TYPE subfield of st_info (symbol type).  */
-
-#define STT_PARISC_MILLICODE	13	/* Millicode function entry point.  */
-
-#define STT_HP_OPAQUE		(STT_LOOS + 0x1)
-#define STT_HP_STUB		(STT_LOOS + 0x2)
-
-/* HPPA relocs.  */
-
-#define R_PARISC_NONE		0	/* No reloc.  */
-#define R_PARISC_DIR32		1	/* Direct 32-bit reference.  */
-#define R_PARISC_DIR21L		2	/* Left 21 bits of eff. address.  */
-#define R_PARISC_DIR17R		3	/* Right 17 bits of eff. address.  */
-#define R_PARISC_DIR17F		4	/* 17 bits of eff. address.  */
-#define R_PARISC_DIR14R		6	/* Right 14 bits of eff. address.  */
-#define R_PARISC_PCREL32	9	/* 32-bit rel. address.  */
-#define R_PARISC_PCREL21L	10	/* Left 21 bits of rel. address.  */
-#define R_PARISC_PCREL17R	11	/* Right 17 bits of rel. address.  */
-#define R_PARISC_PCREL17F	12	/* 17 bits of rel. address.  */
-#define R_PARISC_PCREL14R	14	/* Right 14 bits of rel. address.  */
-#define R_PARISC_DPREL21L	18	/* Left 21 bits of rel. address.  */
-#define R_PARISC_DPREL14R	22	/* Right 14 bits of rel. address.  */
-#define R_PARISC_GPREL21L	26	/* GP-relative, left 21 bits.  */
-#define R_PARISC_GPREL14R	30	/* GP-relative, right 14 bits.  */
-#define R_PARISC_LTOFF21L	34	/* LT-relative, left 21 bits.  */
-#define R_PARISC_LTOFF14R	38	/* LT-relative, right 14 bits.  */
-#define R_PARISC_SECREL32	41	/* 32 bits section rel. address.  */
-#define R_PARISC_SEGBASE	48	/* No relocation, set segment base.  */
-#define R_PARISC_SEGREL32	49	/* 32 bits segment rel. address.  */
-#define R_PARISC_PLTOFF21L	50	/* PLT rel. address, left 21 bits.  */
-#define R_PARISC_PLTOFF14R	54	/* PLT rel. address, right 14 bits.  */
-#define R_PARISC_LTOFF_FPTR32	57	/* 32 bits LT-rel. function pointer. */
-#define R_PARISC_LTOFF_FPTR21L	58	/* LT-rel. fct ptr, left 21 bits. */
-#define R_PARISC_LTOFF_FPTR14R	62	/* LT-rel. fct ptr, right 14 bits. */
-#define R_PARISC_FPTR64		64	/* 64 bits function address.  */
-#define R_PARISC_PLABEL32	65	/* 32 bits function address.  */
-#define R_PARISC_PCREL64	72	/* 64 bits PC-rel. address.  */
-#define R_PARISC_PCREL22F	74	/* 22 bits PC-rel. address.  */
-#define R_PARISC_PCREL14WR	75	/* PC-rel. address, right 14 bits.  */
-#define R_PARISC_PCREL14DR	76	/* PC rel. address, right 14 bits.  */
-#define R_PARISC_PCREL16F	77	/* 16 bits PC-rel. address.  */
-#define R_PARISC_PCREL16WF	78	/* 16 bits PC-rel. address.  */
-#define R_PARISC_PCREL16DF	79	/* 16 bits PC-rel. address.  */
-#define R_PARISC_DIR64		80	/* 64 bits of eff. address.  */
-#define R_PARISC_DIR14WR	83	/* 14 bits of eff. address.  */
-#define R_PARISC_DIR14DR	84	/* 14 bits of eff. address.  */
-#define R_PARISC_DIR16F		85	/* 16 bits of eff. address.  */
-#define R_PARISC_DIR16WF	86	/* 16 bits of eff. address.  */
-#define R_PARISC_DIR16DF	87	/* 16 bits of eff. address.  */
-#define R_PARISC_GPREL64	88	/* 64 bits of GP-rel. address.  */
-#define R_PARISC_GPREL14WR	91	/* GP-rel. address, right 14 bits.  */
-#define R_PARISC_GPREL14DR	92	/* GP-rel. address, right 14 bits.  */
-#define R_PARISC_GPREL16F	93	/* 16 bits GP-rel. address.  */
-#define R_PARISC_GPREL16WF	94	/* 16 bits GP-rel. address.  */
-#define R_PARISC_GPREL16DF	95	/* 16 bits GP-rel. address.  */
-#define R_PARISC_LTOFF64	96	/* 64 bits LT-rel. address.  */
-#define R_PARISC_LTOFF14WR	99	/* LT-rel. address, right 14 bits.  */
-#define R_PARISC_LTOFF14DR	100	/* LT-rel. address, right 14 bits.  */
-#define R_PARISC_LTOFF16F	101	/* 16 bits LT-rel. address.  */
-#define R_PARISC_LTOFF16WF	102	/* 16 bits LT-rel. address.  */
-#define R_PARISC_LTOFF16DF	103	/* 16 bits LT-rel. address.  */
-#define R_PARISC_SECREL64	104	/* 64 bits section rel. address.  */
-#define R_PARISC_SEGREL64	112	/* 64 bits segment rel. address.  */
-#define R_PARISC_PLTOFF14WR	115	/* PLT-rel. address, right 14 bits.  */
-#define R_PARISC_PLTOFF14DR	116	/* PLT-rel. address, right 14 bits.  */
-#define R_PARISC_PLTOFF16F	117	/* 16 bits LT-rel. address.  */
-#define R_PARISC_PLTOFF16WF	118	/* 16 bits PLT-rel. address.  */
-#define R_PARISC_PLTOFF16DF	119	/* 16 bits PLT-rel. address.  */
-#define R_PARISC_LTOFF_FPTR64	120	/* 64 bits LT-rel. function ptr.  */
-#define R_PARISC_LTOFF_FPTR14WR	123	/* LT-rel. fct. ptr., right 14 bits. */
-#define R_PARISC_LTOFF_FPTR14DR	124	/* LT-rel. fct. ptr., right 14 bits. */
-#define R_PARISC_LTOFF_FPTR16F	125	/* 16 bits LT-rel. function ptr.  */
-#define R_PARISC_LTOFF_FPTR16WF	126	/* 16 bits LT-rel. function ptr.  */
-#define R_PARISC_LTOFF_FPTR16DF	127	/* 16 bits LT-rel. function ptr.  */
-#define R_PARISC_LORESERVE	128
-#define R_PARISC_COPY		128	/* Copy relocation.  */
-#define R_PARISC_IPLT		129	/* Dynamic reloc, imported PLT */
-#define R_PARISC_EPLT		130	/* Dynamic reloc, exported PLT */
-#define R_PARISC_TPREL32	153	/* 32 bits TP-rel. address.  */
-#define R_PARISC_TPREL21L	154	/* TP-rel. address, left 21 bits.  */
-#define R_PARISC_TPREL14R	158	/* TP-rel. address, right 14 bits.  */
-#define R_PARISC_LTOFF_TP21L	162	/* LT-TP-rel. address, left 21 bits. */
-#define R_PARISC_LTOFF_TP14R	166	/* LT-TP-rel. address, right 14 bits.*/
-#define R_PARISC_LTOFF_TP14F	167	/* 14 bits LT-TP-rel. address.  */
-#define R_PARISC_TPREL64	216	/* 64 bits TP-rel. address.  */
-#define R_PARISC_TPREL14WR	219	/* TP-rel. address, right 14 bits.  */
-#define R_PARISC_TPREL14DR	220	/* TP-rel. address, right 14 bits.  */
-#define R_PARISC_TPREL16F	221	/* 16 bits TP-rel. address.  */
-#define R_PARISC_TPREL16WF	222	/* 16 bits TP-rel. address.  */
-#define R_PARISC_TPREL16DF	223	/* 16 bits TP-rel. address.  */
-#define R_PARISC_LTOFF_TP64	224	/* 64 bits LT-TP-rel. address.  */
-#define R_PARISC_LTOFF_TP14WR	227	/* LT-TP-rel. address, right 14 bits.*/
-#define R_PARISC_LTOFF_TP14DR	228	/* LT-TP-rel. address, right 14 bits.*/
-#define R_PARISC_LTOFF_TP16F	229	/* 16 bits LT-TP-rel. address.  */
-#define R_PARISC_LTOFF_TP16WF	230	/* 16 bits LT-TP-rel. address.  */
-#define R_PARISC_LTOFF_TP16DF	231	/* 16 bits LT-TP-rel. address.  */
-#define R_PARISC_HIRESERVE	255
-
-#define PA_PLABEL_FDESC		0x02	/* bit set if PLABEL points to
-					 * a function descriptor, not
-					 * an address */
-
-/* The following are PA function descriptors 
- *
- * addr:	the absolute address of the function
- * gp:		either the data pointer (r27) for non-PIC code or the
- *		the PLT pointer (r19) for PIC code */
-
-/* Format for the Elf32 Function descriptor */
-typedef struct elf32_fdesc {
-	__u32	addr;
-	__u32	gp;
-} Elf32_Fdesc;
-
-/* Format for the Elf64 Function descriptor */
-typedef struct elf64_fdesc {
-	__u64	dummy[2]; /* FIXME: nothing uses these, why waste
-			   * the space */
-	__u64	addr;
-	__u64	gp;
-} Elf64_Fdesc;
-
-/* Legal values for p_type field of Elf32_Phdr/Elf64_Phdr.  */
-
-#define PT_HP_TLS		(PT_LOOS + 0x0)
-#define PT_HP_CORE_NONE		(PT_LOOS + 0x1)
-#define PT_HP_CORE_VERSION	(PT_LOOS + 0x2)
-#define PT_HP_CORE_KERNEL	(PT_LOOS + 0x3)
-#define PT_HP_CORE_COMM		(PT_LOOS + 0x4)
-#define PT_HP_CORE_PROC		(PT_LOOS + 0x5)
-#define PT_HP_CORE_LOADABLE	(PT_LOOS + 0x6)
-#define PT_HP_CORE_STACK	(PT_LOOS + 0x7)
-#define PT_HP_CORE_SHM		(PT_LOOS + 0x8)
-#define PT_HP_CORE_MMF		(PT_LOOS + 0x9)
-#define PT_HP_PARALLEL		(PT_LOOS + 0x10)
-#define PT_HP_FASTBIND		(PT_LOOS + 0x11)
-#define PT_HP_OPT_ANNOT		(PT_LOOS + 0x12)
-#define PT_HP_HSL_ANNOT		(PT_LOOS + 0x13)
-#define PT_HP_STACK		(PT_LOOS + 0x14)
-
-#define PT_PARISC_ARCHEXT	0x70000000
-#define PT_PARISC_UNWIND	0x70000001
-
-/* Legal values for p_flags field of Elf32_Phdr/Elf64_Phdr.  */
-
-#define PF_PARISC_SBP		0x08000000
-
-#define PF_HP_PAGE_SIZE		0x00100000
-#define PF_HP_FAR_SHARED	0x00200000
-#define PF_HP_NEAR_SHARED	0x00400000
-#define PF_HP_CODE		0x01000000
-#define PF_HP_MODIFY		0x02000000
-#define PF_HP_LAZYSWAP		0x04000000
-#define PF_HP_SBP		0x08000000
-
-/*
- * The following definitions are those for 32-bit ELF binaries on a 32-bit
- * kernel and for 64-bit binaries on a 64-bit kernel.  To run 32-bit binaries
- * on a 64-bit kernel, arch/parisc/kernel/binfmt_elf32.c defines these
- * macros appropriately and then #includes binfmt_elf.c, which then includes
- * this file.
- */
-#ifndef ELF_CLASS
-
-/*
- * This is used to ensure we don't load something for the wrong architecture.
- *
- * Note that this header file is used by default in fs/binfmt_elf.c. So
- * the following macros are for the default case. However, for the 64
- * bit kernel we also support 32 bit parisc binaries. To do that
- * arch/parisc/kernel/binfmt_elf32.c defines its own set of these
- * macros, and then it includes fs/binfmt_elf.c to provide an alternate
- * elf binary handler for 32 bit binaries (on the 64 bit kernel).
- */
-#ifdef CONFIG_64BIT
-#define ELF_CLASS   ELFCLASS64
-#else
-#define ELF_CLASS	ELFCLASS32
-#endif
-
-typedef unsigned long elf_greg_t;
-
-/*
- * This yields a string that ld.so will use to load implementation
- * specific libraries for optimization.  This is more specific in
- * intent than poking at uname or /proc/cpuinfo.
- */
-
-#define ELF_PLATFORM  ("PARISC\0")
-
-#define SET_PERSONALITY(ex, ibcs2) \
-	current->personality = PER_LINUX; \
-	current->thread.map_base = DEFAULT_MAP_BASE; \
-	current->thread.task_size = DEFAULT_TASK_SIZE \
-
-/*
- * Fill in general registers in a core dump.  This saves pretty
- * much the same registers as hp-ux, although in a different order.
- * Registers marked # below are not currently saved in pt_regs, so
- * we use their current values here.
- *
- * 	gr0..gr31
- * 	sr0..sr7
- * 	iaoq0..iaoq1
- * 	iasq0..iasq1
- * 	cr11 (sar)
- * 	cr19 (iir)
- * 	cr20 (isr)
- * 	cr21 (ior)
- *  #	cr22 (ipsw)
- *  #	cr0 (recovery counter)
- *  #	cr24..cr31 (temporary registers)
- *  #	cr8,9,12,13 (protection IDs)
- *  #	cr10 (scr/ccr)
- *  #	cr15 (ext int enable mask)
- *
- */
-
-#define ELF_CORE_COPY_REGS(dst, pt)	\
-	memset(dst, 0, sizeof(dst));	/* don't leak any "random" bits */ \
-	memcpy(dst + 0, pt->gr, 32 * sizeof(elf_greg_t)); \
-	memcpy(dst + 32, pt->sr, 8 * sizeof(elf_greg_t)); \
-	memcpy(dst + 40, pt->iaoq, 2 * sizeof(elf_greg_t)); \
-	memcpy(dst + 42, pt->iasq, 2 * sizeof(elf_greg_t)); \
-	dst[44] = pt->sar;   dst[45] = pt->iir; \
-	dst[46] = pt->isr;   dst[47] = pt->ior; \
-	dst[48] = mfctl(22); dst[49] = mfctl(0); \
-	dst[50] = mfctl(24); dst[51] = mfctl(25); \
-	dst[52] = mfctl(26); dst[53] = mfctl(27); \
-	dst[54] = mfctl(28); dst[55] = mfctl(29); \
-	dst[56] = mfctl(30); dst[57] = mfctl(31); \
-	dst[58] = mfctl( 8); dst[59] = mfctl( 9); \
-	dst[60] = mfctl(12); dst[61] = mfctl(13); \
-	dst[62] = mfctl(10); dst[63] = mfctl(15);
-
-#endif /* ! ELF_CLASS */
-
-#define ELF_NGREG 80	/* We only need 64 at present, but leave space
-			   for expansion. */
-typedef elf_greg_t elf_gregset_t[ELF_NGREG];
-
-#define ELF_NFPREG 32
-typedef double elf_fpreg_t;
-typedef elf_fpreg_t elf_fpregset_t[ELF_NFPREG];
-
-struct task_struct;
-
-extern int dump_task_fpu (struct task_struct *, elf_fpregset_t *);
-#define ELF_CORE_COPY_FPREGS(tsk, elf_fpregs) dump_task_fpu(tsk, elf_fpregs)
-
-struct pt_regs;	/* forward declaration... */
-
-
-#define elf_check_arch(x) ((x)->e_machine == EM_PARISC && (x)->e_ident[EI_CLASS] == ELF_CLASS)
-
-/*
- * These are used to set parameters in the core dumps.
- */
-#define ELF_DATA	ELFDATA2MSB
-#define ELF_ARCH	EM_PARISC
-#define ELF_OSABI 	ELFOSABI_LINUX
-
-/* %r23 is set by ld.so to a pointer to a function which might be 
-   registered using atexit.  This provides a means for the dynamic
-   linker to call DT_FINI functions for shared libraries that have
-   been loaded before the code runs.
-
-   So that we can use the same startup file with static executables,
-   we start programs with a value of 0 to indicate that there is no
-   such function.  */
-#define ELF_PLAT_INIT(_r, load_addr)       _r->gr[23] = 0
-
-#define USE_ELF_CORE_DUMP
-#define ELF_EXEC_PAGESIZE	4096
-
-/* This is the location that an ET_DYN program is loaded if exec'ed.  Typical
-   use of this is to invoke "./ld.so someprog" to test out a new version of
-   the loader.  We need to make sure that it is out of the way of the program
-   that it will "exec", and that there is sufficient room for the brk.
-
-   (2 * TASK_SIZE / 3) turns into something undefined when run through a
-   32 bit preprocessor and in some cases results in the kernel trying to map
-   ld.so to the kernel virtual base. Use a sane value instead. /Jes 
-  */
-
-#define ELF_ET_DYN_BASE         (TASK_UNMAPPED_BASE + 0x01000000)
-
-/* This yields a mask that user programs can use to figure out what
-   instruction set this CPU supports.  This could be done in user space,
-   but it's not easy, and we've already done it here.  */
-
-#define ELF_HWCAP	0
-
-#endif
diff --git a/include/asm-parisc/emergency-restart.h b/include/asm-parisc/emergency-restart.h
deleted file mode 100644
index 108d8c48e42e..000000000000
--- a/include/asm-parisc/emergency-restart.h
+++ /dev/null
@@ -1,6 +0,0 @@
-#ifndef _ASM_EMERGENCY_RESTART_H
-#define _ASM_EMERGENCY_RESTART_H
-
-#include <asm-generic/emergency-restart.h>
-
-#endif /* _ASM_EMERGENCY_RESTART_H */
diff --git a/include/asm-parisc/errno.h b/include/asm-parisc/errno.h
deleted file mode 100644
index e2f3ddc796be..000000000000
--- a/include/asm-parisc/errno.h
+++ /dev/null
@@ -1,124 +0,0 @@
-#ifndef _PARISC_ERRNO_H
-#define _PARISC_ERRNO_H
-
-#include <asm-generic/errno-base.h>
-
-#define	ENOMSG		35	/* No message of desired type */
-#define	EIDRM		36	/* Identifier removed */
-#define	ECHRNG		37	/* Channel number out of range */
-#define	EL2NSYNC	38	/* Level 2 not synchronized */
-#define	EL3HLT		39	/* Level 3 halted */
-#define	EL3RST		40	/* Level 3 reset */
-#define	ELNRNG		41	/* Link number out of range */
-#define	EUNATCH		42	/* Protocol driver not attached */
-#define	ENOCSI		43	/* No CSI structure available */
-#define	EL2HLT		44	/* Level 2 halted */
-#define	EDEADLK		45	/* Resource deadlock would occur */
-#define	EDEADLOCK	EDEADLK
-#define	ENOLCK		46	/* No record locks available */
-#define	EILSEQ		47	/* Illegal byte sequence */
-
-#define	ENONET		50	/* Machine is not on the network */
-#define	ENODATA		51	/* No data available */
-#define	ETIME		52	/* Timer expired */
-#define	ENOSR		53	/* Out of streams resources */
-#define	ENOSTR		54	/* Device not a stream */
-#define	ENOPKG		55	/* Package not installed */
-
-#define	ENOLINK		57	/* Link has been severed */
-#define	EADV		58	/* Advertise error */
-#define	ESRMNT		59	/* Srmount error */
-#define	ECOMM		60	/* Communication error on send */
-#define	EPROTO		61	/* Protocol error */
-
-#define	EMULTIHOP	64	/* Multihop attempted */
-
-#define	EDOTDOT		66	/* RFS specific error */
-#define	EBADMSG		67	/* Not a data message */
-#define	EUSERS		68	/* Too many users */
-#define	EDQUOT		69	/* Quota exceeded */
-#define	ESTALE		70	/* Stale NFS file handle */
-#define	EREMOTE		71	/* Object is remote */
-#define	EOVERFLOW	72	/* Value too large for defined data type */
-
-/* these errnos are defined by Linux but not HPUX. */
-
-#define	EBADE		160	/* Invalid exchange */
-#define	EBADR		161	/* Invalid request descriptor */
-#define	EXFULL		162	/* Exchange full */
-#define	ENOANO		163	/* No anode */
-#define	EBADRQC		164	/* Invalid request code */
-#define	EBADSLT		165	/* Invalid slot */
-#define	EBFONT		166	/* Bad font file format */
-#define	ENOTUNIQ	167	/* Name not unique on network */
-#define	EBADFD		168	/* File descriptor in bad state */
-#define	EREMCHG		169	/* Remote address changed */
-#define	ELIBACC		170	/* Can not access a needed shared library */
-#define	ELIBBAD		171	/* Accessing a corrupted shared library */
-#define	ELIBSCN		172	/* .lib section in a.out corrupted */
-#define	ELIBMAX		173	/* Attempting to link in too many shared libraries */
-#define	ELIBEXEC	174	/* Cannot exec a shared library directly */
-#define	ERESTART	175	/* Interrupted system call should be restarted */
-#define	ESTRPIPE	176	/* Streams pipe error */
-#define	EUCLEAN		177	/* Structure needs cleaning */
-#define	ENOTNAM		178	/* Not a XENIX named type file */
-#define	ENAVAIL		179	/* No XENIX semaphores available */
-#define	EISNAM		180	/* Is a named type file */
-#define	EREMOTEIO	181	/* Remote I/O error */
-#define	ENOMEDIUM	182	/* No medium found */
-#define	EMEDIUMTYPE	183	/* Wrong medium type */
-#define	ENOKEY		184	/* Required key not available */
-#define	EKEYEXPIRED	185	/* Key has expired */
-#define	EKEYREVOKED	186	/* Key has been revoked */
-#define	EKEYREJECTED	187	/* Key was rejected by service */
-
-/* We now return you to your regularly scheduled HPUX. */
-
-#define ENOSYM		215	/* symbol does not exist in executable */
-#define	ENOTSOCK	216	/* Socket operation on non-socket */
-#define	EDESTADDRREQ	217	/* Destination address required */
-#define	EMSGSIZE	218	/* Message too long */
-#define	EPROTOTYPE	219	/* Protocol wrong type for socket */
-#define	ENOPROTOOPT	220	/* Protocol not available */
-#define	EPROTONOSUPPORT	221	/* Protocol not supported */
-#define	ESOCKTNOSUPPORT	222	/* Socket type not supported */
-#define	EOPNOTSUPP	223	/* Operation not supported on transport endpoint */
-#define	EPFNOSUPPORT	224	/* Protocol family not supported */
-#define	EAFNOSUPPORT	225	/* Address family not supported by protocol */
-#define	EADDRINUSE	226	/* Address already in use */
-#define	EADDRNOTAVAIL	227	/* Cannot assign requested address */
-#define	ENETDOWN	228	/* Network is down */
-#define	ENETUNREACH	229	/* Network is unreachable */
-#define	ENETRESET	230	/* Network dropped connection because of reset */
-#define	ECONNABORTED	231	/* Software caused connection abort */
-#define	ECONNRESET	232	/* Connection reset by peer */
-#define	ENOBUFS		233	/* No buffer space available */
-#define	EISCONN		234	/* Transport endpoint is already connected */
-#define	ENOTCONN	235	/* Transport endpoint is not connected */
-#define	ESHUTDOWN	236	/* Cannot send after transport endpoint shutdown */
-#define	ETOOMANYREFS	237	/* Too many references: cannot splice */
-#define EREFUSED	ECONNREFUSED	/* for HP's NFS apparently */
-#define	ETIMEDOUT	238	/* Connection timed out */
-#define	ECONNREFUSED	239	/* Connection refused */
-#define EREMOTERELEASE	240	/* Remote peer released connection */
-#define	EHOSTDOWN	241	/* Host is down */
-#define	EHOSTUNREACH	242	/* No route to host */
-
-#define	EALREADY	244	/* Operation already in progress */
-#define	EINPROGRESS	245	/* Operation now in progress */
-#define	EWOULDBLOCK	246	/* Operation would block (Linux returns EAGAIN) */
-#define	ENOTEMPTY	247	/* Directory not empty */
-#define	ENAMETOOLONG	248	/* File name too long */
-#define	ELOOP		249	/* Too many symbolic links encountered */
-#define	ENOSYS		251	/* Function not implemented */
-
-#define ENOTSUP		252	/* Function not implemented (POSIX.4 / HPUX) */
-#define ECANCELLED	253	/* aio request was canceled before complete (POSIX.4 / HPUX) */
-#define ECANCELED	ECANCELLED	/* SuSv3 and Solaris wants one 'L' */
-
-/* for robust mutexes */
-#define EOWNERDEAD	254	/* Owner died */
-#define ENOTRECOVERABLE	255	/* State not recoverable */
-
-
-#endif
diff --git a/include/asm-parisc/fb.h b/include/asm-parisc/fb.h
deleted file mode 100644
index 4d503a023ab2..000000000000
--- a/include/asm-parisc/fb.h
+++ /dev/null
@@ -1,19 +0,0 @@
-#ifndef _ASM_FB_H_
-#define _ASM_FB_H_
-
-#include <linux/fb.h>
-#include <linux/fs.h>
-#include <asm/page.h>
-
-static inline void fb_pgprotect(struct file *file, struct vm_area_struct *vma,
-				unsigned long off)
-{
-	pgprot_val(vma->vm_page_prot) |= _PAGE_NO_CACHE;
-}
-
-static inline int fb_is_primary_device(struct fb_info *info)
-{
-	return 0;
-}
-
-#endif /* _ASM_FB_H_ */
diff --git a/include/asm-parisc/fcntl.h b/include/asm-parisc/fcntl.h
deleted file mode 100644
index 1e1c824764ee..000000000000
--- a/include/asm-parisc/fcntl.h
+++ /dev/null
@@ -1,39 +0,0 @@
-#ifndef _PARISC_FCNTL_H
-#define _PARISC_FCNTL_H
-
-/* open/fcntl - O_SYNC is only implemented on blocks devices and on files
-   located on an ext2 file system */
-#define O_APPEND	000000010
-#define O_BLKSEEK	000000100 /* HPUX only */
-#define O_CREAT		000000400 /* not fcntl */
-#define O_EXCL		000002000 /* not fcntl */
-#define O_LARGEFILE	000004000
-#define O_SYNC		000100000
-#define O_NONBLOCK	000200004 /* HPUX has separate NDELAY & NONBLOCK */
-#define O_NOCTTY	000400000 /* not fcntl */
-#define O_DSYNC		001000000 /* HPUX only */
-#define O_RSYNC		002000000 /* HPUX only */
-#define O_NOATIME	004000000
-#define O_CLOEXEC	010000000 /* set close_on_exec */
-
-#define O_DIRECTORY	000010000 /* must be a directory */
-#define O_NOFOLLOW	000000200 /* don't follow links */
-#define O_INVISIBLE	004000000 /* invisible I/O, for DMAPI/XDSM */
-
-#define F_GETLK64	8
-#define F_SETLK64	9
-#define F_SETLKW64	10
-
-#define F_GETOWN	11	/*  for sockets. */
-#define F_SETOWN	12	/*  for sockets. */
-#define F_SETSIG	13	/*  for sockets. */
-#define F_GETSIG	14	/*  for sockets. */
-
-/* for posix fcntl() and lockf() */
-#define F_RDLCK		01
-#define F_WRLCK		02
-#define F_UNLCK		03
-
-#include <asm-generic/fcntl.h>
-
-#endif
diff --git a/include/asm-parisc/fixmap.h b/include/asm-parisc/fixmap.h
deleted file mode 100644
index de3fe3a18229..000000000000
--- a/include/asm-parisc/fixmap.h
+++ /dev/null
@@ -1,30 +0,0 @@
-#ifndef _ASM_FIXMAP_H
-#define _ASM_FIXMAP_H
-
-/*
- * This file defines the locations of the fixed mappings on parisc.
- *
- * All of the values in this file are machine virtual addresses.
- *
- * All of the values in this file must be <4GB (because of assembly
- * loading restrictions).  If you place this region anywhere above
- * __PAGE_OFFSET, you must adjust the memory map accordingly */
-
-/* The alias region is used in kernel space to do copy/clear to or
- * from areas congruently mapped with user space.  It is 8MB large
- * and must be 16MB aligned */
-#define TMPALIAS_MAP_START	((__PAGE_OFFSET) - 16*1024*1024)
-/* This is the kernel area for all maps (vmalloc, dma etc.)  most
- * usually, it extends up to TMPALIAS_MAP_START.  Virtual addresses
- * 0..GATEWAY_PAGE_SIZE are reserved for the gateway page */
-#define KERNEL_MAP_START	(GATEWAY_PAGE_SIZE)
-#define KERNEL_MAP_END		(TMPALIAS_MAP_START)
-
-#ifndef __ASSEMBLY__
-extern void *vmalloc_start;
-#define PCXL_DMA_MAP_SIZE	(8*1024*1024)
-#define VMALLOC_START		((unsigned long)vmalloc_start)
-#define VMALLOC_END		(KERNEL_MAP_END)
-#endif /*__ASSEMBLY__*/
-
-#endif /*_ASM_FIXMAP_H*/
diff --git a/include/asm-parisc/floppy.h b/include/asm-parisc/floppy.h
deleted file mode 100644
index 4ca69f558fae..000000000000
--- a/include/asm-parisc/floppy.h
+++ /dev/null
@@ -1,271 +0,0 @@
-/*    Architecture specific parts of the Floppy driver
- *
- *    Linux/PA-RISC Project (http://www.parisc-linux.org/)
- *    Copyright (C) 2000 Matthew Wilcox (willy a debian . org)
- *    Copyright (C) 2000 Dave Kennedy
- *
- *    This program is free software; you can redistribute it and/or modify
- *    it under the terms of the GNU General Public License as published by
- *    the Free Software Foundation; either version 2 of the License, or
- *    (at your option) any later version.
- *
- *    This program is distributed in the hope that it will be useful,
- *    but WITHOUT ANY WARRANTY; without even the implied warranty of
- *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *    GNU General Public License for more details.
- *
- *    You should have received a copy of the GNU General Public License
- *    along with this program; if not, write to the Free Software
- *    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
- */
-#ifndef __ASM_PARISC_FLOPPY_H
-#define __ASM_PARISC_FLOPPY_H
-
-#include <linux/vmalloc.h>
-
-
-/*
- * The DMA channel used by the floppy controller cannot access data at
- * addresses >= 16MB
- *
- * Went back to the 1MB limit, as some people had problems with the floppy
- * driver otherwise. It doesn't matter much for performance anyway, as most
- * floppy accesses go through the track buffer.
- */
-#define _CROSS_64KB(a,s,vdma) \
-(!vdma && ((unsigned long)(a)/K_64 != ((unsigned long)(a) + (s) - 1) / K_64))
-
-#define CROSS_64KB(a,s) _CROSS_64KB(a,s,use_virtual_dma & 1)
-
-
-#define SW fd_routine[use_virtual_dma&1]
-#define CSW fd_routine[can_use_virtual_dma & 1]
-
-
-#define fd_inb(port)			readb(port)
-#define fd_outb(value, port)		writeb(value, port)
-
-#define fd_request_dma()        CSW._request_dma(FLOPPY_DMA,"floppy")
-#define fd_free_dma()           CSW._free_dma(FLOPPY_DMA)
-#define fd_enable_irq()         enable_irq(FLOPPY_IRQ)
-#define fd_disable_irq()        disable_irq(FLOPPY_IRQ)
-#define fd_free_irq()		free_irq(FLOPPY_IRQ, NULL)
-#define fd_get_dma_residue()    SW._get_dma_residue(FLOPPY_DMA)
-#define fd_dma_mem_alloc(size)	SW._dma_mem_alloc(size)
-#define fd_dma_setup(addr, size, mode, io) SW._dma_setup(addr, size, mode, io)
-
-#define FLOPPY_CAN_FALLBACK_ON_NODMA
-
-static int virtual_dma_count=0;
-static int virtual_dma_residue=0;
-static char *virtual_dma_addr=0;
-static int virtual_dma_mode=0;
-static int doing_pdma=0;
-
-static void floppy_hardint(int irq, void *dev_id, struct pt_regs * regs)
-{
-	register unsigned char st;
-
-#undef TRACE_FLPY_INT
-
-#ifdef TRACE_FLPY_INT
-	static int calls=0;
-	static int bytes=0;
-	static int dma_wait=0;
-#endif
-	if (!doing_pdma) {
-		floppy_interrupt(irq, dev_id, regs);
-		return;
-	}
-
-#ifdef TRACE_FLPY_INT
-	if(!calls)
-		bytes = virtual_dma_count;
-#endif
-
-	{
-		register int lcount;
-		register char *lptr = virtual_dma_addr;
-
-		for (lcount = virtual_dma_count; lcount; lcount--) {
-			st = fd_inb(virtual_dma_port+4) & 0xa0 ;
-			if (st != 0xa0) 
-				break;
-			if (virtual_dma_mode) {
-				fd_outb(*lptr, virtual_dma_port+5);
-			} else {
-				*lptr = fd_inb(virtual_dma_port+5);
-			}
-			lptr++;
-		}
-		virtual_dma_count = lcount;
-		virtual_dma_addr = lptr;
-		st = fd_inb(virtual_dma_port+4);
-	}
-
-#ifdef TRACE_FLPY_INT
-	calls++;
-#endif
-	if (st == 0x20)
-		return;
-	if (!(st & 0x20)) {
-		virtual_dma_residue += virtual_dma_count;
-		virtual_dma_count = 0;
-#ifdef TRACE_FLPY_INT
-		printk("count=%x, residue=%x calls=%d bytes=%d dma_wait=%d\n", 
-		       virtual_dma_count, virtual_dma_residue, calls, bytes,
-		       dma_wait);
-		calls = 0;
-		dma_wait=0;
-#endif
-		doing_pdma = 0;
-		floppy_interrupt(irq, dev_id, regs);
-		return;
-	}
-#ifdef TRACE_FLPY_INT
-	if (!virtual_dma_count)
-		dma_wait++;
-#endif
-}
-
-static void fd_disable_dma(void)
-{
-	if(! (can_use_virtual_dma & 1))
-		disable_dma(FLOPPY_DMA);
-	doing_pdma = 0;
-	virtual_dma_residue += virtual_dma_count;
-	virtual_dma_count=0;
-}
-
-static int vdma_request_dma(unsigned int dmanr, const char * device_id)
-{
-	return 0;
-}
-
-static void vdma_nop(unsigned int dummy)
-{
-}
-
-
-static int vdma_get_dma_residue(unsigned int dummy)
-{
-	return virtual_dma_count + virtual_dma_residue;
-}
-
-
-static int fd_request_irq(void)
-{
-	if(can_use_virtual_dma)
-		return request_irq(FLOPPY_IRQ, floppy_hardint,
-				   IRQF_DISABLED, "floppy", NULL);
-	else
-		return request_irq(FLOPPY_IRQ, floppy_interrupt,
-				   IRQF_DISABLED, "floppy", NULL);
-}
-
-static unsigned long dma_mem_alloc(unsigned long size)
-{
-	return __get_dma_pages(GFP_KERNEL, get_order(size));
-}
-
-
-static unsigned long vdma_mem_alloc(unsigned long size)
-{
-	return (unsigned long) vmalloc(size);
-
-}
-
-#define nodma_mem_alloc(size) vdma_mem_alloc(size)
-
-static void _fd_dma_mem_free(unsigned long addr, unsigned long size)
-{
-	if((unsigned int) addr >= (unsigned int) high_memory)
-		return vfree((void *)addr);
-	else
-		free_pages(addr, get_order(size));		
-}
-
-#define fd_dma_mem_free(addr, size)  _fd_dma_mem_free(addr, size) 
-
-static void _fd_chose_dma_mode(char *addr, unsigned long size)
-{
-	if(can_use_virtual_dma == 2) {
-		if((unsigned int) addr >= (unsigned int) high_memory ||
-		   virt_to_bus(addr) >= 0x1000000 ||
-		   _CROSS_64KB(addr, size, 0))
-			use_virtual_dma = 1;
-		else
-			use_virtual_dma = 0;
-	} else {
-		use_virtual_dma = can_use_virtual_dma & 1;
-	}
-}
-
-#define fd_chose_dma_mode(addr, size) _fd_chose_dma_mode(addr, size)
-
-
-static int vdma_dma_setup(char *addr, unsigned long size, int mode, int io)
-{
-	doing_pdma = 1;
-	virtual_dma_port = io;
-	virtual_dma_mode = (mode  == DMA_MODE_WRITE);
-	virtual_dma_addr = addr;
-	virtual_dma_count = size;
-	virtual_dma_residue = 0;
-	return 0;
-}
-
-static int hard_dma_setup(char *addr, unsigned long size, int mode, int io)
-{
-#ifdef FLOPPY_SANITY_CHECK
-	if (CROSS_64KB(addr, size)) {
-		printk("DMA crossing 64-K boundary %p-%p\n", addr, addr+size);
-		return -1;
-	}
-#endif
-	/* actual, physical DMA */
-	doing_pdma = 0;
-	clear_dma_ff(FLOPPY_DMA);
-	set_dma_mode(FLOPPY_DMA,mode);
-	set_dma_addr(FLOPPY_DMA,virt_to_bus(addr));
-	set_dma_count(FLOPPY_DMA,size);
-	enable_dma(FLOPPY_DMA);
-	return 0;
-}
-
-static struct fd_routine_l {
-	int (*_request_dma)(unsigned int dmanr, const char * device_id);
-	void (*_free_dma)(unsigned int dmanr);
-	int (*_get_dma_residue)(unsigned int dummy);
-	unsigned long (*_dma_mem_alloc) (unsigned long size);
-	int (*_dma_setup)(char *addr, unsigned long size, int mode, int io);
-} fd_routine[] = {
-	{
-		request_dma,
-		free_dma,
-		get_dma_residue,
-		dma_mem_alloc,
-		hard_dma_setup
-	},
-	{
-		vdma_request_dma,
-		vdma_nop,
-		vdma_get_dma_residue,
-		vdma_mem_alloc,
-		vdma_dma_setup
-	}
-};
-
-
-static int FDC1 = 0x3f0; /* Lies.  Floppy controller is memory mapped, not io mapped */
-static int FDC2 = -1;
-
-#define FLOPPY0_TYPE	0
-#define FLOPPY1_TYPE	0
-
-#define N_FDC 1
-#define N_DRIVE 8
-
-#define EXTRA_FLOPPY_PARAMS
-
-#endif /* __ASM_PARISC_FLOPPY_H */
diff --git a/include/asm-parisc/futex.h b/include/asm-parisc/futex.h
deleted file mode 100644
index 0c705c3a55ef..000000000000
--- a/include/asm-parisc/futex.h
+++ /dev/null
@@ -1,77 +0,0 @@
-#ifndef _ASM_PARISC_FUTEX_H
-#define _ASM_PARISC_FUTEX_H
-
-#ifdef __KERNEL__
-
-#include <linux/futex.h>
-#include <linux/uaccess.h>
-#include <asm/errno.h>
-
-static inline int
-futex_atomic_op_inuser (int encoded_op, int __user *uaddr)
-{
-	int op = (encoded_op >> 28) & 7;
-	int cmp = (encoded_op >> 24) & 15;
-	int oparg = (encoded_op << 8) >> 20;
-	int cmparg = (encoded_op << 20) >> 20;
-	int oldval = 0, ret;
-	if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28))
-		oparg = 1 << oparg;
-
-	if (! access_ok (VERIFY_WRITE, uaddr, sizeof(int)))
-		return -EFAULT;
-
-	pagefault_disable();
-
-	switch (op) {
-	case FUTEX_OP_SET:
-	case FUTEX_OP_ADD:
-	case FUTEX_OP_OR:
-	case FUTEX_OP_ANDN:
-	case FUTEX_OP_XOR:
-	default:
-		ret = -ENOSYS;
-	}
-
-	pagefault_enable();
-
-	if (!ret) {
-		switch (cmp) {
-		case FUTEX_OP_CMP_EQ: ret = (oldval == cmparg); break;
-		case FUTEX_OP_CMP_NE: ret = (oldval != cmparg); break;
-		case FUTEX_OP_CMP_LT: ret = (oldval < cmparg); break;
-		case FUTEX_OP_CMP_GE: ret = (oldval >= cmparg); break;
-		case FUTEX_OP_CMP_LE: ret = (oldval <= cmparg); break;
-		case FUTEX_OP_CMP_GT: ret = (oldval > cmparg); break;
-		default: ret = -ENOSYS;
-		}
-	}
-	return ret;
-}
-
-/* Non-atomic version */
-static inline int
-futex_atomic_cmpxchg_inatomic(int __user *uaddr, int oldval, int newval)
-{
-	int err = 0;
-	int uval;
-
-	/* futex.c wants to do a cmpxchg_inatomic on kernel NULL, which is
-	 * our gateway page, and causes no end of trouble...
-	 */
-	if (segment_eq(KERNEL_DS, get_fs()) && !uaddr)
-		return -EFAULT;
-
-	if (!access_ok(VERIFY_WRITE, uaddr, sizeof(int)))
-		return -EFAULT;
-
-	err = get_user(uval, uaddr);
-	if (err) return -EFAULT;
-	if (uval == oldval)
-		err = put_user(newval, uaddr);
-	if (err) return -EFAULT;
-	return uval;
-}
-
-#endif /*__KERNEL__*/
-#endif /*_ASM_PARISC_FUTEX_H*/
diff --git a/include/asm-parisc/grfioctl.h b/include/asm-parisc/grfioctl.h
deleted file mode 100644
index 671e06042b40..000000000000
--- a/include/asm-parisc/grfioctl.h
+++ /dev/null
@@ -1,113 +0,0 @@
-/*  Architecture specific parts of HP's STI (framebuffer) driver.
- *  Structures are HP-UX compatible for XFree86 usage.
- * 
- *    Linux/PA-RISC Project (http://www.parisc-linux.org/)
- *    Copyright (C) 2001 Helge Deller (deller a parisc-linux org)
- *
- *    This program is free software; you can redistribute it and/or modify
- *    it under the terms of the GNU General Public License as published by
- *    the Free Software Foundation; either version 2 of the License, or
- *    (at your option) any later version.
- *
- *    This program is distributed in the hope that it will be useful,
- *    but WITHOUT ANY WARRANTY; without even the implied warranty of
- *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *    GNU General Public License for more details.
- *
- *    You should have received a copy of the GNU General Public License
- *    along with this program; if not, write to the Free Software
- *    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
- */
-
-#ifndef __ASM_PARISC_GRFIOCTL_H
-#define __ASM_PARISC_GRFIOCTL_H
-
-/* upper 32 bits of graphics id (HP/UX identifier) */
-
-#define GRFGATOR		8
-#define S9000_ID_S300		9
-#define GRFBOBCAT		9
-#define	GRFCATSEYE		9
-#define S9000_ID_98720		10
-#define GRFRBOX			10
-#define S9000_ID_98550		11
-#define GRFFIREEYE		11
-#define S9000_ID_A1096A		12
-#define GRFHYPERION		12
-#define S9000_ID_FRI		13
-#define S9000_ID_98730		14
-#define GRFDAVINCI		14
-#define S9000_ID_98705		0x26C08070	/* Tigershark */
-#define S9000_ID_98736		0x26D148AB
-#define S9000_ID_A1659A		0x26D1482A	/* CRX 8 plane color (=ELK) */
-#define S9000_ID_ELK		S9000_ID_A1659A
-#define S9000_ID_A1439A		0x26D148EE	/* CRX24 = CRX+ (24-plane color) */
-#define S9000_ID_A1924A		0x26D1488C	/* GRX gray-scale */
-#define S9000_ID_ELM		S9000_ID_A1924A
-#define S9000_ID_98765		0x27480DEF
-#define S9000_ID_ELK_768	0x27482101
-#define S9000_ID_STINGER	0x27A4A402
-#define S9000_ID_TIMBER		0x27F12392	/* Bushmaster (710) Graphics */
-#define S9000_ID_TOMCAT		0x27FCCB6D	/* dual-headed ELK (Dual CRX) */
-#define S9000_ID_ARTIST		0x2B4DED6D	/* Artist (Gecko/712 & 715) onboard Graphics */
-#define S9000_ID_HCRX		0x2BCB015A	/* Hyperdrive/Hyperbowl (A4071A) Graphics */
-#define CRX24_OVERLAY_PLANES	0x920825AA	/* Overlay planes on CRX24 */
-
-#define CRT_ID_ELK_1024		S9000_ID_ELK_768 /* Elk 1024x768  CRX */
-#define CRT_ID_ELK_1280		S9000_ID_A1659A	/* Elk 1280x1024 CRX */
-#define CRT_ID_ELK_1024DB	0x27849CA5      /* Elk 1024x768 double buffer */
-#define CRT_ID_ELK_GS		S9000_ID_A1924A	/* Elk 1280x1024 GreyScale    */
-#define CRT_ID_CRX24		S9000_ID_A1439A	/* Piranha */
-#define CRT_ID_VISUALIZE_EG	0x2D08C0A7      /* Graffiti, A4450A (built-in B132+/B160L) */
-#define CRT_ID_THUNDER		0x2F23E5FC      /* Thunder 1 VISUALIZE 48*/
-#define CRT_ID_THUNDER2		0x2F8D570E      /* Thunder 2 VISUALIZE 48 XP*/
-#define CRT_ID_HCRX		S9000_ID_HCRX	/* Hyperdrive HCRX */
-#define CRT_ID_CRX48Z		S9000_ID_STINGER /* Stinger */
-#define CRT_ID_DUAL_CRX		S9000_ID_TOMCAT	/* Tomcat */
-#define CRT_ID_PVRX		S9000_ID_98705	/* Tigershark */
-#define CRT_ID_TIMBER		S9000_ID_TIMBER	/* Timber (710 builtin) */
-#define CRT_ID_TVRX		S9000_ID_98765	/* TVRX (gto/falcon) */
-#define CRT_ID_ARTIST		S9000_ID_ARTIST	/* Artist */
-#define CRT_ID_SUMMIT		0x2FC1066B      /* Summit FX2, FX4, FX6 ... */
-#define CRT_ID_LEGO		0x35ACDA30	/* Lego FX5, FX10 ... */
-#define CRT_ID_PINNACLE		0x35ACDA16	/* Pinnacle FXe */ 
-
-/* structure for ioctl(GCDESCRIBE) */
-
-#define gaddr_t unsigned long	/* FIXME: PA2.0 (64bit) portable ? */
-
-struct	grf_fbinfo {
-	unsigned int	id;		/* upper 32 bits of graphics id */
-	unsigned int	mapsize;	/* mapped size of framebuffer */
-	unsigned int	dwidth, dlength;/* x and y sizes */
-	unsigned int	width, length;	/* total x and total y size */
-	unsigned int	xlen;		/* x pitch size */
-	unsigned int	bpp, bppu;	/* bits per pixel and used bpp */
-	unsigned int	npl, nplbytes;	/* # of planes and bytes per plane */
-	char		name[32];	/* name of the device (from ROM) */
-	unsigned int	attr;		/* attributes */
-	gaddr_t 	fbbase, regbase;/* framebuffer and register base addr */
-	gaddr_t		regions[6];	/* region bases */
-};
-
-#define	GCID		_IOR('G', 0, int)
-#define	GCON		_IO('G', 1)
-#define	GCOFF		_IO('G', 2)
-#define	GCAON		_IO('G', 3)
-#define	GCAOFF		_IO('G', 4)
-#define	GCMAP		_IOWR('G', 5, int)
-#define	GCUNMAP		_IOWR('G', 6, int)
-#define	GCMAP_HPUX	_IO('G', 5)
-#define	GCUNMAP_HPUX	_IO('G', 6)
-#define	GCLOCK		_IO('G', 7)
-#define	GCUNLOCK	_IO('G', 8)
-#define	GCLOCK_MINIMUM	_IO('G', 9)
-#define	GCUNLOCK_MINIMUM _IO('G', 10)
-#define	GCSTATIC_CMAP	_IO('G', 11)
-#define	GCVARIABLE_CMAP _IO('G', 12)
-#define GCTERM		_IOWR('G',20,int)	/* multi-headed Tomcat */ 
-#define GCDESCRIBE	_IOR('G', 21, struct grf_fbinfo)
-#define GCFASTLOCK	_IO('G', 26)
-
-#endif /* __ASM_PARISC_GRFIOCTL_H */
-
diff --git a/include/asm-parisc/hardirq.h b/include/asm-parisc/hardirq.h
deleted file mode 100644
index ce93133d5112..000000000000
--- a/include/asm-parisc/hardirq.h
+++ /dev/null
@@ -1,29 +0,0 @@
-/* hardirq.h: PA-RISC hard IRQ support.
- *
- * Copyright (C) 2001 Matthew Wilcox <matthew@wil.cx>
- *
- * The locking is really quite interesting.  There's a cpu-local
- * count of how many interrupts are being handled, and a global
- * lock.  An interrupt can only be serviced if the global lock
- * is free.  You can't be sure no more interrupts are being
- * serviced until you've acquired the lock and then checked
- * all the per-cpu interrupt counts are all zero.  It's a specialised
- * br_lock, and that's exactly how Sparc does it.  We don't because
- * it's more locking for us.  This way is lock-free in the interrupt path.
- */
-
-#ifndef _PARISC_HARDIRQ_H
-#define _PARISC_HARDIRQ_H
-
-#include <linux/threads.h>
-#include <linux/irq.h>
-
-typedef struct {
-	unsigned long __softirq_pending; /* set_bit is used on this */
-} ____cacheline_aligned irq_cpustat_t;
-
-#include <linux/irq_cpustat.h>	/* Standard mappings for irq_cpustat_t above */
-
-void ack_bad_irq(unsigned int irq);
-
-#endif /* _PARISC_HARDIRQ_H */
diff --git a/include/asm-parisc/hardware.h b/include/asm-parisc/hardware.h
deleted file mode 100644
index 4e9626836bab..000000000000
--- a/include/asm-parisc/hardware.h
+++ /dev/null
@@ -1,127 +0,0 @@
-#ifndef _PARISC_HARDWARE_H
-#define _PARISC_HARDWARE_H
-
-#include <linux/mod_devicetable.h>
-#include <asm/pdc.h>
-
-#define HWTYPE_ANY_ID		PA_HWTYPE_ANY_ID
-#define HVERSION_ANY_ID		PA_HVERSION_ANY_ID
-#define HVERSION_REV_ANY_ID	PA_HVERSION_REV_ANY_ID
-#define SVERSION_ANY_ID		PA_SVERSION_ANY_ID
-
-struct hp_hardware {
-	unsigned short	hw_type:5;	/* HPHW_xxx */
-	unsigned short	hversion;
-	unsigned long	sversion:28;
-	unsigned short	opt;
-	const char	name[80];	/* The hardware description */
-};
-
-struct parisc_device;
-
-enum cpu_type {
-	pcx	= 0, /* pa7000		pa 1.0  */
-	pcxs	= 1, /* pa7000		pa 1.1a */
-	pcxt	= 2, /* pa7100		pa 1.1b */
-	pcxt_	= 3, /* pa7200	(t')	pa 1.1c */
-	pcxl	= 4, /* pa7100lc	pa 1.1d */
-	pcxl2	= 5, /* pa7300lc	pa 1.1e */
-	pcxu	= 6, /* pa8000		pa 2.0  */
-	pcxu_	= 7, /* pa8200	(u+)	pa 2.0  */
-	pcxw	= 8, /* pa8500		pa 2.0  */
-	pcxw_	= 9, /* pa8600	(w+)	pa 2.0  */
-	pcxw2	= 10, /* pa8700		pa 2.0  */
-	mako	= 11, /* pa8800		pa 2.0  */
-	mako2	= 12  /* pa8900		pa 2.0  */
-};
-
-extern const char * const cpu_name_version[][2]; /* mapping from enum cpu_type to strings */
-
-struct parisc_driver;
-
-struct io_module {
-        volatile uint32_t nothing;		/* reg 0 */
-        volatile uint32_t io_eim;
-        volatile uint32_t io_dc_adata;
-        volatile uint32_t io_ii_cdata;
-        volatile uint32_t io_dma_link;		/* reg 4 */
-        volatile uint32_t io_dma_command;
-        volatile uint32_t io_dma_address;
-        volatile uint32_t io_dma_count;
-        volatile uint32_t io_flex;		/* reg 8 */
-        volatile uint32_t io_spa_address;
-        volatile uint32_t reserved1[2];
-        volatile uint32_t io_command;		/* reg 12 */
-        volatile uint32_t io_status;
-        volatile uint32_t io_control;
-        volatile uint32_t io_data;
-        volatile uint32_t reserved2;		/* reg 16 */
-        volatile uint32_t chain_addr;
-        volatile uint32_t sub_mask_clr;
-        volatile uint32_t reserved3[13];
-        volatile uint32_t undefined[480];
-        volatile uint32_t unpriv[512];
-};
-
-struct bc_module {
-        volatile uint32_t unused1[12];
-        volatile uint32_t io_command;
-        volatile uint32_t io_status;
-        volatile uint32_t io_control;
-        volatile uint32_t unused2[1];
-        volatile uint32_t io_err_resp;
-        volatile uint32_t io_err_info;
-        volatile uint32_t io_err_req;
-        volatile uint32_t unused3[11];
-        volatile uint32_t io_io_low;
-        volatile uint32_t io_io_high;
-};
-
-#define HPHW_NPROC     0 
-#define HPHW_MEMORY    1       
-#define HPHW_B_DMA     2
-#define HPHW_OBSOLETE  3
-#define HPHW_A_DMA     4
-#define HPHW_A_DIRECT  5
-#define HPHW_OTHER     6
-#define HPHW_BCPORT    7
-#define HPHW_CIO       8
-#define HPHW_CONSOLE   9
-#define HPHW_FIO       10
-#define HPHW_BA        11
-#define HPHW_IOA       12
-#define HPHW_BRIDGE    13
-#define HPHW_FABRIC    14
-#define HPHW_MC	       15
-#define HPHW_FAULTY    31
-
-
-/* hardware.c: */
-extern const char *parisc_hardware_description(struct parisc_device_id *id);
-extern enum cpu_type parisc_get_cpu_type(unsigned long hversion);
-
-struct pci_dev;
-
-/* drivers.c: */
-extern struct parisc_device *alloc_pa_dev(unsigned long hpa,
-		struct hardware_path *path);
-extern int register_parisc_device(struct parisc_device *dev);
-extern int register_parisc_driver(struct parisc_driver *driver);
-extern int count_parisc_driver(struct parisc_driver *driver);
-extern int unregister_parisc_driver(struct parisc_driver *driver);
-extern void walk_central_bus(void);
-extern const struct parisc_device *find_pa_parent_type(const struct parisc_device *, int);
-extern void print_parisc_devices(void);
-extern char *print_pa_hwpath(struct parisc_device *dev, char *path);
-extern char *print_pci_hwpath(struct pci_dev *dev, char *path);
-extern void get_pci_node_path(struct pci_dev *dev, struct hardware_path *path);
-extern void init_parisc_bus(void);
-extern struct device *hwpath_to_device(struct hardware_path *modpath);
-extern void device_to_hwpath(struct device *dev, struct hardware_path *path);
-
-
-/* inventory.c: */
-extern void do_memory_inventory(void);
-extern void do_device_inventory(void);
-
-#endif /* _PARISC_HARDWARE_H */
diff --git a/include/asm-parisc/hw_irq.h b/include/asm-parisc/hw_irq.h
deleted file mode 100644
index 6707f7df3921..000000000000
--- a/include/asm-parisc/hw_irq.h
+++ /dev/null
@@ -1,8 +0,0 @@
-#ifndef _ASM_HW_IRQ_H
-#define _ASM_HW_IRQ_H
-
-/*
- *	linux/include/asm/hw_irq.h
- */
-
-#endif
diff --git a/include/asm-parisc/ide.h b/include/asm-parisc/ide.h
deleted file mode 100644
index c246ef75017d..000000000000
--- a/include/asm-parisc/ide.h
+++ /dev/null
@@ -1,61 +0,0 @@
-/*
- *  linux/include/asm-parisc/ide.h
- *
- *  Copyright (C) 1994-1996  Linus Torvalds & authors
- */
-
-/*
- *  This file contains the PARISC architecture specific IDE code.
- */
-
-#ifndef __ASM_PARISC_IDE_H
-#define __ASM_PARISC_IDE_H
-
-#ifdef __KERNEL__
-
-#define ide_request_irq(irq,hand,flg,dev,id)	request_irq((irq),(hand),(flg),(dev),(id))
-#define ide_free_irq(irq,dev_id)		free_irq((irq), (dev_id))
-#define ide_request_region(from,extent,name)	request_region((from), (extent), (name))
-#define ide_release_region(from,extent)		release_region((from), (extent))
-/* Generic I/O and MEMIO string operations.  */
-
-#define __ide_insw	insw
-#define __ide_insl	insl
-#define __ide_outsw	outsw
-#define __ide_outsl	outsl
-
-static __inline__ void __ide_mm_insw(void __iomem *port, void *addr, u32 count)
-{
-	while (count--) {
-		*(u16 *)addr = __raw_readw(port);
-		addr += 2;
-	}
-}
-
-static __inline__ void __ide_mm_insl(void __iomem *port, void *addr, u32 count)
-{
-	while (count--) {
-		*(u32 *)addr = __raw_readl(port);
-		addr += 4;
-	}
-}
-
-static __inline__ void __ide_mm_outsw(void __iomem *port, void *addr, u32 count)
-{
-	while (count--) {
-		__raw_writew(*(u16 *)addr, port);
-		addr += 2;
-	}
-}
-
-static __inline__ void __ide_mm_outsl(void __iomem *port, void *addr, u32 count)
-{
-	while (count--) {
-		__raw_writel(*(u32 *)addr, port);
-		addr += 4;
-	}
-}
-
-#endif /* __KERNEL__ */
-
-#endif /* __ASM_PARISC_IDE_H */
diff --git a/include/asm-parisc/io.h b/include/asm-parisc/io.h
deleted file mode 100644
index 55ddb1842107..000000000000
--- a/include/asm-parisc/io.h
+++ /dev/null
@@ -1,293 +0,0 @@
-#ifndef _ASM_IO_H
-#define _ASM_IO_H
-
-#include <linux/types.h>
-#include <asm/pgtable.h>
-
-extern unsigned long parisc_vmerge_boundary;
-extern unsigned long parisc_vmerge_max_size;
-
-#define BIO_VMERGE_BOUNDARY	parisc_vmerge_boundary
-#define BIO_VMERGE_MAX_SIZE	parisc_vmerge_max_size
-
-#define virt_to_phys(a) ((unsigned long)__pa(a))
-#define phys_to_virt(a) __va(a)
-#define virt_to_bus virt_to_phys
-#define bus_to_virt phys_to_virt
-
-static inline unsigned long isa_bus_to_virt(unsigned long addr) {
-	BUG();
-	return 0;
-}
-
-static inline unsigned long isa_virt_to_bus(void *addr) {
-	BUG();
-	return 0;
-}
-
-/*
- * Memory mapped I/O
- *
- * readX()/writeX() do byteswapping and take an ioremapped address
- * __raw_readX()/__raw_writeX() don't byteswap and take an ioremapped address.
- * gsc_*() don't byteswap and operate on physical addresses;
- *   eg dev->hpa or 0xfee00000.
- */
-
-static inline unsigned char gsc_readb(unsigned long addr)
-{
-	long flags;
-	unsigned char ret;
-
-	__asm__ __volatile__(
-	"	rsm	2,%0\n"
-	"	ldbx	0(%2),%1\n"
-	"	mtsm	%0\n"
-	: "=&r" (flags), "=r" (ret) : "r" (addr) );
-
-	return ret;
-}
-
-static inline unsigned short gsc_readw(unsigned long addr)
-{
-	long flags;
-	unsigned short ret;
-
-	__asm__ __volatile__(
-	"	rsm	2,%0\n"
-	"	ldhx	0(%2),%1\n"
-	"	mtsm	%0\n"
-	: "=&r" (flags), "=r" (ret) : "r" (addr) );
-
-	return ret;
-}
-
-static inline unsigned int gsc_readl(unsigned long addr)
-{
-	u32 ret;
-
-	__asm__ __volatile__(
-	"	ldwax	0(%1),%0\n"
-	: "=r" (ret) : "r" (addr) );
-
-	return ret;
-}
-
-static inline unsigned long long gsc_readq(unsigned long addr)
-{
-	unsigned long long ret;
-
-#ifdef CONFIG_64BIT
-	__asm__ __volatile__(
-	"	ldda	0(%1),%0\n"
-	:  "=r" (ret) : "r" (addr) );
-#else
-	/* two reads may have side effects.. */
-	ret = ((u64) gsc_readl(addr)) << 32;
-	ret |= gsc_readl(addr+4);
-#endif
-	return ret;
-}
-
-static inline void gsc_writeb(unsigned char val, unsigned long addr)
-{
-	long flags;
-	__asm__ __volatile__(
-	"	rsm	2,%0\n"
-	"	stbs	%1,0(%2)\n"
-	"	mtsm	%0\n"
-	: "=&r" (flags) :  "r" (val), "r" (addr) );
-}
-
-static inline void gsc_writew(unsigned short val, unsigned long addr)
-{
-	long flags;
-	__asm__ __volatile__(
-	"	rsm	2,%0\n"
-	"	sths	%1,0(%2)\n"
-	"	mtsm	%0\n"
-	: "=&r" (flags) :  "r" (val), "r" (addr) );
-}
-
-static inline void gsc_writel(unsigned int val, unsigned long addr)
-{
-	__asm__ __volatile__(
-	"	stwas	%0,0(%1)\n"
-	: :  "r" (val), "r" (addr) );
-}
-
-static inline void gsc_writeq(unsigned long long val, unsigned long addr)
-{
-#ifdef CONFIG_64BIT
-	__asm__ __volatile__(
-	"	stda	%0,0(%1)\n"
-	: :  "r" (val), "r" (addr) );
-#else
-	/* two writes may have side effects.. */
-	gsc_writel(val >> 32, addr);
-	gsc_writel(val, addr+4);
-#endif
-}
-
-/*
- * The standard PCI ioremap interfaces
- */
-
-extern void __iomem * __ioremap(unsigned long offset, unsigned long size, unsigned long flags);
-
-/* Most machines react poorly to I/O-space being cacheable... Instead let's
- * define ioremap() in terms of ioremap_nocache().
- */
-static inline void __iomem * ioremap(unsigned long offset, unsigned long size)
-{
-	return __ioremap(offset, size, _PAGE_NO_CACHE);
-}
-#define ioremap_nocache(off, sz)	ioremap((off), (sz))
-
-extern void iounmap(const volatile void __iomem *addr);
-
-static inline unsigned char __raw_readb(const volatile void __iomem *addr)
-{
-	return (*(volatile unsigned char __force *) (addr));
-}
-static inline unsigned short __raw_readw(const volatile void __iomem *addr)
-{
-	return *(volatile unsigned short __force *) addr;
-}
-static inline unsigned int __raw_readl(const volatile void __iomem *addr)
-{
-	return *(volatile unsigned int __force *) addr;
-}
-static inline unsigned long long __raw_readq(const volatile void __iomem *addr)
-{
-	return *(volatile unsigned long long __force *) addr;
-}
-
-static inline void __raw_writeb(unsigned char b, volatile void __iomem *addr)
-{
-	*(volatile unsigned char __force *) addr = b;
-}
-static inline void __raw_writew(unsigned short b, volatile void __iomem *addr)
-{
-	*(volatile unsigned short __force *) addr = b;
-}
-static inline void __raw_writel(unsigned int b, volatile void __iomem *addr)
-{
-	*(volatile unsigned int __force *) addr = b;
-}
-static inline void __raw_writeq(unsigned long long b, volatile void __iomem *addr)
-{
-	*(volatile unsigned long long __force *) addr = b;
-}
-
-/* readb can never be const, so use __fswab instead of le*_to_cpu */
-#define readb(addr) __raw_readb(addr)
-#define readw(addr) __fswab16(__raw_readw(addr))
-#define readl(addr) __fswab32(__raw_readl(addr))
-#define readq(addr) __fswab64(__raw_readq(addr))
-#define writeb(b, addr) __raw_writeb(b, addr)
-#define writew(b, addr) __raw_writew(cpu_to_le16(b), addr)
-#define writel(b, addr) __raw_writel(cpu_to_le32(b), addr)
-#define writeq(b, addr) __raw_writeq(cpu_to_le64(b), addr)
-
-#define readb_relaxed(addr) readb(addr)
-#define readw_relaxed(addr) readw(addr)
-#define readl_relaxed(addr) readl(addr)
-#define readq_relaxed(addr) readq(addr)
-
-#define mmiowb() do { } while (0)
-
-void memset_io(volatile void __iomem *addr, unsigned char val, int count);
-void memcpy_fromio(void *dst, const volatile void __iomem *src, int count);
-void memcpy_toio(volatile void __iomem *dst, const void *src, int count);
-
-/* Port-space IO */
-
-#define inb_p inb
-#define inw_p inw
-#define inl_p inl
-#define outb_p outb
-#define outw_p outw
-#define outl_p outl
-
-extern unsigned char eisa_in8(unsigned short port);
-extern unsigned short eisa_in16(unsigned short port);
-extern unsigned int eisa_in32(unsigned short port);
-extern void eisa_out8(unsigned char data, unsigned short port);
-extern void eisa_out16(unsigned short data, unsigned short port);
-extern void eisa_out32(unsigned int data, unsigned short port);
-
-#if defined(CONFIG_PCI)
-extern unsigned char inb(int addr);
-extern unsigned short inw(int addr);
-extern unsigned int inl(int addr);
-
-extern void outb(unsigned char b, int addr);
-extern void outw(unsigned short b, int addr);
-extern void outl(unsigned int b, int addr);
-#elif defined(CONFIG_EISA)
-#define inb eisa_in8
-#define inw eisa_in16
-#define inl eisa_in32
-#define outb eisa_out8
-#define outw eisa_out16
-#define outl eisa_out32
-#else
-static inline char inb(unsigned long addr)
-{
-	BUG();
-	return -1;
-}
-
-static inline short inw(unsigned long addr)
-{
-	BUG();
-	return -1;
-}
-
-static inline int inl(unsigned long addr)
-{
-	BUG();
-	return -1;
-}
-
-#define outb(x, y)	BUG()
-#define outw(x, y)	BUG()
-#define outl(x, y)	BUG()
-#endif
-
-/*
- * String versions of in/out ops:
- */
-extern void insb (unsigned long port, void *dst, unsigned long count);
-extern void insw (unsigned long port, void *dst, unsigned long count);
-extern void insl (unsigned long port, void *dst, unsigned long count);
-extern void outsb (unsigned long port, const void *src, unsigned long count);
-extern void outsw (unsigned long port, const void *src, unsigned long count);
-extern void outsl (unsigned long port, const void *src, unsigned long count);
-
-
-/* IO Port space is :      BBiiii   where BB is HBA number. */
-#define IO_SPACE_LIMIT 0x00ffffff
-
-/* PA machines have an MM I/O space from 0xf0000000-0xffffffff in 32
- * bit mode and from 0xfffffffff0000000-0xfffffffffffffff in 64 bit
- * mode (essentially just sign extending.  This macro takes in a 32
- * bit I/O address (still with the leading f) and outputs the correct
- * value for either 32 or 64 bit mode */
-#define F_EXTEND(x) ((unsigned long)((x) | (0xffffffff00000000ULL)))
-
-#include <asm-generic/iomap.h>
-
-/*
- * Convert a physical pointer to a virtual kernel pointer for /dev/mem
- * access
- */
-#define xlate_dev_mem_ptr(p)	__va(p)
-
-/*
- * Convert a virtual cached pointer to an uncached pointer
- */
-#define xlate_dev_kmem_ptr(p)	p
-
-#endif
diff --git a/include/asm-parisc/ioctl.h b/include/asm-parisc/ioctl.h
deleted file mode 100644
index ec8efa02beda..000000000000
--- a/include/asm-parisc/ioctl.h
+++ /dev/null
@@ -1,44 +0,0 @@
-/*
- *    Linux/PA-RISC Project (http://www.parisc-linux.org/)
- *    Copyright (C) 1999,2003 Matthew Wilcox < willy at debian . org >
- *    portions from "linux/ioctl.h for Linux" by H.H. Bergman.
- *
- *    This program is free software; you can redistribute it and/or modify
- *    it under the terms of the GNU General Public License as published by
- *    the Free Software Foundation; either version 2 of the License, or
- *    (at your option) any later version.
- *
- *    This program is distributed in the hope that it will be useful,
- *    but WITHOUT ANY WARRANTY; without even the implied warranty of
- *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *    GNU General Public License for more details.
- *
- *    You should have received a copy of the GNU General Public License
- *    along with this program; if not, write to the Free Software
- *    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
- */
-
-
-#ifndef _ASM_PARISC_IOCTL_H
-#define _ASM_PARISC_IOCTL_H
-
-/* ioctl command encoding: 32 bits total, command in lower 16 bits,
- * size of the parameter structure in the lower 14 bits of the
- * upper 16 bits.
- * Encoding the size of the parameter structure in the ioctl request
- * is useful for catching programs compiled with old versions
- * and to avoid overwriting user space outside the user buffer area.
- * The highest 2 bits are reserved for indicating the ``access mode''.
- * NOTE: This limits the max parameter size to 16kB -1 !
- */
-
-/*
- * Direction bits.
- */
-#define _IOC_NONE	0U
-#define _IOC_WRITE	2U
-#define _IOC_READ	1U
-
-#include <asm-generic/ioctl.h>
-
-#endif /* _ASM_PARISC_IOCTL_H */
diff --git a/include/asm-parisc/ioctls.h b/include/asm-parisc/ioctls.h
deleted file mode 100644
index 6747fad07a3e..000000000000
--- a/include/asm-parisc/ioctls.h
+++ /dev/null
@@ -1,90 +0,0 @@
-#ifndef __ARCH_PARISC_IOCTLS_H__
-#define __ARCH_PARISC_IOCTLS_H__
-
-#include <asm/ioctl.h>
-
-/* 0x54 is just a magic number to make these relatively unique ('T') */
-
-#define TCGETS		_IOR('T', 16, struct termios) /* TCGETATTR */
-#define TCSETS		_IOW('T', 17, struct termios) /* TCSETATTR */
-#define TCSETSW		_IOW('T', 18, struct termios) /* TCSETATTRD */
-#define TCSETSF		_IOW('T', 19, struct termios) /* TCSETATTRF */
-#define TCGETA		_IOR('T', 1, struct termio)
-#define TCSETA		_IOW('T', 2, struct termio)
-#define TCSETAW		_IOW('T', 3, struct termio)
-#define TCSETAF		_IOW('T', 4, struct termio)
-#define TCSBRK		_IO('T', 5)
-#define TCXONC		_IO('T', 6)
-#define TCFLSH		_IO('T', 7)
-#define TIOCEXCL	0x540C
-#define TIOCNXCL	0x540D
-#define TIOCSCTTY	0x540E
-#define TIOCGPGRP	_IOR('T', 30, int)
-#define TIOCSPGRP	_IOW('T', 29, int)
-#define TIOCOUTQ	0x5411
-#define TIOCSTI		0x5412
-#define TIOCGWINSZ	0x5413
-#define TIOCSWINSZ	0x5414
-#define TIOCMGET	0x5415
-#define TIOCMBIS	0x5416
-#define TIOCMBIC	0x5417
-#define TIOCMSET	0x5418
-#define TIOCGSOFTCAR	0x5419
-#define TIOCSSOFTCAR	0x541A
-#define FIONREAD	0x541B
-#define TIOCINQ		FIONREAD
-#define TIOCLINUX	0x541C
-#define TIOCCONS	0x541D
-#define TIOCGSERIAL	0x541E
-#define TIOCSSERIAL	0x541F
-#define TIOCPKT		0x5420
-#define FIONBIO		0x5421
-#define TIOCNOTTY	0x5422
-#define TIOCSETD	0x5423
-#define TIOCGETD	0x5424
-#define TCSBRKP		0x5425	/* Needed for POSIX tcsendbreak() */
-#define TIOCSBRK	0x5427  /* BSD compatibility */
-#define TIOCCBRK	0x5428  /* BSD compatibility */
-#define TIOCGSID	_IOR('T', 20, int) /* Return the session ID of FD */
-#define TCGETS2		_IOR('T',0x2A, struct termios2)
-#define TCSETS2		_IOW('T',0x2B, struct termios2)
-#define TCSETSW2	_IOW('T',0x2C, struct termios2)
-#define TCSETSF2	_IOW('T',0x2D, struct termios2)
-#define TIOCGPTN	_IOR('T',0x30, unsigned int) /* Get Pty Number (of pty-mux device) */
-#define TIOCSPTLCK	_IOW('T',0x31, int)  /* Lock/unlock Pty */
-
-#define FIONCLEX	0x5450  /* these numbers need to be adjusted. */
-#define FIOCLEX		0x5451
-#define FIOASYNC	0x5452
-#define TIOCSERCONFIG	0x5453
-#define TIOCSERGWILD	0x5454
-#define TIOCSERSWILD	0x5455
-#define TIOCGLCKTRMIOS	0x5456
-#define TIOCSLCKTRMIOS	0x5457
-#define TIOCSERGSTRUCT	0x5458 /* For debugging only */
-#define TIOCSERGETLSR   0x5459 /* Get line status register */
-#define TIOCSERGETMULTI 0x545A /* Get multiport config  */
-#define TIOCSERSETMULTI 0x545B /* Set multiport config */
-
-#define TIOCMIWAIT	0x545C	/* wait for a change on serial input line(s) */
-#define TIOCGICOUNT	0x545D	/* read serial port inline interrupt counts */
-#define TIOCGHAYESESP   0x545E  /* Get Hayes ESP configuration */
-#define TIOCSHAYESESP   0x545F  /* Set Hayes ESP configuration */
-#define FIOQSIZE	0x5460	/* Get exact space used by quota */
-
-#define TIOCSTART	0x5461
-#define TIOCSTOP	0x5462
-#define TIOCSLTC	0x5462
-
-/* Used for packet mode */
-#define TIOCPKT_DATA		 0
-#define TIOCPKT_FLUSHREAD	 1
-#define TIOCPKT_FLUSHWRITE	 2
-#define TIOCPKT_STOP		 4
-#define TIOCPKT_START		 8
-#define TIOCPKT_NOSTOP		16
-#define TIOCPKT_DOSTOP		32
-
-#define TIOCSER_TEMT    0x01	/* Transmitter physically empty */
-
-#endif /* _ASM_PARISC_IOCTLS_H */
diff --git a/include/asm-parisc/ipcbuf.h b/include/asm-parisc/ipcbuf.h
deleted file mode 100644
index bd956c425785..000000000000
--- a/include/asm-parisc/ipcbuf.h
+++ /dev/null
@@ -1,27 +0,0 @@
-#ifndef __PARISC_IPCBUF_H__
-#define __PARISC_IPCBUF_H__
-
-/*
- * The ipc64_perm structure for PA-RISC is almost identical to
- * kern_ipc_perm as we have always had 32-bit UIDs and GIDs in the kernel.
- * 'seq' has been changed from long to int so that it's the same size
- * on 64-bit kernels as on 32-bit ones.
- */
-
-struct ipc64_perm
-{
-	key_t           key;
-	uid_t           uid;
-	gid_t           gid;
-	uid_t           cuid;
-	gid_t           cgid;
-	unsigned short int	__pad1;
-	mode_t          mode;
-	unsigned short int	__pad2;
-	unsigned short int	seq;
-	unsigned int	__pad3;
-	unsigned long long int __unused1;
-	unsigned long long int __unused2;
-};
-
-#endif /* __PARISC_IPCBUF_H__ */
diff --git a/include/asm-parisc/irq.h b/include/asm-parisc/irq.h
deleted file mode 100644
index 399c81981ed5..000000000000
--- a/include/asm-parisc/irq.h
+++ /dev/null
@@ -1,57 +0,0 @@
-/*
- * include/asm-parisc/irq.h
- *
- * Copyright 2005 Matthew Wilcox <matthew@wil.cx>
- */
-
-#ifndef _ASM_PARISC_IRQ_H
-#define _ASM_PARISC_IRQ_H
-
-#include <linux/cpumask.h>
-#include <asm/types.h>
-
-#define NO_IRQ		(-1)
-
-#ifdef CONFIG_GSC
-#define GSC_IRQ_BASE	16
-#define GSC_IRQ_MAX	63
-#define CPU_IRQ_BASE	64
-#else
-#define CPU_IRQ_BASE	16
-#endif
-
-#define TIMER_IRQ	(CPU_IRQ_BASE + 0)
-#define	IPI_IRQ		(CPU_IRQ_BASE + 1)
-#define CPU_IRQ_MAX	(CPU_IRQ_BASE + (BITS_PER_LONG - 1))
-
-#define NR_IRQS		(CPU_IRQ_MAX + 1)
-
-static __inline__ int irq_canonicalize(int irq)
-{
-	return (irq == 2) ? 9 : irq;
-}
-
-struct irq_chip;
-
-/*
- * Some useful "we don't have to do anything here" handlers.  Should
- * probably be provided by the generic code.
- */
-void no_ack_irq(unsigned int irq);
-void no_end_irq(unsigned int irq);
-void cpu_ack_irq(unsigned int irq);
-void cpu_end_irq(unsigned int irq);
-
-extern int txn_alloc_irq(unsigned int nbits);
-extern int txn_claim_irq(int);
-extern unsigned int txn_alloc_data(unsigned int);
-extern unsigned long txn_alloc_addr(unsigned int);
-extern unsigned long txn_affinity_addr(unsigned int irq, int cpu);
-
-extern int cpu_claim_irq(unsigned int irq, struct irq_chip *, void *);
-extern int cpu_check_affinity(unsigned int irq, cpumask_t *dest);
-
-/* soft power switch support (power.c) */
-extern struct tasklet_struct power_tasklet;
-
-#endif	/* _ASM_PARISC_IRQ_H */
diff --git a/include/asm-parisc/irq_regs.h b/include/asm-parisc/irq_regs.h
deleted file mode 100644
index 3dd9c0b70270..000000000000
--- a/include/asm-parisc/irq_regs.h
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/irq_regs.h>
diff --git a/include/asm-parisc/kdebug.h b/include/asm-parisc/kdebug.h
deleted file mode 100644
index 6ece1b037665..000000000000
--- a/include/asm-parisc/kdebug.h
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/kdebug.h>
diff --git a/include/asm-parisc/kmap_types.h b/include/asm-parisc/kmap_types.h
deleted file mode 100644
index 806aae3c5338..000000000000
--- a/include/asm-parisc/kmap_types.h
+++ /dev/null
@@ -1,30 +0,0 @@
-#ifndef _ASM_KMAP_TYPES_H
-#define _ASM_KMAP_TYPES_H
-
-
-#ifdef CONFIG_DEBUG_HIGHMEM
-# define D(n) __KM_FENCE_##n ,
-#else
-# define D(n)
-#endif
-
-enum km_type {
-D(0)	KM_BOUNCE_READ,
-D(1)	KM_SKB_SUNRPC_DATA,
-D(2)	KM_SKB_DATA_SOFTIRQ,
-D(3)	KM_USER0,
-D(4)	KM_USER1,
-D(5)	KM_BIO_SRC_IRQ,
-D(6)	KM_BIO_DST_IRQ,
-D(7)	KM_PTE0,
-D(8)	KM_PTE1,
-D(9)	KM_IRQ0,
-D(10)	KM_IRQ1,
-D(11)	KM_SOFTIRQ0,
-D(12)	KM_SOFTIRQ1,
-D(13)	KM_TYPE_NR
-};
-
-#undef D
-
-#endif
diff --git a/include/asm-parisc/led.h b/include/asm-parisc/led.h
deleted file mode 100644
index c3405ab9d60a..000000000000
--- a/include/asm-parisc/led.h
+++ /dev/null
@@ -1,42 +0,0 @@
-#ifndef LED_H
-#define LED_H
-
-#define	LED7		0x80		/* top (or furthest right) LED */
-#define	LED6		0x40
-#define	LED5		0x20
-#define	LED4		0x10
-#define	LED3		0x08
-#define	LED2		0x04
-#define	LED1		0x02
-#define	LED0		0x01		/* bottom (or furthest left) LED */
-
-#define	LED_LAN_TX	LED0		/* for LAN transmit activity */
-#define	LED_LAN_RCV	LED1		/* for LAN receive activity */
-#define	LED_DISK_IO	LED2		/* for disk activity */
-#define	LED_HEARTBEAT	LED3		/* heartbeat */
-
-/* values for pdc_chassis_lcd_info_ret_block.model: */
-#define DISPLAY_MODEL_LCD  0		/* KittyHawk LED or LCD */
-#define DISPLAY_MODEL_NONE 1		/* no LED or LCD */
-#define DISPLAY_MODEL_LASI 2		/* LASI style 8 bit LED */
-#define DISPLAY_MODEL_OLD_ASP 0x7F	/* faked: ASP style 8 x 1 bit LED (only very old ASP versions) */
-
-#define LED_CMD_REG_NONE 0		/* NULL == no addr for the cmd register */
-
-/* register_led_driver() */
-int __init register_led_driver(int model, unsigned long cmd_reg, unsigned long data_reg);
-
-/* registers the LED regions for procfs */
-void __init register_led_regions(void);
-
-#ifdef CONFIG_CHASSIS_LCD_LED
-/* writes a string to the LCD display (if possible on this h/w) */
-int lcd_print(const char *str);
-#else
-#define lcd_print(str)
-#endif
-
-/* main LED initialization function (uses PDC) */ 
-int __init led_init(void);
-
-#endif /* LED_H */
diff --git a/include/asm-parisc/linkage.h b/include/asm-parisc/linkage.h
deleted file mode 100644
index 0b19a7242d0c..000000000000
--- a/include/asm-parisc/linkage.h
+++ /dev/null
@@ -1,31 +0,0 @@
-#ifndef __ASM_PARISC_LINKAGE_H
-#define __ASM_PARISC_LINKAGE_H
-
-#ifndef __ALIGN
-#define __ALIGN         .align 4
-#define __ALIGN_STR     ".align 4"
-#endif
-
-/*
- * In parisc assembly a semicolon marks a comment while a
- * exclamation mark is used to separate independent lines.
- */
-#ifdef __ASSEMBLY__
-
-#define ENTRY(name) \
-	.export name !\
-	ALIGN !\
-name:
-
-#ifdef CONFIG_64BIT
-#define ENDPROC(name) \
-	END(name)
-#else
-#define ENDPROC(name) \
-	.type name, @function !\
-	END(name)
-#endif
-
-#endif /* __ASSEMBLY__ */
-
-#endif  /* __ASM_PARISC_LINKAGE_H */
diff --git a/include/asm-parisc/local.h b/include/asm-parisc/local.h
deleted file mode 100644
index c11c530f74d0..000000000000
--- a/include/asm-parisc/local.h
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/local.h>
diff --git a/include/asm-parisc/machdep.h b/include/asm-parisc/machdep.h
deleted file mode 100644
index a231c97d703e..000000000000
--- a/include/asm-parisc/machdep.h
+++ /dev/null
@@ -1,16 +0,0 @@
-#ifndef _PARISC_MACHDEP_H
-#define _PARISC_MACHDEP_H
-
-#include <linux/notifier.h>
-
-#define	MACH_RESTART	1
-#define	MACH_HALT	2
-#define MACH_POWER_ON	3
-#define	MACH_POWER_OFF	4
-
-extern struct notifier_block *mach_notifier;
-extern void pa7300lc_init(void);
-
-extern void (*cpu_lpmc)(int, struct pt_regs *);
-
-#endif
diff --git a/include/asm-parisc/mc146818rtc.h b/include/asm-parisc/mc146818rtc.h
deleted file mode 100644
index adf41631449f..000000000000
--- a/include/asm-parisc/mc146818rtc.h
+++ /dev/null
@@ -1,9 +0,0 @@
-/*
- * Machine dependent access functions for RTC registers.
- */
-#ifndef _ASM_MC146818RTC_H
-#define _ASM_MC146818RTC_H
-
-/* empty include file to satisfy the include in genrtc.c */
-
-#endif /* _ASM_MC146818RTC_H */
diff --git a/include/asm-parisc/mckinley.h b/include/asm-parisc/mckinley.h
deleted file mode 100644
index d1ea6f12915e..000000000000
--- a/include/asm-parisc/mckinley.h
+++ /dev/null
@@ -1,9 +0,0 @@
-#ifndef ASM_PARISC_MCKINLEY_H
-#define ASM_PARISC_MCKINLEY_H
-#ifdef __KERNEL__
-
-/* declared in arch/parisc/kernel/setup.c */
-extern struct proc_dir_entry * proc_mckinley_root;
-
-#endif /*__KERNEL__*/
-#endif /*ASM_PARISC_MCKINLEY_H*/
diff --git a/include/asm-parisc/mman.h b/include/asm-parisc/mman.h
deleted file mode 100644
index defe752cc996..000000000000
--- a/include/asm-parisc/mman.h
+++ /dev/null
@@ -1,61 +0,0 @@
-#ifndef __PARISC_MMAN_H__
-#define __PARISC_MMAN_H__
-
-#define PROT_READ	0x1		/* page can be read */
-#define PROT_WRITE	0x2		/* page can be written */
-#define PROT_EXEC	0x4		/* page can be executed */
-#define PROT_SEM	0x8		/* page may be used for atomic ops */
-#define PROT_NONE	0x0		/* page can not be accessed */
-#define PROT_GROWSDOWN	0x01000000	/* mprotect flag: extend change to start of growsdown vma */
-#define PROT_GROWSUP	0x02000000	/* mprotect flag: extend change to end of growsup vma */
-
-#define MAP_SHARED	0x01		/* Share changes */
-#define MAP_PRIVATE	0x02		/* Changes are private */
-#define MAP_TYPE	0x03		/* Mask for type of mapping */
-#define MAP_FIXED	0x04		/* Interpret addr exactly */
-#define MAP_ANONYMOUS	0x10		/* don't use a file */
-
-#define MAP_DENYWRITE	0x0800		/* ETXTBSY */
-#define MAP_EXECUTABLE	0x1000		/* mark it as an executable */
-#define MAP_LOCKED	0x2000		/* pages are locked */
-#define MAP_NORESERVE	0x4000		/* don't check for reservations */
-#define MAP_GROWSDOWN	0x8000		/* stack-like segment */
-#define MAP_POPULATE	0x10000		/* populate (prefault) pagetables */
-#define MAP_NONBLOCK	0x20000		/* do not block on IO */
-
-#define MS_SYNC		1		/* synchronous memory sync */
-#define MS_ASYNC	2		/* sync memory asynchronously */
-#define MS_INVALIDATE	4		/* invalidate the caches */
-
-#define MCL_CURRENT	1		/* lock all current mappings */
-#define MCL_FUTURE	2		/* lock all future mappings */
-
-#define MADV_NORMAL     0               /* no further special treatment */
-#define MADV_RANDOM     1               /* expect random page references */
-#define MADV_SEQUENTIAL 2               /* expect sequential page references */
-#define MADV_WILLNEED   3               /* will need these pages */
-#define MADV_DONTNEED   4               /* don't need these pages */
-#define MADV_SPACEAVAIL 5               /* insure that resources are reserved */
-#define MADV_VPS_PURGE  6               /* Purge pages from VM page cache */
-#define MADV_VPS_INHERIT 7              /* Inherit parents page size */
-
-/* common/generic parameters */
-#define MADV_REMOVE	9		/* remove these pages & resources */
-#define MADV_DONTFORK	10		/* don't inherit across fork */
-#define MADV_DOFORK	11		/* do inherit across fork */
-
-/* The range 12-64 is reserved for page size specification. */
-#define MADV_4K_PAGES   12              /* Use 4K pages  */
-#define MADV_16K_PAGES  14              /* Use 16K pages */
-#define MADV_64K_PAGES  16              /* Use 64K pages */
-#define MADV_256K_PAGES 18              /* Use 256K pages */
-#define MADV_1M_PAGES   20              /* Use 1 Megabyte pages */
-#define MADV_4M_PAGES   22              /* Use 4 Megabyte pages */
-#define MADV_16M_PAGES  24              /* Use 16 Megabyte pages */
-#define MADV_64M_PAGES  26              /* Use 64 Megabyte pages */
-
-/* compatibility flags */
-#define MAP_FILE	0
-#define MAP_VARIABLE	0
-
-#endif /* __PARISC_MMAN_H__ */
diff --git a/include/asm-parisc/mmu.h b/include/asm-parisc/mmu.h
deleted file mode 100644
index 6a310cf8b734..000000000000
--- a/include/asm-parisc/mmu.h
+++ /dev/null
@@ -1,7 +0,0 @@
-#ifndef _PARISC_MMU_H_
-#define _PARISC_MMU_H_
-
-/* On parisc, we store the space id here */
-typedef unsigned long mm_context_t;
-
-#endif /* _PARISC_MMU_H_ */
diff --git a/include/asm-parisc/mmu_context.h b/include/asm-parisc/mmu_context.h
deleted file mode 100644
index 85856c74ad1d..000000000000
--- a/include/asm-parisc/mmu_context.h
+++ /dev/null
@@ -1,75 +0,0 @@
-#ifndef __PARISC_MMU_CONTEXT_H
-#define __PARISC_MMU_CONTEXT_H
-
-#include <linux/mm.h>
-#include <linux/sched.h>
-#include <asm/atomic.h>
-#include <asm/pgalloc.h>
-#include <asm/pgtable.h>
-#include <asm-generic/mm_hooks.h>
-
-static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk)
-{
-}
-
-/* on PA-RISC, we actually have enough contexts to justify an allocator
- * for them.  prumpf */
-
-extern unsigned long alloc_sid(void);
-extern void free_sid(unsigned long);
-
-static inline int
-init_new_context(struct task_struct *tsk, struct mm_struct *mm)
-{
-	BUG_ON(atomic_read(&mm->mm_users) != 1);
-
-	mm->context = alloc_sid();
-	return 0;
-}
-
-static inline void
-destroy_context(struct mm_struct *mm)
-{
-	free_sid(mm->context);
-	mm->context = 0;
-}
-
-static inline void load_context(mm_context_t context)
-{
-	mtsp(context, 3);
-#if SPACEID_SHIFT == 0
-	mtctl(context << 1,8);
-#else
-	mtctl(context >> (SPACEID_SHIFT - 1),8);
-#endif
-}
-
-static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next, struct task_struct *tsk)
-{
-
-	if (prev != next) {
-		mtctl(__pa(next->pgd), 25);
-		load_context(next->context);
-	}
-}
-
-#define deactivate_mm(tsk,mm)	do { } while (0)
-
-static inline void activate_mm(struct mm_struct *prev, struct mm_struct *next)
-{
-	/*
-	 * Activate_mm is our one chance to allocate a space id
-	 * for a new mm created in the exec path. There's also
-	 * some lazy tlb stuff, which is currently dead code, but
-	 * we only allocate a space id if one hasn't been allocated
-	 * already, so we should be OK.
-	 */
-
-	BUG_ON(next == &init_mm); /* Should never happen */
-
-	if (next->context == 0)
-	    next->context = alloc_sid();
-
-	switch_mm(prev,next,current);
-}
-#endif
diff --git a/include/asm-parisc/mmzone.h b/include/asm-parisc/mmzone.h
deleted file mode 100644
index 9608d2cf214a..000000000000
--- a/include/asm-parisc/mmzone.h
+++ /dev/null
@@ -1,73 +0,0 @@
-#ifndef _PARISC_MMZONE_H
-#define _PARISC_MMZONE_H
-
-#ifdef CONFIG_DISCONTIGMEM
-
-#define MAX_PHYSMEM_RANGES 8 /* Fix the size for now (current known max is 3) */
-extern int npmem_ranges;
-
-struct node_map_data {
-    pg_data_t pg_data;
-};
-
-extern struct node_map_data node_data[];
-
-#define NODE_DATA(nid)          (&node_data[nid].pg_data)
-
-#define node_start_pfn(nid)	(NODE_DATA(nid)->node_start_pfn)
-#define node_end_pfn(nid)						\
-({									\
-	pg_data_t *__pgdat = NODE_DATA(nid);				\
-	__pgdat->node_start_pfn + __pgdat->node_spanned_pages;		\
-})
-
-/* We have these possible memory map layouts:
- * Astro: 0-3.75, 67.75-68, 4-64
- * zx1: 0-1, 257-260, 4-256
- * Stretch (N-class): 0-2, 4-32, 34-xxx
- */
-
-/* Since each 1GB can only belong to one region (node), we can create
- * an index table for pfn to nid lookup; each entry in pfnnid_map 
- * represents 1GB, and contains the node that the memory belongs to. */
-
-#define PFNNID_SHIFT (30 - PAGE_SHIFT)
-#define PFNNID_MAP_MAX  512     /* support 512GB */
-extern unsigned char pfnnid_map[PFNNID_MAP_MAX];
-
-#ifndef CONFIG_64BIT
-#define pfn_is_io(pfn) ((pfn & (0xf0000000UL >> PAGE_SHIFT)) == (0xf0000000UL >> PAGE_SHIFT))
-#else
-/* io can be 0xf0f0f0f0f0xxxxxx or 0xfffffffff0000000 */
-#define pfn_is_io(pfn) ((pfn & (0xf000000000000000UL >> PAGE_SHIFT)) == (0xf000000000000000UL >> PAGE_SHIFT))
-#endif
-
-static inline int pfn_to_nid(unsigned long pfn)
-{
-	unsigned int i;
-	unsigned char r;
-
-	if (unlikely(pfn_is_io(pfn)))
-		return 0;
-
-	i = pfn >> PFNNID_SHIFT;
-	BUG_ON(i >= sizeof(pfnnid_map) / sizeof(pfnnid_map[0]));
-	r = pfnnid_map[i];
-	BUG_ON(r == 0xff);
-
-	return (int)r;
-}
-
-static inline int pfn_valid(int pfn)
-{
-	int nid = pfn_to_nid(pfn);
-
-	if (nid >= 0)
-		return (pfn < node_end_pfn(nid));
-	return 0;
-}
-
-#else /* !CONFIG_DISCONTIGMEM */
-#define MAX_PHYSMEM_RANGES 	1 
-#endif
-#endif /* _PARISC_MMZONE_H */
diff --git a/include/asm-parisc/module.h b/include/asm-parisc/module.h
deleted file mode 100644
index c2cb49e934c1..000000000000
--- a/include/asm-parisc/module.h
+++ /dev/null
@@ -1,32 +0,0 @@
-#ifndef _ASM_PARISC_MODULE_H
-#define _ASM_PARISC_MODULE_H
-/*
- * This file contains the parisc architecture specific module code.
- */
-#ifdef CONFIG_64BIT
-#define Elf_Shdr Elf64_Shdr
-#define Elf_Sym Elf64_Sym
-#define Elf_Ehdr Elf64_Ehdr
-#define Elf_Addr Elf64_Addr
-#define Elf_Rela Elf64_Rela
-#else
-#define Elf_Shdr Elf32_Shdr
-#define Elf_Sym Elf32_Sym
-#define Elf_Ehdr Elf32_Ehdr
-#define Elf_Addr Elf32_Addr
-#define Elf_Rela Elf32_Rela
-#endif
-
-struct unwind_table;
-
-struct mod_arch_specific
-{
-	unsigned long got_offset, got_count, got_max;
-	unsigned long fdesc_offset, fdesc_count, fdesc_max;
-	unsigned long stub_offset, stub_count, stub_max;
-	unsigned long init_stub_offset, init_stub_count, init_stub_max;
-	int unwind_section;
-	struct unwind_table *unwind;
-};
-
-#endif /* _ASM_PARISC_MODULE_H */
diff --git a/include/asm-parisc/msgbuf.h b/include/asm-parisc/msgbuf.h
deleted file mode 100644
index fe88f2649418..000000000000
--- a/include/asm-parisc/msgbuf.h
+++ /dev/null
@@ -1,37 +0,0 @@
-#ifndef _PARISC_MSGBUF_H
-#define _PARISC_MSGBUF_H
-
-/* 
- * The msqid64_ds structure for parisc architecture, copied from sparc.
- * Note extra padding because this structure is passed back and forth
- * between kernel and user space.
- *
- * Pad space is left for:
- * - 64-bit time_t to solve y2038 problem
- * - 2 miscellaneous 32-bit values
- */
-
-struct msqid64_ds {
-	struct ipc64_perm msg_perm;
-#ifndef CONFIG_64BIT
-	unsigned int   __pad1;
-#endif
-	__kernel_time_t msg_stime;	/* last msgsnd time */
-#ifndef CONFIG_64BIT
-	unsigned int   __pad2;
-#endif
-	__kernel_time_t msg_rtime;	/* last msgrcv time */
-#ifndef CONFIG_64BIT
-	unsigned int   __pad3;
-#endif
-	__kernel_time_t msg_ctime;	/* last change time */
-	unsigned int  msg_cbytes;	/* current number of bytes on queue */
-	unsigned int  msg_qnum;	/* number of messages in queue */
-	unsigned int  msg_qbytes;	/* max number of bytes on queue */
-	__kernel_pid_t msg_lspid;	/* pid of last msgsnd */
-	__kernel_pid_t msg_lrpid;	/* last receive pid */
-	unsigned int  __unused1;
-	unsigned int  __unused2;
-};
-
-#endif /* _PARISC_MSGBUF_H */
diff --git a/include/asm-parisc/mutex.h b/include/asm-parisc/mutex.h
deleted file mode 100644
index 458c1f7fbc18..000000000000
--- a/include/asm-parisc/mutex.h
+++ /dev/null
@@ -1,9 +0,0 @@
-/*
- * Pull in the generic implementation for the mutex fastpath.
- *
- * TODO: implement optimized primitives instead, or leave the generic
- * implementation in place, or pick the atomic_xchg() based generic
- * implementation. (see asm-generic/mutex-xchg.h for details)
- */
-
-#include <asm-generic/mutex-dec.h>
diff --git a/include/asm-parisc/page.h b/include/asm-parisc/page.h
deleted file mode 100644
index c3941f09a878..000000000000
--- a/include/asm-parisc/page.h
+++ /dev/null
@@ -1,173 +0,0 @@
-#ifndef _PARISC_PAGE_H
-#define _PARISC_PAGE_H
-
-#include <linux/const.h>
-
-#if defined(CONFIG_PARISC_PAGE_SIZE_4KB)
-# define PAGE_SHIFT	12
-#elif defined(CONFIG_PARISC_PAGE_SIZE_16KB)
-# define PAGE_SHIFT	14
-#elif defined(CONFIG_PARISC_PAGE_SIZE_64KB)
-# define PAGE_SHIFT	16
-#else
-# error "unknown default kernel page size"
-#endif
-#define PAGE_SIZE	(_AC(1,UL) << PAGE_SHIFT)
-#define PAGE_MASK	(~(PAGE_SIZE-1))
-
-
-#ifndef __ASSEMBLY__
-
-#include <asm/types.h>
-#include <asm/cache.h>
-
-#define clear_page(page)	memset((void *)(page), 0, PAGE_SIZE)
-#define copy_page(to,from)      copy_user_page_asm((void *)(to), (void *)(from))
-
-struct page;
-
-void copy_user_page_asm(void *to, void *from);
-void copy_user_page(void *vto, void *vfrom, unsigned long vaddr,
-			   struct page *pg);
-void clear_user_page(void *page, unsigned long vaddr, struct page *pg);
-
-/*
- * These are used to make use of C type-checking..
- */
-#define STRICT_MM_TYPECHECKS
-#ifdef STRICT_MM_TYPECHECKS
-typedef struct { unsigned long pte;
-#if !defined(CONFIG_64BIT)
-                 unsigned long future_flags;
- /* XXX: it's possible to remove future_flags and change BITS_PER_PTE_ENTRY
-	 to 2, but then strangely the identical 32bit kernel boots on a
-	 c3000(pa20), but not any longer on a 715(pa11).
-	 Still investigating... HelgeD.
-  */
-#endif
-} pte_t; /* either 32 or 64bit */
-
-/* NOTE: even on 64 bits, these entries are __u32 because we allocate
- * the pmd and pgd in ZONE_DMA (i.e. under 4GB) */
-typedef struct { __u32 pmd; } pmd_t;
-typedef struct { __u32 pgd; } pgd_t;
-typedef struct { unsigned long pgprot; } pgprot_t;
-
-#define pte_val(x)	((x).pte)
-/* These do not work lvalues, so make sure we don't use them as such. */
-#define pmd_val(x)	((x).pmd + 0)
-#define pgd_val(x)	((x).pgd + 0)
-#define pgprot_val(x)	((x).pgprot)
-
-#define __pte(x)	((pte_t) { (x) } )
-#define __pmd(x)	((pmd_t) { (x) } )
-#define __pgd(x)	((pgd_t) { (x) } )
-#define __pgprot(x)	((pgprot_t) { (x) } )
-
-#define __pmd_val_set(x,n) (x).pmd = (n)
-#define __pgd_val_set(x,n) (x).pgd = (n)
-
-#else
-/*
- * .. while these make it easier on the compiler
- */
-typedef unsigned long pte_t;
-typedef         __u32 pmd_t;
-typedef         __u32 pgd_t;
-typedef unsigned long pgprot_t;
-
-#define pte_val(x)      (x)
-#define pmd_val(x)      (x)
-#define pgd_val(x)      (x)
-#define pgprot_val(x)   (x)
-
-#define __pte(x)        (x)
-#define __pmd(x)	(x)
-#define __pgd(x)        (x)
-#define __pgprot(x)     (x)
-
-#define __pmd_val_set(x,n) (x) = (n)
-#define __pgd_val_set(x,n) (x) = (n)
-
-#endif /* STRICT_MM_TYPECHECKS */
-
-typedef struct page *pgtable_t;
-
-typedef struct __physmem_range {
-	unsigned long start_pfn;
-	unsigned long pages;       /* PAGE_SIZE pages */
-} physmem_range_t;
-
-extern physmem_range_t pmem_ranges[];
-extern int npmem_ranges;
-
-#endif /* !__ASSEMBLY__ */
-
-/* WARNING: The definitions below must match exactly to sizeof(pte_t)
- * etc
- */
-#ifdef CONFIG_64BIT
-#define BITS_PER_PTE_ENTRY	3
-#define BITS_PER_PMD_ENTRY	2
-#define BITS_PER_PGD_ENTRY	2
-#else
-#define BITS_PER_PTE_ENTRY	3
-#define BITS_PER_PMD_ENTRY	2
-#define BITS_PER_PGD_ENTRY	BITS_PER_PMD_ENTRY
-#endif
-#define PGD_ENTRY_SIZE	(1UL << BITS_PER_PGD_ENTRY)
-#define PMD_ENTRY_SIZE	(1UL << BITS_PER_PMD_ENTRY)
-#define PTE_ENTRY_SIZE	(1UL << BITS_PER_PTE_ENTRY)
-
-#define LINUX_GATEWAY_SPACE     0
-
-/* This governs the relationship between virtual and physical addresses.
- * If you alter it, make sure to take care of our various fixed mapping
- * segments in fixmap.h */
-#ifdef CONFIG_64BIT
-#define __PAGE_OFFSET	(0x40000000)	/* 1GB */
-#else
-#define __PAGE_OFFSET	(0x10000000)	/* 256MB */
-#endif
-
-#define PAGE_OFFSET		((unsigned long)__PAGE_OFFSET)
-
-/* The size of the gateway page (we leave lots of room for expansion) */
-#define GATEWAY_PAGE_SIZE	0x4000
-
-/* The start of the actual kernel binary---used in vmlinux.lds.S
- * Leave some space after __PAGE_OFFSET for detecting kernel null
- * ptr derefs */
-#define KERNEL_BINARY_TEXT_START	(__PAGE_OFFSET + 0x100000)
-
-/* These macros don't work for 64-bit C code -- don't allow in C at all */
-#ifdef __ASSEMBLY__
-#   define PA(x)	((x)-__PAGE_OFFSET)
-#   define VA(x)	((x)+__PAGE_OFFSET)
-#endif
-#define __pa(x)			((unsigned long)(x)-PAGE_OFFSET)
-#define __va(x)			((void *)((unsigned long)(x)+PAGE_OFFSET))
-
-#ifndef CONFIG_DISCONTIGMEM
-#define pfn_valid(pfn)		((pfn) < max_mapnr)
-#endif /* CONFIG_DISCONTIGMEM */
-
-#ifdef CONFIG_HUGETLB_PAGE
-#define HPAGE_SHIFT		22	/* 4MB (is this fixed?) */
-#define HPAGE_SIZE      	((1UL) << HPAGE_SHIFT)
-#define HPAGE_MASK		(~(HPAGE_SIZE - 1))
-#define HUGETLB_PAGE_ORDER	(HPAGE_SHIFT - PAGE_SHIFT)
-#endif
-
-#define virt_addr_valid(kaddr)	pfn_valid(__pa(kaddr) >> PAGE_SHIFT)
-
-#define page_to_phys(page)	(page_to_pfn(page) << PAGE_SHIFT)
-#define virt_to_page(kaddr)     pfn_to_page(__pa(kaddr) >> PAGE_SHIFT)
-
-#define VM_DATA_DEFAULT_FLAGS	(VM_READ | VM_WRITE | VM_EXEC | \
-				 VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC)
-
-#include <asm-generic/memory_model.h>
-#include <asm-generic/page.h>
-
-#endif /* _PARISC_PAGE_H */
diff --git a/include/asm-parisc/param.h b/include/asm-parisc/param.h
deleted file mode 100644
index 32e03d877858..000000000000
--- a/include/asm-parisc/param.h
+++ /dev/null
@@ -1,22 +0,0 @@
-#ifndef _ASMPARISC_PARAM_H
-#define _ASMPARISC_PARAM_H
-
-#ifdef __KERNEL__
-#define HZ		CONFIG_HZ
-#define USER_HZ		100		/* some user API use "ticks" */
-#define CLOCKS_PER_SEC	(USER_HZ)	/* like times() */
-#endif
-
-#ifndef HZ
-#define HZ 100
-#endif
-
-#define EXEC_PAGESIZE	4096
-
-#ifndef NOGROUP
-#define NOGROUP		(-1)
-#endif
-
-#define MAXHOSTNAMELEN	64	/* max length of hostname */
-
-#endif
diff --git a/include/asm-parisc/parisc-device.h b/include/asm-parisc/parisc-device.h
deleted file mode 100644
index 7aa13f2add7a..000000000000
--- a/include/asm-parisc/parisc-device.h
+++ /dev/null
@@ -1,64 +0,0 @@
-#ifndef _ASM_PARISC_PARISC_DEVICE_H_
-#define _ASM_PARISC_PARISC_DEVICE_H_
-
-#include <linux/device.h>
-
-struct parisc_device {
-	struct resource hpa;		/* Hard Physical Address */
-	struct parisc_device_id id;
-	struct parisc_driver *driver;	/* Driver for this device */
-	char		name[80];	/* The hardware description */
-	int		irq;
-	int		aux_irq;	/* Some devices have a second IRQ */
-
-	char		hw_path;        /* The module number on this bus */
-	unsigned int	num_addrs;	/* some devices have additional address ranges. */
-	unsigned long	*addr;          /* which will be stored here */
- 
-#ifdef CONFIG_64BIT
-	/* parms for pdc_pat_cell_module() call */
-	unsigned long	pcell_loc;	/* Physical Cell location */
-	unsigned long	mod_index;	/* PAT specific - Misc Module info */
-
-	/* generic info returned from pdc_pat_cell_module() */
-	unsigned long	mod_info;	/* PAT specific - Misc Module info */
-	unsigned long	pmod_loc;	/* physical Module location */
-#endif
-	u64		dma_mask;	/* DMA mask for I/O */
-	struct device 	dev;
-};
-
-struct parisc_driver {
-	struct parisc_driver *next;
-	char *name; 
-	const struct parisc_device_id *id_table;
-	int (*probe) (struct parisc_device *dev); /* New device discovered */
-	int (*remove) (struct parisc_device *dev);
-	struct device_driver drv;
-};
-
-
-#define to_parisc_device(d)	container_of(d, struct parisc_device, dev)
-#define to_parisc_driver(d)	container_of(d, struct parisc_driver, drv)
-#define parisc_parent(d)	to_parisc_device(d->dev.parent)
-
-static inline char *parisc_pathname(struct parisc_device *d)
-{
-	return d->dev.bus_id;
-}
-
-static inline void
-parisc_set_drvdata(struct parisc_device *d, void *p)
-{
-	dev_set_drvdata(&d->dev, p);
-}
-
-static inline void *
-parisc_get_drvdata(struct parisc_device *d)
-{
-	return dev_get_drvdata(&d->dev);
-}
-
-extern struct bus_type parisc_bus_type;
-
-#endif /*_ASM_PARISC_PARISC_DEVICE_H_*/
diff --git a/include/asm-parisc/parport.h b/include/asm-parisc/parport.h
deleted file mode 100644
index 00d9cc3e7b97..000000000000
--- a/include/asm-parisc/parport.h
+++ /dev/null
@@ -1,18 +0,0 @@
-/* 
- *
- * parport.h: ia32-compatible parport initialisation
- *
- * This file should only be included by drivers/parport/parport_pc.c.
- */
-#ifndef _ASM_PARPORT_H
-#define _ASM_PARPORT_H 1
-
-
-static int __devinit parport_pc_find_nonpci_ports (int autoirq, int autodma)
-{
-	/* nothing ! */
-	return 0;
-}
-
-
-#endif /* !(_ASM_PARPORT_H) */
diff --git a/include/asm-parisc/pci.h b/include/asm-parisc/pci.h
deleted file mode 100644
index 4ba868f44a5e..000000000000
--- a/include/asm-parisc/pci.h
+++ /dev/null
@@ -1,294 +0,0 @@
-#ifndef __ASM_PARISC_PCI_H
-#define __ASM_PARISC_PCI_H
-
-#include <asm/scatterlist.h>
-
-
-
-/*
-** HP PCI platforms generally support multiple bus adapters.
-**    (workstations 1-~4, servers 2-~32)
-**
-** Newer platforms number the busses across PCI bus adapters *sparsely*.
-** E.g. 0, 8, 16, ...
-**
-** Under a PCI bus, most HP platforms support PPBs up to two or three
-** levels deep. See "Bit3" product line. 
-*/
-#define PCI_MAX_BUSSES	256
-
-
-/* To be used as: mdelay(pci_post_reset_delay);
- *
- * post_reset is the time the kernel should stall to prevent anyone from
- * accessing the PCI bus once #RESET is de-asserted. 
- * PCI spec somewhere says 1 second but with multi-PCI bus systems,
- * this makes the boot time much longer than necessary.
- * 20ms seems to work for all the HP PCI implementations to date.
- */
-#define pci_post_reset_delay 50
-
-
-/*
-** pci_hba_data (aka H2P_OBJECT in HP/UX)
-**
-** This is the "common" or "base" data structure which HBA drivers
-** (eg Dino or LBA) are required to place at the top of their own
-** platform_data structure.  I've heard this called "C inheritance" too.
-**
-** Data needed by pcibios layer belongs here.
-*/
-struct pci_hba_data {
-	void __iomem   *base_addr;	/* aka Host Physical Address */
-	const struct parisc_device *dev; /* device from PA bus walk */
-	struct pci_bus *hba_bus;	/* primary PCI bus below HBA */
-	int		hba_num;	/* I/O port space access "key" */
-	struct resource bus_num;	/* PCI bus numbers */
-	struct resource io_space;	/* PIOP */
-	struct resource lmmio_space;	/* bus addresses < 4Gb */
-	struct resource elmmio_space;	/* additional bus addresses < 4Gb */
-	struct resource gmmio_space;	/* bus addresses > 4Gb */
-
-	/* NOTE: Dino code assumes it can use *all* of the lmmio_space,
-	 * elmmio_space and gmmio_space as a contiguous array of
-	 * resources.  This #define represents the array size */
-	#define DINO_MAX_LMMIO_RESOURCES	3
-
-	unsigned long   lmmio_space_offset;  /* CPU view - PCI view */
-	void *          iommu;          /* IOMMU this device is under */
-	/* REVISIT - spinlock to protect resources? */
-
-	#define HBA_NAME_SIZE 16
-	char io_name[HBA_NAME_SIZE];
-	char lmmio_name[HBA_NAME_SIZE];
-	char elmmio_name[HBA_NAME_SIZE];
-	char gmmio_name[HBA_NAME_SIZE];
-};
-
-#define HBA_DATA(d)		((struct pci_hba_data *) (d))
-
-/* 
-** We support 2^16 I/O ports per HBA.  These are set up in the form
-** 0xbbxxxx, where bb is the bus number and xxxx is the I/O port
-** space address.
-*/
-#define HBA_PORT_SPACE_BITS	16
-
-#define HBA_PORT_BASE(h)	((h) << HBA_PORT_SPACE_BITS)
-#define HBA_PORT_SPACE_SIZE	(1UL << HBA_PORT_SPACE_BITS)
-
-#define PCI_PORT_HBA(a)		((a) >> HBA_PORT_SPACE_BITS)
-#define PCI_PORT_ADDR(a)	((a) & (HBA_PORT_SPACE_SIZE - 1))
-
-#ifdef CONFIG_64BIT
-#define PCI_F_EXTEND		0xffffffff00000000UL
-#define PCI_IS_LMMIO(hba,a)	pci_is_lmmio(hba,a)
-
-/* We need to know if an address is LMMMIO or GMMIO.
- * LMMIO requires mangling and GMMIO we must use as-is.
- */
-static __inline__  int pci_is_lmmio(struct pci_hba_data *hba, unsigned long a)
-{
-	return(((a) & PCI_F_EXTEND) == PCI_F_EXTEND);
-}
-
-/*
-** Convert between PCI (IO_VIEW) addresses and processor (PA_VIEW) addresses.
-** See pci.c for more conversions used by Generic PCI code.
-**
-** Platform characteristics/firmware guarantee that
-**	(1) PA_VIEW - IO_VIEW = lmmio_offset for both LMMIO and ELMMIO
-**	(2) PA_VIEW == IO_VIEW for GMMIO
-*/
-#define PCI_BUS_ADDR(hba,a)	(PCI_IS_LMMIO(hba,a)	\
-		?  ((a) - hba->lmmio_space_offset)	/* mangle LMMIO */ \
-		: (a))					/* GMMIO */
-#define PCI_HOST_ADDR(hba,a)	(((a) & PCI_F_EXTEND) == 0 \
-		? (a) + hba->lmmio_space_offset \
-		: (a))
-
-#else	/* !CONFIG_64BIT */
-
-#define PCI_BUS_ADDR(hba,a)	(a)
-#define PCI_HOST_ADDR(hba,a)	(a)
-#define PCI_F_EXTEND		0UL
-#define PCI_IS_LMMIO(hba,a)	(1)	/* 32-bit doesn't support GMMIO */
-
-#endif /* !CONFIG_64BIT */
-
-/*
-** KLUGE: linux/pci.h include asm/pci.h BEFORE declaring struct pci_bus
-** (This eliminates some of the warnings).
-*/
-struct pci_bus;
-struct pci_dev;
-
-/*
- * If the PCI device's view of memory is the same as the CPU's view of memory,
- * PCI_DMA_BUS_IS_PHYS is true.  The networking and block device layers use
- * this boolean for bounce buffer decisions.
- */
-#ifdef CONFIG_PA20
-/* All PA-2.0 machines have an IOMMU. */
-#define PCI_DMA_BUS_IS_PHYS	0
-#define parisc_has_iommu()	do { } while (0)
-#else
-
-#if defined(CONFIG_IOMMU_CCIO) || defined(CONFIG_IOMMU_SBA)
-extern int parisc_bus_is_phys; 	/* in arch/parisc/kernel/setup.c */
-#define PCI_DMA_BUS_IS_PHYS	parisc_bus_is_phys
-#define parisc_has_iommu()	do { parisc_bus_is_phys = 0; } while (0)
-#else
-#define PCI_DMA_BUS_IS_PHYS	1
-#define parisc_has_iommu()	do { } while (0)
-#endif
-
-#endif	/* !CONFIG_PA20 */
-
-
-/*
-** Most PCI devices (eg Tulip, NCR720) also export the same registers
-** to both MMIO and I/O port space.  Due to poor performance of I/O Port
-** access under HP PCI bus adapters, strongly recommend the use of MMIO
-** address space.
-**
-** While I'm at it more PA programming notes:
-**
-** 1) MMIO stores (writes) are posted operations. This means the processor
-**    gets an "ACK" before the write actually gets to the device. A read
-**    to the same device (or typically the bus adapter above it) will
-**    force in-flight write transaction(s) out to the targeted device
-**    before the read can complete.
-**
-** 2) The Programmed I/O (PIO) data may not always be strongly ordered with
-**    respect to DMA on all platforms. Ie PIO data can reach the processor
-**    before in-flight DMA reaches memory. Since most SMP PA platforms
-**    are I/O coherent, it generally doesn't matter...but sometimes
-**    it does.
-**
-** I've helped device driver writers debug both types of problems.
-*/
-struct pci_port_ops {
-	  u8 (*inb)  (struct pci_hba_data *hba, u16 port);
-	 u16 (*inw)  (struct pci_hba_data *hba, u16 port);
-	 u32 (*inl)  (struct pci_hba_data *hba, u16 port);
-	void (*outb) (struct pci_hba_data *hba, u16 port,  u8 data);
-	void (*outw) (struct pci_hba_data *hba, u16 port, u16 data);
-	void (*outl) (struct pci_hba_data *hba, u16 port, u32 data);
-};
-
-
-struct pci_bios_ops {
-	void (*init)(void);
-	void (*fixup_bus)(struct pci_bus *bus);
-};
-
-/* pci_unmap_{single,page} is not a nop, thus... */
-#define DECLARE_PCI_UNMAP_ADDR(ADDR_NAME)	\
-	dma_addr_t ADDR_NAME;
-#define DECLARE_PCI_UNMAP_LEN(LEN_NAME)		\
-	__u32 LEN_NAME;
-#define pci_unmap_addr(PTR, ADDR_NAME)			\
-	((PTR)->ADDR_NAME)
-#define pci_unmap_addr_set(PTR, ADDR_NAME, VAL)		\
-	(((PTR)->ADDR_NAME) = (VAL))
-#define pci_unmap_len(PTR, LEN_NAME)			\
-	((PTR)->LEN_NAME)
-#define pci_unmap_len_set(PTR, LEN_NAME, VAL)		\
-	(((PTR)->LEN_NAME) = (VAL))
-
-/*
-** Stuff declared in arch/parisc/kernel/pci.c
-*/
-extern struct pci_port_ops *pci_port;
-extern struct pci_bios_ops *pci_bios;
-
-#ifdef CONFIG_PCI
-extern void pcibios_register_hba(struct pci_hba_data *);
-extern void pcibios_set_master(struct pci_dev *);
-#else
-static inline void pcibios_register_hba(struct pci_hba_data *x)
-{
-}
-#endif
-
-/*
- * pcibios_assign_all_busses() is used in drivers/pci/pci.c:pci_do_scan_bus()
- *   0 == check if bridge is numbered before re-numbering.
- *   1 == pci_do_scan_bus() should automatically number all PCI-PCI bridges.
- *
- *   We *should* set this to zero for "legacy" platforms and one
- *   for PAT platforms.
- *
- *   But legacy platforms also need to renumber the busses below a Host
- *   Bus controller.  Adding a 4-port Tulip card on the first PCI root
- *   bus of a C200 resulted in the secondary bus being numbered as 1.
- *   The second PCI host bus controller's root bus had already been
- *   assigned bus number 1 by firmware and sysfs complained.
- *
- *   Firmware isn't doing anything wrong here since each controller
- *   is its own PCI domain.  It's simpler and easier for us to renumber
- *   the busses rather than treat each Dino as a separate PCI domain.
- *   Eventually, we may want to introduce PCI domains for Superdome or
- *   rp7420/8420 boxes and then revisit this issue.
- */
-#define pcibios_assign_all_busses()     (1)
-#define pcibios_scan_all_fns(a, b)	(0)
-
-#define PCIBIOS_MIN_IO          0x10
-#define PCIBIOS_MIN_MEM         0x1000 /* NBPG - but pci/setup-res.c dies */
-
-/* export the pci_ DMA API in terms of the dma_ one */
-#include <asm-generic/pci-dma-compat.h>
-
-#ifdef CONFIG_PCI
-static inline void pci_dma_burst_advice(struct pci_dev *pdev,
-					enum pci_dma_burst_strategy *strat,
-					unsigned long *strategy_parameter)
-{
-	unsigned long cacheline_size;
-	u8 byte;
-
-	pci_read_config_byte(pdev, PCI_CACHE_LINE_SIZE, &byte);
-	if (byte == 0)
-		cacheline_size = 1024;
-	else
-		cacheline_size = (int) byte * 4;
-
-	*strat = PCI_DMA_BURST_MULTIPLE;
-	*strategy_parameter = cacheline_size;
-}
-#endif
-
-extern void
-pcibios_resource_to_bus(struct pci_dev *dev, struct pci_bus_region *region,
-			 struct resource *res);
-
-extern void
-pcibios_bus_to_resource(struct pci_dev *dev, struct resource *res,
-			struct pci_bus_region *region);
-
-static inline struct resource *
-pcibios_select_root(struct pci_dev *pdev, struct resource *res)
-{
-	struct resource *root = NULL;
-
-	if (res->flags & IORESOURCE_IO)
-		root = &ioport_resource;
-	if (res->flags & IORESOURCE_MEM)
-		root = &iomem_resource;
-
-	return root;
-}
-
-static inline void pcibios_penalize_isa_irq(int irq, int active)
-{
-	/* We don't need to penalize isa irq's */
-}
-
-static inline int pci_get_legacy_ide_irq(struct pci_dev *dev, int channel)
-{
-	return channel ? 15 : 14;
-}
-
-#endif /* __ASM_PARISC_PCI_H */
diff --git a/include/asm-parisc/pdc.h b/include/asm-parisc/pdc.h
deleted file mode 100644
index 46b75f9cce51..000000000000
--- a/include/asm-parisc/pdc.h
+++ /dev/null
@@ -1,760 +0,0 @@
-#ifndef _PARISC_PDC_H
-#define _PARISC_PDC_H
-
-/*
- *	PDC return values ...
- *	All PDC calls return a subset of these errors. 
- */
-
-#define PDC_WARN		  3	/* Call completed with a warning */
-#define PDC_REQ_ERR_1		  2	/* See above			 */
-#define PDC_REQ_ERR_0		  1	/* Call would generate a requestor error */
-#define PDC_OK			  0	/* Call completed successfully	*/
-#define PDC_BAD_PROC		 -1	/* Called non-existent procedure*/
-#define PDC_BAD_OPTION		 -2	/* Called with non-existent option */
-#define PDC_ERROR		 -3	/* Call could not complete without an error */
-#define PDC_NE_MOD		 -5	/* Module not found		*/
-#define PDC_NE_CELL_MOD		 -7	/* Cell module not found	*/
-#define PDC_INVALID_ARG		-10	/* Called with an invalid argument */
-#define PDC_BUS_POW_WARN	-12	/* Call could not complete in allowed power budget */
-#define PDC_NOT_NARROW		-17	/* Narrow mode not supported	*/
-
-/*
- *	PDC entry points...
- */
-
-#define PDC_POW_FAIL	1		/* perform a power-fail		*/
-#define PDC_POW_FAIL_PREPARE	0	/* prepare for powerfail	*/
-
-#define PDC_CHASSIS	2		/* PDC-chassis functions	*/
-#define PDC_CHASSIS_DISP	0	/* update chassis display	*/
-#define PDC_CHASSIS_WARN	1	/* return chassis warnings	*/
-#define PDC_CHASSIS_DISPWARN	2	/* update&return chassis status */
-#define PDC_RETURN_CHASSIS_INFO 128	/* HVERSION dependent: return chassis LED/LCD info  */
-
-#define PDC_PIM         3               /* Get PIM data                 */
-#define PDC_PIM_HPMC            0       /* Transfer HPMC data           */
-#define PDC_PIM_RETURN_SIZE     1       /* Get Max buffer needed for PIM*/
-#define PDC_PIM_LPMC            2       /* Transfer HPMC data           */
-#define PDC_PIM_SOFT_BOOT       3       /* Transfer Soft Boot data      */
-#define PDC_PIM_TOC             4       /* Transfer TOC data            */
-
-#define PDC_MODEL	4		/* PDC model information call	*/
-#define PDC_MODEL_INFO		0	/* returns information 		*/
-#define PDC_MODEL_BOOTID	1	/* set the BOOT_ID		*/
-#define PDC_MODEL_VERSIONS	2	/* returns cpu-internal versions*/
-#define PDC_MODEL_SYSMODEL	3	/* return system model info	*/
-#define PDC_MODEL_ENSPEC	4	/* enable specific option	*/
-#define PDC_MODEL_DISPEC	5	/* disable specific option	*/
-#define PDC_MODEL_CPU_ID	6	/* returns cpu-id (only newer machines!) */
-#define PDC_MODEL_CAPABILITIES	7	/* returns OS32/OS64-flags	*/
-/* Values for PDC_MODEL_CAPABILITIES non-equivalent virtual aliasing support */
-#define  PDC_MODEL_IOPDIR_FDC		(1 << 2)
-#define  PDC_MODEL_NVA_MASK		(3 << 4)
-#define  PDC_MODEL_NVA_SUPPORTED	(0 << 4)
-#define  PDC_MODEL_NVA_SLOW		(1 << 4)
-#define  PDC_MODEL_NVA_UNSUPPORTED	(3 << 4)
-#define PDC_MODEL_GET_BOOT__OP	8	/* returns boot test options	*/
-#define PDC_MODEL_SET_BOOT__OP	9	/* set boot test options	*/
-
-#define PA89_INSTRUCTION_SET	0x4	/* capatibilies returned	*/
-#define PA90_INSTRUCTION_SET	0x8
-
-#define PDC_CACHE	5		/* return/set cache (& TLB) info*/
-#define PDC_CACHE_INFO		0	/* returns information 		*/
-#define PDC_CACHE_SET_COH	1	/* set coherence state		*/
-#define PDC_CACHE_RET_SPID	2	/* returns space-ID bits	*/
-
-#define PDC_HPA		6		/* return HPA of processor	*/
-#define PDC_HPA_PROCESSOR	0
-#define PDC_HPA_MODULES		1
-
-#define PDC_COPROC	7		/* Co-Processor (usually FP unit(s)) */
-#define PDC_COPROC_CFG		0	/* Co-Processor Cfg (FP unit(s) enabled?) */
-
-#define PDC_IODC	8		/* talk to IODC			*/
-#define PDC_IODC_READ		0	/* read IODC entry point	*/
-/*      PDC_IODC_RI_			 * INDEX parameter of PDC_IODC_READ */
-#define PDC_IODC_RI_DATA_BYTES	0	/* IODC Data Bytes		*/
-/*				1, 2	   obsolete - HVERSION dependent*/
-#define PDC_IODC_RI_INIT	3	/* Initialize module		*/
-#define PDC_IODC_RI_IO		4	/* Module input/output		*/
-#define PDC_IODC_RI_SPA		5	/* Module input/output		*/
-#define PDC_IODC_RI_CONFIG	6	/* Module input/output		*/
-/*				7	  obsolete - HVERSION dependent */
-#define PDC_IODC_RI_TEST	8	/* Module input/output		*/
-#define PDC_IODC_RI_TLB		9	/* Module input/output		*/
-#define PDC_IODC_NINIT		2	/* non-destructive init		*/
-#define PDC_IODC_DINIT		3	/* destructive init		*/
-#define PDC_IODC_MEMERR		4	/* check for memory errors	*/
-#define PDC_IODC_INDEX_DATA	0	/* get first 16 bytes from mod IODC */
-#define PDC_IODC_BUS_ERROR	-4	/* bus error return value	*/
-#define PDC_IODC_INVALID_INDEX	-5	/* invalid index return value	*/
-#define PDC_IODC_COUNT		-6	/* count is too small		*/
-
-#define PDC_TOD		9		/* time-of-day clock (TOD)	*/
-#define PDC_TOD_READ		0	/* read TOD			*/
-#define PDC_TOD_WRITE		1	/* write TOD			*/
-
-
-#define PDC_STABLE	10		/* stable storage (sprockets)	*/
-#define PDC_STABLE_READ		0
-#define PDC_STABLE_WRITE	1
-#define PDC_STABLE_RETURN_SIZE	2
-#define PDC_STABLE_VERIFY_CONTENTS 3
-#define PDC_STABLE_INITIALIZE	4
-
-#define PDC_NVOLATILE	11		/* often not implemented	*/
-
-#define PDC_ADD_VALID	12		/* Memory validation PDC call	*/
-#define PDC_ADD_VALID_VERIFY	0	/* Make PDC_ADD_VALID verify region */
-
-#define PDC_INSTR	15		/* get instr to invoke PDCE_CHECK() */
-
-#define PDC_PROC	16		/* (sprockets)			*/
-
-#define PDC_CONFIG	16		/* (sprockets)			*/
-#define PDC_CONFIG_DECONFIG	0
-#define PDC_CONFIG_DRECONFIG	1
-#define PDC_CONFIG_DRETURN_CONFIG 2
-
-#define PDC_BLOCK_TLB	18		/* manage hardware block-TLB	*/
-#define PDC_BTLB_INFO		0	/* returns parameter 		*/
-#define PDC_BTLB_INSERT		1	/* insert BTLB entry		*/
-#define PDC_BTLB_PURGE		2	/* purge BTLB entries 		*/
-#define PDC_BTLB_PURGE_ALL	3	/* purge all BTLB entries 	*/
-
-#define PDC_TLB		19		/* manage hardware TLB miss handling */
-#define PDC_TLB_INFO		0	/* returns parameter 		*/
-#define PDC_TLB_SETUP		1	/* set up miss handling 	*/
-
-#define PDC_MEM		20		/* Manage memory		*/
-#define PDC_MEM_MEMINFO		0
-#define PDC_MEM_ADD_PAGE	1
-#define PDC_MEM_CLEAR_PDT	2
-#define PDC_MEM_READ_PDT	3
-#define PDC_MEM_RESET_CLEAR	4
-#define PDC_MEM_GOODMEM		5
-#define PDC_MEM_TABLE		128	/* Non contig mem map (sprockets) */
-#define PDC_MEM_RETURN_ADDRESS_TABLE	PDC_MEM_TABLE
-#define PDC_MEM_GET_MEMORY_SYSTEM_TABLES_SIZE	131
-#define PDC_MEM_GET_MEMORY_SYSTEM_TABLES	132
-#define PDC_MEM_GET_PHYSICAL_LOCATION_FROM_MEMORY_ADDRESS 133
-
-#define PDC_MEM_RET_SBE_REPLACED	5	/* PDC_MEM return values */
-#define PDC_MEM_RET_DUPLICATE_ENTRY	4
-#define PDC_MEM_RET_BUF_SIZE_SMALL	1
-#define PDC_MEM_RET_PDT_FULL		-11
-#define PDC_MEM_RET_INVALID_PHYSICAL_LOCATION ~0ULL
-
-#define PDC_PSW		21		/* Get/Set default System Mask  */
-#define PDC_PSW_MASK		0	/* Return mask                  */
-#define PDC_PSW_GET_DEFAULTS	1	/* Return defaults              */
-#define PDC_PSW_SET_DEFAULTS	2	/* Set default                  */
-#define PDC_PSW_ENDIAN_BIT	1	/* set for big endian           */
-#define PDC_PSW_WIDE_BIT	2	/* set for wide mode            */ 
-
-#define PDC_SYSTEM_MAP	22		/* find system modules		*/
-#define PDC_FIND_MODULE 	0
-#define PDC_FIND_ADDRESS	1
-#define PDC_TRANSLATE_PATH	2
-
-#define PDC_SOFT_POWER	23		/* soft power switch		*/
-#define PDC_SOFT_POWER_INFO	0	/* return info about the soft power switch */
-#define PDC_SOFT_POWER_ENABLE	1	/* enable/disable soft power switch */
-
-
-/* HVERSION dependent */
-
-/* The PDC_MEM_MAP calls */
-#define PDC_MEM_MAP	128		/* on s700: return page info	*/
-#define PDC_MEM_MAP_HPA		0	/* returns hpa of a module	*/
-
-#define PDC_EEPROM	129		/* EEPROM access		*/
-#define PDC_EEPROM_READ_WORD	0
-#define PDC_EEPROM_WRITE_WORD	1
-#define PDC_EEPROM_READ_BYTE	2
-#define PDC_EEPROM_WRITE_BYTE	3
-#define PDC_EEPROM_EEPROM_PASSWORD -1000
-
-#define PDC_NVM		130		/* NVM (non-volatile memory) access */
-#define PDC_NVM_READ_WORD	0
-#define PDC_NVM_WRITE_WORD	1
-#define PDC_NVM_READ_BYTE	2
-#define PDC_NVM_WRITE_BYTE	3
-
-#define PDC_SEED_ERROR	132		/* (sprockets)			*/
-
-#define PDC_IO		135		/* log error info, reset IO system */
-#define PDC_IO_READ_AND_CLEAR_ERRORS	0
-#define PDC_IO_RESET			1
-#define PDC_IO_RESET_DEVICES		2
-/* sets bits 6&7 (little endian) of the HcControl Register */
-#define PDC_IO_USB_SUSPEND	0xC000000000000000
-#define PDC_IO_EEPROM_IO_ERR_TABLE_FULL	-5	/* return value */
-#define PDC_IO_NO_SUSPEND		-6	/* return value */
-
-#define PDC_BROADCAST_RESET 136		/* reset all processors		*/
-#define PDC_DO_RESET		0	/* option: perform a broadcast reset */
-#define PDC_DO_FIRM_TEST_RESET	1	/* Do broadcast reset with bitmap */
-#define PDC_BR_RECONFIGURATION	2	/* reset w/reconfiguration	*/
-#define PDC_FIRM_TEST_MAGIC	0xab9ec36fUL    /* for this reboot only	*/
-
-#define PDC_LAN_STATION_ID 138		/* Hversion dependent mechanism for */
-#define PDC_LAN_STATION_ID_READ	0	/* getting the lan station address  */
-
-#define	PDC_LAN_STATION_ID_SIZE	6
-
-#define PDC_CHECK_RANGES 139		/* (sprockets)			*/
-
-#define PDC_NV_SECTIONS	141		/* (sprockets)			*/
-
-#define PDC_PERFORMANCE	142		/* performance monitoring	*/
-
-#define PDC_SYSTEM_INFO	143		/* system information		*/
-#define PDC_SYSINFO_RETURN_INFO_SIZE	0
-#define PDC_SYSINFO_RRETURN_SYS_INFO	1
-#define PDC_SYSINFO_RRETURN_ERRORS	2
-#define PDC_SYSINFO_RRETURN_WARNINGS	3
-#define PDC_SYSINFO_RETURN_REVISIONS	4
-#define PDC_SYSINFO_RRETURN_DIAGNOSE	5
-#define PDC_SYSINFO_RRETURN_HV_DIAGNOSE	1005
-
-#define PDC_RDR		144		/* (sprockets)			*/
-#define PDC_RDR_READ_BUFFER	0
-#define PDC_RDR_READ_SINGLE	1
-#define PDC_RDR_WRITE_SINGLE	2
-
-#define PDC_INTRIGUE	145 		/* (sprockets)			*/
-#define PDC_INTRIGUE_WRITE_BUFFER 	 0
-#define PDC_INTRIGUE_GET_SCRATCH_BUFSIZE 1
-#define PDC_INTRIGUE_START_CPU_COUNTERS	 2
-#define PDC_INTRIGUE_STOP_CPU_COUNTERS	 3
-
-#define PDC_STI		146 		/* STI access			*/
-/* same as PDC_PCI_XXX values (see below) */
-
-/* Legacy PDC definitions for same stuff */
-#define PDC_PCI_INDEX	147
-#define PDC_PCI_INTERFACE_INFO		0
-#define PDC_PCI_SLOT_INFO		1
-#define PDC_PCI_INFLIGHT_BYTES		2
-#define PDC_PCI_READ_CONFIG		3
-#define PDC_PCI_WRITE_CONFIG		4
-#define PDC_PCI_READ_PCI_IO		5
-#define PDC_PCI_WRITE_PCI_IO		6
-#define PDC_PCI_READ_CONFIG_DELAY	7
-#define PDC_PCI_UPDATE_CONFIG_DELAY	8
-#define PDC_PCI_PCI_PATH_TO_PCI_HPA	9
-#define PDC_PCI_PCI_HPA_TO_PCI_PATH	10
-#define PDC_PCI_PCI_PATH_TO_PCI_BUS	11
-#define PDC_PCI_PCI_RESERVED		12
-#define PDC_PCI_PCI_INT_ROUTE_SIZE	13
-#define PDC_PCI_GET_INT_TBL_SIZE	PDC_PCI_PCI_INT_ROUTE_SIZE
-#define PDC_PCI_PCI_INT_ROUTE		14
-#define PDC_PCI_GET_INT_TBL		PDC_PCI_PCI_INT_ROUTE 
-#define PDC_PCI_READ_MON_TYPE		15
-#define PDC_PCI_WRITE_MON_TYPE		16
-
-
-/* Get SCSI Interface Card info:  SDTR, SCSI ID, mode (SE vs LVD) */
-#define PDC_INITIATOR	163
-#define PDC_GET_INITIATOR	0
-#define PDC_SET_INITIATOR	1
-#define PDC_DELETE_INITIATOR	2
-#define PDC_RETURN_TABLE_SIZE	3
-#define PDC_RETURN_TABLE	4
-
-#define PDC_LINK	165 		/* (sprockets)			*/
-#define PDC_LINK_PCI_ENTRY_POINTS	0  /* list (Arg1) = 0 */
-#define PDC_LINK_USB_ENTRY_POINTS	1  /* list (Arg1) = 1 */
-
-/* cl_class
- * page 3-33 of IO-Firmware ARS
- * IODC ENTRY_INIT(Search first) RET[1]
- */
-#define	CL_NULL		0	/* invalid */
-#define	CL_RANDOM	1	/* random access (as disk) */
-#define	CL_SEQU		2	/* sequential access (as tape) */
-#define	CL_DUPLEX	7	/* full-duplex point-to-point (RS-232, Net) */
-#define	CL_KEYBD	8	/* half-duplex console (HIL Keyboard) */
-#define	CL_DISPL	9	/* half-duplex console (display) */
-#define	CL_FC		10	/* FiberChannel access media */
-
-/* IODC ENTRY_INIT() */
-#define ENTRY_INIT_SRCH_FRST	2
-#define ENTRY_INIT_SRCH_NEXT	3
-#define ENTRY_INIT_MOD_DEV	4
-#define ENTRY_INIT_DEV		5
-#define ENTRY_INIT_MOD		6
-#define ENTRY_INIT_MSG		9
-
-/* IODC ENTRY_IO() */
-#define ENTRY_IO_BOOTIN		0
-#define ENTRY_IO_BOOTOUT	1
-#define ENTRY_IO_CIN		2
-#define ENTRY_IO_COUT		3
-#define ENTRY_IO_CLOSE		4
-#define ENTRY_IO_GETMSG		9
-#define ENTRY_IO_BBLOCK_IN	16
-#define ENTRY_IO_BBLOCK_OUT	17
-
-/* IODC ENTRY_SPA() */
-
-/* IODC ENTRY_CONFIG() */
-
-/* IODC ENTRY_TEST() */
-
-/* IODC ENTRY_TLB() */
-
-/* constants for OS (NVM...) */
-#define OS_ID_NONE		0	/* Undefined OS ID	*/
-#define OS_ID_HPUX		1	/* HP-UX OS		*/
-#define OS_ID_MPEXL		2	/* MPE XL OS		*/
-#define OS_ID_OSF		3	/* OSF OS		*/
-#define OS_ID_HPRT		4	/* HP-RT OS		*/
-#define OS_ID_NOVEL		5	/* NOVELL OS		*/
-#define OS_ID_LINUX		6	/* Linux		*/
-
-
-/* constants for PDC_CHASSIS */
-#define OSTAT_OFF		0
-#define OSTAT_FLT		1 
-#define OSTAT_TEST		2
-#define OSTAT_INIT		3
-#define OSTAT_SHUT		4
-#define OSTAT_WARN		5
-#define OSTAT_RUN		6
-#define OSTAT_ON		7
-
-/* Page Zero constant offsets used by the HPMC handler */
-#define BOOT_CONSOLE_HPA_OFFSET  0x3c0
-#define BOOT_CONSOLE_SPA_OFFSET  0x3c4
-#define BOOT_CONSOLE_PATH_OFFSET 0x3a8
-
-/* size of the pdc_result buffer for firmware.c */
-#define NUM_PDC_RESULT	32
-
-#if !defined(__ASSEMBLY__)
-#ifdef __KERNEL__
-
-#include <linux/types.h>
-
-extern int pdc_type;
-
-/* Values for pdc_type */
-#define PDC_TYPE_ILLEGAL	-1
-#define PDC_TYPE_PAT		 0 /* 64-bit PAT-PDC */
-#define PDC_TYPE_SYSTEM_MAP	 1 /* 32-bit, but supports PDC_SYSTEM_MAP */
-#define PDC_TYPE_SNAKE		 2 /* Doesn't support SYSTEM_MAP */
-
-struct pdc_chassis_info {       /* for PDC_CHASSIS_INFO */
-	unsigned long actcnt;   /* actual number of bytes returned */
-	unsigned long maxcnt;   /* maximum number of bytes that could be returned */
-};
-
-struct pdc_coproc_cfg {         /* for PDC_COPROC_CFG */
-        unsigned long ccr_functional;
-        unsigned long ccr_present;
-        unsigned long revision;
-        unsigned long model;
-};
-
-struct pdc_model {		/* for PDC_MODEL */
-	unsigned long hversion;
-	unsigned long sversion;
-	unsigned long hw_id;
-	unsigned long boot_id;
-	unsigned long sw_id;
-	unsigned long sw_cap;
-	unsigned long arch_rev;
-	unsigned long pot_key;
-	unsigned long curr_key;
-};
-
-struct pdc_cache_cf {		/* for PDC_CACHE  (I/D-caches) */
-    unsigned long
-#ifdef CONFIG_64BIT
-		cc_padW:32,
-#endif
-		cc_alias: 4,	/* alias boundaries for virtual addresses   */
-		cc_block: 4,	/* to determine most efficient stride */
-		cc_line	: 3,	/* maximum amount written back as a result of store (multiple of 16 bytes) */
-		cc_shift: 2,	/* how much to shift cc_block left */
-		cc_wt	: 1,	/* 0 = WT-Dcache, 1 = WB-Dcache */
-		cc_sh	: 2,	/* 0 = separate I/D-cache, else shared I/D-cache */
-		cc_cst  : 3,	/* 0 = incoherent D-cache, 1=coherent D-cache */
-		cc_pad1 : 10,	/* reserved */
-		cc_hv   : 3;	/* hversion dependent */
-};
-
-struct pdc_tlb_cf {		/* for PDC_CACHE (I/D-TLB's) */
-    unsigned long tc_pad0:12,	/* reserved */
-#ifdef CONFIG_64BIT
-		tc_padW:32,
-#endif
-		tc_sh	: 2,	/* 0 = separate I/D-TLB, else shared I/D-TLB */
-		tc_hv   : 1,	/* HV */
-		tc_page : 1,	/* 0 = 2K page-size-machine, 1 = 4k page size */
-		tc_cst  : 3,	/* 0 = incoherent operations, else coherent operations */
-		tc_aid  : 5,	/* ITLB: width of access ids of processor (encoded!) */
-		tc_pad1 : 8;	/* ITLB: width of space-registers (encoded) */
-};
-
-struct pdc_cache_info {		/* main-PDC_CACHE-structure (caches & TLB's) */
-	/* I-cache */
-	unsigned long	ic_size;	/* size in bytes */
-	struct pdc_cache_cf ic_conf;	/* configuration */
-	unsigned long	ic_base;	/* base-addr */
-	unsigned long	ic_stride;
-	unsigned long	ic_count;
-	unsigned long	ic_loop;
-	/* D-cache */
-	unsigned long	dc_size;	/* size in bytes */
-	struct pdc_cache_cf dc_conf;	/* configuration */
-	unsigned long	dc_base;	/* base-addr */
-	unsigned long	dc_stride;
-	unsigned long	dc_count;
-	unsigned long	dc_loop;
-	/* Instruction-TLB */
-	unsigned long	it_size;	/* number of entries in I-TLB */
-	struct pdc_tlb_cf it_conf;	/* I-TLB-configuration */
-	unsigned long	it_sp_base;
-	unsigned long	it_sp_stride;
-	unsigned long	it_sp_count;
-	unsigned long	it_off_base;
-	unsigned long	it_off_stride;
-	unsigned long	it_off_count;
-	unsigned long	it_loop;
-	/* data-TLB */
-	unsigned long	dt_size;	/* number of entries in D-TLB */
-	struct pdc_tlb_cf dt_conf;	/* D-TLB-configuration */
-	unsigned long	dt_sp_base;
-	unsigned long	dt_sp_stride;
-	unsigned long	dt_sp_count;
-	unsigned long	dt_off_base;
-	unsigned long	dt_off_stride;
-	unsigned long	dt_off_count;
-	unsigned long	dt_loop;
-};
-
-#if 0
-/* If you start using the next struct, you'll have to adjust it to
- * work with 64-bit firmware I think -PB
- */
-struct pdc_iodc {     /* PDC_IODC */
-	unsigned char   hversion_model;
-	unsigned char 	hversion;
-	unsigned char 	spa;
-	unsigned char 	type;
-	unsigned int	sversion_rev:4;
-	unsigned int	sversion_model:19;
-	unsigned int	sversion_opt:8;
-	unsigned char	rev;
-	unsigned char	dep;
-	unsigned char	features;
-	unsigned char	pad1;
-	unsigned int	checksum:16;
-	unsigned int	length:16;
-	unsigned int    pad[15];
-} __attribute__((aligned(8))) ;
-#endif
-
-#ifndef CONFIG_PA20
-/* no BLTBs in pa2.0 processors */
-struct pdc_btlb_info_range {
-	__u8 res00;
-	__u8 num_i;
-	__u8 num_d;
-	__u8 num_comb;
-};
-
-struct pdc_btlb_info {	/* PDC_BLOCK_TLB, return of PDC_BTLB_INFO */
-	unsigned int min_size;	/* minimum size of BTLB in pages */
-	unsigned int max_size;	/* maximum size of BTLB in pages */
-	struct pdc_btlb_info_range fixed_range_info;
-	struct pdc_btlb_info_range variable_range_info;
-};
-
-#endif /* !CONFIG_PA20 */
-
-#ifdef CONFIG_64BIT
-struct pdc_memory_table_raddr { /* PDC_MEM/PDC_MEM_TABLE (return info) */
-	unsigned long entries_returned;
-	unsigned long entries_total;
-};
-
-struct pdc_memory_table {       /* PDC_MEM/PDC_MEM_TABLE (arguments) */
-	unsigned long paddr;
-	unsigned int  pages;
-	unsigned int  reserved;
-};
-#endif /* CONFIG_64BIT */
-
-struct pdc_system_map_mod_info { /* PDC_SYSTEM_MAP/FIND_MODULE */
-	unsigned long mod_addr;
-	unsigned long mod_pgs;
-	unsigned long add_addrs;
-};
-
-struct pdc_system_map_addr_info { /* PDC_SYSTEM_MAP/FIND_ADDRESS */
-	unsigned long mod_addr;
-	unsigned long mod_pgs;
-};
-
-struct pdc_initiator { /* PDC_INITIATOR */
-	int host_id;
-	int factor;
-	int width;
-	int mode;
-};
-
-struct hardware_path {
-	char  flags;	/* see bit definitions below */
-	char  bc[6];	/* Bus Converter routing info to a specific */
-			/* I/O adaptor (< 0 means none, > 63 resvd) */
-	char  mod;	/* fixed field of specified module */
-};
-
-/*
- * Device path specifications used by PDC.
- */
-struct pdc_module_path {
-	struct hardware_path path;
-	unsigned int layers[6]; /* device-specific info (ctlr #, unit # ...) */
-};
-
-#ifndef CONFIG_PA20
-/* Only used on some pre-PA2.0 boxes */
-struct pdc_memory_map {		/* PDC_MEMORY_MAP */
-	unsigned long hpa;	/* mod's register set address */
-	unsigned long more_pgs;	/* number of additional I/O pgs */
-};
-#endif
-
-struct pdc_tod {
-	unsigned long tod_sec; 
-	unsigned long tod_usec;
-};
-
-/* architected results from PDC_PIM/transfer hpmc on a PA1.1 machine */
-
-struct pdc_hpmc_pim_11 { /* PDC_PIM */
-	__u32 gr[32];
-	__u32 cr[32];
-	__u32 sr[8];
-	__u32 iasq_back;
-	__u32 iaoq_back;
-	__u32 check_type;
-	__u32 cpu_state;
-	__u32 rsvd1;
-	__u32 cache_check;
-	__u32 tlb_check;
-	__u32 bus_check;
-	__u32 assists_check;
-	__u32 rsvd2;
-	__u32 assist_state;
-	__u32 responder_addr;
-	__u32 requestor_addr;
-	__u32 path_info;
-	__u64 fr[32];
-};
-
-/*
- * architected results from PDC_PIM/transfer hpmc on a PA2.0 machine
- *
- * Note that PDC_PIM doesn't care whether or not wide mode was enabled
- * so the results are different on  PA1.1 vs. PA2.0 when in narrow mode.
- *
- * Note also that there are unarchitected results available, which
- * are hversion dependent. Do a "ser pim 0 hpmc" after rebooting, since
- * the firmware is probably the best way of printing hversion dependent
- * data.
- */
-
-struct pdc_hpmc_pim_20 { /* PDC_PIM */
-	__u64 gr[32];
-	__u64 cr[32];
-	__u64 sr[8];
-	__u64 iasq_back;
-	__u64 iaoq_back;
-	__u32 check_type;
-	__u32 cpu_state;
-	__u32 cache_check;
-	__u32 tlb_check;
-	__u32 bus_check;
-	__u32 assists_check;
-	__u32 assist_state;
-	__u32 path_info;
-	__u64 responder_addr;
-	__u64 requestor_addr;
-	__u64 fr[32];
-};
-
-void pdc_console_init(void);	/* in pdc_console.c */
-void pdc_console_restart(void);
-
-void setup_pdc(void);		/* in inventory.c */
-
-/* wrapper-functions from pdc.c */
-
-int pdc_add_valid(unsigned long address);
-int pdc_chassis_info(struct pdc_chassis_info *chassis_info, void *led_info, unsigned long len);
-int pdc_chassis_disp(unsigned long disp);
-int pdc_chassis_warn(unsigned long *warn);
-int pdc_coproc_cfg(struct pdc_coproc_cfg *pdc_coproc_info);
-int pdc_iodc_read(unsigned long *actcnt, unsigned long hpa, unsigned int index,
-		  void *iodc_data, unsigned int iodc_data_size);
-int pdc_system_map_find_mods(struct pdc_system_map_mod_info *pdc_mod_info,
-			     struct pdc_module_path *mod_path, long mod_index);
-int pdc_system_map_find_addrs(struct pdc_system_map_addr_info *pdc_addr_info,
-			      long mod_index, long addr_index);
-int pdc_model_info(struct pdc_model *model);
-int pdc_model_sysmodel(char *name);
-int pdc_model_cpuid(unsigned long *cpu_id);
-int pdc_model_versions(unsigned long *versions, int id);
-int pdc_model_capabilities(unsigned long *capabilities);
-int pdc_cache_info(struct pdc_cache_info *cache);
-int pdc_spaceid_bits(unsigned long *space_bits);
-#ifndef CONFIG_PA20
-int pdc_btlb_info(struct pdc_btlb_info *btlb);
-int pdc_mem_map_hpa(struct pdc_memory_map *r_addr, struct pdc_module_path *mod_path);
-#endif /* !CONFIG_PA20 */
-int pdc_lan_station_id(char *lan_addr, unsigned long net_hpa);
-
-int pdc_stable_read(unsigned long staddr, void *memaddr, unsigned long count);
-int pdc_stable_write(unsigned long staddr, void *memaddr, unsigned long count);
-int pdc_stable_get_size(unsigned long *size);
-int pdc_stable_verify_contents(void);
-int pdc_stable_initialize(void);
-
-int pdc_pci_irt_size(unsigned long *num_entries, unsigned long hpa);
-int pdc_pci_irt(unsigned long num_entries, unsigned long hpa, void *tbl);
-
-int pdc_get_initiator(struct hardware_path *, struct pdc_initiator *);
-int pdc_tod_read(struct pdc_tod *tod);
-int pdc_tod_set(unsigned long sec, unsigned long usec);
-
-#ifdef CONFIG_64BIT
-int pdc_mem_mem_table(struct pdc_memory_table_raddr *r_addr,
-		struct pdc_memory_table *tbl, unsigned long entries);
-#endif
-
-void set_firmware_width(void);
-int pdc_do_firm_test_reset(unsigned long ftc_bitmap);
-int pdc_do_reset(void);
-int pdc_soft_power_info(unsigned long *power_reg);
-int pdc_soft_power_button(int sw_control);
-void pdc_io_reset(void);
-void pdc_io_reset_devices(void);
-int pdc_iodc_getc(void);
-int pdc_iodc_print(const unsigned char *str, unsigned count);
-
-void pdc_emergency_unlock(void);
-int pdc_sti_call(unsigned long func, unsigned long flags,
-                 unsigned long inptr, unsigned long outputr,
-                 unsigned long glob_cfg);
-
-static inline char * os_id_to_string(u16 os_id) {
-	switch(os_id) {
-	case OS_ID_NONE:	return "No OS";
-	case OS_ID_HPUX:	return "HP-UX";
-	case OS_ID_MPEXL:	return "MPE-iX";
-	case OS_ID_OSF:		return "OSF";
-	case OS_ID_HPRT:	return "HP-RT";
-	case OS_ID_NOVEL:	return "Novell Netware";
-	case OS_ID_LINUX:	return "Linux";
-	default:	return "Unknown";
-	}
-}
-
-#endif /* __KERNEL__ */
-
-#define PAGE0   ((struct zeropage *)__PAGE_OFFSET)
-
-/* DEFINITION OF THE ZERO-PAGE (PAG0) */
-/* based on work by Jason Eckhardt (jason@equator.com) */
-
-/* flags of the device_path */
-#define	PF_AUTOBOOT	0x80
-#define	PF_AUTOSEARCH	0x40
-#define	PF_TIMER	0x0F
-
-struct device_path {		/* page 1-69 */
-	unsigned char flags;	/* flags see above! */
-	unsigned char bc[6];	/* bus converter routing info */
-	unsigned char mod;
-	unsigned int  layers[6];/* device-specific layer-info */
-} __attribute__((aligned(8))) ;
-
-struct pz_device {
-	struct	device_path dp;	/* see above */
-	/* struct	iomod *hpa; */
-	unsigned int hpa;	/* HPA base address */
-	/* char	*spa; */
-	unsigned int spa;	/* SPA base address */
-	/* int	(*iodc_io)(struct iomod*, ...); */
-	unsigned int iodc_io;	/* device entry point */
-	short	pad;		/* reserved */
-	unsigned short cl_class;/* see below */
-} __attribute__((aligned(8))) ;
-
-struct zeropage {
-	/* [0x000] initialize vectors (VEC) */
-	unsigned int	vec_special;		/* must be zero */
-	/* int	(*vec_pow_fail)(void);*/
-	unsigned int	vec_pow_fail; /* power failure handler */
-	/* int	(*vec_toc)(void); */
-	unsigned int	vec_toc;
-	unsigned int	vec_toclen;
-	/* int	(*vec_rendz)(void); */
-	unsigned int vec_rendz;
-	int	vec_pow_fail_flen;
-	int	vec_pad[10];		
-	
-	/* [0x040] reserved processor dependent */
-	int	pad0[112];
-
-	/* [0x200] reserved */
-	int	pad1[84];
-
-	/* [0x350] memory configuration (MC) */
-	int	memc_cont;		/* contiguous mem size (bytes) */
-	int	memc_phsize;		/* physical memory size */
-	int	memc_adsize;		/* additional mem size, bytes of SPA space used by PDC */
-	unsigned int mem_pdc_hi;	/* used for 64-bit */
-
-	/* [0x360] various parameters for the boot-CPU */
-	/* unsigned int *mem_booterr[8]; */
-	unsigned int mem_booterr[8];	/* ptr to boot errors */
-	unsigned int mem_free;		/* first location, where OS can be loaded */
-	/* struct iomod *mem_hpa; */
-	unsigned int mem_hpa;		/* HPA of the boot-CPU */
-	/* int (*mem_pdc)(int, ...); */
-	unsigned int mem_pdc;		/* PDC entry point */
-	unsigned int mem_10msec;	/* number of clock ticks in 10msec */
-
-	/* [0x390] initial memory module (IMM) */
-	/* struct iomod *imm_hpa; */
-	unsigned int imm_hpa;		/* HPA of the IMM */
-	int	imm_soft_boot;		/* 0 = was hard boot, 1 = was soft boot */
-	unsigned int	imm_spa_size;		/* SPA size of the IMM in bytes */
-	unsigned int	imm_max_mem;		/* bytes of mem in IMM */
-
-	/* [0x3A0] boot console, display device and keyboard */
-	struct pz_device mem_cons;	/* description of console device */
-	struct pz_device mem_boot;	/* description of boot device */
-	struct pz_device mem_kbd;	/* description of keyboard device */
-
-	/* [0x430] reserved */
-	int	pad430[116];
-
-	/* [0x600] processor dependent */
-	__u32	pad600[1];
-	__u32	proc_sti;		/* pointer to STI ROM */
-	__u32	pad608[126];
-};
-
-#endif /* !defined(__ASSEMBLY__) */
-
-#endif /* _PARISC_PDC_H */
diff --git a/include/asm-parisc/pdc_chassis.h b/include/asm-parisc/pdc_chassis.h
deleted file mode 100644
index a609273dc6bf..000000000000
--- a/include/asm-parisc/pdc_chassis.h
+++ /dev/null
@@ -1,381 +0,0 @@
-/*
- *	include/asm-parisc/pdc_chassis.h
- *
- *	Copyright (C) 2002 Laurent Canet <canetl@esiee.fr>
- *	Copyright (C) 2002 Thibaut Varene <varenet@parisc-linux.org>
- *
- *
- *      This program is free software; you can redistribute it and/or modify
- *      it under the terms of the GNU General Public License, version 2, as
- *      published by the Free Software Foundation.
- *      
- *      This program is distributed in the hope that it will be useful,
- *      but WITHOUT ANY WARRANTY; without even the implied warranty of
- *      MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *      GNU General Public License for more details.
- *      
- *      You should have received a copy of the GNU General Public License
- *      along with this program; if not, write to the Free Software
- *      Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- *
- *      TODO:	- handle processor number on SMP systems (Reporting Entity ID)
- *      	- handle message ID
- *      	- handle timestamps
- */
- 
-
-#ifndef _PARISC_PDC_CHASSIS_H
-#define _PARISC_PDC_CHASSIS_H
-
-/*
- * ----------
- * Prototypes
- * ----------
- */
-
-int pdc_chassis_send_status(int message);
-void parisc_pdc_chassis_init(void);
-
-
-/*
- * -----------------
- * Direct call names
- * -----------------
- * They setup everything for you, the Log message and the corresponding LED state
- */
-
-#define PDC_CHASSIS_DIRECT_BSTART	0
-#define PDC_CHASSIS_DIRECT_BCOMPLETE	1
-#define PDC_CHASSIS_DIRECT_SHUTDOWN	2
-#define PDC_CHASSIS_DIRECT_PANIC	3
-#define PDC_CHASSIS_DIRECT_HPMC		4
-#define PDC_CHASSIS_DIRECT_LPMC		5
-#define PDC_CHASSIS_DIRECT_DUMP		6	/* not yet implemented */
-#define PDC_CHASSIS_DIRECT_OOPS		7	/* not yet implemented */
-
-
-/*
- * ------------
- * LEDs control
- * ------------
- * Set the three LEDs -- Run, Attn, and Fault.
- */
-
-/* Old PDC LED control */
-#define PDC_CHASSIS_DISP_DATA(v)	((unsigned long)(v) << 17)
-
-/* 
- * Available PDC PAT LED states
- */
-
-#define PDC_CHASSIS_LED_RUN_OFF		(0ULL << 4)
-#define PDC_CHASSIS_LED_RUN_FLASH	(1ULL << 4)
-#define PDC_CHASSIS_LED_RUN_ON		(2ULL << 4)
-#define PDC_CHASSIS_LED_RUN_NC		(3ULL << 4)
-#define PDC_CHASSIS_LED_ATTN_OFF	(0ULL << 6)
-#define PDC_CHASSIS_LED_ATTN_FLASH	(1ULL << 6)
-#define PDC_CHASSIS_LED_ATTN_NC		(3ULL << 6)	/* ATTN ON is invalid */
-#define PDC_CHASSIS_LED_FAULT_OFF	(0ULL << 8)
-#define PDC_CHASSIS_LED_FAULT_FLASH	(1ULL << 8)
-#define PDC_CHASSIS_LED_FAULT_ON	(2ULL << 8)
-#define PDC_CHASSIS_LED_FAULT_NC	(3ULL << 8)
-#define PDC_CHASSIS_LED_VALID		(1ULL << 10)
-
-/* 
- * Valid PDC PAT LED states combinations
- */
-
-/* System running normally */
-#define PDC_CHASSIS_LSTATE_RUN_NORMAL	(PDC_CHASSIS_LED_RUN_ON		| \
-					 PDC_CHASSIS_LED_ATTN_OFF	| \
-					 PDC_CHASSIS_LED_FAULT_OFF	| \
-					 PDC_CHASSIS_LED_VALID		)
-/* System crashed and rebooted itself successfully */
-#define PDC_CHASSIS_LSTATE_RUN_CRASHREC	(PDC_CHASSIS_LED_RUN_ON		| \
-					 PDC_CHASSIS_LED_ATTN_OFF	| \
-					 PDC_CHASSIS_LED_FAULT_FLASH	| \
-					 PDC_CHASSIS_LED_VALID		)
-/* There was a system interruption that did not take the system down */
-#define PDC_CHASSIS_LSTATE_RUN_SYSINT	(PDC_CHASSIS_LED_RUN_ON		| \
-					 PDC_CHASSIS_LED_ATTN_FLASH	| \
-					 PDC_CHASSIS_LED_FAULT_OFF	| \
-					 PDC_CHASSIS_LED_VALID		)
-/* System running and unexpected reboot or non-critical error detected */
-#define PDC_CHASSIS_LSTATE_RUN_NCRIT	(PDC_CHASSIS_LED_RUN_ON		| \
-					 PDC_CHASSIS_LED_ATTN_FLASH	| \
-					 PDC_CHASSIS_LED_FAULT_FLASH	| \
-					 PDC_CHASSIS_LED_VALID		)
-/* Executing non-OS code */
-#define PDC_CHASSIS_LSTATE_NONOS	(PDC_CHASSIS_LED_RUN_FLASH	| \
-					 PDC_CHASSIS_LED_ATTN_OFF	| \
-					 PDC_CHASSIS_LED_FAULT_OFF	| \
-					 PDC_CHASSIS_LED_VALID		)
-/* Boot failed - Executing non-OS code */
-#define PDC_CHASSIS_LSTATE_NONOS_BFAIL	(PDC_CHASSIS_LED_RUN_FLASH	| \
-					 PDC_CHASSIS_LED_ATTN_OFF	| \
-					 PDC_CHASSIS_LED_FAULT_ON	| \
-					 PDC_CHASSIS_LED_VALID		)
-/* Unexpected reboot occurred - Executing non-OS code */
-#define PDC_CHASSIS_LSTATE_NONOS_UNEXP	(PDC_CHASSIS_LED_RUN_FLASH	| \
-					 PDC_CHASSIS_LED_ATTN_OFF	| \
-					 PDC_CHASSIS_LED_FAULT_FLASH	| \
-					 PDC_CHASSIS_LED_VALID		)
-/* Executing non-OS code - Non-critical error detected */
-#define PDC_CHASSIS_LSTATE_NONOS_NCRIT	(PDC_CHASSIS_LED_RUN_FLASH	| \
-					 PDC_CHASSIS_LED_ATTN_FLASH	| \
-					 PDC_CHASSIS_LED_FAULT_OFF	| \
-					 PDC_CHASSIS_LED_VALID		)
-/* Boot failed - Executing non-OS code - Non-critical error detected */
-#define PDC_CHASSIS_LSTATE_BFAIL_NCRIT	(PDC_CHASSIS_LED_RUN_FLASH	| \
-					 PDC_CHASSIS_LED_ATTN_FLASH	| \
-					 PDC_CHASSIS_LED_FAULT_ON	| \
-					 PDC_CHASSIS_LED_VALID		)
-/* Unexpected reboot/recovering - Executing non-OS code - Non-critical error detected */
-#define PDC_CHASSIS_LSTATE_UNEXP_NCRIT	(PDC_CHASSIS_LED_RUN_FLASH	| \
-					 PDC_CHASSIS_LED_ATTN_FLASH	| \
-					 PDC_CHASSIS_LED_FAULT_FLASH	| \
-					 PDC_CHASSIS_LED_VALID		)
-/* Cannot execute PDC */
-#define PDC_CHASSIS_LSTATE_CANNOT_PDC	(PDC_CHASSIS_LED_RUN_OFF	| \
-					 PDC_CHASSIS_LED_ATTN_OFF	| \
-					 PDC_CHASSIS_LED_FAULT_OFF	| \
-					 PDC_CHASSIS_LED_VALID		)
-/* Boot failed - OS not up - PDC has detected a failure that prevents boot */
-#define PDC_CHASSIS_LSTATE_FATAL_BFAIL	(PDC_CHASSIS_LED_RUN_OFF	| \
-					 PDC_CHASSIS_LED_ATTN_OFF	| \
-					 PDC_CHASSIS_LED_FAULT_ON	| \
-					 PDC_CHASSIS_LED_VALID		)
-/* No code running - Non-critical error detected (double fault situation) */
-#define PDC_CHASSIS_LSTATE_NOCODE_NCRIT	(PDC_CHASSIS_LED_RUN_OFF	| \
-					 PDC_CHASSIS_LED_ATTN_FLASH	| \
-					 PDC_CHASSIS_LED_FAULT_OFF	| \
-					 PDC_CHASSIS_LED_VALID		)
-/* Boot failed - OS not up - Fatal failure detected - Non-critical error detected */
-#define PDC_CHASSIS_LSTATE_FATAL_NCRIT	(PDC_CHASSIS_LED_RUN_OFF	| \
-					 PDC_CHASSIS_LED_ATTN_FLASH	| \
-					 PDC_CHASSIS_LED_FAULT_ON	| \
-					 PDC_CHASSIS_LED_VALID		)
-/* All other states are invalid */
-
-
-/*
- * --------------
- * PDC Log events
- * --------------
- * Here follows bits needed to fill up the log event sent to PDC_CHASSIS
- * The log message contains: Alert level, Source, Source detail,
- * Source ID, Problem detail, Caller activity, Activity status, 
- * Caller subactivity, Reporting entity type, Reporting entity ID,
- * Data type, Unique message ID and EOM. 
- */
-
-/* Alert level */
-#define PDC_CHASSIS_ALERT_FORWARD	(0ULL << 36)	/* no failure detected */
-#define PDC_CHASSIS_ALERT_SERPROC	(1ULL << 36)	/* service proc - no failure */
-#define PDC_CHASSIS_ALERT_NURGENT	(2ULL << 36)	/* non-urgent operator attn */
-#define PDC_CHASSIS_ALERT_BLOCKED	(3ULL << 36)	/* system blocked */
-#define PDC_CHASSIS_ALERT_CONF_CHG	(4ULL << 36)	/* unexpected configuration change */
-#define PDC_CHASSIS_ALERT_ENV_PB	(5ULL << 36)	/* boot possible, environmental pb */
-#define PDC_CHASSIS_ALERT_PENDING	(6ULL << 36)	/* boot possible, pending failure */
-#define PDC_CHASSIS_ALERT_PERF_IMP	(8ULL << 36)	/* boot possible, performance impaired */
-#define PDC_CHASSIS_ALERT_FUNC_IMP	(10ULL << 36)	/* boot possible, functionality impaired */
-#define PDC_CHASSIS_ALERT_SOFT_FAIL	(12ULL << 36)	/* software failure */
-#define PDC_CHASSIS_ALERT_HANG		(13ULL << 36)	/* system hang */
-#define PDC_CHASSIS_ALERT_ENV_FATAL	(14ULL << 36)	/* fatal power or environmental pb */
-#define PDC_CHASSIS_ALERT_HW_FATAL	(15ULL << 36)	/* fatal hardware problem */
-
-/* Source */
-#define PDC_CHASSIS_SRC_NONE		(0ULL << 28)	/* unknown, no source stated */
-#define PDC_CHASSIS_SRC_PROC		(1ULL << 28)	/* processor */
-/* For later use ? */
-#define PDC_CHASSIS_SRC_PROC_CACHE	(2ULL << 28)	/* processor cache*/
-#define PDC_CHASSIS_SRC_PDH		(3ULL << 28)	/* processor dependent hardware */
-#define PDC_CHASSIS_SRC_PWR		(4ULL << 28)	/* power */
-#define PDC_CHASSIS_SRC_FAB		(5ULL << 28)	/* fabric connector */
-#define PDC_CHASSIS_SRC_PLATi		(6ULL << 28)	/* platform */
-#define PDC_CHASSIS_SRC_MEM		(7ULL << 28)	/* memory */
-#define PDC_CHASSIS_SRC_IO		(8ULL << 28)	/* I/O */
-#define PDC_CHASSIS_SRC_CELL		(9ULL << 28)	/* cell */
-#define PDC_CHASSIS_SRC_PD		(10ULL << 28)	/* protected domain */
-
-/* Source detail field */
-#define PDC_CHASSIS_SRC_D_PROC		(1ULL << 24)	/* processor general */
-
-/* Source ID - platform dependent */
-#define PDC_CHASSIS_SRC_ID_UNSPEC	(0ULL << 16)
-
-/* Problem detail - problem source dependent */
-#define PDC_CHASSIS_PB_D_PROC_NONE	(0ULL << 32)	/* no problem detail */
-#define PDC_CHASSIS_PB_D_PROC_TIMEOUT	(4ULL << 32)	/* timeout */
-
-/* Caller activity */
-#define PDC_CHASSIS_CALL_ACT_HPUX_BL	(7ULL << 12)	/* Boot Loader */
-#define PDC_CHASSIS_CALL_ACT_HPUX_PD	(8ULL << 12)	/* SAL_PD activities */
-#define PDC_CHASSIS_CALL_ACT_HPUX_EVENT	(9ULL << 12)	/* SAL_EVENTS activities */
-#define PDC_CHASSIS_CALL_ACT_HPUX_IO	(10ULL << 12)	/* SAL_IO activities */
-#define PDC_CHASSIS_CALL_ACT_HPUX_PANIC	(11ULL << 12)	/* System panic */
-#define PDC_CHASSIS_CALL_ACT_HPUX_INIT	(12ULL << 12)	/* System initialization */
-#define PDC_CHASSIS_CALL_ACT_HPUX_SHUT	(13ULL << 12)	/* System shutdown */
-#define PDC_CHASSIS_CALL_ACT_HPUX_WARN	(14ULL << 12)	/* System warning */
-#define PDC_CHASSIS_CALL_ACT_HPUX_DU	(15ULL << 12)	/* Display_Activity() update */
-
-/* Activity status - implementation dependent */
-#define PDC_CHASSIS_ACT_STATUS_UNSPEC	(0ULL << 0)
-
-/* Caller subactivity - implementation dependent */
-/* FIXME: other subactivities ? */
-#define PDC_CHASSIS_CALL_SACT_UNSPEC	(0ULL << 4)	/* implementation dependent */
-
-/* Reporting entity type */
-#define PDC_CHASSIS_RET_GENERICOS	(12ULL << 52)	/* generic OSes */
-#define PDC_CHASSIS_RET_IA64_NT		(13ULL << 52)	/* IA-64 NT */
-#define PDC_CHASSIS_RET_HPUX		(14ULL << 52)	/* HP-UX */
-#define PDC_CHASSIS_RET_DIAG		(15ULL << 52)	/* offline diagnostics & utilities */
-
-/* Reporting entity ID */
-#define PDC_CHASSIS_REID_UNSPEC		(0ULL << 44)
-
-/* Data type */
-#define PDC_CHASSIS_DT_NONE		(0ULL << 59)	/* data field unused */
-/* For later use ? Do we need these ? */
-#define PDC_CHASSIS_DT_PHYS_ADDR	(1ULL << 59)	/* physical address */
-#define PDC_CHASSIS_DT_DATA_EXPECT	(2ULL << 59)	/* expected data */
-#define PDC_CHASSIS_DT_ACTUAL		(3ULL << 59)	/* actual data */
-#define PDC_CHASSIS_DT_PHYS_LOC		(4ULL << 59)	/* physical location */
-#define PDC_CHASSIS_DT_PHYS_LOC_EXT	(5ULL << 59)	/* physical location extension */
-#define PDC_CHASSIS_DT_TAG		(6ULL << 59)	/* tag */
-#define PDC_CHASSIS_DT_SYNDROME		(7ULL << 59)	/* syndrome */
-#define PDC_CHASSIS_DT_CODE_ADDR	(8ULL << 59)	/* code address */
-#define PDC_CHASSIS_DT_ASCII_MSG	(9ULL << 59)	/* ascii message */
-#define PDC_CHASSIS_DT_POST		(10ULL << 59)	/* POST code */
-#define PDC_CHASSIS_DT_TIMESTAMP	(11ULL << 59)	/* timestamp */
-#define PDC_CHASSIS_DT_DEV_STAT		(12ULL << 59)	/* device status */
-#define PDC_CHASSIS_DT_DEV_TYPE		(13ULL << 59)	/* device type */
-#define PDC_CHASSIS_DT_PB_DET		(14ULL << 59)	/* problem detail */
-#define PDC_CHASSIS_DT_ACT_LEV		(15ULL << 59)	/* activity level/timeout */
-#define PDC_CHASSIS_DT_SER_NUM		(16ULL << 59)	/* serial number */
-#define PDC_CHASSIS_DT_REV_NUM		(17ULL << 59)	/* revision number */
-#define PDC_CHASSIS_DT_INTERRUPT	(18ULL << 59)	/* interruption information */
-#define PDC_CHASSIS_DT_TEST_NUM		(19ULL << 59)	/* test number */
-#define PDC_CHASSIS_DT_STATE_CHG	(20ULL << 59)	/* major changes in system state */
-#define PDC_CHASSIS_DT_PROC_DEALLOC	(21ULL << 59)	/* processor deallocate */
-#define PDC_CHASSIS_DT_RESET		(30ULL << 59)	/* reset type and cause */
-#define PDC_CHASSIS_DT_PA_LEGACY	(31ULL << 59)	/* legacy PA hex chassis code */
-
-/* System states - part of major changes in system state data field */
-#define PDC_CHASSIS_SYSTATE_BSTART	(0ULL << 0)	/* boot start */
-#define PDC_CHASSIS_SYSTATE_BCOMP	(1ULL << 0)	/* boot complete */
-#define PDC_CHASSIS_SYSTATE_CHANGE	(2ULL << 0)	/* major change */
-#define PDC_CHASSIS_SYSTATE_LED		(3ULL << 0)	/* LED change */
-#define PDC_CHASSIS_SYSTATE_PANIC	(9ULL << 0)	/* OS Panic */
-#define PDC_CHASSIS_SYSTATE_DUMP	(10ULL << 0)	/* memory dump */
-#define PDC_CHASSIS_SYSTATE_HPMC	(11ULL << 0)	/* processing HPMC */
-#define PDC_CHASSIS_SYSTATE_HALT	(15ULL << 0)	/* system halted */
-
-/* Message ID */
-#define PDC_CHASSIS_MSG_ID		(0ULL << 40)	/* we do not handle msg IDs atm */
-
-/* EOM - separates log entries */
-#define PDC_CHASSIS_EOM_CLEAR		(0ULL << 43)
-#define PDC_CHASSIS_EOM_SET		(1ULL << 43)
-
-/*
- * Preformated well known messages
- */
-
-/* Boot started */
-#define PDC_CHASSIS_PMSG_BSTART		(PDC_CHASSIS_ALERT_SERPROC	| \
-					 PDC_CHASSIS_SRC_PROC		| \
-					 PDC_CHASSIS_SRC_D_PROC		| \
-					 PDC_CHASSIS_SRC_ID_UNSPEC	| \
-					 PDC_CHASSIS_PB_D_PROC_NONE	| \
-					 PDC_CHASSIS_CALL_ACT_HPUX_INIT	| \
-					 PDC_CHASSIS_ACT_STATUS_UNSPEC	| \
-					 PDC_CHASSIS_CALL_SACT_UNSPEC	| \
-					 PDC_CHASSIS_RET_HPUX		| \
-					 PDC_CHASSIS_REID_UNSPEC	| \
-					 PDC_CHASSIS_DT_STATE_CHG	| \
-					 PDC_CHASSIS_SYSTATE_BSTART	| \
-					 PDC_CHASSIS_MSG_ID		| \
-					 PDC_CHASSIS_EOM_SET		)
-
-/* Boot complete */
-#define PDC_CHASSIS_PMSG_BCOMPLETE	(PDC_CHASSIS_ALERT_SERPROC	| \
-					 PDC_CHASSIS_SRC_PROC		| \
-					 PDC_CHASSIS_SRC_D_PROC		| \
-					 PDC_CHASSIS_SRC_ID_UNSPEC	| \
-					 PDC_CHASSIS_PB_D_PROC_NONE	| \
-					 PDC_CHASSIS_CALL_ACT_HPUX_INIT	| \
-					 PDC_CHASSIS_ACT_STATUS_UNSPEC	| \
-					 PDC_CHASSIS_CALL_SACT_UNSPEC	| \
-					 PDC_CHASSIS_RET_HPUX		| \
-					 PDC_CHASSIS_REID_UNSPEC	| \
-					 PDC_CHASSIS_DT_STATE_CHG	| \
-					 PDC_CHASSIS_SYSTATE_BCOMP	| \
-					 PDC_CHASSIS_MSG_ID		| \
-					 PDC_CHASSIS_EOM_SET		)
-
-/* Shutdown */
-#define PDC_CHASSIS_PMSG_SHUTDOWN	(PDC_CHASSIS_ALERT_SERPROC	| \
-					 PDC_CHASSIS_SRC_PROC		| \
-					 PDC_CHASSIS_SRC_D_PROC		| \
-					 PDC_CHASSIS_SRC_ID_UNSPEC	| \
-					 PDC_CHASSIS_PB_D_PROC_NONE	| \
-					 PDC_CHASSIS_CALL_ACT_HPUX_SHUT	| \
-					 PDC_CHASSIS_ACT_STATUS_UNSPEC	| \
-					 PDC_CHASSIS_CALL_SACT_UNSPEC	| \
-					 PDC_CHASSIS_RET_HPUX		| \
-					 PDC_CHASSIS_REID_UNSPEC	| \
-					 PDC_CHASSIS_DT_STATE_CHG	| \
-					 PDC_CHASSIS_SYSTATE_HALT	| \
-					 PDC_CHASSIS_MSG_ID		| \
-					 PDC_CHASSIS_EOM_SET		)
-
-/* Panic */
-#define PDC_CHASSIS_PMSG_PANIC		(PDC_CHASSIS_ALERT_SOFT_FAIL	| \
-					 PDC_CHASSIS_SRC_PROC		| \
-					 PDC_CHASSIS_SRC_D_PROC		| \
-					 PDC_CHASSIS_SRC_ID_UNSPEC	| \
-					 PDC_CHASSIS_PB_D_PROC_NONE	| \
-					 PDC_CHASSIS_CALL_ACT_HPUX_PANIC| \
-					 PDC_CHASSIS_ACT_STATUS_UNSPEC	| \
-					 PDC_CHASSIS_CALL_SACT_UNSPEC	| \
-					 PDC_CHASSIS_RET_HPUX		| \
-					 PDC_CHASSIS_REID_UNSPEC	| \
-					 PDC_CHASSIS_DT_STATE_CHG	| \
-					 PDC_CHASSIS_SYSTATE_PANIC	| \
-					 PDC_CHASSIS_MSG_ID		| \
-					 PDC_CHASSIS_EOM_SET		)
-
-// FIXME: extrapolated data
-/* HPMC */
-#define PDC_CHASSIS_PMSG_HPMC		(PDC_CHASSIS_ALERT_CONF_CHG /*?*/	| \
-					 PDC_CHASSIS_SRC_PROC		| \
-					 PDC_CHASSIS_SRC_D_PROC		| \
-					 PDC_CHASSIS_SRC_ID_UNSPEC	| \
-					 PDC_CHASSIS_PB_D_PROC_NONE	| \
-					 PDC_CHASSIS_CALL_ACT_HPUX_WARN	| \
-					 PDC_CHASSIS_RET_HPUX		| \
-					 PDC_CHASSIS_DT_STATE_CHG	| \
-					 PDC_CHASSIS_SYSTATE_HPMC	| \
-					 PDC_CHASSIS_MSG_ID		| \
-					 PDC_CHASSIS_EOM_SET		)
-
-/* LPMC */
-#define PDC_CHASSIS_PMSG_LPMC		(PDC_CHASSIS_ALERT_BLOCKED /*?*/| \
-					 PDC_CHASSIS_SRC_PROC		| \
-					 PDC_CHASSIS_SRC_D_PROC		| \
-					 PDC_CHASSIS_SRC_ID_UNSPEC	| \
-					 PDC_CHASSIS_PB_D_PROC_NONE	| \
-					 PDC_CHASSIS_CALL_ACT_HPUX_WARN	| \
-					 PDC_CHASSIS_ACT_STATUS_UNSPEC	| \
-					 PDC_CHASSIS_CALL_SACT_UNSPEC	| \
-					 PDC_CHASSIS_RET_HPUX		| \
-					 PDC_CHASSIS_REID_UNSPEC	| \
-					 PDC_CHASSIS_DT_STATE_CHG	| \
-					 PDC_CHASSIS_SYSTATE_CHANGE	| \
-					 PDC_CHASSIS_MSG_ID		| \
-					 PDC_CHASSIS_EOM_SET		)
-
-#endif /* _PARISC_PDC_CHASSIS_H */
-/* vim: set ts=8 */
diff --git a/include/asm-parisc/pdcpat.h b/include/asm-parisc/pdcpat.h
deleted file mode 100644
index 47539f117958..000000000000
--- a/include/asm-parisc/pdcpat.h
+++ /dev/null
@@ -1,308 +0,0 @@
-#ifndef __PARISC_PATPDC_H
-#define __PARISC_PATPDC_H
-
-/*
- * This file is subject to the terms and conditions of the GNU General Public
- * License.  See the file "COPYING" in the main directory of this archive
- * for more details.
- *
- * Copyright 2000 (c) Hewlett Packard (Paul Bame <bame()spam.parisc-linux.org>)
- * Copyright 2000,2004 (c) Grant Grundler <grundler()nahspam.parisc-linux.org>
- */
-
-
-#define PDC_PAT_CELL           	64L   /* Interface for gaining and 
-                                         * manipulatin g cell state within PD */
-#define PDC_PAT_CELL_GET_NUMBER    0L   /* Return Cell number */
-#define PDC_PAT_CELL_GET_INFO      1L   /* Returns info about Cell */
-#define PDC_PAT_CELL_MODULE        2L   /* Returns info about Module */
-#define PDC_PAT_CELL_SET_ATTENTION 9L   /* Set Cell Attention indicator */
-#define PDC_PAT_CELL_NUMBER_TO_LOC 10L   /* Cell Number -> Location */
-#define PDC_PAT_CELL_WALK_FABRIC   11L   /* Walk the Fabric */
-#define PDC_PAT_CELL_GET_RDT_SIZE  12L   /* Return Route Distance Table Sizes */
-#define PDC_PAT_CELL_GET_RDT       13L   /* Return Route Distance Tables */
-#define PDC_PAT_CELL_GET_LOCAL_PDH_SZ 14L /* Read Local PDH Buffer Size */
-#define PDC_PAT_CELL_SET_LOCAL_PDH    15L  /* Write Local PDH Buffer */
-#define PDC_PAT_CELL_GET_REMOTE_PDH_SZ 16L /* Return Remote PDH Buffer Size */
-#define PDC_PAT_CELL_GET_REMOTE_PDH 17L /* Read Remote PDH Buffer */
-#define PDC_PAT_CELL_GET_DBG_INFO   128L  /* Return DBG Buffer Info */
-#define PDC_PAT_CELL_CHANGE_ALIAS   129L  /* Change Non-Equivalent Alias Chacking */
-
-
-/*
-** Arg to PDC_PAT_CELL_MODULE memaddr[4]
-**
-** Addresses on the Merced Bus != all Runway Bus addresses.
-** This is intended for programming SBA/LBA chips range registers.
-*/
-#define IO_VIEW      0UL
-#define PA_VIEW      1UL
-
-/* PDC_PAT_CELL_MODULE entity type values */
-#define	PAT_ENTITY_CA	0	/* central agent */
-#define	PAT_ENTITY_PROC	1	/* processor */
-#define	PAT_ENTITY_MEM	2	/* memory controller */
-#define	PAT_ENTITY_SBA	3	/* system bus adapter */
-#define	PAT_ENTITY_LBA	4	/* local bus adapter */
-#define	PAT_ENTITY_PBC	5	/* processor bus converter */
-#define	PAT_ENTITY_XBC	6	/* crossbar fabric connect */
-#define	PAT_ENTITY_RC	7	/* fabric interconnect */
-
-/* PDC_PAT_CELL_MODULE address range type values */
-#define PAT_PBNUM           0         /* PCI Bus Number */
-#define PAT_LMMIO           1         /* < 4G MMIO Space */
-#define PAT_GMMIO           2         /* > 4G MMIO Space */
-#define PAT_NPIOP           3         /* Non Postable I/O Port Space */
-#define PAT_PIOP            4         /* Postable I/O Port Space */
-#define PAT_AHPA            5         /* Addional HPA Space */
-#define PAT_UFO             6         /* HPA Space (UFO for Mariposa) */
-#define PAT_GNIP            7         /* GNI Reserved Space */
-
-
-
-/* PDC PAT CHASSIS LOG -- Platform logging & forward progress functions */
-
-#define PDC_PAT_CHASSIS_LOG		65L
-#define PDC_PAT_CHASSIS_WRITE_LOG    	0L /* Write Log Entry */
-#define PDC_PAT_CHASSIS_READ_LOG     	1L /* Read  Log Entry */
-
-
-/* PDC PAT CPU  -- CPU configuration within the protection domain */
-
-#define PDC_PAT_CPU                	67L
-#define PDC_PAT_CPU_INFO            	0L /* Return CPU config info */
-#define PDC_PAT_CPU_DELETE          	1L /* Delete CPU */
-#define PDC_PAT_CPU_ADD             	2L /* Add    CPU */
-#define PDC_PAT_CPU_GET_NUMBER      	3L /* Return CPU Number */
-#define PDC_PAT_CPU_GET_HPA         	4L /* Return CPU HPA */
-#define PDC_PAT_CPU_STOP            	5L /* Stop   CPU */
-#define PDC_PAT_CPU_RENDEZVOUS      	6L /* Rendezvous CPU */
-#define PDC_PAT_CPU_GET_CLOCK_INFO  	7L /* Return CPU Clock info */
-#define PDC_PAT_CPU_GET_RENDEZVOUS_STATE 8L /* Return Rendezvous State */
-#define PDC_PAT_CPU_PLUNGE_FABRIC 	128L /* Plunge Fabric */
-#define PDC_PAT_CPU_UPDATE_CACHE_CLEANSING 129L /* Manipulate Cache 
-                                                 * Cleansing Mode */
-/*  PDC PAT EVENT -- Platform Events */
-
-#define PDC_PAT_EVENT              	68L
-#define PDC_PAT_EVENT_GET_CAPS     	0L /* Get Capabilities */
-#define PDC_PAT_EVENT_SET_MODE     	1L /* Set Notification Mode */
-#define PDC_PAT_EVENT_SCAN         	2L /* Scan Event */
-#define PDC_PAT_EVENT_HANDLE       	3L /* Handle Event */
-#define PDC_PAT_EVENT_GET_NB_CALL  	4L /* Get Non-Blocking call Args */
-
-/*  PDC PAT HPMC -- Cause processor to go into spin loop, and wait
- *  			for wake up from Monarch Processor.
- */
-
-#define PDC_PAT_HPMC               70L
-#define PDC_PAT_HPMC_RENDEZ_CPU     0L /* go into spin loop */
-#define PDC_PAT_HPMC_SET_PARAMS     1L /* Allows OS to specify intr which PDC 
-                                        * will use to interrupt OS during
-                                        * machine check rendezvous */
-
-/* parameters for PDC_PAT_HPMC_SET_PARAMS: */
-#define HPMC_SET_PARAMS_INTR 	    1L /* Rendezvous Interrupt */
-#define HPMC_SET_PARAMS_WAKE 	    2L /* Wake up processor */
-
-
-/*  PDC PAT IO  -- On-line services for I/O modules */
-
-#define PDC_PAT_IO                  71L
-#define PDC_PAT_IO_GET_SLOT_STATUS   	5L /* Get Slot Status Info*/
-#define PDC_PAT_IO_GET_LOC_FROM_HARDWARE 6L /* Get Physical Location from */
-                                            /* Hardware Path */
-#define PDC_PAT_IO_GET_HARDWARE_FROM_LOC 7L /* Get Hardware Path from 
-                                             * Physical Location */
-#define PDC_PAT_IO_GET_PCI_CONFIG_FROM_HW 11L /* Get PCI Configuration
-                                               * Address from Hardware Path */
-#define PDC_PAT_IO_GET_HW_FROM_PCI_CONFIG 12L /* Get Hardware Path 
-                                               * from PCI Configuration Address */
-#define PDC_PAT_IO_READ_HOST_BRIDGE_INFO 13L  /* Read Host Bridge State Info */
-#define PDC_PAT_IO_CLEAR_HOST_BRIDGE_INFO 14L /* Clear Host Bridge State Info*/
-#define PDC_PAT_IO_GET_PCI_ROUTING_TABLE_SIZE 15L /* Get PCI INT Routing Table 
-                                                   * Size */
-#define PDC_PAT_IO_GET_PCI_ROUTING_TABLE  16L /* Get PCI INT Routing Table */
-#define PDC_PAT_IO_GET_HINT_TABLE_SIZE 	17L /* Get Hint Table Size */
-#define PDC_PAT_IO_GET_HINT_TABLE   	18L /* Get Hint Table */
-#define PDC_PAT_IO_PCI_CONFIG_READ  	19L /* PCI Config Read */
-#define PDC_PAT_IO_PCI_CONFIG_WRITE 	20L /* PCI Config Write */
-#define PDC_PAT_IO_GET_NUM_IO_SLOTS 	21L /* Get Number of I/O Bay Slots in 
-                                       		  * Cabinet */
-#define PDC_PAT_IO_GET_LOC_IO_SLOTS 	22L /* Get Physical Location of I/O */
-                                   		     /* Bay Slots in Cabinet */
-#define PDC_PAT_IO_BAY_STATUS_INFO  	28L /* Get I/O Bay Slot Status Info */
-#define PDC_PAT_IO_GET_PROC_VIEW        29L /* Get Processor view of IO address */
-#define PDC_PAT_IO_PROG_SBA_DIR_RANGE   30L /* Program directed range */
-
-
-/* PDC PAT MEM  -- Manage memory page deallocation */
-
-#define PDC_PAT_MEM            72L
-#define PDC_PAT_MEM_PD_INFO     	0L /* Return PDT info for PD       */
-#define PDC_PAT_MEM_PD_CLEAR    	1L /* Clear PDT for PD             */
-#define PDC_PAT_MEM_PD_READ     	2L /* Read PDT entries for PD      */
-#define PDC_PAT_MEM_PD_RESET    	3L /* Reset clear bit for PD       */
-#define PDC_PAT_MEM_CELL_INFO   	5L /* Return PDT info For Cell     */
-#define PDC_PAT_MEM_CELL_CLEAR  	6L /* Clear PDT For Cell           */
-#define PDC_PAT_MEM_CELL_READ   	7L /* Read PDT entries For Cell    */
-#define PDC_PAT_MEM_CELL_RESET  	8L /* Reset clear bit For Cell     */
-#define PDC_PAT_MEM_SETGM	  	9L /* Set Golden Memory value      */
-#define PDC_PAT_MEM_ADD_PAGE    	10L /* ADDs a page to the cell      */
-#define PDC_PAT_MEM_ADDRESS     	11L /* Get Physical Location From   */
-                                    		 /* Memory Address               */
-#define PDC_PAT_MEM_GET_TXT_SIZE   	12L /* Get Formatted Text Size   */
-#define PDC_PAT_MEM_GET_PD_TXT     	13L /* Get PD Formatted Text     */
-#define PDC_PAT_MEM_GET_CELL_TXT   	14L /* Get Cell Formatted Text   */
-#define PDC_PAT_MEM_RD_STATE_INFO  	15L /* Read Mem Module State Info*/
-#define PDC_PAT_MEM_CLR_STATE_INFO 	16L /*Clear Mem Module State Info*/
-#define PDC_PAT_MEM_CLEAN_RANGE    	128L /*Clean Mem in specific range*/
-#define PDC_PAT_MEM_GET_TBL_SIZE   	131L /* Get Memory Table Size     */
-#define PDC_PAT_MEM_GET_TBL        	132L /* Get Memory Table          */
-
-
-/* PDC PAT NVOLATILE  --  Access Non-Volatile Memory */
-
-#define PDC_PAT_NVOLATILE	73L
-#define PDC_PAT_NVOLATILE_READ		0L /* Read Non-Volatile Memory   */
-#define PDC_PAT_NVOLATILE_WRITE		1L /* Write Non-Volatile Memory  */
-#define PDC_PAT_NVOLATILE_GET_SIZE	2L /* Return size of NVM         */
-#define PDC_PAT_NVOLATILE_VERIFY	3L /* Verify contents of NVM     */
-#define PDC_PAT_NVOLATILE_INIT		4L /* Initialize NVM             */
-
-/* PDC PAT PD */
-#define PDC_PAT_PD		74L         /* Protection Domain Info   */
-#define PDC_PAT_PD_GET_ADDR_MAP		0L  /* Get Address Map          */
-
-/* PDC_PAT_PD_GET_ADDR_MAP entry types */
-#define PAT_MEMORY_DESCRIPTOR		1   
-
-/* PDC_PAT_PD_GET_ADDR_MAP memory types */
-#define PAT_MEMTYPE_MEMORY		0
-#define PAT_MEMTYPE_FIRMWARE		4
-
-/* PDC_PAT_PD_GET_ADDR_MAP memory usage */
-#define PAT_MEMUSE_GENERAL		0
-#define PAT_MEMUSE_GI			128
-#define PAT_MEMUSE_GNI			129
-
-
-#ifndef __ASSEMBLY__
-#include <linux/types.h>
-
-#ifdef CONFIG_64BIT
-#define is_pdc_pat()	(PDC_TYPE_PAT == pdc_type)
-extern int pdc_pat_get_irt_size(unsigned long *num_entries, unsigned long cell_num);
-extern int pdc_pat_get_irt(void *r_addr, unsigned long cell_num);
-#else	/* ! CONFIG_64BIT */
-/* No PAT support for 32-bit kernels...sorry */
-#define is_pdc_pat()	(0)
-#define pdc_pat_get_irt_size(num_entries, cell_numn)	PDC_BAD_PROC
-#define pdc_pat_get_irt(r_addr, cell_num)		PDC_BAD_PROC
-#endif	/* ! CONFIG_64BIT */
-
-
-struct pdc_pat_cell_num {
-	unsigned long cell_num;
-	unsigned long cell_loc;
-};
-
-struct pdc_pat_cpu_num {
-	unsigned long cpu_num;
-	unsigned long cpu_loc;
-};
-
-struct pdc_pat_pd_addr_map_entry {
-	unsigned char entry_type;       /* 1 = Memory Descriptor Entry Type */
-	unsigned char reserve1[5];
-	unsigned char memory_type;
-	unsigned char memory_usage;
-	unsigned long paddr;
-	unsigned int  pages;            /* Length in 4K pages */
-	unsigned int  reserve2;
-	unsigned long cell_map;
-};
-
-/********************************************************************
-* PDC_PAT_CELL[Return Cell Module] memaddr[0] conf_base_addr
-* ----------------------------------------------------------
-* Bit  0 to 51 - conf_base_addr
-* Bit 52 to 62 - reserved
-* Bit       63 - endianess bit
-********************************************************************/
-#define PAT_GET_CBA(value) ((value) & 0xfffffffffffff000UL)
-
-/********************************************************************
-* PDC_PAT_CELL[Return Cell Module] memaddr[1] mod_info
-* ----------------------------------------------------
-* Bit  0 to  7 - entity type
-*    0 = central agent,            1 = processor,
-*    2 = memory controller,        3 = system bus adapter,
-*    4 = local bus adapter,        5 = processor bus converter,
-*    6 = crossbar fabric connect,  7 = fabric interconnect,
-*    8 to 254 reserved,            255 = unknown.
-* Bit  8 to 15 - DVI
-* Bit 16 to 23 - IOC functions
-* Bit 24 to 39 - reserved
-* Bit 40 to 63 - mod_pages
-*    number of 4K pages a module occupies starting at conf_base_addr
-********************************************************************/
-#define PAT_GET_ENTITY(value)	(((value) >> 56) & 0xffUL)
-#define PAT_GET_DVI(value)	(((value) >> 48) & 0xffUL)
-#define PAT_GET_IOC(value)	(((value) >> 40) & 0xffUL)
-#define PAT_GET_MOD_PAGES(value) ((value) & 0xffffffUL)
-
-
-/*
-** PDC_PAT_CELL_GET_INFO return block
-*/
-typedef struct pdc_pat_cell_info_rtn_block {
-	unsigned long cpu_info;
-	unsigned long cell_info;
-	unsigned long cell_location;
-	unsigned long reo_location;
-	unsigned long mem_size;
-	unsigned long dimm_status;
-	unsigned long pdc_rev;
-	unsigned long fabric_info0;
-	unsigned long fabric_info1;
-	unsigned long fabric_info2;
-	unsigned long fabric_info3;
-	unsigned long reserved[21];
-} pdc_pat_cell_info_rtn_block_t;
-
-
-/* FIXME: mod[508] should really be a union of the various mod components */
-struct pdc_pat_cell_mod_maddr_block {	/* PDC_PAT_CELL_MODULE */
-	unsigned long cba;		/* func 0 cfg space address */
-	unsigned long mod_info;		/* module information */
-	unsigned long mod_location;	/* physical location of the module */
-	struct hardware_path mod_path;	/* module path (device path - layers) */
-	unsigned long mod[508];		/* PAT cell module components */
-} __attribute__((aligned(8))) ;
-
-typedef struct pdc_pat_cell_mod_maddr_block pdc_pat_cell_mod_maddr_block_t;
-
-
-extern int pdc_pat_chassis_send_log(unsigned long status, unsigned long data);
-extern int pdc_pat_cell_get_number(struct pdc_pat_cell_num *cell_info);
-extern int pdc_pat_cell_module(unsigned long *actcnt, unsigned long ploc, unsigned long mod, unsigned long view_type, void *mem_addr);
-extern int pdc_pat_cell_num_to_loc(void *, unsigned long);
-
-extern int pdc_pat_cpu_get_number(struct pdc_pat_cpu_num *cpu_info, void *hpa);
-
-extern int pdc_pat_pd_get_addr_map(unsigned long *actual_len, void *mem_addr, unsigned long count, unsigned long offset);
-
-
-extern int pdc_pat_io_pci_cfg_read(unsigned long pci_addr, int pci_size, u32 *val); 
-extern int pdc_pat_io_pci_cfg_write(unsigned long pci_addr, int pci_size, u32 val); 
-
-
-/* Flag to indicate this is a PAT box...don't use this unless you
-** really have to...it might go away some day.
-*/
-extern int pdc_pat;     /* arch/parisc/kernel/inventory.c */
-
-#endif /* __ASSEMBLY__ */
-
-#endif /* ! __PARISC_PATPDC_H */
diff --git a/include/asm-parisc/percpu.h b/include/asm-parisc/percpu.h
deleted file mode 100644
index a0dcd1970128..000000000000
--- a/include/asm-parisc/percpu.h
+++ /dev/null
@@ -1,7 +0,0 @@
-#ifndef _PARISC_PERCPU_H
-#define _PARISC_PERCPU_H
-
-#include <asm-generic/percpu.h>
-
-#endif 
-
diff --git a/include/asm-parisc/perf.h b/include/asm-parisc/perf.h
deleted file mode 100644
index a18e11972c09..000000000000
--- a/include/asm-parisc/perf.h
+++ /dev/null
@@ -1,74 +0,0 @@
-#ifndef _ASM_PERF_H_
-#define _ASM_PERF_H_
-
-/* ioctls */
-#define PA_PERF_ON	_IO('p', 1)
-#define PA_PERF_OFF	_IOR('p', 2, unsigned int)
-#define PA_PERF_VERSION	_IOR('p', 3, int)
-
-#define PA_PERF_DEV	"perf"
-#define PA_PERF_MINOR	146
-
-/* Interface types */
-#define UNKNOWN_INTF    255
-#define ONYX_INTF         0
-#define CUDA_INTF         1
-
-/* Common Onyx and Cuda images */
-#define CPI                 0
-#define BUSUTIL             1
-#define TLBMISS             2
-#define TLBHANDMISS         3
-#define PTKN                4
-#define PNTKN               5
-#define IMISS               6
-#define DMISS               7
-#define DMISS_ACCESS        8 
-#define BIG_CPI 	    9
-#define BIG_LS		   10  
-#define BR_ABORT	   11
-#define ISNT		   12 
-#define QUADRANT           13
-#define RW_PDFET           14
-#define RW_WDFET           15
-#define SHLIB_CPI          16
-
-/* Cuda only Images */
-#define FLOPS              17
-#define CACHEMISS          18 
-#define BRANCHES           19             
-#define CRSTACK            20 
-#define I_CACHE_SPEC       21 
-#define MAX_CUDA_IMAGES    22 
-
-/* Onyx only Images */
-#define ADDR_INV_ABORT_ALU 17
-#define BRAD_STALL	   18 
-#define CNTL_IN_PIPEL	   19 
-#define DSNT_XFH	   20 
-#define FET_SIG1	   21 
-#define FET_SIG2	   22 
-#define G7_1		   23 
-#define G7_2		   24 
-#define G7_3 		   25
-#define G7_4		   26
-#define MPB_LABORT         27
-#define PANIC              28
-#define RARE_INST          29 
-#define RW_DFET            30 
-#define RW_IFET            31 
-#define RW_SDFET           32 
-#define SPEC_IFET          33 
-#define ST_COND0           34 
-#define ST_COND1           35 
-#define ST_COND2           36
-#define ST_COND3           37
-#define ST_COND4           38
-#define ST_UNPRED0         39 
-#define ST_UNPRED1         40 
-#define UNPRED             41 
-#define GO_STORE           42
-#define SHLIB_CALL         43
-#define MAX_ONYX_IMAGES    44
-
-#endif
diff --git a/include/asm-parisc/pgalloc.h b/include/asm-parisc/pgalloc.h
deleted file mode 100644
index fc987a1c12a8..000000000000
--- a/include/asm-parisc/pgalloc.h
+++ /dev/null
@@ -1,149 +0,0 @@
-#ifndef _ASM_PGALLOC_H
-#define _ASM_PGALLOC_H
-
-#include <linux/gfp.h>
-#include <linux/mm.h>
-#include <linux/threads.h>
-#include <asm/processor.h>
-#include <asm/fixmap.h>
-
-#include <asm/cache.h>
-
-/* Allocate the top level pgd (page directory)
- *
- * Here (for 64 bit kernels) we implement a Hybrid L2/L3 scheme: we
- * allocate the first pmd adjacent to the pgd.  This means that we can
- * subtract a constant offset to get to it.  The pmd and pgd sizes are
- * arranged so that a single pmd covers 4GB (giving a full 64-bit
- * process access to 8TB) so our lookups are effectively L2 for the
- * first 4GB of the kernel (i.e. for all ILP32 processes and all the
- * kernel for machines with under 4GB of memory) */
-static inline pgd_t *pgd_alloc(struct mm_struct *mm)
-{
-	pgd_t *pgd = (pgd_t *)__get_free_pages(GFP_KERNEL,
-					       PGD_ALLOC_ORDER);
-	pgd_t *actual_pgd = pgd;
-
-	if (likely(pgd != NULL)) {
-		memset(pgd, 0, PAGE_SIZE<<PGD_ALLOC_ORDER);
-#ifdef CONFIG_64BIT
-		actual_pgd += PTRS_PER_PGD;
-		/* Populate first pmd with allocated memory.  We mark it
-		 * with PxD_FLAG_ATTACHED as a signal to the system that this
-		 * pmd entry may not be cleared. */
-		__pgd_val_set(*actual_pgd, (PxD_FLAG_PRESENT | 
-				        PxD_FLAG_VALID | 
-					PxD_FLAG_ATTACHED) 
-			+ (__u32)(__pa((unsigned long)pgd) >> PxD_VALUE_SHIFT));
-		/* The first pmd entry also is marked with _PAGE_GATEWAY as
-		 * a signal that this pmd may not be freed */
-		__pgd_val_set(*pgd, PxD_FLAG_ATTACHED);
-#endif
-	}
-	return actual_pgd;
-}
-
-static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd)
-{
-#ifdef CONFIG_64BIT
-	pgd -= PTRS_PER_PGD;
-#endif
-	free_pages((unsigned long)pgd, PGD_ALLOC_ORDER);
-}
-
-#if PT_NLEVELS == 3
-
-/* Three Level Page Table Support for pmd's */
-
-static inline void pgd_populate(struct mm_struct *mm, pgd_t *pgd, pmd_t *pmd)
-{
-	__pgd_val_set(*pgd, (PxD_FLAG_PRESENT | PxD_FLAG_VALID) +
-		        (__u32)(__pa((unsigned long)pmd) >> PxD_VALUE_SHIFT));
-}
-
-static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long address)
-{
-	pmd_t *pmd = (pmd_t *)__get_free_pages(GFP_KERNEL|__GFP_REPEAT,
-					       PMD_ORDER);
-	if (pmd)
-		memset(pmd, 0, PAGE_SIZE<<PMD_ORDER);
-	return pmd;
-}
-
-static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd)
-{
-#ifdef CONFIG_64BIT
-	if(pmd_flag(*pmd) & PxD_FLAG_ATTACHED)
-		/* This is the permanent pmd attached to the pgd;
-		 * cannot free it */
-		return;
-#endif
-	free_pages((unsigned long)pmd, PMD_ORDER);
-}
-
-#else
-
-/* Two Level Page Table Support for pmd's */
-
-/*
- * allocating and freeing a pmd is trivial: the 1-entry pmd is
- * inside the pgd, so has no extra memory associated with it.
- */
-
-#define pmd_alloc_one(mm, addr)		({ BUG(); ((pmd_t *)2); })
-#define pmd_free(mm, x)			do { } while (0)
-#define pgd_populate(mm, pmd, pte)	BUG()
-
-#endif
-
-static inline void
-pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmd, pte_t *pte)
-{
-#ifdef CONFIG_64BIT
-	/* preserve the gateway marker if this is the beginning of
-	 * the permanent pmd */
-	if(pmd_flag(*pmd) & PxD_FLAG_ATTACHED)
-		__pmd_val_set(*pmd, (PxD_FLAG_PRESENT |
-				 PxD_FLAG_VALID |
-				 PxD_FLAG_ATTACHED) 
-			+ (__u32)(__pa((unsigned long)pte) >> PxD_VALUE_SHIFT));
-	else
-#endif
-		__pmd_val_set(*pmd, (PxD_FLAG_PRESENT | PxD_FLAG_VALID) 
-			+ (__u32)(__pa((unsigned long)pte) >> PxD_VALUE_SHIFT));
-}
-
-#define pmd_populate(mm, pmd, pte_page) \
-	pmd_populate_kernel(mm, pmd, page_address(pte_page))
-#define pmd_pgtable(pmd) pmd_page(pmd)
-
-static inline pgtable_t
-pte_alloc_one(struct mm_struct *mm, unsigned long address)
-{
-	struct page *page = alloc_page(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO);
-	if (page)
-		pgtable_page_ctor(page);
-	return page;
-}
-
-static inline pte_t *
-pte_alloc_one_kernel(struct mm_struct *mm, unsigned long addr)
-{
-	pte_t *pte = (pte_t *)__get_free_page(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO);
-	return pte;
-}
-
-static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
-{
-	free_page((unsigned long)pte);
-}
-
-static inline void pte_free(struct mm_struct *mm, struct page *pte)
-{
-	pgtable_page_dtor(pte);
-	pte_free_kernel(mm, page_address(pte));
-}
-
-#define check_pgt_cache()	do { } while (0)
-
-#endif
diff --git a/include/asm-parisc/pgtable.h b/include/asm-parisc/pgtable.h
deleted file mode 100644
index 470a4b88124d..000000000000
--- a/include/asm-parisc/pgtable.h
+++ /dev/null
@@ -1,508 +0,0 @@
-#ifndef _PARISC_PGTABLE_H
-#define _PARISC_PGTABLE_H
-
-#include <asm-generic/4level-fixup.h>
-
-#include <asm/fixmap.h>
-
-#ifndef __ASSEMBLY__
-/*
- * we simulate an x86-style page table for the linux mm code
- */
-
-#include <linux/mm.h>		/* for vm_area_struct */
-#include <linux/bitops.h>
-#include <asm/processor.h>
-#include <asm/cache.h>
-
-/*
- * kern_addr_valid(ADDR) tests if ADDR is pointing to valid kernel
- * memory.  For the return value to be meaningful, ADDR must be >=
- * PAGE_OFFSET.  This operation can be relatively expensive (e.g.,
- * require a hash-, or multi-level tree-lookup or something of that
- * sort) but it guarantees to return TRUE only if accessing the page
- * at that address does not cause an error.  Note that there may be
- * addresses for which kern_addr_valid() returns FALSE even though an
- * access would not cause an error (e.g., this is typically true for
- * memory mapped I/O regions.
- *
- * XXX Need to implement this for parisc.
- */
-#define kern_addr_valid(addr)	(1)
-
-/* Certain architectures need to do special things when PTEs
- * within a page table are directly modified.  Thus, the following
- * hook is made available.
- */
-#define set_pte(pteptr, pteval)                                 \
-        do{                                                     \
-                *(pteptr) = (pteval);                           \
-        } while(0)
-#define set_pte_at(mm,addr,ptep,pteval) set_pte(ptep,pteval)
-
-#endif /* !__ASSEMBLY__ */
-
-#define pte_ERROR(e) \
-	printk("%s:%d: bad pte %08lx.\n", __FILE__, __LINE__, pte_val(e))
-#define pmd_ERROR(e) \
-	printk("%s:%d: bad pmd %08lx.\n", __FILE__, __LINE__, (unsigned long)pmd_val(e))
-#define pgd_ERROR(e) \
-	printk("%s:%d: bad pgd %08lx.\n", __FILE__, __LINE__, (unsigned long)pgd_val(e))
-
-/* This is the size of the initially mapped kernel memory */
-#ifdef CONFIG_64BIT
-#define KERNEL_INITIAL_ORDER	24	/* 0 to 1<<24 = 16MB */
-#else
-#define KERNEL_INITIAL_ORDER	23	/* 0 to 1<<23 = 8MB */
-#endif
-#define KERNEL_INITIAL_SIZE	(1 << KERNEL_INITIAL_ORDER)
-
-#if defined(CONFIG_64BIT) && defined(CONFIG_PARISC_PAGE_SIZE_4KB)
-#define PT_NLEVELS	3
-#define PGD_ORDER	1 /* Number of pages per pgd */
-#define PMD_ORDER	1 /* Number of pages per pmd */
-#define PGD_ALLOC_ORDER	2 /* first pgd contains pmd */
-#else
-#define PT_NLEVELS	2
-#define PGD_ORDER	1 /* Number of pages per pgd */
-#define PGD_ALLOC_ORDER	PGD_ORDER
-#endif
-
-/* Definitions for 3rd level (we use PLD here for Page Lower directory
- * because PTE_SHIFT is used lower down to mean shift that has to be
- * done to get usable bits out of the PTE) */
-#define PLD_SHIFT	PAGE_SHIFT
-#define PLD_SIZE	PAGE_SIZE
-#define BITS_PER_PTE	(PAGE_SHIFT - BITS_PER_PTE_ENTRY)
-#define PTRS_PER_PTE    (1UL << BITS_PER_PTE)
-
-/* Definitions for 2nd level */
-#define pgtable_cache_init()	do { } while (0)
-
-#define PMD_SHIFT       (PLD_SHIFT + BITS_PER_PTE)
-#define PMD_SIZE	(1UL << PMD_SHIFT)
-#define PMD_MASK	(~(PMD_SIZE-1))
-#if PT_NLEVELS == 3
-#define BITS_PER_PMD	(PAGE_SHIFT + PMD_ORDER - BITS_PER_PMD_ENTRY)
-#else
-#define BITS_PER_PMD	0
-#endif
-#define PTRS_PER_PMD    (1UL << BITS_PER_PMD)
-
-/* Definitions for 1st level */
-#define PGDIR_SHIFT	(PMD_SHIFT + BITS_PER_PMD)
-#define BITS_PER_PGD	(PAGE_SHIFT + PGD_ORDER - BITS_PER_PGD_ENTRY)
-#define PGDIR_SIZE	(1UL << PGDIR_SHIFT)
-#define PGDIR_MASK	(~(PGDIR_SIZE-1))
-#define PTRS_PER_PGD    (1UL << BITS_PER_PGD)
-#define USER_PTRS_PER_PGD       PTRS_PER_PGD
-
-#define MAX_ADDRBITS	(PGDIR_SHIFT + BITS_PER_PGD)
-#define MAX_ADDRESS	(1UL << MAX_ADDRBITS)
-
-#define SPACEID_SHIFT	(MAX_ADDRBITS - 32)
-
-/* This calculates the number of initial pages we need for the initial
- * page tables */
-#if (KERNEL_INITIAL_ORDER) >= (PMD_SHIFT)
-# define PT_INITIAL	(1 << (KERNEL_INITIAL_ORDER - PMD_SHIFT))
-#else
-# define PT_INITIAL	(1)  /* all initial PTEs fit into one page */
-#endif
-
-/*
- * pgd entries used up by user/kernel:
- */
-
-#define FIRST_USER_ADDRESS	0
-
-/* NB: The tlb miss handlers make certain assumptions about the order */
-/*     of the following bits, so be careful (One example, bits 25-31  */
-/*     are moved together in one instruction).                        */
-
-#define _PAGE_READ_BIT     31   /* (0x001) read access allowed */
-#define _PAGE_WRITE_BIT    30   /* (0x002) write access allowed */
-#define _PAGE_EXEC_BIT     29   /* (0x004) execute access allowed */
-#define _PAGE_GATEWAY_BIT  28   /* (0x008) privilege promotion allowed */
-#define _PAGE_DMB_BIT      27   /* (0x010) Data Memory Break enable (B bit) */
-#define _PAGE_DIRTY_BIT    26   /* (0x020) Page Dirty (D bit) */
-#define _PAGE_FILE_BIT	_PAGE_DIRTY_BIT	/* overload this bit */
-#define _PAGE_REFTRAP_BIT  25   /* (0x040) Page Ref. Trap enable (T bit) */
-#define _PAGE_NO_CACHE_BIT 24   /* (0x080) Uncached Page (U bit) */
-#define _PAGE_ACCESSED_BIT 23   /* (0x100) Software: Page Accessed */
-#define _PAGE_PRESENT_BIT  22   /* (0x200) Software: translation valid */
-#define _PAGE_FLUSH_BIT    21   /* (0x400) Software: translation valid */
-				/*             for cache flushing only */
-#define _PAGE_USER_BIT     20   /* (0x800) Software: User accessible page */
-
-/* N.B. The bits are defined in terms of a 32 bit word above, so the */
-/*      following macro is ok for both 32 and 64 bit.                */
-
-#define xlate_pabit(x) (31 - x)
-
-/* this defines the shift to the usable bits in the PTE it is set so
- * that the valid bits _PAGE_PRESENT_BIT and _PAGE_USER_BIT are set
- * to zero */
-#define PTE_SHIFT	   	xlate_pabit(_PAGE_USER_BIT)
-
-/* PFN_PTE_SHIFT defines the shift of a PTE value to access the PFN field */
-#define PFN_PTE_SHIFT		12
-
-
-/* this is how many bits may be used by the file functions */
-#define PTE_FILE_MAX_BITS	(BITS_PER_LONG - PTE_SHIFT)
-
-#define pte_to_pgoff(pte) (pte_val(pte) >> PTE_SHIFT)
-#define pgoff_to_pte(off) ((pte_t) { ((off) << PTE_SHIFT) | _PAGE_FILE })
-
-#define _PAGE_READ     (1 << xlate_pabit(_PAGE_READ_BIT))
-#define _PAGE_WRITE    (1 << xlate_pabit(_PAGE_WRITE_BIT))
-#define _PAGE_RW       (_PAGE_READ | _PAGE_WRITE)
-#define _PAGE_EXEC     (1 << xlate_pabit(_PAGE_EXEC_BIT))
-#define _PAGE_GATEWAY  (1 << xlate_pabit(_PAGE_GATEWAY_BIT))
-#define _PAGE_DMB      (1 << xlate_pabit(_PAGE_DMB_BIT))
-#define _PAGE_DIRTY    (1 << xlate_pabit(_PAGE_DIRTY_BIT))
-#define _PAGE_REFTRAP  (1 << xlate_pabit(_PAGE_REFTRAP_BIT))
-#define _PAGE_NO_CACHE (1 << xlate_pabit(_PAGE_NO_CACHE_BIT))
-#define _PAGE_ACCESSED (1 << xlate_pabit(_PAGE_ACCESSED_BIT))
-#define _PAGE_PRESENT  (1 << xlate_pabit(_PAGE_PRESENT_BIT))
-#define _PAGE_FLUSH    (1 << xlate_pabit(_PAGE_FLUSH_BIT))
-#define _PAGE_USER     (1 << xlate_pabit(_PAGE_USER_BIT))
-#define _PAGE_FILE     (1 << xlate_pabit(_PAGE_FILE_BIT))
-
-#define _PAGE_TABLE	(_PAGE_PRESENT | _PAGE_READ | _PAGE_WRITE |  _PAGE_DIRTY | _PAGE_ACCESSED)
-#define _PAGE_CHG_MASK	(PAGE_MASK | _PAGE_ACCESSED | _PAGE_DIRTY)
-#define _PAGE_KERNEL	(_PAGE_PRESENT | _PAGE_EXEC | _PAGE_READ | _PAGE_WRITE | _PAGE_DIRTY | _PAGE_ACCESSED)
-
-/* The pgd/pmd contains a ptr (in phys addr space); since all pgds/pmds
- * are page-aligned, we don't care about the PAGE_OFFSET bits, except
- * for a few meta-information bits, so we shift the address to be
- * able to effectively address 40/42/44-bits of physical address space
- * depending on 4k/16k/64k PAGE_SIZE */
-#define _PxD_PRESENT_BIT   31
-#define _PxD_ATTACHED_BIT  30
-#define _PxD_VALID_BIT     29
-
-#define PxD_FLAG_PRESENT  (1 << xlate_pabit(_PxD_PRESENT_BIT))
-#define PxD_FLAG_ATTACHED (1 << xlate_pabit(_PxD_ATTACHED_BIT))
-#define PxD_FLAG_VALID    (1 << xlate_pabit(_PxD_VALID_BIT))
-#define PxD_FLAG_MASK     (0xf)
-#define PxD_FLAG_SHIFT    (4)
-#define PxD_VALUE_SHIFT   (8) /* (PAGE_SHIFT-PxD_FLAG_SHIFT) */
-
-#ifndef __ASSEMBLY__
-
-#define PAGE_NONE	__pgprot(_PAGE_PRESENT | _PAGE_USER | _PAGE_ACCESSED)
-#define PAGE_SHARED	__pgprot(_PAGE_PRESENT | _PAGE_USER | _PAGE_READ | _PAGE_WRITE | _PAGE_ACCESSED)
-/* Others seem to make this executable, I don't know if that's correct
-   or not.  The stack is mapped this way though so this is necessary
-   in the short term - dhd@linuxcare.com, 2000-08-08 */
-#define PAGE_READONLY	__pgprot(_PAGE_PRESENT | _PAGE_USER | _PAGE_READ | _PAGE_ACCESSED)
-#define PAGE_WRITEONLY  __pgprot(_PAGE_PRESENT | _PAGE_USER | _PAGE_WRITE | _PAGE_ACCESSED)
-#define PAGE_EXECREAD   __pgprot(_PAGE_PRESENT | _PAGE_USER | _PAGE_READ | _PAGE_EXEC |_PAGE_ACCESSED)
-#define PAGE_COPY       PAGE_EXECREAD
-#define PAGE_RWX        __pgprot(_PAGE_PRESENT | _PAGE_USER | _PAGE_READ | _PAGE_WRITE | _PAGE_EXEC |_PAGE_ACCESSED)
-#define PAGE_KERNEL	__pgprot(_PAGE_KERNEL)
-#define PAGE_KERNEL_RO	__pgprot(_PAGE_KERNEL & ~_PAGE_WRITE)
-#define PAGE_KERNEL_UNC	__pgprot(_PAGE_KERNEL | _PAGE_NO_CACHE)
-#define PAGE_GATEWAY    __pgprot(_PAGE_PRESENT | _PAGE_USER | _PAGE_ACCESSED | _PAGE_GATEWAY| _PAGE_READ)
-#define PAGE_FLUSH      __pgprot(_PAGE_FLUSH)
-
-
-/*
- * We could have an execute only page using "gateway - promote to priv
- * level 3", but that is kind of silly. So, the way things are defined
- * now, we must always have read permission for pages with execute
- * permission. For the fun of it we'll go ahead and support write only
- * pages.
- */
-
-	 /*xwr*/
-#define __P000  PAGE_NONE
-#define __P001  PAGE_READONLY
-#define __P010  __P000 /* copy on write */
-#define __P011  __P001 /* copy on write */
-#define __P100  PAGE_EXECREAD
-#define __P101  PAGE_EXECREAD
-#define __P110  __P100 /* copy on write */
-#define __P111  __P101 /* copy on write */
-
-#define __S000  PAGE_NONE
-#define __S001  PAGE_READONLY
-#define __S010  PAGE_WRITEONLY
-#define __S011  PAGE_SHARED
-#define __S100  PAGE_EXECREAD
-#define __S101  PAGE_EXECREAD
-#define __S110  PAGE_RWX
-#define __S111  PAGE_RWX
-
-
-extern pgd_t swapper_pg_dir[]; /* declared in init_task.c */
-
-/* initial page tables for 0-8MB for kernel */
-
-extern pte_t pg0[];
-
-/* zero page used for uninitialized stuff */
-
-extern unsigned long *empty_zero_page;
-
-/*
- * ZERO_PAGE is a global shared page that is always zero: used
- * for zero-mapped memory areas etc..
- */
-
-#define ZERO_PAGE(vaddr) (virt_to_page(empty_zero_page))
-
-#define pte_none(x)     ((pte_val(x) == 0) || (pte_val(x) & _PAGE_FLUSH))
-#define pte_present(x)	(pte_val(x) & _PAGE_PRESENT)
-#define pte_clear(mm,addr,xp)	do { pte_val(*(xp)) = 0; } while (0)
-
-#define pmd_flag(x)	(pmd_val(x) & PxD_FLAG_MASK)
-#define pmd_address(x)	((unsigned long)(pmd_val(x) &~ PxD_FLAG_MASK) << PxD_VALUE_SHIFT)
-#define pgd_flag(x)	(pgd_val(x) & PxD_FLAG_MASK)
-#define pgd_address(x)	((unsigned long)(pgd_val(x) &~ PxD_FLAG_MASK) << PxD_VALUE_SHIFT)
-
-#if PT_NLEVELS == 3
-/* The first entry of the permanent pmd is not there if it contains
- * the gateway marker */
-#define pmd_none(x)	(!pmd_val(x) || pmd_flag(x) == PxD_FLAG_ATTACHED)
-#else
-#define pmd_none(x)	(!pmd_val(x))
-#endif
-#define pmd_bad(x)	(!(pmd_flag(x) & PxD_FLAG_VALID))
-#define pmd_present(x)	(pmd_flag(x) & PxD_FLAG_PRESENT)
-static inline void pmd_clear(pmd_t *pmd) {
-#if PT_NLEVELS == 3
-	if (pmd_flag(*pmd) & PxD_FLAG_ATTACHED)
-		/* This is the entry pointing to the permanent pmd
-		 * attached to the pgd; cannot clear it */
-		__pmd_val_set(*pmd, PxD_FLAG_ATTACHED);
-	else
-#endif
-		__pmd_val_set(*pmd,  0);
-}
-
-
-
-#if PT_NLEVELS == 3
-#define pgd_page_vaddr(pgd) ((unsigned long) __va(pgd_address(pgd)))
-#define pgd_page(pgd)	virt_to_page((void *)pgd_page_vaddr(pgd))
-
-/* For 64 bit we have three level tables */
-
-#define pgd_none(x)     (!pgd_val(x))
-#define pgd_bad(x)      (!(pgd_flag(x) & PxD_FLAG_VALID))
-#define pgd_present(x)  (pgd_flag(x) & PxD_FLAG_PRESENT)
-static inline void pgd_clear(pgd_t *pgd) {
-#if PT_NLEVELS == 3
-	if(pgd_flag(*pgd) & PxD_FLAG_ATTACHED)
-		/* This is the permanent pmd attached to the pgd; cannot
-		 * free it */
-		return;
-#endif
-	__pgd_val_set(*pgd, 0);
-}
-#else
-/*
- * The "pgd_xxx()" functions here are trivial for a folded two-level
- * setup: the pgd is never bad, and a pmd always exists (as it's folded
- * into the pgd entry)
- */
-static inline int pgd_none(pgd_t pgd)		{ return 0; }
-static inline int pgd_bad(pgd_t pgd)		{ return 0; }
-static inline int pgd_present(pgd_t pgd)	{ return 1; }
-static inline void pgd_clear(pgd_t * pgdp)	{ }
-#endif
-
-/*
- * The following only work if pte_present() is true.
- * Undefined behaviour if not..
- */
-static inline int pte_dirty(pte_t pte)		{ return pte_val(pte) & _PAGE_DIRTY; }
-static inline int pte_young(pte_t pte)		{ return pte_val(pte) & _PAGE_ACCESSED; }
-static inline int pte_write(pte_t pte)		{ return pte_val(pte) & _PAGE_WRITE; }
-static inline int pte_file(pte_t pte)		{ return pte_val(pte) & _PAGE_FILE; }
-static inline int pte_special(pte_t pte)	{ return 0; }
-
-static inline pte_t pte_mkclean(pte_t pte)	{ pte_val(pte) &= ~_PAGE_DIRTY; return pte; }
-static inline pte_t pte_mkold(pte_t pte)	{ pte_val(pte) &= ~_PAGE_ACCESSED; return pte; }
-static inline pte_t pte_wrprotect(pte_t pte)	{ pte_val(pte) &= ~_PAGE_WRITE; return pte; }
-static inline pte_t pte_mkdirty(pte_t pte)	{ pte_val(pte) |= _PAGE_DIRTY; return pte; }
-static inline pte_t pte_mkyoung(pte_t pte)	{ pte_val(pte) |= _PAGE_ACCESSED; return pte; }
-static inline pte_t pte_mkwrite(pte_t pte)	{ pte_val(pte) |= _PAGE_WRITE; return pte; }
-static inline pte_t pte_mkspecial(pte_t pte)	{ return pte; }
-
-/*
- * Conversion functions: convert a page and protection to a page entry,
- * and a page entry and page directory to the page they refer to.
- */
-#define __mk_pte(addr,pgprot) \
-({									\
-	pte_t __pte;							\
-									\
-	pte_val(__pte) = ((((addr)>>PAGE_SHIFT)<<PFN_PTE_SHIFT) + pgprot_val(pgprot));	\
-									\
-	__pte;								\
-})
-
-#define mk_pte(page, pgprot)	pfn_pte(page_to_pfn(page), (pgprot))
-
-static inline pte_t pfn_pte(unsigned long pfn, pgprot_t pgprot)
-{
-	pte_t pte;
-	pte_val(pte) = (pfn << PFN_PTE_SHIFT) | pgprot_val(pgprot);
-	return pte;
-}
-
-static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
-{ pte_val(pte) = (pte_val(pte) & _PAGE_CHG_MASK) | pgprot_val(newprot); return pte; }
-
-/* Permanent address of a page.  On parisc we don't have highmem. */
-
-#define pte_pfn(x)		(pte_val(x) >> PFN_PTE_SHIFT)
-
-#define pte_page(pte)		(pfn_to_page(pte_pfn(pte)))
-
-#define pmd_page_vaddr(pmd)	((unsigned long) __va(pmd_address(pmd)))
-
-#define __pmd_page(pmd) ((unsigned long) __va(pmd_address(pmd)))
-#define pmd_page(pmd)	virt_to_page((void *)__pmd_page(pmd))
-
-#define pgd_index(address) ((address) >> PGDIR_SHIFT)
-
-/* to find an entry in a page-table-directory */
-#define pgd_offset(mm, address) \
-((mm)->pgd + ((address) >> PGDIR_SHIFT))
-
-/* to find an entry in a kernel page-table-directory */
-#define pgd_offset_k(address) pgd_offset(&init_mm, address)
-
-/* Find an entry in the second-level page table.. */
-
-#if PT_NLEVELS == 3
-#define pmd_offset(dir,address) \
-((pmd_t *) pgd_page_vaddr(*(dir)) + (((address)>>PMD_SHIFT) & (PTRS_PER_PMD-1)))
-#else
-#define pmd_offset(dir,addr) ((pmd_t *) dir)
-#endif
-
-/* Find an entry in the third-level page table.. */ 
-#define pte_index(address) (((address) >> PAGE_SHIFT) & (PTRS_PER_PTE-1))
-#define pte_offset_kernel(pmd, address) \
-	((pte_t *) pmd_page_vaddr(*(pmd)) + pte_index(address))
-#define pte_offset_map(pmd, address) pte_offset_kernel(pmd, address)
-#define pte_offset_map_nested(pmd, address) pte_offset_kernel(pmd, address)
-#define pte_unmap(pte) do { } while (0)
-#define pte_unmap_nested(pte) do { } while (0)
-
-#define pte_unmap(pte)			do { } while (0)
-#define pte_unmap_nested(pte)		do { } while (0)
-
-extern void paging_init (void);
-
-/* Used for deferring calls to flush_dcache_page() */
-
-#define PG_dcache_dirty         PG_arch_1
-
-extern void update_mmu_cache(struct vm_area_struct *, unsigned long, pte_t);
-
-/* Encode and de-code a swap entry */
-
-#define __swp_type(x)                     ((x).val & 0x1f)
-#define __swp_offset(x)                   ( (((x).val >> 6) &  0x7) | \
-					  (((x).val >> 8) & ~0x7) )
-#define __swp_entry(type, offset)         ((swp_entry_t) { (type) | \
-					    ((offset &  0x7) << 6) | \
-					    ((offset & ~0x7) << 8) })
-#define __pte_to_swp_entry(pte)		((swp_entry_t) { pte_val(pte) })
-#define __swp_entry_to_pte(x)		((pte_t) { (x).val })
-
-static inline int ptep_test_and_clear_young(struct vm_area_struct *vma, unsigned long addr, pte_t *ptep)
-{
-#ifdef CONFIG_SMP
-	if (!pte_young(*ptep))
-		return 0;
-	return test_and_clear_bit(xlate_pabit(_PAGE_ACCESSED_BIT), &pte_val(*ptep));
-#else
-	pte_t pte = *ptep;
-	if (!pte_young(pte))
-		return 0;
-	set_pte_at(vma->vm_mm, addr, ptep, pte_mkold(pte));
-	return 1;
-#endif
-}
-
-extern spinlock_t pa_dbit_lock;
-
-struct mm_struct;
-static inline pte_t ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
-{
-	pte_t old_pte;
-	pte_t pte;
-
-	spin_lock(&pa_dbit_lock);
-	pte = old_pte = *ptep;
-	pte_val(pte) &= ~_PAGE_PRESENT;
-	pte_val(pte) |= _PAGE_FLUSH;
-	set_pte_at(mm,addr,ptep,pte);
-	spin_unlock(&pa_dbit_lock);
-
-	return old_pte;
-}
-
-static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
-{
-#ifdef CONFIG_SMP
-	unsigned long new, old;
-
-	do {
-		old = pte_val(*ptep);
-		new = pte_val(pte_wrprotect(__pte (old)));
-	} while (cmpxchg((unsigned long *) ptep, old, new) != old);
-#else
-	pte_t old_pte = *ptep;
-	set_pte_at(mm, addr, ptep, pte_wrprotect(old_pte));
-#endif
-}
-
-#define pte_same(A,B)	(pte_val(A) == pte_val(B))
-
-#endif /* !__ASSEMBLY__ */
-
-
-/* TLB page size encoding - see table 3-1 in parisc20.pdf */
-#define _PAGE_SIZE_ENCODING_4K		0
-#define _PAGE_SIZE_ENCODING_16K		1
-#define _PAGE_SIZE_ENCODING_64K		2
-#define _PAGE_SIZE_ENCODING_256K	3
-#define _PAGE_SIZE_ENCODING_1M		4
-#define _PAGE_SIZE_ENCODING_4M		5
-#define _PAGE_SIZE_ENCODING_16M		6
-#define _PAGE_SIZE_ENCODING_64M		7
-
-#if defined(CONFIG_PARISC_PAGE_SIZE_4KB)
-# define _PAGE_SIZE_ENCODING_DEFAULT _PAGE_SIZE_ENCODING_4K
-#elif defined(CONFIG_PARISC_PAGE_SIZE_16KB)
-# define _PAGE_SIZE_ENCODING_DEFAULT _PAGE_SIZE_ENCODING_16K
-#elif defined(CONFIG_PARISC_PAGE_SIZE_64KB)
-# define _PAGE_SIZE_ENCODING_DEFAULT _PAGE_SIZE_ENCODING_64K
-#endif
-
-
-#define io_remap_pfn_range(vma, vaddr, pfn, size, prot)		\
-		remap_pfn_range(vma, vaddr, pfn, size, prot)
-
-#define pgprot_noncached(prot) __pgprot(pgprot_val(prot) | _PAGE_NO_CACHE)
-
-/* We provide our own get_unmapped_area to provide cache coherency */
-
-#define HAVE_ARCH_UNMAPPED_AREA
-
-#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG
-#define __HAVE_ARCH_PTEP_GET_AND_CLEAR
-#define __HAVE_ARCH_PTEP_SET_WRPROTECT
-#define __HAVE_ARCH_PTE_SAME
-#include <asm-generic/pgtable.h>
-
-#endif /* _PARISC_PGTABLE_H */
diff --git a/include/asm-parisc/poll.h b/include/asm-parisc/poll.h
deleted file mode 100644
index c98509d3149e..000000000000
--- a/include/asm-parisc/poll.h
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/poll.h>
diff --git a/include/asm-parisc/posix_types.h b/include/asm-parisc/posix_types.h
deleted file mode 100644
index bb725a6630bb..000000000000
--- a/include/asm-parisc/posix_types.h
+++ /dev/null
@@ -1,129 +0,0 @@
-#ifndef __ARCH_PARISC_POSIX_TYPES_H
-#define __ARCH_PARISC_POSIX_TYPES_H
-
-/*
- * This file is generally used by user-level software, so you need to
- * be a little careful about namespace pollution etc.  Also, we cannot
- * assume GCC is being used.
- */
-typedef unsigned long		__kernel_ino_t;
-typedef unsigned short		__kernel_mode_t;
-typedef unsigned short		__kernel_nlink_t;
-typedef long			__kernel_off_t;
-typedef int			__kernel_pid_t;
-typedef unsigned short		__kernel_ipc_pid_t;
-typedef unsigned int		__kernel_uid_t;
-typedef unsigned int		__kernel_gid_t;
-typedef int			__kernel_suseconds_t;
-typedef long			__kernel_clock_t;
-typedef int			__kernel_timer_t;
-typedef int			__kernel_clockid_t;
-typedef int			__kernel_daddr_t;
-/* Note these change from narrow to wide kernels */
-#ifdef CONFIG_64BIT
-typedef unsigned long		__kernel_size_t;
-typedef long			__kernel_ssize_t;
-typedef long			__kernel_ptrdiff_t;
-typedef long			__kernel_time_t;
-#else
-typedef unsigned int		__kernel_size_t;
-typedef int			__kernel_ssize_t;
-typedef int			__kernel_ptrdiff_t;
-typedef long			__kernel_time_t;
-#endif
-typedef char *			__kernel_caddr_t;
-
-typedef unsigned short		__kernel_uid16_t;
-typedef unsigned short		__kernel_gid16_t;
-typedef unsigned int		__kernel_uid32_t;
-typedef unsigned int		__kernel_gid32_t;
-
-#ifdef __GNUC__
-typedef long long		__kernel_loff_t;
-typedef long long		__kernel_off64_t;
-typedef unsigned long long	__kernel_ino64_t;
-#endif
-
-typedef unsigned int		__kernel_old_dev_t;
-
-typedef struct {
-	int	val[2];
-} __kernel_fsid_t;
-
-/* compatibility stuff */
-typedef __kernel_uid_t __kernel_old_uid_t;
-typedef __kernel_gid_t __kernel_old_gid_t;
-
-#if defined(__KERNEL__)
-
-#undef __FD_SET
-static __inline__ void __FD_SET(unsigned long __fd, __kernel_fd_set *__fdsetp)
-{
-	unsigned long __tmp = __fd / __NFDBITS;
-	unsigned long __rem = __fd % __NFDBITS;
-	__fdsetp->fds_bits[__tmp] |= (1UL<<__rem);
-}
-
-#undef __FD_CLR
-static __inline__ void __FD_CLR(unsigned long __fd, __kernel_fd_set *__fdsetp)
-{
-	unsigned long __tmp = __fd / __NFDBITS;
-	unsigned long __rem = __fd % __NFDBITS;
-	__fdsetp->fds_bits[__tmp] &= ~(1UL<<__rem);
-}
-
-#undef __FD_ISSET
-static __inline__ int __FD_ISSET(unsigned long __fd, const __kernel_fd_set *__p)
-{ 
-	unsigned long __tmp = __fd / __NFDBITS;
-	unsigned long __rem = __fd % __NFDBITS;
-	return (__p->fds_bits[__tmp] & (1UL<<__rem)) != 0;
-}
-
-/*
- * This will unroll the loop for the normal constant case (8 ints,
- * for a 256-bit fd_set)
- */
-#undef __FD_ZERO
-static __inline__ void __FD_ZERO(__kernel_fd_set *__p)
-{
-	unsigned long *__tmp = __p->fds_bits;
-	int __i;
-
-	if (__builtin_constant_p(__FDSET_LONGS)) {
-		switch (__FDSET_LONGS) {
-		case 16:
-			__tmp[ 0] = 0; __tmp[ 1] = 0;
-			__tmp[ 2] = 0; __tmp[ 3] = 0;
-			__tmp[ 4] = 0; __tmp[ 5] = 0;
-			__tmp[ 6] = 0; __tmp[ 7] = 0;
-			__tmp[ 8] = 0; __tmp[ 9] = 0;
-			__tmp[10] = 0; __tmp[11] = 0;
-			__tmp[12] = 0; __tmp[13] = 0;
-			__tmp[14] = 0; __tmp[15] = 0;
-			return;
-
-		case 8:
-			__tmp[ 0] = 0; __tmp[ 1] = 0;
-			__tmp[ 2] = 0; __tmp[ 3] = 0;
-			__tmp[ 4] = 0; __tmp[ 5] = 0;
-			__tmp[ 6] = 0; __tmp[ 7] = 0;
-			return;
-
-		case 4:
-			__tmp[ 0] = 0; __tmp[ 1] = 0;
-			__tmp[ 2] = 0; __tmp[ 3] = 0;
-			return;
-		}
-	}
-	__i = __FDSET_LONGS;
-	while (__i) {
-		__i--;
-		*__tmp = 0;
-		__tmp++;
-	}
-}
-
-#endif /* defined(__KERNEL__) */
-
-#endif
diff --git a/include/asm-parisc/prefetch.h b/include/asm-parisc/prefetch.h
deleted file mode 100644
index c5edc60c059f..000000000000
--- a/include/asm-parisc/prefetch.h
+++ /dev/null
@@ -1,39 +0,0 @@
-/*
- * include/asm-parisc/prefetch.h
- *
- * PA 2.0 defines data prefetch instructions on page 6-11 of the Kane book.
- * In addition, many implementations do hardware prefetching of both
- * instructions and data.
- *
- * PA7300LC (page 14-4 of the ERS) also implements prefetching by a load
- * to gr0 but not in a way that Linux can use.  If the load would cause an
- * interruption (eg due to prefetching 0), it is suppressed on PA2.0
- * processors, but not on 7300LC.
- *
- */
-
-#ifndef __ASM_PARISC_PREFETCH_H
-#define __ASM_PARISC_PREFETCH_H
-
-#ifndef __ASSEMBLY__
-#ifdef CONFIG_PREFETCH
-
-#define ARCH_HAS_PREFETCH
-static inline void prefetch(const void *addr)
-{
-	__asm__("ldw 0(%0), %%r0" : : "r" (addr));
-}
-
-/* LDD is a PA2.0 addition. */
-#ifdef CONFIG_PA20
-#define ARCH_HAS_PREFETCHW
-static inline void prefetchw(const void *addr)
-{
-	__asm__("ldd 0(%0), %%r0" : : "r" (addr));
-}
-#endif /* CONFIG_PA20 */
-
-#endif /* CONFIG_PREFETCH */
-#endif /* __ASSEMBLY__ */
-
-#endif /* __ASM_PARISC_PROCESSOR_H */
diff --git a/include/asm-parisc/processor.h b/include/asm-parisc/processor.h
deleted file mode 100644
index 3c9d34844c83..000000000000
--- a/include/asm-parisc/processor.h
+++ /dev/null
@@ -1,357 +0,0 @@
-/*
- * include/asm-parisc/processor.h
- *
- * Copyright (C) 1994 Linus Torvalds
- * Copyright (C) 2001 Grant Grundler
- */
-
-#ifndef __ASM_PARISC_PROCESSOR_H
-#define __ASM_PARISC_PROCESSOR_H
-
-#ifndef __ASSEMBLY__
-#include <linux/threads.h>
-
-#include <asm/prefetch.h>
-#include <asm/hardware.h>
-#include <asm/pdc.h>
-#include <asm/ptrace.h>
-#include <asm/types.h>
-#include <asm/system.h>
-#endif /* __ASSEMBLY__ */
-
-#define KERNEL_STACK_SIZE 	(4*PAGE_SIZE)
-
-/*
- * Default implementation of macro that returns current
- * instruction pointer ("program counter").
- */
-#ifdef CONFIG_PA20
-#define current_ia(x)	__asm__("mfia %0" : "=r"(x))
-#else /* mfia added in pa2.0 */
-#define current_ia(x)	__asm__("blr 0,%0\n\tnop" : "=r"(x))
-#endif
-#define current_text_addr() ({ void *pc; current_ia(pc); pc; })
-
-#define TASK_SIZE_OF(tsk)       ((tsk)->thread.task_size)
-#define TASK_SIZE	        TASK_SIZE_OF(current)
-#define TASK_UNMAPPED_BASE      (current->thread.map_base)
-
-#define DEFAULT_TASK_SIZE32	(0xFFF00000UL)
-#define DEFAULT_MAP_BASE32	(0x40000000UL)
-
-#ifdef CONFIG_64BIT
-#define DEFAULT_TASK_SIZE       (MAX_ADDRESS-0xf000000)
-#define DEFAULT_MAP_BASE        (0x200000000UL)
-#else
-#define DEFAULT_TASK_SIZE	DEFAULT_TASK_SIZE32
-#define DEFAULT_MAP_BASE	DEFAULT_MAP_BASE32
-#endif
-
-#ifdef __KERNEL__
-
-/* XXX: STACK_TOP actually should be STACK_BOTTOM for parisc.
- * prumpf */
-
-#define STACK_TOP	TASK_SIZE
-#define STACK_TOP_MAX	DEFAULT_TASK_SIZE
-
-#endif
-
-#ifndef __ASSEMBLY__
-
-/*
- * Data detected about CPUs at boot time which is the same for all CPU's.
- * HP boxes are SMP - ie identical processors.
- *
- * FIXME: some CPU rev info may be processor specific...
- */
-struct system_cpuinfo_parisc {
-	unsigned int	cpu_count;
-	unsigned int	cpu_hz;
-	unsigned int	hversion;
-	unsigned int	sversion;
-	enum cpu_type	cpu_type;
-
-	struct {
-		struct pdc_model model;
-		unsigned long versions;
-		unsigned long cpuid;
-		unsigned long capabilities;
-		char   sys_model_name[81]; /* PDC-ROM returnes this model name */
-	} pdc;
-
-	const char	*cpu_name;	/* e.g. "PA7300LC (PCX-L2)" */
-	const char	*family_name;	/* e.g. "1.1e" */
-};
-
-
-/* Per CPU data structure - ie varies per CPU.  */
-struct cpuinfo_parisc {
-	unsigned long it_value;     /* Interval Timer at last timer Intr */
-	unsigned long it_delta;     /* Interval delta (tic_10ms / HZ * 100) */
-	unsigned long irq_count;    /* number of IRQ's since boot */
-	unsigned long irq_max_cr16; /* longest time to handle a single IRQ */
-	unsigned long cpuid;        /* aka slot_number or set to NO_PROC_ID */
-	unsigned long hpa;          /* Host Physical address */
-	unsigned long txn_addr;     /* MMIO addr of EIR or id_eid */
-#ifdef CONFIG_SMP
-	unsigned long pending_ipi;  /* bitmap of type ipi_message_type */
-	unsigned long ipi_count;    /* number ipi Interrupts */
-#endif
-	unsigned long bh_count;     /* number of times bh was invoked */
-	unsigned long prof_counter; /* per CPU profiling support */
-	unsigned long prof_multiplier;	/* per CPU profiling support */
-	unsigned long fp_rev;
-	unsigned long fp_model;
-	unsigned int state;
-	struct parisc_device *dev;
-	unsigned long loops_per_jiffy;
-};
-
-extern struct system_cpuinfo_parisc boot_cpu_data;
-extern struct cpuinfo_parisc cpu_data[NR_CPUS];
-#define current_cpu_data cpu_data[smp_processor_id()]
-
-#define CPU_HVERSION ((boot_cpu_data.hversion >> 4) & 0x0FFF)
-
-typedef struct {
-	int seg;  
-} mm_segment_t;
-
-#define ARCH_MIN_TASKALIGN	8
-
-struct thread_struct {
-	struct pt_regs regs;
-	unsigned long  task_size;
-	unsigned long  map_base;
-	unsigned long  flags;
-}; 
-
-/* Thread struct flags. */
-#define PARISC_UAC_NOPRINT	(1UL << 0)	/* see prctl and unaligned.c */
-#define PARISC_UAC_SIGBUS	(1UL << 1)
-#define PARISC_KERNEL_DEATH	(1UL << 31)	/* see die_if_kernel()... */
-
-#define PARISC_UAC_SHIFT	0
-#define PARISC_UAC_MASK		(PARISC_UAC_NOPRINT|PARISC_UAC_SIGBUS)
-
-#define SET_UNALIGN_CTL(task,value)                                       \
-        ({                                                                \
-        (task)->thread.flags = (((task)->thread.flags & ~PARISC_UAC_MASK) \
-                                | (((value) << PARISC_UAC_SHIFT) &        \
-                                   PARISC_UAC_MASK));                     \
-        0;                                                                \
-        })
-
-#define GET_UNALIGN_CTL(task,addr)                                        \
-        ({                                                                \
-        put_user(((task)->thread.flags & PARISC_UAC_MASK)                 \
-                 >> PARISC_UAC_SHIFT, (int __user *) (addr));             \
-        })
-
-#define INIT_THREAD { \
-	.regs = {	.gr	= { 0, }, \
-			.fr	= { 0, }, \
-			.sr	= { 0, }, \
-			.iasq	= { 0, }, \
-			.iaoq	= { 0, }, \
-			.cr27	= 0, \
-		}, \
-	.task_size	= DEFAULT_TASK_SIZE, \
-	.map_base	= DEFAULT_MAP_BASE, \
-	.flags		= 0 \
-	}
-
-/*
- * Return saved PC of a blocked thread.  This is used by ps mostly.
- */
-
-unsigned long thread_saved_pc(struct task_struct *t);
-void show_trace(struct task_struct *task, unsigned long *stack);
-
-/*
- * Start user thread in another space.
- *
- * Note that we set both the iaoq and r31 to the new pc. When
- * the kernel initially calls execve it will return through an
- * rfi path that will use the values in the iaoq. The execve
- * syscall path will return through the gateway page, and
- * that uses r31 to branch to.
- *
- * For ELF we clear r23, because the dynamic linker uses it to pass
- * the address of the finalizer function.
- *
- * We also initialize sr3 to an illegal value (illegal for our
- * implementation, not for the architecture).
- */
-typedef unsigned int elf_caddr_t;
-
-#define start_thread_som(regs, new_pc, new_sp) do {	\
-	unsigned long *sp = (unsigned long *)new_sp;	\
-	__u32 spaceid = (__u32)current->mm->context;	\
-	unsigned long pc = (unsigned long)new_pc;	\
-	/* offset pc for priv. level */			\
-	pc |= 3;					\
-							\
-	set_fs(USER_DS);				\
-	regs->iasq[0] = spaceid;			\
-	regs->iasq[1] = spaceid;			\
-	regs->iaoq[0] = pc;				\
-	regs->iaoq[1] = pc + 4;                         \
-	regs->sr[2] = LINUX_GATEWAY_SPACE;              \
-	regs->sr[3] = 0xffff;				\
-	regs->sr[4] = spaceid;				\
-	regs->sr[5] = spaceid;				\
-	regs->sr[6] = spaceid;				\
-	regs->sr[7] = spaceid;				\
-	regs->gr[ 0] = USER_PSW;                        \
-	regs->gr[30] = ((new_sp)+63)&~63;		\
-	regs->gr[31] = pc;				\
-							\
-	get_user(regs->gr[26],&sp[0]);			\
-	get_user(regs->gr[25],&sp[-1]); 		\
-	get_user(regs->gr[24],&sp[-2]); 		\
-	get_user(regs->gr[23],&sp[-3]); 		\
-} while(0)
-
-/* The ELF abi wants things done a "wee bit" differently than
- * som does.  Supporting this behavior here avoids
- * having our own version of create_elf_tables.
- *
- * Oh, and yes, that is not a typo, we are really passing argc in r25
- * and argv in r24 (rather than r26 and r25).  This is because that's
- * where __libc_start_main wants them.
- *
- * Duplicated from dl-machine.h for the benefit of readers:
- *
- *  Our initial stack layout is rather different from everyone else's
- *  due to the unique PA-RISC ABI.  As far as I know it looks like
- *  this:
-
-   -----------------------------------  (user startup code creates this frame)
-   |         32 bytes of magic       |
-   |---------------------------------|
-   | 32 bytes argument/sp save area  |
-   |---------------------------------| (bprm->p)
-   |	    ELF auxiliary info	     |
-   |         (up to 28 words)        |
-   |---------------------------------|
-   |		   NULL		     |
-   |---------------------------------|
-   |	   Environment pointers	     |
-   |---------------------------------|
-   |		   NULL		     |
-   |---------------------------------|
-   |        Argument pointers        |
-   |---------------------------------| <- argv
-   |          argc (1 word)          |
-   |---------------------------------| <- bprm->exec (HACK!)
-   |         N bytes of slack        |
-   |---------------------------------|
-   |	filename passed to execve    |
-   |---------------------------------| (mm->env_end)
-   |           env strings           |
-   |---------------------------------| (mm->env_start, mm->arg_end)
-   |           arg strings           |
-   |---------------------------------|
-   | additional faked arg strings if |
-   | we're invoked via binfmt_script |
-   |---------------------------------| (mm->arg_start)
-   stack base is at TASK_SIZE - rlim_max.
-
-on downward growing arches, it looks like this:
-   stack base at TASK_SIZE
-   | filename passed to execve
-   | env strings
-   | arg strings
-   | faked arg strings
-   | slack
-   | ELF
-   | envps
-   | argvs
-   | argc
-
- *  The pleasant part of this is that if we need to skip arguments we
- *  can just decrement argc and move argv, because the stack pointer
- *  is utterly unrelated to the location of the environment and
- *  argument vectors.
- *
- * Note that the S/390 people took the easy way out and hacked their
- * GCC to make the stack grow downwards.
- *
- * Final Note: For entry from syscall, the W (wide) bit of the PSW
- * is stuffed into the lowest bit of the user sp (%r30), so we fill
- * it in here from the current->personality
- */
-
-#ifdef CONFIG_64BIT
-#define USER_WIDE_MODE	(!test_thread_flag(TIF_32BIT))
-#else
-#define USER_WIDE_MODE	0
-#endif
-
-#define start_thread(regs, new_pc, new_sp) do {		\
-	elf_addr_t *sp = (elf_addr_t *)new_sp;		\
-	__u32 spaceid = (__u32)current->mm->context;	\
-	elf_addr_t pc = (elf_addr_t)new_pc | 3;		\
-	elf_caddr_t *argv = (elf_caddr_t *)bprm->exec + 1;	\
-							\
-	set_fs(USER_DS);				\
-	regs->iasq[0] = spaceid;			\
-	regs->iasq[1] = spaceid;			\
-	regs->iaoq[0] = pc;				\
-	regs->iaoq[1] = pc + 4;                         \
-	regs->sr[2] = LINUX_GATEWAY_SPACE;              \
-	regs->sr[3] = 0xffff;				\
-	regs->sr[4] = spaceid;				\
-	regs->sr[5] = spaceid;				\
-	regs->sr[6] = spaceid;				\
-	regs->sr[7] = spaceid;				\
-	regs->gr[ 0] = USER_PSW | (USER_WIDE_MODE ? PSW_W : 0); \
-	regs->fr[ 0] = 0LL;                            	\
-	regs->fr[ 1] = 0LL;                            	\
-	regs->fr[ 2] = 0LL;                            	\
-	regs->fr[ 3] = 0LL;                            	\
-	regs->gr[30] = (((unsigned long)sp + 63) &~ 63) | (USER_WIDE_MODE ? 1 : 0); \
-	regs->gr[31] = pc;				\
-							\
-	get_user(regs->gr[25], (argv - 1));		\
-	regs->gr[24] = (long) argv;			\
-	regs->gr[23] = 0;				\
-} while(0)
-
-struct task_struct;
-struct mm_struct;
-
-/* Free all resources held by a thread. */
-extern void release_thread(struct task_struct *);
-extern int kernel_thread(int (*fn)(void *), void * arg, unsigned long flags);
-
-/* Prepare to copy thread state - unlazy all lazy status */
-#define prepare_to_copy(tsk)	do { } while (0)
-
-extern void map_hpux_gateway_page(struct task_struct *tsk, struct mm_struct *mm);
-
-extern unsigned long get_wchan(struct task_struct *p);
-
-#define KSTK_EIP(tsk)	((tsk)->thread.regs.iaoq[0])
-#define KSTK_ESP(tsk)	((tsk)->thread.regs.gr[30])
-
-#define cpu_relax()	barrier()
-
-/* Used as a macro to identify the combined VIPT/PIPT cached
- * CPUs which require a guarantee of coherency (no inequivalent
- * aliases with different data, whether clean or not) to operate */
-static inline int parisc_requires_coherency(void)
-{
-#ifdef CONFIG_PA8X00
-	return (boot_cpu_data.cpu_type == mako) ||
-		(boot_cpu_data.cpu_type == mako2);
-#else
-	return 0;
-#endif
-}
-
-#endif /* __ASSEMBLY__ */
-
-#endif /* __ASM_PARISC_PROCESSOR_H */
diff --git a/include/asm-parisc/psw.h b/include/asm-parisc/psw.h
deleted file mode 100644
index 5a3e23c9ce63..000000000000
--- a/include/asm-parisc/psw.h
+++ /dev/null
@@ -1,62 +0,0 @@
-#ifndef _PARISC_PSW_H
-
-
-#define	PSW_I	0x00000001
-#define	PSW_D	0x00000002
-#define	PSW_P	0x00000004
-#define	PSW_Q	0x00000008
-
-#define	PSW_R	0x00000010
-#define	PSW_F	0x00000020
-#define	PSW_G	0x00000040	/* PA1.x only */
-#define PSW_O	0x00000080	/* PA2.0 only */
-
-/* ssm/rsm instructions number PSW_W and PSW_E differently */
-#define PSW_SM_I	PSW_I	/* Enable External Interrupts */
-#define PSW_SM_D	PSW_D
-#define PSW_SM_P	PSW_P
-#define PSW_SM_Q	PSW_Q	/* Enable Interrupt State Collection */
-#define PSW_SM_R	PSW_R	/* Enable Recover Counter Trap */
-#define PSW_SM_W	0x200	/* PA2.0 only : Enable Wide Mode */
-
-#define PSW_SM_QUIET	PSW_SM_R+PSW_SM_Q+PSW_SM_P+PSW_SM_D+PSW_SM_I
-
-#define PSW_CB	0x0000ff00
-
-#define	PSW_M	0x00010000
-#define	PSW_V	0x00020000
-#define	PSW_C	0x00040000
-#define	PSW_B	0x00080000
-
-#define	PSW_X	0x00100000
-#define	PSW_N	0x00200000
-#define	PSW_L	0x00400000
-#define	PSW_H	0x00800000
-
-#define	PSW_T	0x01000000
-#define	PSW_S	0x02000000
-#define	PSW_E	0x04000000
-#define PSW_W	0x08000000	/* PA2.0 only */
-#define PSW_W_BIT       36      /* PA2.0 only */
-
-#define	PSW_Z	0x40000000	/* PA1.x only */
-#define	PSW_Y	0x80000000	/* PA1.x only */
-
-#ifdef CONFIG_64BIT
-#  define PSW_HI_CB 0x000000ff    /* PA2.0 only */
-#endif
-
-#ifdef CONFIG_64BIT
-#  define USER_PSW_HI_MASK	PSW_HI_CB
-#  define WIDE_PSW		PSW_W
-#else 
-#  define WIDE_PSW		0
-#endif
-
-/* Used when setting up for rfi */
-#define KERNEL_PSW    (WIDE_PSW | PSW_C | PSW_Q | PSW_P | PSW_D)
-#define REAL_MODE_PSW (WIDE_PSW | PSW_Q)
-#define USER_PSW_MASK (WIDE_PSW | PSW_T | PSW_N | PSW_X | PSW_B | PSW_V | PSW_CB)
-#define USER_PSW      (PSW_C | PSW_Q | PSW_P | PSW_D | PSW_I)
-
-#endif
diff --git a/include/asm-parisc/ptrace.h b/include/asm-parisc/ptrace.h
deleted file mode 100644
index 3e94c5d85ff5..000000000000
--- a/include/asm-parisc/ptrace.h
+++ /dev/null
@@ -1,58 +0,0 @@
-#ifndef _PARISC_PTRACE_H
-#define _PARISC_PTRACE_H
-
-/* written by Philipp Rumpf, Copyright (C) 1999 SuSE GmbH Nuernberg
-** Copyright (C) 2000 Grant Grundler, Hewlett-Packard
-*/
-
-#include <linux/types.h>
-
-/* This struct defines the way the registers are stored on the 
- * stack during a system call.
- *
- * N.B. gdb/strace care about the size and offsets within this
- * structure. If you change things, you may break object compatibility
- * for those applications.
- */
-
-struct pt_regs {
-	unsigned long gr[32];	/* PSW is in gr[0] */
-	__u64 fr[32];
-	unsigned long sr[ 8];
-	unsigned long iasq[2];
-	unsigned long iaoq[2];
-	unsigned long cr27;
-	unsigned long pad0;     /* available for other uses */
-	unsigned long orig_r28;
-	unsigned long ksp;
-	unsigned long kpc;
-	unsigned long sar;	/* CR11 */
-	unsigned long iir;	/* CR19 */
-	unsigned long isr;	/* CR20 */
-	unsigned long ior;	/* CR21 */
-	unsigned long ipsw;	/* CR22 */
-};
-
-/*
- * The numbers chosen here are somewhat arbitrary but absolutely MUST
- * not overlap with any of the number assigned in <linux/ptrace.h>.
- *
- * These ones are taken from IA-64 on the assumption that theirs are
- * the most correct (and we also want to support PTRACE_SINGLEBLOCK
- * since we have taken branch traps too)
- */
-#define PTRACE_SINGLEBLOCK	12	/* resume execution until next branch */
-
-#ifdef __KERNEL__
-
-#define task_regs(task) ((struct pt_regs *) ((char *)(task) + TASK_REGS))
-
-/* XXX should we use iaoq[1] or iaoq[0] ? */
-#define user_mode(regs)			(((regs)->iaoq[0] & 3) ? 1 : 0)
-#define user_space(regs)		(((regs)->iasq[1] != 0) ? 1 : 0)
-#define instruction_pointer(regs)	((regs)->iaoq[0] & ~3)
-unsigned long profile_pc(struct pt_regs *);
-extern void show_regs(struct pt_regs *);
-#endif
-
-#endif
diff --git a/include/asm-parisc/real.h b/include/asm-parisc/real.h
deleted file mode 100644
index 82acb25db395..000000000000
--- a/include/asm-parisc/real.h
+++ /dev/null
@@ -1,5 +0,0 @@
-#ifndef _PARISC_REAL_H
-#define _PARISC_REAL_H
-
-
-#endif
diff --git a/include/asm-parisc/resource.h b/include/asm-parisc/resource.h
deleted file mode 100644
index 8b06343b62ed..000000000000
--- a/include/asm-parisc/resource.h
+++ /dev/null
@@ -1,7 +0,0 @@
-#ifndef _ASM_PARISC_RESOURCE_H
-#define _ASM_PARISC_RESOURCE_H
-
-#define _STK_LIM_MAX	10 * _STK_LIM
-#include <asm-generic/resource.h>
-
-#endif
diff --git a/include/asm-parisc/ropes.h b/include/asm-parisc/ropes.h
deleted file mode 100644
index 09f51d5ab57c..000000000000
--- a/include/asm-parisc/ropes.h
+++ /dev/null
@@ -1,322 +0,0 @@
-#ifndef _ASM_PARISC_ROPES_H_
-#define _ASM_PARISC_ROPES_H_
-
-#include <asm/parisc-device.h>
-
-#ifdef CONFIG_64BIT
-/* "low end" PA8800 machines use ZX1 chipset: PAT PDC and only run 64-bit */
-#define ZX1_SUPPORT
-#endif
-
-#ifdef CONFIG_PROC_FS
-/* depends on proc fs support. But costs CPU performance */
-#undef SBA_COLLECT_STATS
-#endif
-
-/*
-** The number of pdir entries to "free" before issuing
-** a read to PCOM register to flush out PCOM writes.
-** Interacts with allocation granularity (ie 4 or 8 entries
-** allocated and free'd/purged at a time might make this
-** less interesting).
-*/
-#define DELAYED_RESOURCE_CNT	16
-
-#define MAX_IOC		2	/* per Ike. Pluto/Astro only have 1. */
-#define ROPES_PER_IOC	8	/* per Ike half or Pluto/Astro */
-
-struct ioc {
-	void __iomem	*ioc_hpa;	/* I/O MMU base address */
-	char		*res_map;	/* resource map, bit == pdir entry */
-	u64		*pdir_base;	/* physical base address */
-	unsigned long	ibase;		/* pdir IOV Space base - shared w/lba_pci */
-	unsigned long	imask;		/* pdir IOV Space mask - shared w/lba_pci */
-#ifdef ZX1_SUPPORT
-	unsigned long	iovp_mask;	/* help convert IOVA to IOVP */
-#endif
-	unsigned long	*res_hint;	/* next avail IOVP - circular search */
-	spinlock_t	res_lock;
-	unsigned int	res_bitshift;	/* from the LEFT! */
-	unsigned int	res_size;	/* size of resource map in bytes */
-#ifdef SBA_HINT_SUPPORT
-/* FIXME : DMA HINTs not used */
-	unsigned long	hint_mask_pdir; /* bits used for DMA hints */
-	unsigned int	hint_shift_pdir;
-#endif
-#if DELAYED_RESOURCE_CNT > 0
-	int		saved_cnt;
-	struct sba_dma_pair {
-			dma_addr_t	iova;
-			size_t		size;
-        } saved[DELAYED_RESOURCE_CNT];
-#endif
-
-#ifdef SBA_COLLECT_STATS
-#define SBA_SEARCH_SAMPLE	0x100
-	unsigned long	avg_search[SBA_SEARCH_SAMPLE];
-	unsigned long	avg_idx;	/* current index into avg_search */
-	unsigned long	used_pages;
-	unsigned long	msingle_calls;
-	unsigned long	msingle_pages;
-	unsigned long	msg_calls;
-	unsigned long	msg_pages;
-	unsigned long	usingle_calls;
-	unsigned long	usingle_pages;
-	unsigned long	usg_calls;
-	unsigned long	usg_pages;
-#endif
-        /* STUFF We don't need in performance path */
-	unsigned int	pdir_size;	/* in bytes, determined by IOV Space size */
-};
-
-struct sba_device {
-	struct sba_device	*next;  /* list of SBA's in system */
-	struct parisc_device	*dev;   /* dev found in bus walk */
-	const char		*name;
-	void __iomem		*sba_hpa; /* base address */
-	spinlock_t		sba_lock;
-	unsigned int		flags;  /* state/functionality enabled */
-	unsigned int		hw_rev;  /* HW revision of chip */
-
-	struct resource		chip_resv; /* MMIO reserved for chip */
-	struct resource		iommu_resv; /* MMIO reserved for iommu */
-
-	unsigned int		num_ioc;  /* number of on-board IOC's */
-	struct ioc		ioc[MAX_IOC];
-};
-
-#define ASTRO_RUNWAY_PORT	0x582
-#define IKE_MERCED_PORT		0x803
-#define REO_MERCED_PORT		0x804
-#define REOG_MERCED_PORT	0x805
-#define PLUTO_MCKINLEY_PORT	0x880
-
-static inline int IS_ASTRO(struct parisc_device *d) {
-	return d->id.hversion == ASTRO_RUNWAY_PORT;
-}
-
-static inline int IS_IKE(struct parisc_device *d) {
-	return d->id.hversion == IKE_MERCED_PORT;
-}
-
-static inline int IS_PLUTO(struct parisc_device *d) {
-	return d->id.hversion == PLUTO_MCKINLEY_PORT;
-}
-
-#define PLUTO_IOVA_BASE	(1UL*1024*1024*1024)	/* 1GB */
-#define PLUTO_IOVA_SIZE	(1UL*1024*1024*1024)	/* 1GB */
-#define PLUTO_GART_SIZE	(PLUTO_IOVA_SIZE / 2)
-
-#define SBA_PDIR_VALID_BIT	0x8000000000000000ULL
-
-#define SBA_AGPGART_COOKIE	0x0000badbadc0ffeeULL
-
-#define SBA_FUNC_ID	0x0000	/* function id */
-#define SBA_FCLASS	0x0008	/* function class, bist, header, rev... */
-
-#define SBA_FUNC_SIZE 4096   /* SBA configuration function reg set */
-
-#define ASTRO_IOC_OFFSET	(32 * SBA_FUNC_SIZE)
-#define PLUTO_IOC_OFFSET	(1 * SBA_FUNC_SIZE)
-/* Ike's IOC's occupy functions 2 and 3 */
-#define IKE_IOC_OFFSET(p)	((p+2) * SBA_FUNC_SIZE)
-
-#define IOC_CTRL          0x8	/* IOC_CTRL offset */
-#define IOC_CTRL_TC       (1 << 0) /* TOC Enable */
-#define IOC_CTRL_CE       (1 << 1) /* Coalesce Enable */
-#define IOC_CTRL_DE       (1 << 2) /* Dillon Enable */
-#define IOC_CTRL_RM       (1 << 8) /* Real Mode */
-#define IOC_CTRL_NC       (1 << 9) /* Non Coherent Mode */
-#define IOC_CTRL_D4       (1 << 11) /* Disable 4-byte coalescing */
-#define IOC_CTRL_DD       (1 << 13) /* Disable distr. LMMIO range coalescing */
-
-/*
-** Offsets into MBIB (Function 0 on Ike and hopefully Astro)
-** Firmware programs this stuff. Don't touch it.
-*/
-#define LMMIO_DIRECT0_BASE  0x300
-#define LMMIO_DIRECT0_MASK  0x308
-#define LMMIO_DIRECT0_ROUTE 0x310
-
-#define LMMIO_DIST_BASE  0x360
-#define LMMIO_DIST_MASK  0x368
-#define LMMIO_DIST_ROUTE 0x370
-
-#define IOS_DIST_BASE	0x390
-#define IOS_DIST_MASK	0x398
-#define IOS_DIST_ROUTE	0x3A0
-
-#define IOS_DIRECT_BASE	0x3C0
-#define IOS_DIRECT_MASK	0x3C8
-#define IOS_DIRECT_ROUTE 0x3D0
-
-/*
-** Offsets into I/O TLB (Function 2 and 3 on Ike)
-*/
-#define ROPE0_CTL	0x200  /* "regbus pci0" */
-#define ROPE1_CTL	0x208
-#define ROPE2_CTL	0x210
-#define ROPE3_CTL	0x218
-#define ROPE4_CTL	0x220
-#define ROPE5_CTL	0x228
-#define ROPE6_CTL	0x230
-#define ROPE7_CTL	0x238
-
-#define IOC_ROPE0_CFG	0x500	/* pluto only */
-#define   IOC_ROPE_AO	  0x10	/* Allow "Relaxed Ordering" */
-
-#define HF_ENABLE	0x40
-
-#define IOC_IBASE	0x300	/* IO TLB */
-#define IOC_IMASK	0x308
-#define IOC_PCOM	0x310
-#define IOC_TCNFG	0x318
-#define IOC_PDIR_BASE	0x320
-
-/*
-** IOC supports 4/8/16/64KB page sizes (see TCNFG register)
-** It's safer (avoid memory corruption) to keep DMA page mappings
-** equivalently sized to VM PAGE_SIZE.
-**
-** We really can't avoid generating a new mapping for each
-** page since the Virtual Coherence Index has to be generated
-** and updated for each page.
-**
-** PAGE_SIZE could be greater than IOVP_SIZE. But not the inverse.
-*/
-#define IOVP_SIZE	PAGE_SIZE
-#define IOVP_SHIFT	PAGE_SHIFT
-#define IOVP_MASK	PAGE_MASK
-
-#define SBA_PERF_CFG	0x708	/* Performance Counter stuff */
-#define SBA_PERF_MASK1	0x718
-#define SBA_PERF_MASK2	0x730
-
-/*
-** Offsets into PCI Performance Counters (functions 12 and 13)
-** Controlled by PERF registers in function 2 & 3 respectively.
-*/
-#define SBA_PERF_CNT1	0x200
-#define SBA_PERF_CNT2	0x208
-#define SBA_PERF_CNT3	0x210
-
-/*
-** lba_device: Per instance Elroy data structure
-*/
-struct lba_device {
-	struct pci_hba_data	hba;
-
-	spinlock_t		lba_lock;
-	void			*iosapic_obj;
-
-#ifdef CONFIG_64BIT
-	void __iomem		*iop_base;	/* PA_VIEW - for IO port accessor funcs */
-#endif
-
-	int			flags;		/* state/functionality enabled */
-	int			hw_rev;		/* HW revision of chip */
-};
-
-#define ELROY_HVERS		0x782
-#define MERCURY_HVERS		0x783
-#define QUICKSILVER_HVERS	0x784
-
-static inline int IS_ELROY(struct parisc_device *d) {
-	return (d->id.hversion == ELROY_HVERS);
-}
-
-static inline int IS_MERCURY(struct parisc_device *d) {
-	return (d->id.hversion == MERCURY_HVERS);
-}
-
-static inline int IS_QUICKSILVER(struct parisc_device *d) {
-	return (d->id.hversion == QUICKSILVER_HVERS);
-}
-
-static inline int agp_mode_mercury(void __iomem *hpa) {
-	u64 bus_mode;
-
-	bus_mode = readl(hpa + 0x0620);
-	if (bus_mode & 1)
-		return 1;
-
-	return 0;
-}
-
-/*
-** I/O SAPIC init function
-** Caller knows where an I/O SAPIC is. LBA has an integrated I/O SAPIC.
-** Call setup as part of per instance initialization.
-** (ie *not* init_module() function unless only one is present.)
-** fixup_irq is to initialize PCI IRQ line support and
-** virtualize pcidev->irq value. To be called by pci_fixup_bus().
-*/
-extern void *iosapic_register(unsigned long hpa);
-extern int iosapic_fixup_irq(void *obj, struct pci_dev *pcidev);
-
-#define LBA_FUNC_ID	0x0000	/* function id */
-#define LBA_FCLASS	0x0008	/* function class, bist, header, rev... */
-#define LBA_CAPABLE	0x0030	/* capabilities register */
-
-#define LBA_PCI_CFG_ADDR	0x0040	/* poke CFG address here */
-#define LBA_PCI_CFG_DATA	0x0048	/* read or write data here */
-
-#define LBA_PMC_MTLT	0x0050	/* Firmware sets this - read only. */
-#define LBA_FW_SCRATCH	0x0058	/* Firmware writes the PCI bus number here. */
-#define LBA_ERROR_ADDR	0x0070	/* On error, address gets logged here */
-
-#define LBA_ARB_MASK	0x0080	/* bit 0 enable arbitration. PAT/PDC enables */
-#define LBA_ARB_PRI	0x0088	/* firmware sets this. */
-#define LBA_ARB_MODE	0x0090	/* firmware sets this. */
-#define LBA_ARB_MTLT	0x0098	/* firmware sets this. */
-
-#define LBA_MOD_ID	0x0100	/* Module ID. PDC_PAT_CELL reports 4 */
-
-#define LBA_STAT_CTL	0x0108	/* Status & Control */
-#define   LBA_BUS_RESET		0x01	/*  Deassert PCI Bus Reset Signal */
-#define   CLEAR_ERRLOG		0x10	/*  "Clear Error Log" cmd */
-#define   CLEAR_ERRLOG_ENABLE	0x20	/*  "Clear Error Log" Enable */
-#define   HF_ENABLE	0x40	/*    enable HF mode (default is -1 mode) */
-
-#define LBA_LMMIO_BASE	0x0200	/* < 4GB I/O address range */
-#define LBA_LMMIO_MASK	0x0208
-
-#define LBA_GMMIO_BASE	0x0210	/* > 4GB I/O address range */
-#define LBA_GMMIO_MASK	0x0218
-
-#define LBA_WLMMIO_BASE	0x0220	/* All < 4GB ranges under the same *SBA* */
-#define LBA_WLMMIO_MASK	0x0228
-
-#define LBA_WGMMIO_BASE	0x0230	/* All > 4GB ranges under the same *SBA* */
-#define LBA_WGMMIO_MASK	0x0238
-
-#define LBA_IOS_BASE	0x0240	/* I/O port space for this LBA */
-#define LBA_IOS_MASK	0x0248
-
-#define LBA_ELMMIO_BASE	0x0250	/* Extra LMMIO range */
-#define LBA_ELMMIO_MASK	0x0258
-
-#define LBA_EIOS_BASE	0x0260	/* Extra I/O port space */
-#define LBA_EIOS_MASK	0x0268
-
-#define LBA_GLOBAL_MASK	0x0270	/* Mercury only: Global Address Mask */
-#define LBA_DMA_CTL	0x0278	/* firmware sets this */
-
-#define LBA_IBASE	0x0300	/* SBA DMA support */
-#define LBA_IMASK	0x0308
-
-/* FIXME: ignore DMA Hint stuff until we can measure performance */
-#define LBA_HINT_CFG	0x0310
-#define LBA_HINT_BASE	0x0380	/* 14 registers at every 8 bytes. */
-
-#define LBA_BUS_MODE	0x0620
-
-/* ERROR regs are needed for config cycle kluges */
-#define LBA_ERROR_CONFIG 0x0680
-#define     LBA_SMART_MODE 0x20
-#define LBA_ERROR_STATUS 0x0688
-#define LBA_ROPE_CTL     0x06A0
-
-#define LBA_IOSAPIC_BASE	0x800 /* Offset of IRQ logic */
-
-#endif /*_ASM_PARISC_ROPES_H_*/
diff --git a/include/asm-parisc/rt_sigframe.h b/include/asm-parisc/rt_sigframe.h
deleted file mode 100644
index f0dd3b30f6c4..000000000000
--- a/include/asm-parisc/rt_sigframe.h
+++ /dev/null
@@ -1,23 +0,0 @@
-#ifndef _ASM_PARISC_RT_SIGFRAME_H
-#define _ASM_PARISC_RT_SIGFRAME_H
-
-#define SIGRETURN_TRAMP 4
-#define SIGRESTARTBLOCK_TRAMP 5 
-#define TRAMP_SIZE (SIGRETURN_TRAMP + SIGRESTARTBLOCK_TRAMP)
-
-struct rt_sigframe {
-	/* XXX: Must match trampoline size in arch/parisc/kernel/signal.c 
-	        Secondary to that it must protect the ERESTART_RESTARTBLOCK
-		trampoline we left on the stack (we were bad and didn't 
-		change sp so we could run really fast.) */
-	unsigned int tramp[TRAMP_SIZE];
-	struct siginfo info;
-	struct ucontext uc;
-};
-
-#define	SIGFRAME		128
-#define FUNCTIONCALLFRAME	96
-#define PARISC_RT_SIGFRAME_SIZE					\
-	(((sizeof(struct rt_sigframe) + FUNCTIONCALLFRAME) + SIGFRAME) & -SIGFRAME)
-
-#endif
diff --git a/include/asm-parisc/rtc.h b/include/asm-parisc/rtc.h
deleted file mode 100644
index 099d641a42c2..000000000000
--- a/include/asm-parisc/rtc.h
+++ /dev/null
@@ -1,131 +0,0 @@
-/* 
- * include/asm-parisc/rtc.h
- *
- * Copyright 2002 Randolph CHung <tausq@debian.org>
- *
- * Based on: include/asm-ppc/rtc.h and the genrtc driver in the
- * 2.4 parisc linux tree
- */
-
-#ifndef __ASM_RTC_H__
-#define __ASM_RTC_H__
-
-#ifdef __KERNEL__
-
-#include <linux/rtc.h>
-
-#include <asm/pdc.h>
-
-#define SECS_PER_HOUR   (60 * 60)
-#define SECS_PER_DAY    (SECS_PER_HOUR * 24)
-
-
-#define RTC_PIE 0x40		/* periodic interrupt enable */
-#define RTC_AIE 0x20		/* alarm interrupt enable */
-#define RTC_UIE 0x10		/* update-finished interrupt enable */
-
-#define RTC_BATT_BAD 0x100	/* battery bad */
-
-/* some dummy definitions */
-#define RTC_SQWE 0x08		/* enable square-wave output */
-#define RTC_DM_BINARY 0x04	/* all time/date values are BCD if clear */
-#define RTC_24H 0x02		/* 24 hour mode - else hours bit 7 means pm */
-#define RTC_DST_EN 0x01	        /* auto switch DST - works f. USA only */
-
-# define __isleap(year) \
-  ((year) % 4 == 0 && ((year) % 100 != 0 || (year) % 400 == 0))
-
-/* How many days come before each month (0-12).  */
-static const unsigned short int __mon_yday[2][13] =
-{
-	/* Normal years.  */
-	{ 0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334, 365 },
-	/* Leap years.  */
-	{ 0, 31, 60, 91, 121, 152, 182, 213, 244, 274, 305, 335, 366 }
-};
-
-static inline unsigned int get_rtc_time(struct rtc_time *wtime)
-{
-	struct pdc_tod tod_data;
-	long int days, rem, y;
-	const unsigned short int *ip;
-
-	memset(wtime, 0, sizeof(*wtime));
-	if (pdc_tod_read(&tod_data) < 0)
-		return RTC_24H | RTC_BATT_BAD;
-
-	// most of the remainder of this function is:
-//	Copyright (C) 1991, 1993, 1997, 1998 Free Software Foundation, Inc.
-//	This was originally a part of the GNU C Library.
-//      It is distributed under the GPL, and was swiped from offtime.c
-
-
-	days = tod_data.tod_sec / SECS_PER_DAY;
-	rem = tod_data.tod_sec % SECS_PER_DAY;
-
-	wtime->tm_hour = rem / SECS_PER_HOUR;
-	rem %= SECS_PER_HOUR;
-	wtime->tm_min = rem / 60;
-	wtime->tm_sec = rem % 60;
-
-	y = 1970;
-
-#define DIV(a, b) ((a) / (b) - ((a) % (b) < 0))
-#define LEAPS_THRU_END_OF(y) (DIV (y, 4) - DIV (y, 100) + DIV (y, 400))
-
-	while (days < 0 || days >= (__isleap (y) ? 366 : 365))
-	{
-		/* Guess a corrected year, assuming 365 days per year.  */
-		long int yg = y + days / 365 - (days % 365 < 0);
-
-		/* Adjust DAYS and Y to match the guessed year.  */
-		days -= ((yg - y) * 365
-			 + LEAPS_THRU_END_OF (yg - 1)
-			 - LEAPS_THRU_END_OF (y - 1));
-		y = yg;
-	}
-	wtime->tm_year = y - 1900;
-
-	ip = __mon_yday[__isleap(y)];
-	for (y = 11; days < (long int) ip[y]; --y)
-		continue;
-	days -= ip[y];
-	wtime->tm_mon = y;
-	wtime->tm_mday = days + 1;
-
-	return RTC_24H;
-}
-
-static int set_rtc_time(struct rtc_time *wtime)
-{
-	u_int32_t secs;
-
-	secs = mktime(wtime->tm_year + 1900, wtime->tm_mon + 1, wtime->tm_mday, 
-		      wtime->tm_hour, wtime->tm_min, wtime->tm_sec);
-
-	if(pdc_tod_set(secs, 0) < 0)
-		return -1;
-	else
-		return 0;
-
-}
-
-static inline unsigned int get_rtc_ss(void)
-{
-	struct rtc_time h;
-
-	get_rtc_time(&h);
-	return h.tm_sec;
-}
-
-static inline int get_rtc_pll(struct rtc_pll_info *pll)
-{
-	return -EINVAL;
-}
-static inline int set_rtc_pll(struct rtc_pll_info *pll)
-{
-	return -EINVAL;
-}
-
-#endif /* __KERNEL__ */
-#endif /* __ASM_RTC_H__ */
diff --git a/include/asm-parisc/runway.h b/include/asm-parisc/runway.h
deleted file mode 100644
index 5bea02da7e22..000000000000
--- a/include/asm-parisc/runway.h
+++ /dev/null
@@ -1,12 +0,0 @@
-#ifndef ASM_PARISC_RUNWAY_H
-#define ASM_PARISC_RUNWAY_H
-#ifdef __KERNEL__
-
-/* declared in arch/parisc/kernel/setup.c */
-extern struct proc_dir_entry * proc_runway_root;
-
-#define RUNWAY_STATUS	0x10
-#define RUNWAY_DEBUG	0x40
-
-#endif /* __KERNEL__ */
-#endif /* ASM_PARISC_RUNWAY_H */
diff --git a/include/asm-parisc/scatterlist.h b/include/asm-parisc/scatterlist.h
deleted file mode 100644
index 62269b31ebf4..000000000000
--- a/include/asm-parisc/scatterlist.h
+++ /dev/null
@@ -1,27 +0,0 @@
-#ifndef _ASM_PARISC_SCATTERLIST_H
-#define _ASM_PARISC_SCATTERLIST_H
-
-#include <asm/page.h>
-#include <asm/types.h>
-
-struct scatterlist {
-#ifdef CONFIG_DEBUG_SG
-	unsigned long sg_magic;
-#endif
-	unsigned long page_link;
-	unsigned int offset;
-
-	unsigned int length;
-
-	/* an IOVA can be 64-bits on some PA-Risc platforms. */
-	dma_addr_t iova;	/* I/O Virtual Address */
-	__u32      iova_length; /* bytes mapped */
-};
-
-#define sg_virt_addr(sg) ((unsigned long)sg_virt(sg))
-#define sg_dma_address(sg) ((sg)->iova)
-#define sg_dma_len(sg)     ((sg)->iova_length)
-
-#define ISA_DMA_THRESHOLD (~0UL)
-
-#endif /* _ASM_PARISC_SCATTERLIST_H */
diff --git a/include/asm-parisc/sections.h b/include/asm-parisc/sections.h
deleted file mode 100644
index 9d13c3507ad6..000000000000
--- a/include/asm-parisc/sections.h
+++ /dev/null
@@ -1,12 +0,0 @@
-#ifndef _PARISC_SECTIONS_H
-#define _PARISC_SECTIONS_H
-
-/* nothing to see, move along */
-#include <asm-generic/sections.h>
-
-#ifdef CONFIG_64BIT
-#undef dereference_function_descriptor
-void *dereference_function_descriptor(void *);
-#endif
-
-#endif
diff --git a/include/asm-parisc/segment.h b/include/asm-parisc/segment.h
deleted file mode 100644
index 26794ddb6524..000000000000
--- a/include/asm-parisc/segment.h
+++ /dev/null
@@ -1,6 +0,0 @@
-#ifndef __PARISC_SEGMENT_H
-#define __PARISC_SEGMENT_H
-
-/* Only here because we have some old header files that expect it.. */
-
-#endif
diff --git a/include/asm-parisc/sembuf.h b/include/asm-parisc/sembuf.h
deleted file mode 100644
index 1e59ffd3bd1e..000000000000
--- a/include/asm-parisc/sembuf.h
+++ /dev/null
@@ -1,29 +0,0 @@
-#ifndef _PARISC_SEMBUF_H
-#define _PARISC_SEMBUF_H
-
-/* 
- * The semid64_ds structure for parisc architecture.
- * Note extra padding because this structure is passed back and forth
- * between kernel and user space.
- *
- * Pad space is left for:
- * - 64-bit time_t to solve y2038 problem
- * - 2 miscellaneous 32-bit values
- */
-
-struct semid64_ds {
-	struct ipc64_perm sem_perm;		/* permissions .. see ipc.h */
-#ifndef CONFIG_64BIT
-	unsigned int	__pad1;
-#endif
-	__kernel_time_t	sem_otime;		/* last semop time */
-#ifndef CONFIG_64BIT
-	unsigned int	__pad2;
-#endif
-	__kernel_time_t	sem_ctime;		/* last change time */
-	unsigned int	sem_nsems;		/* no. of semaphores in array */
-	unsigned int	__unused1;
-	unsigned int	__unused2;
-};
-
-#endif /* _PARISC_SEMBUF_H */
diff --git a/include/asm-parisc/serial.h b/include/asm-parisc/serial.h
deleted file mode 100644
index d7e3cc60dbc3..000000000000
--- a/include/asm-parisc/serial.h
+++ /dev/null
@@ -1,10 +0,0 @@
-/*
- * include/asm-parisc/serial.h
- */
-
-/*
- * This is used for 16550-compatible UARTs
- */
-#define BASE_BAUD ( 1843200 / 16 )
-
-#define SERIAL_PORT_DFNS
diff --git a/include/asm-parisc/setup.h b/include/asm-parisc/setup.h
deleted file mode 100644
index 7da2e5b8747e..000000000000
--- a/include/asm-parisc/setup.h
+++ /dev/null
@@ -1,6 +0,0 @@
-#ifndef _PARISC_SETUP_H
-#define _PARISC_SETUP_H
-
-#define COMMAND_LINE_SIZE	1024
-
-#endif /* _PARISC_SETUP_H */
diff --git a/include/asm-parisc/shmbuf.h b/include/asm-parisc/shmbuf.h
deleted file mode 100644
index 0a3eada1863b..000000000000
--- a/include/asm-parisc/shmbuf.h
+++ /dev/null
@@ -1,58 +0,0 @@
-#ifndef _PARISC_SHMBUF_H
-#define _PARISC_SHMBUF_H
-
-/* 
- * The shmid64_ds structure for parisc architecture.
- * Note extra padding because this structure is passed back and forth
- * between kernel and user space.
- *
- * Pad space is left for:
- * - 64-bit time_t to solve y2038 problem
- * - 2 miscellaneous 32-bit values
- */
-
-struct shmid64_ds {
-	struct ipc64_perm	shm_perm;	/* operation perms */
-#ifndef CONFIG_64BIT
-	unsigned int		__pad1;
-#endif
-	__kernel_time_t		shm_atime;	/* last attach time */
-#ifndef CONFIG_64BIT
-	unsigned int		__pad2;
-#endif
-	__kernel_time_t		shm_dtime;	/* last detach time */
-#ifndef CONFIG_64BIT
-	unsigned int		__pad3;
-#endif
-	__kernel_time_t		shm_ctime;	/* last change time */
-#ifndef CONFIG_64BIT
-	unsigned int		__pad4;
-#endif
-	size_t			shm_segsz;	/* size of segment (bytes) */
-	__kernel_pid_t		shm_cpid;	/* pid of creator */
-	__kernel_pid_t		shm_lpid;	/* pid of last operator */
-	unsigned int		shm_nattch;	/* no. of current attaches */
-	unsigned int		__unused1;
-	unsigned int		__unused2;
-};
-
-#ifdef CONFIG_64BIT
-/* The 'unsigned int' (formerly 'unsigned long') data types below will
- * ensure that a 32-bit app calling shmctl(*,IPC_INFO,*) will work on
- * a wide kernel, but if some of these values are meant to contain pointers
- * they may need to be 'long long' instead. -PB XXX FIXME
- */
-#endif
-struct shminfo64 {
-	unsigned int	shmmax;
-	unsigned int	shmmin;
-	unsigned int	shmmni;
-	unsigned int	shmseg;
-	unsigned int	shmall;
-	unsigned int	__unused1;
-	unsigned int	__unused2;
-	unsigned int	__unused3;
-	unsigned int	__unused4;
-};
-
-#endif /* _PARISC_SHMBUF_H */
diff --git a/include/asm-parisc/shmparam.h b/include/asm-parisc/shmparam.h
deleted file mode 100644
index 628ddc22faa8..000000000000
--- a/include/asm-parisc/shmparam.h
+++ /dev/null
@@ -1,8 +0,0 @@
-#ifndef _ASMPARISC_SHMPARAM_H
-#define _ASMPARISC_SHMPARAM_H
-
-#define __ARCH_FORCE_SHMLBA 	1
-
-#define SHMLBA 0x00400000   /* attach addr needs to be 4 Mb aligned */
-
-#endif /* _ASMPARISC_SHMPARAM_H */
diff --git a/include/asm-parisc/sigcontext.h b/include/asm-parisc/sigcontext.h
deleted file mode 100644
index 27ef31bb3b6e..000000000000
--- a/include/asm-parisc/sigcontext.h
+++ /dev/null
@@ -1,20 +0,0 @@
-#ifndef _ASMPARISC_SIGCONTEXT_H
-#define _ASMPARISC_SIGCONTEXT_H
-
-#define PARISC_SC_FLAG_ONSTACK 1<<0
-#define PARISC_SC_FLAG_IN_SYSCALL 1<<1
-
-/* We will add more stuff here as it becomes necessary, until we know
-   it works. */
-struct sigcontext {
-	unsigned long sc_flags;
-
-	unsigned long sc_gr[32]; /* PSW in sc_gr[0] */
-	unsigned long long sc_fr[32]; /* FIXME, do we need other state info? */
-	unsigned long sc_iasq[2];
-	unsigned long sc_iaoq[2];
-	unsigned long sc_sar; /* cr11 */
-};
-
-
-#endif
diff --git a/include/asm-parisc/siginfo.h b/include/asm-parisc/siginfo.h
deleted file mode 100644
index d4909f55fe35..000000000000
--- a/include/asm-parisc/siginfo.h
+++ /dev/null
@@ -1,14 +0,0 @@
-#ifndef _PARISC_SIGINFO_H
-#define _PARISC_SIGINFO_H
-
-#include <asm-generic/siginfo.h>
-
-/*
- * SIGTRAP si_codes
- */
-#define TRAP_BRANCH	(__SI_FAULT|3)	/* process taken branch trap */
-#define TRAP_HWBKPT	(__SI_FAULT|4)	/* hardware breakpoint or watchpoint */
-#undef NSIGTRAP
-#define NSIGTRAP	4
-
-#endif
diff --git a/include/asm-parisc/signal.h b/include/asm-parisc/signal.h
deleted file mode 100644
index c20356375d1d..000000000000
--- a/include/asm-parisc/signal.h
+++ /dev/null
@@ -1,153 +0,0 @@
-#ifndef _ASM_PARISC_SIGNAL_H
-#define _ASM_PARISC_SIGNAL_H
-
-#define SIGHUP		 1
-#define SIGINT		 2
-#define SIGQUIT		 3
-#define SIGILL		 4
-#define SIGTRAP		 5
-#define SIGABRT		 6
-#define SIGIOT		 6
-#define SIGEMT		 7
-#define SIGFPE		 8
-#define SIGKILL		 9
-#define SIGBUS		10
-#define SIGSEGV		11
-#define SIGSYS		12 /* Linux doesn't use this */
-#define SIGPIPE		13
-#define SIGALRM		14
-#define SIGTERM		15
-#define SIGUSR1		16
-#define SIGUSR2		17
-#define SIGCHLD		18
-#define SIGPWR		19
-#define SIGVTALRM	20
-#define SIGPROF		21
-#define SIGIO		22
-#define SIGPOLL		SIGIO
-#define SIGWINCH	23
-#define SIGSTOP		24
-#define SIGTSTP		25
-#define SIGCONT		26
-#define SIGTTIN		27
-#define SIGTTOU		28
-#define SIGURG		29
-#define SIGLOST		30 /* Linux doesn't use this either */
-#define	SIGUNUSED	31
-#define SIGRESERVE	SIGUNUSED
-
-#define SIGXCPU		33
-#define SIGXFSZ		34
-#define SIGSTKFLT	36
-
-/* These should not be considered constants from userland.  */
-#define SIGRTMIN	37
-#define SIGRTMAX	_NSIG /* it's 44 under HP/UX */
-
-/*
- * SA_FLAGS values:
- *
- * SA_ONSTACK indicates that a registered stack_t will be used.
- * SA_RESTART flag to get restarting signals (which were the default long ago)
- * SA_NOCLDSTOP flag to turn off SIGCHLD when children stop.
- * SA_RESETHAND clears the handler when the signal is delivered.
- * SA_NOCLDWAIT flag on SIGCHLD to inhibit zombies.
- * SA_NODEFER prevents the current signal from being masked in the handler.
- *
- * SA_ONESHOT and SA_NOMASK are the historical Linux names for the Single
- * Unix names RESETHAND and NODEFER respectively.
- */
-#define SA_ONSTACK	0x00000001
-#define SA_RESETHAND	0x00000004
-#define SA_NOCLDSTOP	0x00000008
-#define SA_SIGINFO	0x00000010
-#define SA_NODEFER	0x00000020
-#define SA_RESTART	0x00000040
-#define SA_NOCLDWAIT	0x00000080
-#define _SA_SIGGFAULT	0x00000100 /* HPUX */
-
-#define SA_NOMASK	SA_NODEFER
-#define SA_ONESHOT	SA_RESETHAND
-
-#define SA_RESTORER	0x04000000 /* obsolete -- ignored */
-
-/* 
- * sigaltstack controls
- */
-#define SS_ONSTACK	1
-#define SS_DISABLE	2
-
-#define MINSIGSTKSZ	2048
-#define SIGSTKSZ	8192
-
-#ifdef __KERNEL__
-
-#define _NSIG		64
-/* bits-per-word, where word apparently means 'long' not 'int' */
-#define _NSIG_BPW	BITS_PER_LONG
-#define _NSIG_WORDS	(_NSIG / _NSIG_BPW)
-
-#endif /* __KERNEL__ */
-
-#define SIG_BLOCK          0	/* for blocking signals */
-#define SIG_UNBLOCK        1	/* for unblocking signals */
-#define SIG_SETMASK        2	/* for setting the signal mask */
-
-#define SIG_DFL	((__sighandler_t)0)	/* default signal handling */
-#define SIG_IGN	((__sighandler_t)1)	/* ignore signal */
-#define SIG_ERR	((__sighandler_t)-1)	/* error return from signal */
-
-# ifndef __ASSEMBLY__
-
-#  include <linux/types.h>
-
-/* Avoid too many header ordering problems.  */
-struct siginfo;
-
-/* Type of a signal handler.  */
-#ifdef CONFIG_64BIT
-/* function pointers on 64-bit parisc are pointers to little structs and the
- * compiler doesn't support code which changes or tests the address of
- * the function in the little struct.  This is really ugly -PB
- */
-typedef char __user *__sighandler_t;
-#else
-typedef void __signalfn_t(int);
-typedef __signalfn_t __user *__sighandler_t;
-#endif
-
-typedef struct sigaltstack {
-	void __user *ss_sp;
-	int ss_flags;
-	size_t ss_size;
-} stack_t;
-
-#ifdef __KERNEL__
-
-/* Most things should be clean enough to redefine this at will, if care
-   is taken to make libc match.  */
-
-typedef unsigned long old_sigset_t;		/* at least 32 bits */
-
-typedef struct {
-	/* next_signal() assumes this is a long - no choice */
-	unsigned long sig[_NSIG_WORDS];
-} sigset_t;
-
-struct sigaction {
-	__sighandler_t sa_handler;
-	unsigned long sa_flags;
-	sigset_t sa_mask;		/* mask last for extensibility */
-};
-
-struct k_sigaction {
-	struct sigaction sa;
-};
-
-#define ptrace_signal_deliver(regs, cookie) do { } while (0)
-
-#include <asm/sigcontext.h>
-
-#endif /* __KERNEL__ */
-#endif /* !__ASSEMBLY */
-#endif /* _ASM_PARISC_SIGNAL_H */
diff --git a/include/asm-parisc/smp.h b/include/asm-parisc/smp.h
deleted file mode 100644
index 398cdbaf4e54..000000000000
--- a/include/asm-parisc/smp.h
+++ /dev/null
@@ -1,68 +0,0 @@
-#ifndef __ASM_SMP_H
-#define __ASM_SMP_H
-
-
-#if defined(CONFIG_SMP)
-
-/* Page Zero Location PDC will look for the address to branch to when we poke
-** slave CPUs still in "Icache loop".
-*/
-#define PDC_OS_BOOT_RENDEZVOUS     0x10
-#define PDC_OS_BOOT_RENDEZVOUS_HI  0x28
-
-#ifndef ASSEMBLY
-#include <linux/bitops.h>
-#include <linux/threads.h>	/* for NR_CPUS */
-#include <linux/cpumask.h>
-typedef unsigned long address_t;
-
-extern cpumask_t cpu_online_map;
-
-
-/*
- *	Private routines/data
- *
- *	physical and logical are equivalent until we support CPU hotplug.
- */
-#define cpu_number_map(cpu)	(cpu)
-#define cpu_logical_map(cpu)	(cpu)
-
-extern void smp_send_reschedule(int cpu);
-extern void smp_send_all_nop(void);
-
-extern void arch_send_call_function_single_ipi(int cpu);
-extern void arch_send_call_function_ipi(cpumask_t mask);
-
-#endif /* !ASSEMBLY */
-
-/*
- *	This magic constant controls our willingness to transfer
- *      a process across CPUs. Such a transfer incurs cache and tlb
- *      misses. The current value is inherited from i386. Still needs
- *      to be tuned for parisc.
- */
- 
-#define PROC_CHANGE_PENALTY	15		/* Schedule penalty */
-
-extern unsigned long cpu_present_mask;
-
-#define raw_smp_processor_id()	(current_thread_info()->cpu)
-
-#else /* CONFIG_SMP */
-
-static inline void smp_send_all_nop(void) { return; }
-
-#endif
-
-#define NO_PROC_ID		0xFF		/* No processor magic marker */
-#define ANY_PROC_ID		0xFF		/* Any processor magic marker */
-static inline int __cpu_disable (void) {
-  return 0;
-}
-static inline void __cpu_die (unsigned int cpu) {
-  while(1)
-    ;
-}
-extern int __cpu_up (unsigned int cpu);
-
-#endif /*  __ASM_SMP_H */
diff --git a/include/asm-parisc/socket.h b/include/asm-parisc/socket.h
deleted file mode 100644
index fba402c95ac2..000000000000
--- a/include/asm-parisc/socket.h
+++ /dev/null
@@ -1,62 +0,0 @@
-#ifndef _ASM_SOCKET_H
-#define _ASM_SOCKET_H
-
-#include <asm/sockios.h>
-
-/* For setsockopt(2) */
-#define SOL_SOCKET	0xffff
-
-#define SO_DEBUG	0x0001
-#define SO_REUSEADDR	0x0004
-#define SO_KEEPALIVE	0x0008
-#define SO_DONTROUTE	0x0010
-#define SO_BROADCAST	0x0020
-#define SO_LINGER	0x0080
-#define SO_OOBINLINE	0x0100
-/* To add :#define SO_REUSEPORT 0x0200 */
-#define SO_SNDBUF	0x1001
-#define SO_RCVBUF	0x1002
-#define SO_SNDBUFFORCE	0x100a
-#define SO_RCVBUFFORCE	0x100b
-#define SO_SNDLOWAT	0x1003
-#define SO_RCVLOWAT	0x1004
-#define SO_SNDTIMEO	0x1005
-#define SO_RCVTIMEO	0x1006
-#define SO_ERROR	0x1007
-#define SO_TYPE		0x1008
-#define SO_PEERNAME	0x2000
-
-#define SO_NO_CHECK	0x400b
-#define SO_PRIORITY	0x400c
-#define SO_BSDCOMPAT	0x400e
-#define SO_PASSCRED	0x4010
-#define SO_PEERCRED	0x4011
-#define SO_TIMESTAMP	0x4012
-#define SCM_TIMESTAMP	SO_TIMESTAMP
-#define SO_TIMESTAMPNS	0x4013
-#define SCM_TIMESTAMPNS	SO_TIMESTAMPNS
-
-/* Security levels - as per NRL IPv6 - don't actually do anything */
-#define SO_SECURITY_AUTHENTICATION		0x4016
-#define SO_SECURITY_ENCRYPTION_TRANSPORT	0x4017
-#define SO_SECURITY_ENCRYPTION_NETWORK		0x4018
-
-#define SO_BINDTODEVICE	0x4019
-
-/* Socket filtering */
-#define SO_ATTACH_FILTER        0x401a
-#define SO_DETACH_FILTER        0x401b
-
-#define SO_ACCEPTCONN		0x401c
-
-#define SO_PEERSEC		0x401d
-#define SO_PASSSEC		0x401e
-
-#define SO_MARK			0x401f
-
-/* O_NONBLOCK clashes with the bits used for socket types.  Therefore we
- * have to define SOCK_NONBLOCK to a different value here.
- */
-#define SOCK_NONBLOCK   0x40000000
-
-#endif /* _ASM_SOCKET_H */
diff --git a/include/asm-parisc/sockios.h b/include/asm-parisc/sockios.h
deleted file mode 100644
index dabfbc7483f6..000000000000
--- a/include/asm-parisc/sockios.h
+++ /dev/null
@@ -1,13 +0,0 @@
-#ifndef __ARCH_PARISC_SOCKIOS__
-#define __ARCH_PARISC_SOCKIOS__
-
-/* Socket-level I/O control calls. */
-#define FIOSETOWN 	0x8901
-#define SIOCSPGRP	0x8902
-#define FIOGETOWN	0x8903
-#define SIOCGPGRP	0x8904
-#define SIOCATMARK	0x8905
-#define SIOCGSTAMP	0x8906		/* Get stamp (timeval) */
-#define SIOCGSTAMPNS	0x8907		/* Get stamp (timespec) */
-
-#endif
diff --git a/include/asm-parisc/spinlock.h b/include/asm-parisc/spinlock.h
deleted file mode 100644
index f3d2090a18dc..000000000000
--- a/include/asm-parisc/spinlock.h
+++ /dev/null
@@ -1,194 +0,0 @@
-#ifndef __ASM_SPINLOCK_H
-#define __ASM_SPINLOCK_H
-
-#include <asm/system.h>
-#include <asm/processor.h>
-#include <asm/spinlock_types.h>
-
-static inline int __raw_spin_is_locked(raw_spinlock_t *x)
-{
-	volatile unsigned int *a = __ldcw_align(x);
-	return *a == 0;
-}
-
-#define __raw_spin_lock(lock) __raw_spin_lock_flags(lock, 0)
-#define __raw_spin_unlock_wait(x) \
-		do { cpu_relax(); } while (__raw_spin_is_locked(x))
-
-static inline void __raw_spin_lock_flags(raw_spinlock_t *x,
-					 unsigned long flags)
-{
-	volatile unsigned int *a;
-
-	mb();
-	a = __ldcw_align(x);
-	while (__ldcw(a) == 0)
-		while (*a == 0)
-			if (flags & PSW_SM_I) {
-				local_irq_enable();
-				cpu_relax();
-				local_irq_disable();
-			} else
-				cpu_relax();
-	mb();
-}
-
-static inline void __raw_spin_unlock(raw_spinlock_t *x)
-{
-	volatile unsigned int *a;
-	mb();
-	a = __ldcw_align(x);
-	*a = 1;
-	mb();
-}
-
-static inline int __raw_spin_trylock(raw_spinlock_t *x)
-{
-	volatile unsigned int *a;
-	int ret;
-
-	mb();
-	a = __ldcw_align(x);
-        ret = __ldcw(a) != 0;
-	mb();
-
-	return ret;
-}
-
-/*
- * Read-write spinlocks, allowing multiple readers but only one writer.
- * Linux rwlocks are unfair to writers; they can be starved for an indefinite
- * time by readers.  With care, they can also be taken in interrupt context.
- *
- * In the PA-RISC implementation, we have a spinlock and a counter.
- * Readers use the lock to serialise their access to the counter (which
- * records how many readers currently hold the lock).
- * Writers hold the spinlock, preventing any readers or other writers from
- * grabbing the rwlock.
- */
-
-/* Note that we have to ensure interrupts are disabled in case we're
- * interrupted by some other code that wants to grab the same read lock */
-static  __inline__ void __raw_read_lock(raw_rwlock_t *rw)
-{
-	unsigned long flags;
-	local_irq_save(flags);
-	__raw_spin_lock_flags(&rw->lock, flags);
-	rw->counter++;
-	__raw_spin_unlock(&rw->lock);
-	local_irq_restore(flags);
-}
-
-/* Note that we have to ensure interrupts are disabled in case we're
- * interrupted by some other code that wants to grab the same read lock */
-static  __inline__ void __raw_read_unlock(raw_rwlock_t *rw)
-{
-	unsigned long flags;
-	local_irq_save(flags);
-	__raw_spin_lock_flags(&rw->lock, flags);
-	rw->counter--;
-	__raw_spin_unlock(&rw->lock);
-	local_irq_restore(flags);
-}
-
-/* Note that we have to ensure interrupts are disabled in case we're
- * interrupted by some other code that wants to grab the same read lock */
-static __inline__ int __raw_read_trylock(raw_rwlock_t *rw)
-{
-	unsigned long flags;
- retry:
-	local_irq_save(flags);
-	if (__raw_spin_trylock(&rw->lock)) {
-		rw->counter++;
-		__raw_spin_unlock(&rw->lock);
-		local_irq_restore(flags);
-		return 1;
-	}
-
-	local_irq_restore(flags);
-	/* If write-locked, we fail to acquire the lock */
-	if (rw->counter < 0)
-		return 0;
-
-	/* Wait until we have a realistic chance at the lock */
-	while (__raw_spin_is_locked(&rw->lock) && rw->counter >= 0)
-		cpu_relax();
-
-	goto retry;
-}
-
-/* Note that we have to ensure interrupts are disabled in case we're
- * interrupted by some other code that wants to read_trylock() this lock */
-static __inline__ void __raw_write_lock(raw_rwlock_t *rw)
-{
-	unsigned long flags;
-retry:
-	local_irq_save(flags);
-	__raw_spin_lock_flags(&rw->lock, flags);
-
-	if (rw->counter != 0) {
-		__raw_spin_unlock(&rw->lock);
-		local_irq_restore(flags);
-
-		while (rw->counter != 0)
-			cpu_relax();
-
-		goto retry;
-	}
-
-	rw->counter = -1; /* mark as write-locked */
-	mb();
-	local_irq_restore(flags);
-}
-
-static __inline__ void __raw_write_unlock(raw_rwlock_t *rw)
-{
-	rw->counter = 0;
-	__raw_spin_unlock(&rw->lock);
-}
-
-/* Note that we have to ensure interrupts are disabled in case we're
- * interrupted by some other code that wants to read_trylock() this lock */
-static __inline__ int __raw_write_trylock(raw_rwlock_t *rw)
-{
-	unsigned long flags;
-	int result = 0;
-
-	local_irq_save(flags);
-	if (__raw_spin_trylock(&rw->lock)) {
-		if (rw->counter == 0) {
-			rw->counter = -1;
-			result = 1;
-		} else {
-			/* Read-locked.  Oh well. */
-			__raw_spin_unlock(&rw->lock);
-		}
-	}
-	local_irq_restore(flags);
-
-	return result;
-}
-
-/*
- * read_can_lock - would read_trylock() succeed?
- * @lock: the rwlock in question.
- */
-static __inline__ int __raw_read_can_lock(raw_rwlock_t *rw)
-{
-	return rw->counter >= 0;
-}
-
-/*
- * write_can_lock - would write_trylock() succeed?
- * @lock: the rwlock in question.
- */
-static __inline__ int __raw_write_can_lock(raw_rwlock_t *rw)
-{
-	return !rw->counter;
-}
-
-#define _raw_spin_relax(lock)	cpu_relax()
-#define _raw_read_relax(lock)	cpu_relax()
-#define _raw_write_relax(lock)	cpu_relax()
-
-#endif /* __ASM_SPINLOCK_H */
diff --git a/include/asm-parisc/spinlock_types.h b/include/asm-parisc/spinlock_types.h
deleted file mode 100644
index 3f72f47cf4b2..000000000000
--- a/include/asm-parisc/spinlock_types.h
+++ /dev/null
@@ -1,21 +0,0 @@
-#ifndef __ASM_SPINLOCK_TYPES_H
-#define __ASM_SPINLOCK_TYPES_H
-
-typedef struct {
-#ifdef CONFIG_PA20
-	volatile unsigned int slock;
-# define __RAW_SPIN_LOCK_UNLOCKED { 1 }
-#else
-	volatile unsigned int lock[4];
-# define __RAW_SPIN_LOCK_UNLOCKED	{ { 1, 1, 1, 1 } }
-#endif
-} raw_spinlock_t;
-
-typedef struct {
-	raw_spinlock_t lock;
-	volatile int counter;
-} raw_rwlock_t;
-
-#define __RAW_RW_LOCK_UNLOCKED		{ __RAW_SPIN_LOCK_UNLOCKED, 0 }
-
-#endif
diff --git a/include/asm-parisc/stat.h b/include/asm-parisc/stat.h
deleted file mode 100644
index 9d5fbbc5c31f..000000000000
--- a/include/asm-parisc/stat.h
+++ /dev/null
@@ -1,100 +0,0 @@
-#ifndef _PARISC_STAT_H
-#define _PARISC_STAT_H
-
-#include <linux/types.h>
-
-struct stat {
-	unsigned int	st_dev;		/* dev_t is 32 bits on parisc */
-	ino_t		st_ino;		/* 32 bits */
-	mode_t		st_mode;	/* 16 bits */
-	nlink_t		st_nlink;	/* 16 bits */
-	unsigned short	st_reserved1;	/* old st_uid */
-	unsigned short	st_reserved2;	/* old st_gid */
-	unsigned int	st_rdev;
-	off_t		st_size;
-	time_t		st_atime;
-	unsigned int	st_atime_nsec;
-	time_t		st_mtime;
-	unsigned int	st_mtime_nsec;
-	time_t		st_ctime;
-	unsigned int	st_ctime_nsec;
-	int		st_blksize;
-	int		st_blocks;
-	unsigned int	__unused1;	/* ACL stuff */
-	unsigned int	__unused2;	/* network */
-	ino_t		__unused3;	/* network */
-	unsigned int	__unused4;	/* cnodes */
-	unsigned short	__unused5;	/* netsite */
-	short		st_fstype;
-	unsigned int	st_realdev;
-	unsigned short	st_basemode;
-	unsigned short	st_spareshort;
-	uid_t		st_uid;
-	gid_t		st_gid;
-	unsigned int	st_spare4[3];
-};
-
-#define STAT_HAVE_NSEC
-
-typedef __kernel_off64_t	off64_t;
-
-struct hpux_stat64 {
-	unsigned int	st_dev;		/* dev_t is 32 bits on parisc */
-	ino_t           st_ino;         /* 32 bits */
-	mode_t		st_mode;	/* 16 bits */
-	nlink_t		st_nlink;	/* 16 bits */
-	unsigned short	st_reserved1;	/* old st_uid */
-	unsigned short	st_reserved2;	/* old st_gid */
-	unsigned int	st_rdev;
-	off64_t		st_size;
-	time_t		st_atime;
-	unsigned int	st_spare1;
-	time_t		st_mtime;
-	unsigned int	st_spare2;
-	time_t		st_ctime;
-	unsigned int	st_spare3;
-	int		st_blksize;
-	__u64		st_blocks;
-	unsigned int	__unused1;	/* ACL stuff */
-	unsigned int	__unused2;	/* network */
-	ino_t           __unused3;      /* network */
-	unsigned int	__unused4;	/* cnodes */
-	unsigned short	__unused5;	/* netsite */
-	short		st_fstype;
-	unsigned int	st_realdev;
-	unsigned short	st_basemode;
-	unsigned short	st_spareshort;
-	uid_t		st_uid;
-	gid_t		st_gid;
-	unsigned int	st_spare4[3];
-};
-
-/* This is the struct that 32-bit userspace applications are expecting.
- * How 64-bit apps are going to be compiled, I have no idea.  But at least
- * this way, we don't have a wrapper in the kernel.
- */
-struct stat64 {
-	unsigned long long	st_dev;
-	unsigned int		__pad1;
-
-	unsigned int		__st_ino;	/* Not actually filled in */
-	unsigned int		st_mode;
-	unsigned int		st_nlink;
-	unsigned int		st_uid;
-	unsigned int		st_gid;
-	unsigned long long	st_rdev;
-	unsigned int		__pad2;
-	signed long long	st_size;
-	signed int		st_blksize;
-
-	signed long long	st_blocks;
-	signed int		st_atime;
-	unsigned int		st_atime_nsec;
-	signed int		st_mtime;
-	unsigned int		st_mtime_nsec;
-	signed int		st_ctime;
-	unsigned int		st_ctime_nsec;
-	unsigned long long	st_ino;
-};
-
-#endif
diff --git a/include/asm-parisc/statfs.h b/include/asm-parisc/statfs.h
deleted file mode 100644
index 1d2b8130b23d..000000000000
--- a/include/asm-parisc/statfs.h
+++ /dev/null
@@ -1,58 +0,0 @@
-#ifndef _PARISC_STATFS_H
-#define _PARISC_STATFS_H
-
-#ifndef __KERNEL_STRICT_NAMES
-
-#include <linux/types.h>
-
-typedef __kernel_fsid_t	fsid_t;
-
-#endif
-
-/*
- * It appears that PARISC could be 64 _or_ 32 bit.
- * 64-bit fields must be explicitly 64-bit in statfs64.
- */
-struct statfs {
-	long f_type;
-	long f_bsize;
-	long f_blocks;
-	long f_bfree;
-	long f_bavail;
-	long f_files;
-	long f_ffree;
-	__kernel_fsid_t f_fsid;
-	long f_namelen;
-	long f_frsize;
-	long f_spare[5];
-};
-
-struct statfs64 {
-	long f_type;
-	long f_bsize;
-	__u64 f_blocks;
-	__u64 f_bfree;
-	__u64 f_bavail;
-	__u64 f_files;
-	__u64 f_ffree;
-	__kernel_fsid_t f_fsid;
-	long f_namelen;
-	long f_frsize;
-	long f_spare[5];
-};
-
-struct compat_statfs64 {
-	__u32 f_type;
-	__u32 f_bsize;
-	__u64 f_blocks;
-	__u64 f_bfree;
-	__u64 f_bavail;
-	__u64 f_files;
-	__u64 f_ffree;
-	__kernel_fsid_t f_fsid;
-	__u32 f_namelen;
-	__u32 f_frsize;
-	__u32 f_spare[5];
-};
-
-#endif
diff --git a/include/asm-parisc/string.h b/include/asm-parisc/string.h
deleted file mode 100644
index eda01be65e35..000000000000
--- a/include/asm-parisc/string.h
+++ /dev/null
@@ -1,10 +0,0 @@
-#ifndef _PA_STRING_H_
-#define _PA_STRING_H_
-
-#define __HAVE_ARCH_MEMSET
-extern void * memset(void *, int, size_t);
-
-#define __HAVE_ARCH_MEMCPY
-void * memcpy(void * dest,const void *src,size_t count);
-
-#endif
diff --git a/include/asm-parisc/superio.h b/include/asm-parisc/superio.h
deleted file mode 100644
index 6598acb4d46d..000000000000
--- a/include/asm-parisc/superio.h
+++ /dev/null
@@ -1,85 +0,0 @@
-#ifndef _PARISC_SUPERIO_H
-#define _PARISC_SUPERIO_H
-
-#define IC_PIC1    0x20		/* PCI I/O address of master 8259 */
-#define IC_PIC2    0xA0		/* PCI I/O address of slave */
-
-/* Config Space Offsets to configuration and base address registers */
-#define SIO_CR     0x5A		/* Configuration Register */
-#define SIO_ACPIBAR 0x88	/* ACPI BAR */
-#define SIO_FDCBAR 0x90		/* Floppy Disk Controller BAR */
-#define SIO_SP1BAR 0x94		/* Serial 1 BAR */
-#define SIO_SP2BAR 0x98		/* Serial 2 BAR */
-#define SIO_PPBAR  0x9C		/* Parallel BAR */
-
-#define TRIGGER_1  0x67		/* Edge/level trigger register 1 */
-#define TRIGGER_2  0x68		/* Edge/level trigger register 2 */
-
-/* Interrupt Routing Control registers */
-#define CFG_IR_SER    0x69	/* Serial 1 [0:3] and Serial 2 [4:7] */
-#define CFG_IR_PFD    0x6a	/* Parallel [0:3] and Floppy [4:7] */
-#define CFG_IR_IDE    0x6b	/* IDE1     [0:3] and IDE2 [4:7] */
-#define CFG_IR_INTAB  0x6c	/* PCI INTA [0:3] and INT B [4:7] */
-#define CFG_IR_INTCD  0x6d	/* PCI INTC [0:3] and INT D [4:7] */
-#define CFG_IR_PS2    0x6e	/* PS/2 KBINT [0:3] and Mouse [4:7] */
-#define CFG_IR_FXBUS  0x6f	/* FXIRQ[0] [0:3] and FXIRQ[1] [4:7] */
-#define CFG_IR_USB    0x70	/* FXIRQ[2] [0:3] and USB [4:7] */
-#define CFG_IR_ACPI   0x71	/* ACPI SCI [0:3] and reserved [4:7] */
-
-#define CFG_IR_LOW     CFG_IR_SER	/* Lowest interrupt routing reg */
-#define CFG_IR_HIGH    CFG_IR_ACPI	/* Highest interrupt routing reg */
-
-/* 8259 operational control words */
-#define OCW2_EOI   0x20		/* Non-specific EOI */
-#define OCW2_SEOI  0x60		/* Specific EOI */
-#define OCW3_IIR   0x0A		/* Read request register */
-#define OCW3_ISR   0x0B		/* Read service register */
-#define OCW3_POLL  0x0C		/* Poll the PIC for an interrupt vector */
-
-/* Interrupt lines. Only PIC1 is used */
-#define USB_IRQ    1		/* USB */
-#define SP1_IRQ    3		/* Serial port 1 */
-#define SP2_IRQ    4		/* Serial port 2 */
-#define PAR_IRQ    5		/* Parallel port */
-#define FDC_IRQ    6		/* Floppy controller */
-#define IDE_IRQ    7		/* IDE (pri+sec) */
-
-/* ACPI registers */
-#define USB_REG_CR	0x1f	/* USB Regulator Control Register */
-
-#define SUPERIO_NIRQS   8
-
-struct superio_device {
-	u32 fdc_base;
-	u32 sp1_base;
-	u32 sp2_base;
-	u32 pp_base;
-	u32 acpi_base;
-	int suckyio_irq_enabled;
-	struct pci_dev *lio_pdev;       /* pci device for legacy IO (fn 1) */
-	struct pci_dev *usb_pdev;       /* pci device for USB (fn 2) */
-};
-
-/*
- * Does NS make a 87415 based plug in PCI card? If so, because of this
- * macro we currently don't support it being plugged into a machine
- * that contains a SuperIO chip AND has CONFIG_SUPERIO enabled.
- *
- * This could be fixed by checking to see if function 1 exists, and
- * if it is SuperIO Legacy IO; but really now, is this combination
- * going to EVER happen?
- */
-
-#define SUPERIO_IDE_FN 0 /* Function number of IDE controller */
-#define SUPERIO_LIO_FN 1 /* Function number of Legacy IO controller */
-#define SUPERIO_USB_FN 2 /* Function number of USB controller */
-
-#define is_superio_device(x) \
-	(((x)->vendor == PCI_VENDOR_ID_NS) && \
-	(  ((x)->device == PCI_DEVICE_ID_NS_87415) \
-	|| ((x)->device == PCI_DEVICE_ID_NS_87560_LIO) \
-	|| ((x)->device == PCI_DEVICE_ID_NS_87560_USB) ) )
-
-extern int superio_fixup_irq(struct pci_dev *pcidev); /* called by iosapic */
-
-#endif /* _PARISC_SUPERIO_H */
diff --git a/include/asm-parisc/system.h b/include/asm-parisc/system.h
deleted file mode 100644
index ee80c920b464..000000000000
--- a/include/asm-parisc/system.h
+++ /dev/null
@@ -1,182 +0,0 @@
-#ifndef __PARISC_SYSTEM_H
-#define __PARISC_SYSTEM_H
-
-#include <asm/psw.h>
-
-/* The program status word as bitfields.  */
-struct pa_psw {
-	unsigned int y:1;
-	unsigned int z:1;
-	unsigned int rv:2;
-	unsigned int w:1;
-	unsigned int e:1;
-	unsigned int s:1;
-	unsigned int t:1;
-
-	unsigned int h:1;
-	unsigned int l:1;
-	unsigned int n:1;
-	unsigned int x:1;
-	unsigned int b:1;
-	unsigned int c:1;
-	unsigned int v:1;
-	unsigned int m:1;
-
-	unsigned int cb:8;
-
-	unsigned int o:1;
-	unsigned int g:1;
-	unsigned int f:1;
-	unsigned int r:1;
-	unsigned int q:1;
-	unsigned int p:1;
-	unsigned int d:1;
-	unsigned int i:1;
-};
-
-#ifdef CONFIG_64BIT
-#define pa_psw(task) ((struct pa_psw *) ((char *) (task) + TASK_PT_PSW + 4))
-#else
-#define pa_psw(task) ((struct pa_psw *) ((char *) (task) + TASK_PT_PSW))
-#endif
-
-struct task_struct;
-
-extern struct task_struct *_switch_to(struct task_struct *, struct task_struct *);
-
-#define switch_to(prev, next, last) do {			\
-	(last) = _switch_to(prev, next);			\
-} while(0)
-
-/* interrupt control */
-#define local_save_flags(x)	__asm__ __volatile__("ssm 0, %0" : "=r" (x) : : "memory")
-#define local_irq_disable()	__asm__ __volatile__("rsm %0,%%r0\n" : : "i" (PSW_I) : "memory" )
-#define local_irq_enable()	__asm__ __volatile__("ssm %0,%%r0\n" : : "i" (PSW_I) : "memory" )
-
-#define local_irq_save(x) \
-	__asm__ __volatile__("rsm %1,%0" : "=r" (x) :"i" (PSW_I) : "memory" )
-#define local_irq_restore(x) \
-	__asm__ __volatile__("mtsm %0" : : "r" (x) : "memory" )
-
-#define irqs_disabled()			\
-({					\
-	unsigned long flags;		\
-	local_save_flags(flags);	\
-	(flags & PSW_I) == 0;		\
-})
-
-#define mfctl(reg)	({		\
-	unsigned long cr;		\
-	__asm__ __volatile__(		\
-		"mfctl " #reg ",%0" :	\
-		 "=r" (cr)		\
-	);				\
-	cr;				\
-})
-
-#define mtctl(gr, cr) \
-	__asm__ __volatile__("mtctl %0,%1" \
-		: /* no outputs */ \
-		: "r" (gr), "i" (cr) : "memory")
-
-/* these are here to de-mystefy the calling code, and to provide hooks */
-/* which I needed for debugging EIEM problems -PB */
-#define get_eiem() mfctl(15)
-static inline void set_eiem(unsigned long val)
-{
-	mtctl(val, 15);
-}
-
-#define mfsp(reg)	({		\
-	unsigned long cr;		\
-	__asm__ __volatile__(		\
-		"mfsp " #reg ",%0" :	\
-		 "=r" (cr)		\
-	);				\
-	cr;				\
-})
-
-#define mtsp(gr, cr) \
-	__asm__ __volatile__("mtsp %0,%1" \
-		: /* no outputs */ \
-		: "r" (gr), "i" (cr) : "memory")
-
-
-/*
-** This is simply the barrier() macro from linux/kernel.h but when serial.c
-** uses tqueue.h uses smp_mb() defined using barrier(), linux/kernel.h
-** hasn't yet been included yet so it fails, thus repeating the macro here.
-**
-** PA-RISC architecture allows for weakly ordered memory accesses although
-** none of the processors use it. There is a strong ordered bit that is
-** set in the O-bit of the page directory entry. Operating systems that
-** can not tolerate out of order accesses should set this bit when mapping
-** pages. The O-bit of the PSW should also be set to 1 (I don't believe any
-** of the processor implemented the PSW O-bit). The PCX-W ERS states that
-** the TLB O-bit is not implemented so the page directory does not need to
-** have the O-bit set when mapping pages (section 3.1). This section also
-** states that the PSW Y, Z, G, and O bits are not implemented.
-** So it looks like nothing needs to be done for parisc-linux (yet).
-** (thanks to chada for the above comment -ggg)
-**
-** The __asm__ op below simple prevents gcc/ld from reordering
-** instructions across the mb() "call".
-*/
-#define mb()		__asm__ __volatile__("":::"memory")	/* barrier() */
-#define rmb()		mb()
-#define wmb()		mb()
-#define smp_mb()	mb()
-#define smp_rmb()	mb()
-#define smp_wmb()	mb()
-#define smp_read_barrier_depends()	do { } while(0)
-#define read_barrier_depends()		do { } while(0)
-
-#define set_mb(var, value)		do { var = value; mb(); } while (0)
-
-#ifndef CONFIG_PA20
-/* Because kmalloc only guarantees 8-byte alignment for kmalloc'd data,
-   and GCC only guarantees 8-byte alignment for stack locals, we can't
-   be assured of 16-byte alignment for atomic lock data even if we
-   specify "__attribute ((aligned(16)))" in the type declaration.  So,
-   we use a struct containing an array of four ints for the atomic lock
-   type and dynamically select the 16-byte aligned int from the array
-   for the semaphore.  */
-
-#define __PA_LDCW_ALIGNMENT	16
-#define __ldcw_align(a) ({					\
-	unsigned long __ret = (unsigned long) &(a)->lock[0];	\
-	__ret = (__ret + __PA_LDCW_ALIGNMENT - 1)		\
-		& ~(__PA_LDCW_ALIGNMENT - 1);			\
-	(volatile unsigned int *) __ret;			\
-})
-#define __LDCW	"ldcw"
-
-#else /*CONFIG_PA20*/
-/* From: "Jim Hull" <jim.hull of hp.com>
-   I've attached a summary of the change, but basically, for PA 2.0, as
-   long as the ",CO" (coherent operation) completer is specified, then the
-   16-byte alignment requirement for ldcw and ldcd is relaxed, and instead
-   they only require "natural" alignment (4-byte for ldcw, 8-byte for
-   ldcd). */
-
-#define __PA_LDCW_ALIGNMENT	4
-#define __ldcw_align(a) ((volatile unsigned int *)a)
-#define __LDCW	"ldcw,co"
-
-#endif /*!CONFIG_PA20*/
-
-/* LDCW, the only atomic read-write operation PA-RISC has. *sigh*.  */
-#define __ldcw(a) ({						\
-	unsigned __ret;						\
-	__asm__ __volatile__(__LDCW " 0(%1),%0"			\
-		: "=r" (__ret) : "r" (a));			\
-	__ret;							\
-})
-
-#ifdef CONFIG_SMP
-# define __lock_aligned __attribute__((__section__(".data.lock_aligned")))
-#endif
-
-#define arch_align_stack(x) (x)
-
-#endif
diff --git a/include/asm-parisc/termbits.h b/include/asm-parisc/termbits.h
deleted file mode 100644
index d8bbc73b16b7..000000000000
--- a/include/asm-parisc/termbits.h
+++ /dev/null
@@ -1,200 +0,0 @@
-#ifndef __ARCH_PARISC_TERMBITS_H__
-#define __ARCH_PARISC_TERMBITS_H__
-
-#include <linux/posix_types.h>
-
-typedef unsigned char	cc_t;
-typedef unsigned int	speed_t;
-typedef unsigned int	tcflag_t;
-
-#define NCCS 19
-struct termios {
-	tcflag_t c_iflag;		/* input mode flags */
-	tcflag_t c_oflag;		/* output mode flags */
-	tcflag_t c_cflag;		/* control mode flags */
-	tcflag_t c_lflag;		/* local mode flags */
-	cc_t c_line;			/* line discipline */
-	cc_t c_cc[NCCS];		/* control characters */
-};
-
-struct termios2 {
-	tcflag_t c_iflag;		/* input mode flags */
-	tcflag_t c_oflag;		/* output mode flags */
-	tcflag_t c_cflag;		/* control mode flags */
-	tcflag_t c_lflag;		/* local mode flags */
-	cc_t c_line;			/* line discipline */
-	cc_t c_cc[NCCS];		/* control characters */
-	speed_t c_ispeed;		/* input speed */
-	speed_t c_ospeed;		/* output speed */
-};
-
-struct ktermios {
-	tcflag_t c_iflag;		/* input mode flags */
-	tcflag_t c_oflag;		/* output mode flags */
-	tcflag_t c_cflag;		/* control mode flags */
-	tcflag_t c_lflag;		/* local mode flags */
-	cc_t c_line;			/* line discipline */
-	cc_t c_cc[NCCS];		/* control characters */
-	speed_t c_ispeed;		/* input speed */
-	speed_t c_ospeed;		/* output speed */
-};
-
-/* c_cc characters */
-#define VINTR 0
-#define VQUIT 1
-#define VERASE 2
-#define VKILL 3
-#define VEOF 4
-#define VTIME 5
-#define VMIN 6
-#define VSWTC 7
-#define VSTART 8
-#define VSTOP 9
-#define VSUSP 10
-#define VEOL 11
-#define VREPRINT 12
-#define VDISCARD 13
-#define VWERASE 14
-#define VLNEXT 15
-#define VEOL2 16
-
-
-/* c_iflag bits */
-#define IGNBRK	0000001
-#define BRKINT	0000002
-#define IGNPAR	0000004
-#define PARMRK	0000010
-#define INPCK	0000020
-#define ISTRIP	0000040
-#define INLCR	0000100
-#define IGNCR	0000200
-#define ICRNL	0000400
-#define IUCLC	0001000
-#define IXON	0002000
-#define IXANY	0004000
-#define IXOFF	0010000
-#define IMAXBEL	0040000
-#define IUTF8	0100000
-
-/* c_oflag bits */
-#define OPOST	0000001
-#define OLCUC	0000002
-#define ONLCR	0000004
-#define OCRNL	0000010
-#define ONOCR	0000020
-#define ONLRET	0000040
-#define OFILL	0000100
-#define OFDEL	0000200
-#define NLDLY	0000400
-#define   NL0	0000000
-#define   NL1	0000400
-#define CRDLY	0003000
-#define   CR0	0000000
-#define   CR1	0001000
-#define   CR2	0002000
-#define   CR3	0003000
-#define TABDLY	0014000
-#define   TAB0	0000000
-#define   TAB1	0004000
-#define   TAB2	0010000
-#define   TAB3	0014000
-#define   XTABS	0014000
-#define BSDLY	0020000
-#define   BS0	0000000
-#define   BS1	0020000
-#define VTDLY	0040000
-#define   VT0	0000000
-#define   VT1	0040000
-#define FFDLY	0100000
-#define   FF0	0000000
-#define   FF1	0100000
-
-/* c_cflag bit meaning */
-#define CBAUD   0010017
-#define  B0     0000000         /* hang up */
-#define  B50    0000001
-#define  B75    0000002
-#define  B110   0000003
-#define  B134   0000004
-#define  B150   0000005
-#define  B200   0000006
-#define  B300   0000007
-#define  B600   0000010
-#define  B1200  0000011
-#define  B1800  0000012
-#define  B2400  0000013
-#define  B4800  0000014
-#define  B9600  0000015
-#define  B19200 0000016
-#define  B38400 0000017
-#define EXTA B19200
-#define EXTB B38400
-#define CSIZE   0000060
-#define   CS5   0000000
-#define   CS6   0000020
-#define   CS7   0000040
-#define   CS8   0000060
-#define CSTOPB  0000100
-#define CREAD   0000200
-#define PARENB  0000400
-#define PARODD  0001000
-#define HUPCL   0002000
-#define CLOCAL  0004000
-#define CBAUDEX 0010000
-#define    BOTHER 0010000
-#define    B57600 0010001
-#define   B115200 0010002
-#define   B230400 0010003
-#define   B460800 0010004
-#define   B500000 0010005
-#define   B576000 0010006
-#define   B921600 0010007
-#define  B1000000 0010010
-#define  B1152000 0010011
-#define  B1500000 0010012
-#define  B2000000 0010013
-#define  B2500000 0010014
-#define  B3000000 0010015
-#define  B3500000 0010016
-#define  B4000000 0010017
-#define CIBAUD    002003600000		/* input baud rate */
-#define CMSPAR    010000000000          /* mark or space (stick) parity */
-#define CRTSCTS   020000000000          /* flow control */
-
-#define IBSHIFT	16		/* Shift from CBAUD to CIBAUD */
-
-
-/* c_lflag bits */
-#define ISIG    0000001
-#define ICANON  0000002
-#define XCASE   0000004
-#define ECHO    0000010
-#define ECHOE   0000020
-#define ECHOK   0000040
-#define ECHONL  0000100
-#define NOFLSH  0000200
-#define TOSTOP  0000400
-#define ECHOCTL 0001000
-#define ECHOPRT 0002000
-#define ECHOKE  0004000
-#define FLUSHO  0010000
-#define PENDIN  0040000
-#define IEXTEN  0100000
-
-/* tcflow() and TCXONC use these */
-#define	TCOOFF		0
-#define	TCOON		1
-#define	TCIOFF		2
-#define	TCION		3
-
-/* tcflush() and TCFLSH use these */
-#define	TCIFLUSH	0
-#define	TCOFLUSH	1
-#define	TCIOFLUSH	2
-
-/* tcsetattr uses these */
-#define	TCSANOW		0
-#define	TCSADRAIN	1
-#define	TCSAFLUSH	2
-
-#endif
diff --git a/include/asm-parisc/termios.h b/include/asm-parisc/termios.h
deleted file mode 100644
index a2a57a4548af..000000000000
--- a/include/asm-parisc/termios.h
+++ /dev/null
@@ -1,90 +0,0 @@
-#ifndef _PARISC_TERMIOS_H
-#define _PARISC_TERMIOS_H
-
-#include <asm/termbits.h>
-#include <asm/ioctls.h>
-
-struct winsize {
-	unsigned short ws_row;
-	unsigned short ws_col;
-	unsigned short ws_xpixel;
-	unsigned short ws_ypixel;
-};
-
-#define NCC 8
-struct termio {
-	unsigned short c_iflag;		/* input mode flags */
-	unsigned short c_oflag;		/* output mode flags */
-	unsigned short c_cflag;		/* control mode flags */
-	unsigned short c_lflag;		/* local mode flags */
-	unsigned char c_line;		/* line discipline */
-	unsigned char c_cc[NCC];	/* control characters */
-};
-
-/* modem lines */
-#define TIOCM_LE	0x001
-#define TIOCM_DTR	0x002
-#define TIOCM_RTS	0x004
-#define TIOCM_ST	0x008
-#define TIOCM_SR	0x010
-#define TIOCM_CTS	0x020
-#define TIOCM_CAR	0x040
-#define TIOCM_RNG	0x080
-#define TIOCM_DSR	0x100
-#define TIOCM_CD	TIOCM_CAR
-#define TIOCM_RI	TIOCM_RNG
-#define TIOCM_OUT1	0x2000
-#define TIOCM_OUT2	0x4000
-#define TIOCM_LOOP	0x8000
-
-/* ioctl (fd, TIOCSERGETLSR, &result) where result may be as below */
-
-#ifdef __KERNEL__
-
-/*	intr=^C		quit=^\		erase=del	kill=^U
-	eof=^D		vtime=\0	vmin=\1		sxtc=\0
-	start=^Q	stop=^S		susp=^Z		eol=\0
-	reprint=^R	discard=^U	werase=^W	lnext=^V
-	eol2=\0
-*/
-#define INIT_C_CC "\003\034\177\025\004\0\1\0\021\023\032\0\022\017\027\026\0"
-
-/*
- * Translate a "termio" structure into a "termios". Ugh.
- */
-#define SET_LOW_TERMIOS_BITS(termios, termio, x) { \
-	unsigned short __tmp; \
-	get_user(__tmp,&(termio)->x); \
-	*(unsigned short *) &(termios)->x = __tmp; \
-}
-
-#define user_termio_to_kernel_termios(termios, termio) \
-({ \
-	SET_LOW_TERMIOS_BITS(termios, termio, c_iflag); \
-	SET_LOW_TERMIOS_BITS(termios, termio, c_oflag); \
-	SET_LOW_TERMIOS_BITS(termios, termio, c_cflag); \
-	SET_LOW_TERMIOS_BITS(termios, termio, c_lflag); \
-	copy_from_user((termios)->c_cc, (termio)->c_cc, NCC); \
-})
-
-/*
- * Translate a "termios" structure into a "termio". Ugh.
- */
-#define kernel_termios_to_user_termio(termio, termios) \
-({ \
-	put_user((termios)->c_iflag, &(termio)->c_iflag); \
-	put_user((termios)->c_oflag, &(termio)->c_oflag); \
-	put_user((termios)->c_cflag, &(termio)->c_cflag); \
-	put_user((termios)->c_lflag, &(termio)->c_lflag); \
-	put_user((termios)->c_line,  &(termio)->c_line); \
-	copy_to_user((termio)->c_cc, (termios)->c_cc, NCC); \
-})
-
-#define user_termios_to_kernel_termios(k, u) copy_from_user(k, u, sizeof(struct termios2))
-#define kernel_termios_to_user_termios(u, k) copy_to_user(u, k, sizeof(struct termios2))
-#define user_termios_to_kernel_termios_1(k, u) copy_from_user(k, u, sizeof(struct termios))
-#define kernel_termios_to_user_termios_1(u, k) copy_to_user(u, k, sizeof(struct termios))
-
-#endif	/* __KERNEL__ */
-
-#endif	/* _PARISC_TERMIOS_H */
diff --git a/include/asm-parisc/thread_info.h b/include/asm-parisc/thread_info.h
deleted file mode 100644
index 9f812741c355..000000000000
--- a/include/asm-parisc/thread_info.h
+++ /dev/null
@@ -1,74 +0,0 @@
-#ifndef _ASM_PARISC_THREAD_INFO_H
-#define _ASM_PARISC_THREAD_INFO_H
-
-#ifdef __KERNEL__
-
-#ifndef __ASSEMBLY__
-#include <asm/processor.h>
-
-struct thread_info {
-	struct task_struct *task;	/* main task structure */
-	struct exec_domain *exec_domain;/* execution domain */
-	unsigned long flags;		/* thread_info flags (see TIF_*) */
-	mm_segment_t addr_limit;	/* user-level address space limit */
-	__u32 cpu;			/* current CPU */
-	int preempt_count;		/* 0=premptable, <0=BUG; will also serve as bh-counter */
-	struct restart_block restart_block;
-};
-
-#define INIT_THREAD_INFO(tsk)			\
-{						\
-	.task		= &tsk,			\
-	.exec_domain	= &default_exec_domain,	\
-	.flags		= 0,			\
-	.cpu		= 0,			\
-	.addr_limit	= KERNEL_DS,		\
-	.preempt_count	= 1,			\
-  	.restart_block	= {			\
-		.fn = do_no_restart_syscall	\
-	}					\
-}
-
-#define init_thread_info        (init_thread_union.thread_info)
-#define init_stack              (init_thread_union.stack)
-
-/* thread information allocation */
-
-#define THREAD_SIZE_ORDER            2
-/* Be sure to hunt all references to this down when you change the size of
- * the kernel stack */
-#define THREAD_SIZE             (PAGE_SIZE << THREAD_SIZE_ORDER)
-#define THREAD_SHIFT            (PAGE_SHIFT + THREAD_SIZE_ORDER)
-
-/* how to get the thread information struct from C */
-#define current_thread_info()	((struct thread_info *)mfctl(30))
-
-#endif /* !__ASSEMBLY */
-
-#define PREEMPT_ACTIVE_BIT	28
-#define PREEMPT_ACTIVE		(1 << PREEMPT_ACTIVE_BIT)
-
-/*
- * thread information flags
- */
-#define TIF_SYSCALL_TRACE	0	/* syscall trace active */
-#define TIF_SIGPENDING		1	/* signal pending */
-#define TIF_NEED_RESCHED	2	/* rescheduling necessary */
-#define TIF_POLLING_NRFLAG	3	/* true if poll_idle() is polling TIF_NEED_RESCHED */
-#define TIF_32BIT               4       /* 32 bit binary */
-#define TIF_MEMDIE		5
-#define TIF_RESTORE_SIGMASK	6	/* restore saved signal mask */
-
-#define _TIF_SYSCALL_TRACE	(1 << TIF_SYSCALL_TRACE)
-#define _TIF_SIGPENDING		(1 << TIF_SIGPENDING)
-#define _TIF_NEED_RESCHED	(1 << TIF_NEED_RESCHED)
-#define _TIF_POLLING_NRFLAG	(1 << TIF_POLLING_NRFLAG)
-#define _TIF_32BIT		(1 << TIF_32BIT)
-#define _TIF_RESTORE_SIGMASK	(1 << TIF_RESTORE_SIGMASK)
-
-#define _TIF_USER_WORK_MASK     (_TIF_SIGPENDING | \
-                                 _TIF_NEED_RESCHED | _TIF_RESTORE_SIGMASK)
-
-#endif /* __KERNEL__ */
-
-#endif /* _ASM_PARISC_THREAD_INFO_H */
diff --git a/include/asm-parisc/timex.h b/include/asm-parisc/timex.h
deleted file mode 100644
index 3b68d77273d9..000000000000
--- a/include/asm-parisc/timex.h
+++ /dev/null
@@ -1,20 +0,0 @@
-/*
- * linux/include/asm-parisc/timex.h
- *
- * PARISC architecture timex specifications
- */
-#ifndef _ASMPARISC_TIMEX_H
-#define _ASMPARISC_TIMEX_H
-
-#include <asm/system.h>
-
-#define CLOCK_TICK_RATE	1193180 /* Underlying HZ */
-
-typedef unsigned long cycles_t;
-
-static inline cycles_t get_cycles (void)
-{
-	return mfctl(16);
-}
-
-#endif
diff --git a/include/asm-parisc/tlb.h b/include/asm-parisc/tlb.h
deleted file mode 100644
index 383b1db310ee..000000000000
--- a/include/asm-parisc/tlb.h
+++ /dev/null
@@ -1,27 +0,0 @@
-#ifndef _PARISC_TLB_H
-#define _PARISC_TLB_H
-
-#define tlb_flush(tlb)			\
-do {	if ((tlb)->fullmm)		\
-		flush_tlb_mm((tlb)->mm);\
-} while (0)
-
-#define tlb_start_vma(tlb, vma) \
-do {	if (!(tlb)->fullmm)	\
-		flush_cache_range(vma, vma->vm_start, vma->vm_end); \
-} while (0)
-
-#define tlb_end_vma(tlb, vma)	\
-do {	if (!(tlb)->fullmm)	\
-		flush_tlb_range(vma, vma->vm_start, vma->vm_end); \
-} while (0)
-
-#define __tlb_remove_tlb_entry(tlb, pte, address) \
-	do { } while (0)
-
-#include <asm-generic/tlb.h>
-
-#define __pmd_free_tlb(tlb, pmd)	pmd_free((tlb)->mm, pmd)
-#define __pte_free_tlb(tlb, pte)	pte_free((tlb)->mm, pte)
-
-#endif
diff --git a/include/asm-parisc/tlbflush.h b/include/asm-parisc/tlbflush.h
deleted file mode 100644
index b72ec66db699..000000000000
--- a/include/asm-parisc/tlbflush.h
+++ /dev/null
@@ -1,80 +0,0 @@
-#ifndef _PARISC_TLBFLUSH_H
-#define _PARISC_TLBFLUSH_H
-
-/* TLB flushing routines.... */
-
-#include <linux/mm.h>
-#include <linux/sched.h>
-#include <asm/mmu_context.h>
-
-
-/* This is for the serialisation of PxTLB broadcasts.  At least on the
- * N class systems, only one PxTLB inter processor broadcast can be
- * active at any one time on the Merced bus.  This tlb purge
- * synchronisation is fairly lightweight and harmless so we activate
- * it on all SMP systems not just the N class.  We also need to have
- * preemption disabled on uniprocessor machines, and spin_lock does that
- * nicely.
- */
-extern spinlock_t pa_tlb_lock;
-
-#define purge_tlb_start(x) spin_lock(&pa_tlb_lock)
-#define purge_tlb_end(x) spin_unlock(&pa_tlb_lock)
-
-extern void flush_tlb_all(void);
-extern void flush_tlb_all_local(void *);
-
-/*
- * flush_tlb_mm()
- *
- * XXX This code is NOT valid for HP-UX compatibility processes,
- * (although it will probably work 99% of the time). HP-UX
- * processes are free to play with the space id's and save them
- * over long periods of time, etc. so we have to preserve the
- * space and just flush the entire tlb. We need to check the
- * personality in order to do that, but the personality is not
- * currently being set correctly.
- *
- * Of course, Linux processes could do the same thing, but
- * we don't support that (and the compilers, dynamic linker,
- * etc. do not do that).
- */
-
-static inline void flush_tlb_mm(struct mm_struct *mm)
-{
-	BUG_ON(mm == &init_mm); /* Should never happen */
-
-#ifdef CONFIG_SMP
-	flush_tlb_all();
-#else
-	if (mm) {
-		if (mm->context != 0)
-			free_sid(mm->context);
-		mm->context = alloc_sid();
-		if (mm == current->active_mm)
-			load_context(mm->context);
-	}
-#endif
-}
-
-static inline void flush_tlb_page(struct vm_area_struct *vma,
-	unsigned long addr)
-{
-	/* For one page, it's not worth testing the split_tlb variable */
-
-	mb();
-	mtsp(vma->vm_mm->context,1);
-	purge_tlb_start();
-	pdtlb(addr);
-	pitlb(addr);
-	purge_tlb_end();
-}
-
-void __flush_tlb_range(unsigned long sid,
-	unsigned long start, unsigned long end);
-
-#define flush_tlb_range(vma,start,end) __flush_tlb_range((vma)->vm_mm->context,start,end)
-
-#define flush_tlb_kernel_range(start, end) __flush_tlb_range(0,start,end)
-
-#endif
diff --git a/include/asm-parisc/topology.h b/include/asm-parisc/topology.h
deleted file mode 100644
index d8133eb0b1e7..000000000000
--- a/include/asm-parisc/topology.h
+++ /dev/null
@@ -1,6 +0,0 @@
-#ifndef _ASM_PARISC_TOPOLOGY_H
-#define _ASM_PARISC_TOPOLOGY_H
-
-#include <asm-generic/topology.h>
-
-#endif /* _ASM_PARISC_TOPOLOGY_H */
diff --git a/include/asm-parisc/traps.h b/include/asm-parisc/traps.h
deleted file mode 100644
index 1945f995f2df..000000000000
--- a/include/asm-parisc/traps.h
+++ /dev/null
@@ -1,16 +0,0 @@
-#ifndef __ASM_TRAPS_H
-#define __ASM_TRAPS_H
-
-#ifdef __KERNEL__
-struct pt_regs;
-
-/* traps.c */
-void parisc_terminate(char *msg, struct pt_regs *regs,
-		int code, unsigned long offset);
-
-/* mm/fault.c */
-void do_page_fault(struct pt_regs *regs, unsigned long code,
-		unsigned long address);
-#endif
-
-#endif
diff --git a/include/asm-parisc/types.h b/include/asm-parisc/types.h
deleted file mode 100644
index 7f5a39bfb4ce..000000000000
--- a/include/asm-parisc/types.h
+++ /dev/null
@@ -1,36 +0,0 @@
-#ifndef _PARISC_TYPES_H
-#define _PARISC_TYPES_H
-
-#include <asm-generic/int-ll64.h>
-
-#ifndef __ASSEMBLY__
-
-typedef unsigned short umode_t;
-
-#endif /* __ASSEMBLY__ */
-
-/*
- * These aren't exported outside the kernel to avoid name space clashes
- */
-#ifdef __KERNEL__
-
-#ifdef CONFIG_64BIT
-#define BITS_PER_LONG 64
-#define SHIFT_PER_LONG 6
-#else
-#define BITS_PER_LONG 32
-#define SHIFT_PER_LONG 5
-#endif
-
-#ifndef __ASSEMBLY__
-
-/* Dma addresses are 32-bits wide.  */
-
-typedef u32 dma_addr_t;
-typedef u64 dma64_addr_t;
-
-#endif /* __ASSEMBLY__ */
-
-#endif /* __KERNEL__ */
-
-#endif
diff --git a/include/asm-parisc/uaccess.h b/include/asm-parisc/uaccess.h
deleted file mode 100644
index 4878b9501f24..000000000000
--- a/include/asm-parisc/uaccess.h
+++ /dev/null
@@ -1,244 +0,0 @@
-#ifndef __PARISC_UACCESS_H
-#define __PARISC_UACCESS_H
-
-/*
- * User space memory access functions
- */
-#include <asm/page.h>
-#include <asm/system.h>
-#include <asm/cache.h>
-#include <asm-generic/uaccess.h>
-
-#define VERIFY_READ 0
-#define VERIFY_WRITE 1
-
-#define KERNEL_DS	((mm_segment_t){0})
-#define USER_DS 	((mm_segment_t){1})
-
-#define segment_eq(a,b)	((a).seg == (b).seg)
-
-#define get_ds()	(KERNEL_DS)
-#define get_fs()	(current_thread_info()->addr_limit)
-#define set_fs(x)	(current_thread_info()->addr_limit = (x))
-
-/*
- * Note that since kernel addresses are in a separate address space on
- * parisc, we don't need to do anything for access_ok().
- * We just let the page fault handler do the right thing. This also means
- * that put_user is the same as __put_user, etc.
- */
-
-extern int __get_kernel_bad(void);
-extern int __get_user_bad(void);
-extern int __put_kernel_bad(void);
-extern int __put_user_bad(void);
-
-static inline long access_ok(int type, const void __user * addr,
-		unsigned long size)
-{
-	return 1;
-}
-
-#define put_user __put_user
-#define get_user __get_user
-
-#if !defined(CONFIG_64BIT)
-#define LDD_KERNEL(ptr)		__get_kernel_bad();
-#define LDD_USER(ptr)		__get_user_bad();
-#define STD_KERNEL(x, ptr)	__put_kernel_asm64(x,ptr)
-#define STD_USER(x, ptr)	__put_user_asm64(x,ptr)
-#define ASM_WORD_INSN		".word\t"
-#else
-#define LDD_KERNEL(ptr)		__get_kernel_asm("ldd",ptr)
-#define LDD_USER(ptr)		__get_user_asm("ldd",ptr)
-#define STD_KERNEL(x, ptr)	__put_kernel_asm("std",x,ptr)
-#define STD_USER(x, ptr)	__put_user_asm("std",x,ptr)
-#define ASM_WORD_INSN		".dword\t"
-#endif
-
-/*
- * The exception table contains two values: the first is an address
- * for an instruction that is allowed to fault, and the second is
- * the address to the fixup routine. 
- */
-
-struct exception_table_entry {
-	unsigned long insn;  /* address of insn that is allowed to fault.   */
-	long fixup;          /* fixup routine */
-};
-
-#define ASM_EXCEPTIONTABLE_ENTRY( fault_addr, except_addr )\
-	".section __ex_table,\"aw\"\n"			   \
-	ASM_WORD_INSN #fault_addr ", " #except_addr "\n\t" \
-	".previous\n"
-
-/*
- * The page fault handler stores, in a per-cpu area, the following information
- * if a fixup routine is available.
- */
-struct exception_data {
-	unsigned long fault_ip;
-	unsigned long fault_space;
-	unsigned long fault_addr;
-};
-
-#define __get_user(x,ptr)                               \
-({                                                      \
-	register long __gu_err __asm__ ("r8") = 0;      \
-	register long __gu_val __asm__ ("r9") = 0;      \
-							\
-	if (segment_eq(get_fs(),KERNEL_DS)) {           \
-	    switch (sizeof(*(ptr))) {                   \
-	    case 1: __get_kernel_asm("ldb",ptr); break; \
-	    case 2: __get_kernel_asm("ldh",ptr); break; \
-	    case 4: __get_kernel_asm("ldw",ptr); break; \
-	    case 8: LDD_KERNEL(ptr); break;		\
-	    default: __get_kernel_bad(); break;         \
-	    }                                           \
-	}                                               \
-	else {                                          \
-	    switch (sizeof(*(ptr))) {                   \
-	    case 1: __get_user_asm("ldb",ptr); break;   \
-	    case 2: __get_user_asm("ldh",ptr); break;   \
-	    case 4: __get_user_asm("ldw",ptr); break;   \
-	    case 8: LDD_USER(ptr);  break;		\
-	    default: __get_user_bad(); break;           \
-	    }                                           \
-	}                                               \
-							\
-	(x) = (__typeof__(*(ptr))) __gu_val;            \
-	__gu_err;                                       \
-})
-
-#define __get_kernel_asm(ldx,ptr)                       \
-	__asm__("\n1:\t" ldx "\t0(%2),%0\n\t"		\
-		ASM_EXCEPTIONTABLE_ENTRY(1b, fixup_get_user_skip_1)\
-		: "=r"(__gu_val), "=r"(__gu_err)        \
-		: "r"(ptr), "1"(__gu_err)		\
-		: "r1");
-
-#define __get_user_asm(ldx,ptr)                         \
-	__asm__("\n1:\t" ldx "\t0(%%sr3,%2),%0\n\t"	\
-		ASM_EXCEPTIONTABLE_ENTRY(1b,fixup_get_user_skip_1)\
-		: "=r"(__gu_val), "=r"(__gu_err)        \
-		: "r"(ptr), "1"(__gu_err)		\
-		: "r1");
-
-#define __put_user(x,ptr)                                       \
-({								\
-	register long __pu_err __asm__ ("r8") = 0;      	\
-        __typeof__(*(ptr)) __x = (__typeof__(*(ptr)))(x);	\
-								\
-	if (segment_eq(get_fs(),KERNEL_DS)) {                   \
-	    switch (sizeof(*(ptr))) {                           \
-	    case 1: __put_kernel_asm("stb",__x,ptr); break;     \
-	    case 2: __put_kernel_asm("sth",__x,ptr); break;     \
-	    case 4: __put_kernel_asm("stw",__x,ptr); break;     \
-	    case 8: STD_KERNEL(__x,ptr); break;			\
-	    default: __put_kernel_bad(); break;			\
-	    }                                                   \
-	}                                                       \
-	else {                                                  \
-	    switch (sizeof(*(ptr))) {                           \
-	    case 1: __put_user_asm("stb",__x,ptr); break;       \
-	    case 2: __put_user_asm("sth",__x,ptr); break;       \
-	    case 4: __put_user_asm("stw",__x,ptr); break;       \
-	    case 8: STD_USER(__x,ptr); break;			\
-	    default: __put_user_bad(); break;			\
-	    }                                                   \
-	}                                                       \
-								\
-	__pu_err;						\
-})
-
-/*
- * The "__put_user/kernel_asm()" macros tell gcc they read from memory
- * instead of writing. This is because they do not write to any memory
- * gcc knows about, so there are no aliasing issues. These macros must
- * also be aware that "fixup_put_user_skip_[12]" are executed in the
- * context of the fault, and any registers used there must be listed
- * as clobbers. In this case only "r1" is used by the current routines.
- * r8/r9 are already listed as err/val.
- */
-
-#define __put_kernel_asm(stx,x,ptr)                         \
-	__asm__ __volatile__ (                              \
-		"\n1:\t" stx "\t%2,0(%1)\n\t"		    \
-		ASM_EXCEPTIONTABLE_ENTRY(1b,fixup_put_user_skip_1)\
-		: "=r"(__pu_err)                            \
-		: "r"(ptr), "r"(x), "0"(__pu_err)	    \
-	    	: "r1")
-
-#define __put_user_asm(stx,x,ptr)                           \
-	__asm__ __volatile__ (                              \
-		"\n1:\t" stx "\t%2,0(%%sr3,%1)\n\t"	    \
-		ASM_EXCEPTIONTABLE_ENTRY(1b,fixup_put_user_skip_1)\
-		: "=r"(__pu_err)                            \
-		: "r"(ptr), "r"(x), "0"(__pu_err)	    \
-		: "r1")
-
-
-#if !defined(CONFIG_64BIT)
-
-#define __put_kernel_asm64(__val,ptr) do {		    \
-	u64 __val64 = (u64)(__val);			    \
-	u32 hi = (__val64) >> 32;			    \
-	u32 lo = (__val64) & 0xffffffff;		    \
-	__asm__ __volatile__ (				    \
-		"\n1:\tstw %2,0(%1)"			    \
-		"\n2:\tstw %3,4(%1)\n\t"		    \
-		ASM_EXCEPTIONTABLE_ENTRY(1b,fixup_put_user_skip_2)\
-		ASM_EXCEPTIONTABLE_ENTRY(2b,fixup_put_user_skip_1)\
-		: "=r"(__pu_err)                            \
-		: "r"(ptr), "r"(hi), "r"(lo), "0"(__pu_err) \
-		: "r1");				    \
-} while (0)
-
-#define __put_user_asm64(__val,ptr) do {	    	    \
-	u64 __val64 = (u64)(__val);			    \
-	u32 hi = (__val64) >> 32;			    \
-	u32 lo = (__val64) & 0xffffffff;		    \
-	__asm__ __volatile__ (				    \
-		"\n1:\tstw %2,0(%%sr3,%1)"		    \
-		"\n2:\tstw %3,4(%%sr3,%1)\n\t"		    \
-		ASM_EXCEPTIONTABLE_ENTRY(1b,fixup_put_user_skip_2)\
-		ASM_EXCEPTIONTABLE_ENTRY(2b,fixup_put_user_skip_1)\
-		: "=r"(__pu_err)                            \
-		: "r"(ptr), "r"(hi), "r"(lo), "0"(__pu_err) \
-		: "r1");				    \
-} while (0)
-
-#endif /* !defined(CONFIG_64BIT) */
-
-
-/*
- * Complex access routines -- external declarations
- */
-
-extern unsigned long lcopy_to_user(void __user *, const void *, unsigned long);
-extern unsigned long lcopy_from_user(void *, const void __user *, unsigned long);
-extern unsigned long lcopy_in_user(void __user *, const void __user *, unsigned long);
-extern long lstrncpy_from_user(char *, const char __user *, long);
-extern unsigned lclear_user(void __user *,unsigned long);
-extern long lstrnlen_user(const char __user *,long);
-
-/*
- * Complex access routines -- macros
- */
-
-#define strncpy_from_user lstrncpy_from_user
-#define strnlen_user lstrnlen_user
-#define strlen_user(str) lstrnlen_user(str, 0x7fffffffL)
-#define clear_user lclear_user
-#define __clear_user lclear_user
-
-unsigned long copy_to_user(void __user *dst, const void *src, unsigned long len);
-#define __copy_to_user copy_to_user
-unsigned long copy_from_user(void *dst, const void __user *src, unsigned long len);
-#define __copy_from_user copy_from_user
-unsigned long copy_in_user(void __user *dst, const void __user *src, unsigned long len);
-#define __copy_in_user copy_in_user
-#define __copy_to_user_inatomic __copy_to_user
-#define __copy_from_user_inatomic __copy_from_user
-
-#endif /* __PARISC_UACCESS_H */
diff --git a/include/asm-parisc/ucontext.h b/include/asm-parisc/ucontext.h
deleted file mode 100644
index 6c8883e4b0bd..000000000000
--- a/include/asm-parisc/ucontext.h
+++ /dev/null
@@ -1,12 +0,0 @@
-#ifndef _ASM_PARISC_UCONTEXT_H
-#define _ASM_PARISC_UCONTEXT_H
-
-struct ucontext {
-	unsigned int	  uc_flags;
-	struct ucontext  *uc_link;
-	stack_t		  uc_stack;
-	struct sigcontext uc_mcontext;
-	sigset_t	  uc_sigmask;	/* mask last for extensibility */
-};
-
-#endif /* !_ASM_PARISC_UCONTEXT_H */
diff --git a/include/asm-parisc/unaligned.h b/include/asm-parisc/unaligned.h
deleted file mode 100644
index dfc5d3321a54..000000000000
--- a/include/asm-parisc/unaligned.h
+++ /dev/null
@@ -1,16 +0,0 @@
-#ifndef _ASM_PARISC_UNALIGNED_H
-#define _ASM_PARISC_UNALIGNED_H
-
-#include <linux/unaligned/be_struct.h>
-#include <linux/unaligned/le_byteshift.h>
-#include <linux/unaligned/generic.h>
-#define get_unaligned	__get_unaligned_be
-#define put_unaligned	__put_unaligned_be
-
-#ifdef __KERNEL__
-struct pt_regs;
-void handle_unaligned(struct pt_regs *regs);
-int check_unaligned(struct pt_regs *regs);
-#endif
-
-#endif /* _ASM_PARISC_UNALIGNED_H */
diff --git a/include/asm-parisc/unistd.h b/include/asm-parisc/unistd.h
deleted file mode 100644
index a7d857f0e4f4..000000000000
--- a/include/asm-parisc/unistd.h
+++ /dev/null
@@ -1,991 +0,0 @@
-#ifndef _ASM_PARISC_UNISTD_H_
-#define _ASM_PARISC_UNISTD_H_
-
-/*
- * This file contains the system call numbers.
- */
-
-/*
- *   HP-UX system calls get their native numbers for binary compatibility.
- */
-
-#define __NR_HPUX_exit                    1
-#define __NR_HPUX_fork                    2
-#define __NR_HPUX_read                    3
-#define __NR_HPUX_write                   4
-#define __NR_HPUX_open                    5
-#define __NR_HPUX_close                   6
-#define __NR_HPUX_wait                    7
-#define __NR_HPUX_creat                   8
-#define __NR_HPUX_link                    9
-#define __NR_HPUX_unlink                 10
-#define __NR_HPUX_execv                  11
-#define __NR_HPUX_chdir                  12
-#define __NR_HPUX_time                   13
-#define __NR_HPUX_mknod                  14
-#define __NR_HPUX_chmod                  15
-#define __NR_HPUX_chown                  16
-#define __NR_HPUX_break                  17
-#define __NR_HPUX_lchmod                 18
-#define __NR_HPUX_lseek                  19
-#define __NR_HPUX_getpid                 20
-#define __NR_HPUX_mount                  21
-#define __NR_HPUX_umount                 22
-#define __NR_HPUX_setuid                 23
-#define __NR_HPUX_getuid                 24
-#define __NR_HPUX_stime                  25
-#define __NR_HPUX_ptrace                 26
-#define __NR_HPUX_alarm                  27
-#define __NR_HPUX_oldfstat               28
-#define __NR_HPUX_pause                  29
-#define __NR_HPUX_utime                  30
-#define __NR_HPUX_stty                   31
-#define __NR_HPUX_gtty                   32
-#define __NR_HPUX_access                 33
-#define __NR_HPUX_nice                   34
-#define __NR_HPUX_ftime                  35
-#define __NR_HPUX_sync                   36
-#define __NR_HPUX_kill                   37
-#define __NR_HPUX_stat                   38
-#define __NR_HPUX_setpgrp3               39
-#define __NR_HPUX_lstat                  40
-#define __NR_HPUX_dup                    41
-#define __NR_HPUX_pipe                   42
-#define __NR_HPUX_times                  43
-#define __NR_HPUX_profil                 44
-#define __NR_HPUX_ki_call                45
-#define __NR_HPUX_setgid                 46
-#define __NR_HPUX_getgid                 47
-#define __NR_HPUX_sigsys                 48
-#define __NR_HPUX_reserved1              49
-#define __NR_HPUX_reserved2              50
-#define __NR_HPUX_acct                   51
-#define __NR_HPUX_set_userthreadid       52
-#define __NR_HPUX_oldlock                53
-#define __NR_HPUX_ioctl                  54
-#define __NR_HPUX_reboot                 55
-#define __NR_HPUX_symlink                56
-#define __NR_HPUX_utssys                 57
-#define __NR_HPUX_readlink               58
-#define __NR_HPUX_execve                 59
-#define __NR_HPUX_umask                  60
-#define __NR_HPUX_chroot                 61
-#define __NR_HPUX_fcntl                  62
-#define __NR_HPUX_ulimit                 63
-#define __NR_HPUX_getpagesize            64
-#define __NR_HPUX_mremap                 65
-#define __NR_HPUX_vfork                  66
-#define __NR_HPUX_vread                  67
-#define __NR_HPUX_vwrite                 68
-#define __NR_HPUX_sbrk                   69
-#define __NR_HPUX_sstk                   70
-#define __NR_HPUX_mmap                   71
-#define __NR_HPUX_vadvise                72
-#define __NR_HPUX_munmap                 73
-#define __NR_HPUX_mprotect               74
-#define __NR_HPUX_madvise                75
-#define __NR_HPUX_vhangup                76
-#define __NR_HPUX_swapoff                77
-#define __NR_HPUX_mincore                78
-#define __NR_HPUX_getgroups              79
-#define __NR_HPUX_setgroups              80
-#define __NR_HPUX_getpgrp2               81
-#define __NR_HPUX_setpgrp2               82
-#define __NR_HPUX_setitimer              83
-#define __NR_HPUX_wait3                  84
-#define __NR_HPUX_swapon                 85
-#define __NR_HPUX_getitimer              86
-#define __NR_HPUX_gethostname42          87
-#define __NR_HPUX_sethostname42          88
-#define __NR_HPUX_getdtablesize          89
-#define __NR_HPUX_dup2                   90
-#define __NR_HPUX_getdopt                91
-#define __NR_HPUX_fstat                  92
-#define __NR_HPUX_select                 93
-#define __NR_HPUX_setdopt                94
-#define __NR_HPUX_fsync                  95
-#define __NR_HPUX_setpriority            96
-#define __NR_HPUX_socket_old             97
-#define __NR_HPUX_connect_old            98
-#define __NR_HPUX_accept_old             99
-#define __NR_HPUX_getpriority           100
-#define __NR_HPUX_send_old              101
-#define __NR_HPUX_recv_old              102
-#define __NR_HPUX_socketaddr_old        103
-#define __NR_HPUX_bind_old              104
-#define __NR_HPUX_setsockopt_old        105
-#define __NR_HPUX_listen_old            106
-#define __NR_HPUX_vtimes_old            107
-#define __NR_HPUX_sigvector             108
-#define __NR_HPUX_sigblock              109
-#define __NR_HPUX_siggetmask            110
-#define __NR_HPUX_sigpause              111
-#define __NR_HPUX_sigstack              112
-#define __NR_HPUX_recvmsg_old           113
-#define __NR_HPUX_sendmsg_old           114
-#define __NR_HPUX_vtrace_old            115
-#define __NR_HPUX_gettimeofday          116
-#define __NR_HPUX_getrusage             117
-#define __NR_HPUX_getsockopt_old        118
-#define __NR_HPUX_resuba_old            119
-#define __NR_HPUX_readv                 120
-#define __NR_HPUX_writev                121
-#define __NR_HPUX_settimeofday          122
-#define __NR_HPUX_fchown                123
-#define __NR_HPUX_fchmod                124
-#define __NR_HPUX_recvfrom_old          125
-#define __NR_HPUX_setresuid             126
-#define __NR_HPUX_setresgid             127
-#define __NR_HPUX_rename                128
-#define __NR_HPUX_truncate              129
-#define __NR_HPUX_ftruncate             130
-#define __NR_HPUX_flock_old             131
-#define __NR_HPUX_sysconf               132
-#define __NR_HPUX_sendto_old            133
-#define __NR_HPUX_shutdown_old          134
-#define __NR_HPUX_socketpair_old        135
-#define __NR_HPUX_mkdir                 136
-#define __NR_HPUX_rmdir                 137
-#define __NR_HPUX_utimes_old            138
-#define __NR_HPUX_sigcleanup_old        139
-#define __NR_HPUX_setcore               140
-#define __NR_HPUX_getpeername_old       141
-#define __NR_HPUX_gethostid             142
-#define __NR_HPUX_sethostid             143
-#define __NR_HPUX_getrlimit             144
-#define __NR_HPUX_setrlimit             145
-#define __NR_HPUX_killpg_old            146
-#define __NR_HPUX_cachectl              147
-#define __NR_HPUX_quotactl              148
-#define __NR_HPUX_get_sysinfo           149
-#define __NR_HPUX_getsockname_old       150
-#define __NR_HPUX_privgrp               151
-#define __NR_HPUX_rtprio                152
-#define __NR_HPUX_plock                 153
-#define __NR_HPUX_reserved3             154
-#define __NR_HPUX_lockf                 155
-#define __NR_HPUX_semget                156
-#define __NR_HPUX_osemctl               157
-#define __NR_HPUX_semop                 158
-#define __NR_HPUX_msgget                159
-#define __NR_HPUX_omsgctl               160
-#define __NR_HPUX_msgsnd                161
-#define __NR_HPUX_msgrecv               162
-#define __NR_HPUX_shmget                163
-#define __NR_HPUX_oshmctl               164
-#define __NR_HPUX_shmat                 165
-#define __NR_HPUX_shmdt                 166
-#define __NR_HPUX_m68020_advise         167
-/* [168,189] are for Discless/DUX */
-#define __NR_HPUX_csp                   168
-#define __NR_HPUX_cluster               169
-#define __NR_HPUX_mkrnod                170
-#define __NR_HPUX_test                  171
-#define __NR_HPUX_unsp_open             172
-#define __NR_HPUX_reserved4             173
-#define __NR_HPUX_getcontext_old        174
-#define __NR_HPUX_osetcontext           175
-#define __NR_HPUX_bigio                 176
-#define __NR_HPUX_pipenode              177
-#define __NR_HPUX_lsync                 178
-#define __NR_HPUX_getmachineid          179
-#define __NR_HPUX_cnodeid               180
-#define __NR_HPUX_cnodes                181
-#define __NR_HPUX_swapclients           182
-#define __NR_HPUX_rmt_process           183
-#define __NR_HPUX_dskless_stats         184
-#define __NR_HPUX_sigprocmask           185
-#define __NR_HPUX_sigpending            186
-#define __NR_HPUX_sigsuspend            187
-#define __NR_HPUX_sigaction             188
-#define __NR_HPUX_reserved5             189
-#define __NR_HPUX_nfssvc                190
-#define __NR_HPUX_getfh                 191
-#define __NR_HPUX_getdomainname         192
-#define __NR_HPUX_setdomainname         193
-#define __NR_HPUX_async_daemon          194
-#define __NR_HPUX_getdirentries         195
-#define __NR_HPUX_statfs                196
-#define __NR_HPUX_fstatfs               197
-#define __NR_HPUX_vfsmount              198
-#define __NR_HPUX_reserved6             199
-#define __NR_HPUX_waitpid               200
-/* 201 - 223 missing */
-#define __NR_HPUX_sigsetreturn          224
-#define __NR_HPUX_sigsetstatemask       225
-/* 226 missing */
-#define __NR_HPUX_cs                    227
-#define __NR_HPUX_cds                   228
-#define __NR_HPUX_set_no_trunc          229
-#define __NR_HPUX_pathconf              230
-#define __NR_HPUX_fpathconf             231
-/* 232, 233 missing */
-#define __NR_HPUX_nfs_fcntl             234
-#define __NR_HPUX_ogetacl               235
-#define __NR_HPUX_ofgetacl              236
-#define __NR_HPUX_osetacl               237
-#define __NR_HPUX_ofsetacl              238
-#define __NR_HPUX_pstat                 239
-#define __NR_HPUX_getaudid              240
-#define __NR_HPUX_setaudid              241
-#define __NR_HPUX_getaudproc            242
-#define __NR_HPUX_setaudproc            243
-#define __NR_HPUX_getevent              244
-#define __NR_HPUX_setevent              245
-#define __NR_HPUX_audwrite              246
-#define __NR_HPUX_audswitch             247
-#define __NR_HPUX_audctl                248
-#define __NR_HPUX_ogetaccess            249
-#define __NR_HPUX_fsctl                 250
-/* 251 - 258 missing */
-#define __NR_HPUX_swapfs                259
-#define __NR_HPUX_fss                   260
-/* 261 - 266 missing */
-#define __NR_HPUX_tsync                 267
-#define __NR_HPUX_getnumfds             268
-#define __NR_HPUX_poll                  269
-#define __NR_HPUX_getmsg                270
-#define __NR_HPUX_putmsg                271
-#define __NR_HPUX_fchdir                272
-#define __NR_HPUX_getmount_cnt          273
-#define __NR_HPUX_getmount_entry        274
-#define __NR_HPUX_accept                275
-#define __NR_HPUX_bind                  276
-#define __NR_HPUX_connect               277
-#define __NR_HPUX_getpeername           278
-#define __NR_HPUX_getsockname           279
-#define __NR_HPUX_getsockopt            280
-#define __NR_HPUX_listen                281
-#define __NR_HPUX_recv                  282
-#define __NR_HPUX_recvfrom              283
-#define __NR_HPUX_recvmsg               284
-#define __NR_HPUX_send                  285
-#define __NR_HPUX_sendmsg               286
-#define __NR_HPUX_sendto                287
-#define __NR_HPUX_setsockopt            288
-#define __NR_HPUX_shutdown              289
-#define __NR_HPUX_socket                290
-#define __NR_HPUX_socketpair            291
-#define __NR_HPUX_proc_open             292
-#define __NR_HPUX_proc_close            293
-#define __NR_HPUX_proc_send             294
-#define __NR_HPUX_proc_recv             295
-#define __NR_HPUX_proc_sendrecv         296
-#define __NR_HPUX_proc_syscall          297
-/* 298 - 311 missing */
-#define __NR_HPUX_semctl                312
-#define __NR_HPUX_msgctl                313
-#define __NR_HPUX_shmctl                314
-#define __NR_HPUX_mpctl                 315
-#define __NR_HPUX_exportfs              316
-#define __NR_HPUX_getpmsg               317
-#define __NR_HPUX_putpmsg               318
-/* 319 missing */
-#define __NR_HPUX_msync                 320
-#define __NR_HPUX_msleep                321
-#define __NR_HPUX_mwakeup               322
-#define __NR_HPUX_msem_init             323
-#define __NR_HPUX_msem_remove           324
-#define __NR_HPUX_adjtime               325
-#define __NR_HPUX_kload                 326
-#define __NR_HPUX_fattach               327
-#define __NR_HPUX_fdetach               328
-#define __NR_HPUX_serialize             329
-#define __NR_HPUX_statvfs               330
-#define __NR_HPUX_fstatvfs              331
-#define __NR_HPUX_lchown                332
-#define __NR_HPUX_getsid                333
-#define __NR_HPUX_sysfs                 334
-/* 335, 336 missing */
-#define __NR_HPUX_sched_setparam        337
-#define __NR_HPUX_sched_getparam        338
-#define __NR_HPUX_sched_setscheduler    339
-#define __NR_HPUX_sched_getscheduler    340
-#define __NR_HPUX_sched_yield           341
-#define __NR_HPUX_sched_get_priority_max 342
-#define __NR_HPUX_sched_get_priority_min 343
-#define __NR_HPUX_sched_rr_get_interval 344
-#define __NR_HPUX_clock_settime         345
-#define __NR_HPUX_clock_gettime         346
-#define __NR_HPUX_clock_getres          347
-#define __NR_HPUX_timer_create          348
-#define __NR_HPUX_timer_delete          349
-#define __NR_HPUX_timer_settime         350
-#define __NR_HPUX_timer_gettime         351
-#define __NR_HPUX_timer_getoverrun      352
-#define __NR_HPUX_nanosleep             353
-#define __NR_HPUX_toolbox               354
-/* 355 missing */
-#define __NR_HPUX_getdents              356
-#define __NR_HPUX_getcontext            357
-#define __NR_HPUX_sysinfo               358
-#define __NR_HPUX_fcntl64               359
-#define __NR_HPUX_ftruncate64           360
-#define __NR_HPUX_fstat64               361
-#define __NR_HPUX_getdirentries64       362
-#define __NR_HPUX_getrlimit64           363
-#define __NR_HPUX_lockf64               364
-#define __NR_HPUX_lseek64               365
-#define __NR_HPUX_lstat64               366
-#define __NR_HPUX_mmap64                367
-#define __NR_HPUX_setrlimit64           368
-#define __NR_HPUX_stat64                369
-#define __NR_HPUX_truncate64            370
-#define __NR_HPUX_ulimit64              371
-#define __NR_HPUX_pread                 372
-#define __NR_HPUX_preadv                373
-#define __NR_HPUX_pwrite                374
-#define __NR_HPUX_pwritev               375
-#define __NR_HPUX_pread64               376
-#define __NR_HPUX_preadv64              377
-#define __NR_HPUX_pwrite64              378
-#define __NR_HPUX_pwritev64             379
-#define __NR_HPUX_setcontext            380
-#define __NR_HPUX_sigaltstack           381
-#define __NR_HPUX_waitid                382
-#define __NR_HPUX_setpgrp               383
-#define __NR_HPUX_recvmsg2              384
-#define __NR_HPUX_sendmsg2              385
-#define __NR_HPUX_socket2               386
-#define __NR_HPUX_socketpair2           387
-#define __NR_HPUX_setregid              388
-#define __NR_HPUX_lwp_create            389
-#define __NR_HPUX_lwp_terminate         390
-#define __NR_HPUX_lwp_wait              391
-#define __NR_HPUX_lwp_suspend           392
-#define __NR_HPUX_lwp_resume            393
-/* 394 missing */
-#define __NR_HPUX_lwp_abort_syscall     395
-#define __NR_HPUX_lwp_info              396
-#define __NR_HPUX_lwp_kill              397
-#define __NR_HPUX_ksleep                398
-#define __NR_HPUX_kwakeup               399
-/* 400 missing */
-#define __NR_HPUX_pstat_getlwp          401
-#define __NR_HPUX_lwp_exit              402
-#define __NR_HPUX_lwp_continue          403
-#define __NR_HPUX_getacl                404
-#define __NR_HPUX_fgetacl               405
-#define __NR_HPUX_setacl                406
-#define __NR_HPUX_fsetacl               407
-#define __NR_HPUX_getaccess             408
-#define __NR_HPUX_lwp_mutex_init        409
-#define __NR_HPUX_lwp_mutex_lock_sys    410
-#define __NR_HPUX_lwp_mutex_unlock      411
-#define __NR_HPUX_lwp_cond_init         412
-#define __NR_HPUX_lwp_cond_signal       413
-#define __NR_HPUX_lwp_cond_broadcast    414
-#define __NR_HPUX_lwp_cond_wait_sys     415
-#define __NR_HPUX_lwp_getscheduler      416
-#define __NR_HPUX_lwp_setscheduler      417
-#define __NR_HPUX_lwp_getstate          418
-#define __NR_HPUX_lwp_setstate          419
-#define __NR_HPUX_lwp_detach            420
-#define __NR_HPUX_mlock                 421
-#define __NR_HPUX_munlock               422
-#define __NR_HPUX_mlockall              423
-#define __NR_HPUX_munlockall            424
-#define __NR_HPUX_shm_open              425
-#define __NR_HPUX_shm_unlink            426
-#define __NR_HPUX_sigqueue              427
-#define __NR_HPUX_sigwaitinfo           428
-#define __NR_HPUX_sigtimedwait          429
-#define __NR_HPUX_sigwait               430
-#define __NR_HPUX_aio_read              431
-#define __NR_HPUX_aio_write             432
-#define __NR_HPUX_lio_listio            433
-#define __NR_HPUX_aio_error             434
-#define __NR_HPUX_aio_return            435
-#define __NR_HPUX_aio_cancel            436
-#define __NR_HPUX_aio_suspend           437
-#define __NR_HPUX_aio_fsync             438
-#define __NR_HPUX_mq_open               439
-#define __NR_HPUX_mq_close              440
-#define __NR_HPUX_mq_unlink             441
-#define __NR_HPUX_mq_send               442
-#define __NR_HPUX_mq_receive            443
-#define __NR_HPUX_mq_notify             444
-#define __NR_HPUX_mq_setattr            445
-#define __NR_HPUX_mq_getattr            446
-#define __NR_HPUX_ksem_open             447
-#define __NR_HPUX_ksem_unlink           448
-#define __NR_HPUX_ksem_close            449
-#define __NR_HPUX_ksem_post             450
-#define __NR_HPUX_ksem_wait             451
-#define __NR_HPUX_ksem_read             452
-#define __NR_HPUX_ksem_trywait          453
-#define __NR_HPUX_lwp_rwlock_init       454
-#define __NR_HPUX_lwp_rwlock_destroy    455
-#define __NR_HPUX_lwp_rwlock_rdlock_sys 456
-#define __NR_HPUX_lwp_rwlock_wrlock_sys 457
-#define __NR_HPUX_lwp_rwlock_tryrdlock  458
-#define __NR_HPUX_lwp_rwlock_trywrlock  459
-#define __NR_HPUX_lwp_rwlock_unlock     460
-#define __NR_HPUX_ttrace                461
-#define __NR_HPUX_ttrace_wait           462
-#define __NR_HPUX_lf_wire_mem           463
-#define __NR_HPUX_lf_unwire_mem         464
-#define __NR_HPUX_lf_send_pin_map       465
-#define __NR_HPUX_lf_free_buf           466
-#define __NR_HPUX_lf_wait_nq            467
-#define __NR_HPUX_lf_wakeup_conn_q      468
-#define __NR_HPUX_lf_unused             469
-#define __NR_HPUX_lwp_sema_init         470
-#define __NR_HPUX_lwp_sema_post         471
-#define __NR_HPUX_lwp_sema_wait         472
-#define __NR_HPUX_lwp_sema_trywait      473
-#define __NR_HPUX_lwp_sema_destroy      474
-#define __NR_HPUX_statvfs64             475
-#define __NR_HPUX_fstatvfs64            476
-#define __NR_HPUX_msh_register          477
-#define __NR_HPUX_ptrace64              478
-#define __NR_HPUX_sendfile              479
-#define __NR_HPUX_sendpath              480
-#define __NR_HPUX_sendfile64            481
-#define __NR_HPUX_sendpath64            482
-#define __NR_HPUX_modload               483
-#define __NR_HPUX_moduload              484
-#define __NR_HPUX_modpath               485
-#define __NR_HPUX_getksym               486
-#define __NR_HPUX_modadm                487
-#define __NR_HPUX_modstat               488
-#define __NR_HPUX_lwp_detached_exit     489
-#define __NR_HPUX_crashconf             490
-#define __NR_HPUX_siginhibit            491
-#define __NR_HPUX_sigenable             492
-#define __NR_HPUX_spuctl                493
-#define __NR_HPUX_zerokernelsum         494
-#define __NR_HPUX_nfs_kstat             495
-#define __NR_HPUX_aio_read64            496
-#define __NR_HPUX_aio_write64           497
-#define __NR_HPUX_aio_error64           498
-#define __NR_HPUX_aio_return64          499
-#define __NR_HPUX_aio_cancel64          500
-#define __NR_HPUX_aio_suspend64         501
-#define __NR_HPUX_aio_fsync64           502
-#define __NR_HPUX_lio_listio64          503
-#define __NR_HPUX_recv2                 504
-#define __NR_HPUX_recvfrom2             505
-#define __NR_HPUX_send2                 506
-#define __NR_HPUX_sendto2               507
-#define __NR_HPUX_acl                   508
-#define __NR_HPUX___cnx_p2p_ctl         509
-#define __NR_HPUX___cnx_gsched_ctl      510
-#define __NR_HPUX___cnx_pmon_ctl        511
-
-#define __NR_HPUX_syscalls		512
-
-/*
- * Linux system call numbers.
- *
- * Cary Coutant says that we should just use another syscall gateway
- * page to avoid clashing with the HPUX space, and I think he's right:
- * it will would keep a branch out of our syscall entry path, at the
- * very least.  If we decide to change it later, we can ``just'' tweak
- * the LINUX_GATEWAY_ADDR define at the bottom and make __NR_Linux be
- * 1024 or something.  Oh, and recompile libc. =)
- *
- * 64-bit HPUX binaries get the syscall gateway address passed in a register
- * from the kernel at startup, which seems a sane strategy.
- */
-
-#define __NR_Linux                0
-#define __NR_restart_syscall      (__NR_Linux + 0)
-#define __NR_exit                 (__NR_Linux + 1)
-#define __NR_fork                 (__NR_Linux + 2)
-#define __NR_read                 (__NR_Linux + 3)
-#define __NR_write                (__NR_Linux + 4)
-#define __NR_open                 (__NR_Linux + 5)
-#define __NR_close                (__NR_Linux + 6)
-#define __NR_waitpid              (__NR_Linux + 7)
-#define __NR_creat                (__NR_Linux + 8)
-#define __NR_link                 (__NR_Linux + 9)
-#define __NR_unlink              (__NR_Linux + 10)
-#define __NR_execve              (__NR_Linux + 11)
-#define __NR_chdir               (__NR_Linux + 12)
-#define __NR_time                (__NR_Linux + 13)
-#define __NR_mknod               (__NR_Linux + 14)
-#define __NR_chmod               (__NR_Linux + 15)
-#define __NR_lchown              (__NR_Linux + 16)
-#define __NR_socket              (__NR_Linux + 17)
-#define __NR_stat                (__NR_Linux + 18)
-#define __NR_lseek               (__NR_Linux + 19)
-#define __NR_getpid              (__NR_Linux + 20)
-#define __NR_mount               (__NR_Linux + 21)
-#define __NR_bind                (__NR_Linux + 22)
-#define __NR_setuid              (__NR_Linux + 23)
-#define __NR_getuid              (__NR_Linux + 24)
-#define __NR_stime               (__NR_Linux + 25)
-#define __NR_ptrace              (__NR_Linux + 26)
-#define __NR_alarm               (__NR_Linux + 27)
-#define __NR_fstat               (__NR_Linux + 28)
-#define __NR_pause               (__NR_Linux + 29)
-#define __NR_utime               (__NR_Linux + 30)
-#define __NR_connect             (__NR_Linux + 31)
-#define __NR_listen              (__NR_Linux + 32)
-#define __NR_access              (__NR_Linux + 33)
-#define __NR_nice                (__NR_Linux + 34)
-#define __NR_accept              (__NR_Linux + 35)
-#define __NR_sync                (__NR_Linux + 36)
-#define __NR_kill                (__NR_Linux + 37)
-#define __NR_rename              (__NR_Linux + 38)
-#define __NR_mkdir               (__NR_Linux + 39)
-#define __NR_rmdir               (__NR_Linux + 40)
-#define __NR_dup                 (__NR_Linux + 41)
-#define __NR_pipe                (__NR_Linux + 42)
-#define __NR_times               (__NR_Linux + 43)
-#define __NR_getsockname         (__NR_Linux + 44)
-#define __NR_brk                 (__NR_Linux + 45)
-#define __NR_setgid              (__NR_Linux + 46)
-#define __NR_getgid              (__NR_Linux + 47)
-#define __NR_signal              (__NR_Linux + 48)
-#define __NR_geteuid             (__NR_Linux + 49)
-#define __NR_getegid             (__NR_Linux + 50)
-#define __NR_acct                (__NR_Linux + 51)
-#define __NR_umount2             (__NR_Linux + 52)
-#define __NR_getpeername         (__NR_Linux + 53)
-#define __NR_ioctl               (__NR_Linux + 54)
-#define __NR_fcntl               (__NR_Linux + 55)
-#define __NR_socketpair          (__NR_Linux + 56)
-#define __NR_setpgid             (__NR_Linux + 57)
-#define __NR_send                (__NR_Linux + 58)
-#define __NR_uname               (__NR_Linux + 59)
-#define __NR_umask               (__NR_Linux + 60)
-#define __NR_chroot              (__NR_Linux + 61)
-#define __NR_ustat               (__NR_Linux + 62)
-#define __NR_dup2                (__NR_Linux + 63)
-#define __NR_getppid             (__NR_Linux + 64)
-#define __NR_getpgrp             (__NR_Linux + 65)
-#define __NR_setsid              (__NR_Linux + 66)
-#define __NR_pivot_root          (__NR_Linux + 67)
-#define __NR_sgetmask            (__NR_Linux + 68)
-#define __NR_ssetmask            (__NR_Linux + 69)
-#define __NR_setreuid            (__NR_Linux + 70)
-#define __NR_setregid            (__NR_Linux + 71)
-#define __NR_mincore             (__NR_Linux + 72)
-#define __NR_sigpending          (__NR_Linux + 73)
-#define __NR_sethostname         (__NR_Linux + 74)
-#define __NR_setrlimit           (__NR_Linux + 75)
-#define __NR_getrlimit           (__NR_Linux + 76)
-#define __NR_getrusage           (__NR_Linux + 77)
-#define __NR_gettimeofday        (__NR_Linux + 78)
-#define __NR_settimeofday        (__NR_Linux + 79)
-#define __NR_getgroups           (__NR_Linux + 80)
-#define __NR_setgroups           (__NR_Linux + 81)
-#define __NR_sendto              (__NR_Linux + 82)
-#define __NR_symlink             (__NR_Linux + 83)
-#define __NR_lstat               (__NR_Linux + 84)
-#define __NR_readlink            (__NR_Linux + 85)
-#define __NR_uselib              (__NR_Linux + 86)
-#define __NR_swapon              (__NR_Linux + 87)
-#define __NR_reboot              (__NR_Linux + 88)
-#define __NR_mmap2             (__NR_Linux + 89)
-#define __NR_mmap                (__NR_Linux + 90)
-#define __NR_munmap              (__NR_Linux + 91)
-#define __NR_truncate            (__NR_Linux + 92)
-#define __NR_ftruncate           (__NR_Linux + 93)
-#define __NR_fchmod              (__NR_Linux + 94)
-#define __NR_fchown              (__NR_Linux + 95)
-#define __NR_getpriority         (__NR_Linux + 96)
-#define __NR_setpriority         (__NR_Linux + 97)
-#define __NR_recv                (__NR_Linux + 98)
-#define __NR_statfs              (__NR_Linux + 99)
-#define __NR_fstatfs            (__NR_Linux + 100)
-#define __NR_stat64           (__NR_Linux + 101)
-/* #define __NR_socketcall         (__NR_Linux + 102) */
-#define __NR_syslog             (__NR_Linux + 103)
-#define __NR_setitimer          (__NR_Linux + 104)
-#define __NR_getitimer          (__NR_Linux + 105)
-#define __NR_capget             (__NR_Linux + 106)
-#define __NR_capset             (__NR_Linux + 107)
-#define __NR_pread64            (__NR_Linux + 108)
-#define __NR_pwrite64           (__NR_Linux + 109)
-#define __NR_getcwd             (__NR_Linux + 110)
-#define __NR_vhangup            (__NR_Linux + 111)
-#define __NR_fstat64            (__NR_Linux + 112)
-#define __NR_vfork              (__NR_Linux + 113)
-#define __NR_wait4              (__NR_Linux + 114)
-#define __NR_swapoff            (__NR_Linux + 115)
-#define __NR_sysinfo            (__NR_Linux + 116)
-#define __NR_shutdown           (__NR_Linux + 117)
-#define __NR_fsync              (__NR_Linux + 118)
-#define __NR_madvise            (__NR_Linux + 119)
-#define __NR_clone              (__NR_Linux + 120)
-#define __NR_setdomainname      (__NR_Linux + 121)
-#define __NR_sendfile           (__NR_Linux + 122)
-#define __NR_recvfrom           (__NR_Linux + 123)
-#define __NR_adjtimex           (__NR_Linux + 124)
-#define __NR_mprotect           (__NR_Linux + 125)
-#define __NR_sigprocmask        (__NR_Linux + 126)
-#define __NR_create_module      (__NR_Linux + 127)
-#define __NR_init_module        (__NR_Linux + 128)
-#define __NR_delete_module      (__NR_Linux + 129)
-#define __NR_get_kernel_syms    (__NR_Linux + 130)
-#define __NR_quotactl           (__NR_Linux + 131)
-#define __NR_getpgid            (__NR_Linux + 132)
-#define __NR_fchdir             (__NR_Linux + 133)
-#define __NR_bdflush            (__NR_Linux + 134)
-#define __NR_sysfs              (__NR_Linux + 135)
-#define __NR_personality        (__NR_Linux + 136)
-#define __NR_afs_syscall        (__NR_Linux + 137) /* Syscall for Andrew File System */
-#define __NR_setfsuid           (__NR_Linux + 138)
-#define __NR_setfsgid           (__NR_Linux + 139)
-#define __NR__llseek            (__NR_Linux + 140)
-#define __NR_getdents           (__NR_Linux + 141)
-#define __NR__newselect         (__NR_Linux + 142)
-#define __NR_flock              (__NR_Linux + 143)
-#define __NR_msync              (__NR_Linux + 144)
-#define __NR_readv              (__NR_Linux + 145)
-#define __NR_writev             (__NR_Linux + 146)
-#define __NR_getsid             (__NR_Linux + 147)
-#define __NR_fdatasync          (__NR_Linux + 148)
-#define __NR__sysctl            (__NR_Linux + 149)
-#define __NR_mlock              (__NR_Linux + 150)
-#define __NR_munlock            (__NR_Linux + 151)
-#define __NR_mlockall           (__NR_Linux + 152)
-#define __NR_munlockall         (__NR_Linux + 153)
-#define __NR_sched_setparam             (__NR_Linux + 154)
-#define __NR_sched_getparam             (__NR_Linux + 155)
-#define __NR_sched_setscheduler         (__NR_Linux + 156)
-#define __NR_sched_getscheduler         (__NR_Linux + 157)
-#define __NR_sched_yield                (__NR_Linux + 158)
-#define __NR_sched_get_priority_max     (__NR_Linux + 159)
-#define __NR_sched_get_priority_min     (__NR_Linux + 160)
-#define __NR_sched_rr_get_interval      (__NR_Linux + 161)
-#define __NR_nanosleep          (__NR_Linux + 162)
-#define __NR_mremap             (__NR_Linux + 163)
-#define __NR_setresuid          (__NR_Linux + 164)
-#define __NR_getresuid          (__NR_Linux + 165)
-#define __NR_sigaltstack        (__NR_Linux + 166)
-#define __NR_query_module       (__NR_Linux + 167)
-#define __NR_poll               (__NR_Linux + 168)
-#define __NR_nfsservctl         (__NR_Linux + 169)
-#define __NR_setresgid          (__NR_Linux + 170)
-#define __NR_getresgid          (__NR_Linux + 171)
-#define __NR_prctl              (__NR_Linux + 172)
-#define __NR_rt_sigreturn       (__NR_Linux + 173)
-#define __NR_rt_sigaction       (__NR_Linux + 174)
-#define __NR_rt_sigprocmask     (__NR_Linux + 175)
-#define __NR_rt_sigpending      (__NR_Linux + 176)
-#define __NR_rt_sigtimedwait    (__NR_Linux + 177)
-#define __NR_rt_sigqueueinfo    (__NR_Linux + 178)
-#define __NR_rt_sigsuspend      (__NR_Linux + 179)
-#define __NR_chown              (__NR_Linux + 180)
-#define __NR_setsockopt         (__NR_Linux + 181)
-#define __NR_getsockopt         (__NR_Linux + 182)
-#define __NR_sendmsg            (__NR_Linux + 183)
-#define __NR_recvmsg            (__NR_Linux + 184)
-#define __NR_semop              (__NR_Linux + 185)
-#define __NR_semget             (__NR_Linux + 186)
-#define __NR_semctl             (__NR_Linux + 187)
-#define __NR_msgsnd             (__NR_Linux + 188)
-#define __NR_msgrcv             (__NR_Linux + 189)
-#define __NR_msgget             (__NR_Linux + 190)
-#define __NR_msgctl             (__NR_Linux + 191)
-#define __NR_shmat              (__NR_Linux + 192)
-#define __NR_shmdt              (__NR_Linux + 193)
-#define __NR_shmget             (__NR_Linux + 194)
-#define __NR_shmctl             (__NR_Linux + 195)
-
-#define __NR_getpmsg		(__NR_Linux + 196) /* Somebody *wants* streams? */
-#define __NR_putpmsg		(__NR_Linux + 197)
-
-#define __NR_lstat64            (__NR_Linux + 198)
-#define __NR_truncate64         (__NR_Linux + 199)
-#define __NR_ftruncate64        (__NR_Linux + 200)
-#define __NR_getdents64         (__NR_Linux + 201)
-#define __NR_fcntl64            (__NR_Linux + 202)
-#define __NR_attrctl            (__NR_Linux + 203)
-#define __NR_acl_get            (__NR_Linux + 204)
-#define __NR_acl_set            (__NR_Linux + 205)
-#define __NR_gettid             (__NR_Linux + 206)
-#define __NR_readahead          (__NR_Linux + 207)
-#define __NR_tkill              (__NR_Linux + 208)
-#define __NR_sendfile64         (__NR_Linux + 209)
-#define __NR_futex              (__NR_Linux + 210)
-#define __NR_sched_setaffinity  (__NR_Linux + 211)
-#define __NR_sched_getaffinity  (__NR_Linux + 212)
-#define __NR_set_thread_area    (__NR_Linux + 213)
-#define __NR_get_thread_area    (__NR_Linux + 214)
-#define __NR_io_setup           (__NR_Linux + 215)
-#define __NR_io_destroy         (__NR_Linux + 216)
-#define __NR_io_getevents       (__NR_Linux + 217)
-#define __NR_io_submit          (__NR_Linux + 218)
-#define __NR_io_cancel          (__NR_Linux + 219)
-#define __NR_alloc_hugepages    (__NR_Linux + 220)
-#define __NR_free_hugepages     (__NR_Linux + 221)
-#define __NR_exit_group         (__NR_Linux + 222)
-#define __NR_lookup_dcookie     (__NR_Linux + 223)
-#define __NR_epoll_create       (__NR_Linux + 224)
-#define __NR_epoll_ctl          (__NR_Linux + 225)
-#define __NR_epoll_wait         (__NR_Linux + 226)
-#define __NR_remap_file_pages   (__NR_Linux + 227)
-#define __NR_semtimedop         (__NR_Linux + 228)
-#define __NR_mq_open            (__NR_Linux + 229)
-#define __NR_mq_unlink          (__NR_Linux + 230)
-#define __NR_mq_timedsend       (__NR_Linux + 231)
-#define __NR_mq_timedreceive    (__NR_Linux + 232)
-#define __NR_mq_notify          (__NR_Linux + 233)
-#define __NR_mq_getsetattr      (__NR_Linux + 234)
-#define __NR_waitid		(__NR_Linux + 235)
-#define __NR_fadvise64_64	(__NR_Linux + 236)
-#define __NR_set_tid_address	(__NR_Linux + 237)
-#define __NR_setxattr		(__NR_Linux + 238)
-#define __NR_lsetxattr		(__NR_Linux + 239)
-#define __NR_fsetxattr		(__NR_Linux + 240)
-#define __NR_getxattr		(__NR_Linux + 241)
-#define __NR_lgetxattr		(__NR_Linux + 242)
-#define __NR_fgetxattr		(__NR_Linux + 243)
-#define __NR_listxattr		(__NR_Linux + 244)
-#define __NR_llistxattr		(__NR_Linux + 245)
-#define __NR_flistxattr		(__NR_Linux + 246)
-#define __NR_removexattr	(__NR_Linux + 247)
-#define __NR_lremovexattr	(__NR_Linux + 248)
-#define __NR_fremovexattr	(__NR_Linux + 249)
-#define __NR_timer_create	(__NR_Linux + 250)
-#define __NR_timer_settime	(__NR_Linux + 251)
-#define __NR_timer_gettime	(__NR_Linux + 252)
-#define __NR_timer_getoverrun	(__NR_Linux + 253)
-#define __NR_timer_delete	(__NR_Linux + 254)
-#define __NR_clock_settime	(__NR_Linux + 255)
-#define __NR_clock_gettime	(__NR_Linux + 256)
-#define __NR_clock_getres	(__NR_Linux + 257)
-#define __NR_clock_nanosleep	(__NR_Linux + 258)
-#define __NR_tgkill		(__NR_Linux + 259)
-#define __NR_mbind		(__NR_Linux + 260)
-#define __NR_get_mempolicy	(__NR_Linux + 261)
-#define __NR_set_mempolicy	(__NR_Linux + 262)
-#define __NR_vserver		(__NR_Linux + 263)
-#define __NR_add_key		(__NR_Linux + 264)
-#define __NR_request_key	(__NR_Linux + 265)
-#define __NR_keyctl		(__NR_Linux + 266)
-#define __NR_ioprio_set		(__NR_Linux + 267)
-#define __NR_ioprio_get		(__NR_Linux + 268)
-#define __NR_inotify_init	(__NR_Linux + 269)
-#define __NR_inotify_add_watch	(__NR_Linux + 270)
-#define __NR_inotify_rm_watch	(__NR_Linux + 271)
-#define __NR_migrate_pages	(__NR_Linux + 272)
-#define __NR_pselect6		(__NR_Linux + 273)
-#define __NR_ppoll		(__NR_Linux + 274)
-#define __NR_openat		(__NR_Linux + 275)
-#define __NR_mkdirat		(__NR_Linux + 276)
-#define __NR_mknodat		(__NR_Linux + 277)
-#define __NR_fchownat		(__NR_Linux + 278)
-#define __NR_futimesat		(__NR_Linux + 279)
-#define __NR_fstatat64		(__NR_Linux + 280)
-#define __NR_unlinkat		(__NR_Linux + 281)
-#define __NR_renameat		(__NR_Linux + 282)
-#define __NR_linkat		(__NR_Linux + 283)
-#define __NR_symlinkat		(__NR_Linux + 284)
-#define __NR_readlinkat		(__NR_Linux + 285)
-#define __NR_fchmodat		(__NR_Linux + 286)
-#define __NR_faccessat		(__NR_Linux + 287)
-#define __NR_unshare		(__NR_Linux + 288)
-#define __NR_set_robust_list	(__NR_Linux + 289)
-#define __NR_get_robust_list	(__NR_Linux + 290)
-#define __NR_splice		(__NR_Linux + 291)
-#define __NR_sync_file_range	(__NR_Linux + 292)
-#define __NR_tee		(__NR_Linux + 293)
-#define __NR_vmsplice		(__NR_Linux + 294)
-#define __NR_move_pages		(__NR_Linux + 295)
-#define __NR_getcpu		(__NR_Linux + 296)
-#define __NR_epoll_pwait	(__NR_Linux + 297)
-#define __NR_statfs64		(__NR_Linux + 298)
-#define __NR_fstatfs64		(__NR_Linux + 299)
-#define __NR_kexec_load		(__NR_Linux + 300)
-#define __NR_utimensat		(__NR_Linux + 301)
-#define __NR_signalfd		(__NR_Linux + 302)
-#define __NR_timerfd		(__NR_Linux + 303)
-#define __NR_eventfd		(__NR_Linux + 304)
-#define __NR_fallocate		(__NR_Linux + 305)
-#define __NR_timerfd_create	(__NR_Linux + 306)
-#define __NR_timerfd_settime	(__NR_Linux + 307)
-#define __NR_timerfd_gettime	(__NR_Linux + 308)
-
-#define __NR_Linux_syscalls	(__NR_timerfd_gettime + 1)
-
-
-#define __IGNORE_select		/* newselect */
-#define __IGNORE_fadvise64	/* fadvise64_64 */
-#define __IGNORE_utimes		/* utime */
-
-
-#define HPUX_GATEWAY_ADDR       0xC0000004
-#define LINUX_GATEWAY_ADDR      0x100
-
-#ifdef __KERNEL__
-#ifndef __ASSEMBLY__
-
-#define SYS_ify(syscall_name)   __NR_##syscall_name
-
-#ifndef ASM_LINE_SEP
-# define ASM_LINE_SEP ;
-#endif
-
-/* Definition taken from glibc 2.3.3
- * sysdeps/unix/sysv/linux/hppa/sysdep.h
- */
-
-#ifdef PIC
-/* WARNING: CANNOT BE USED IN A NOP! */
-# define K_STW_ASM_PIC	"       copy %%r19, %%r4\n"
-# define K_LDW_ASM_PIC	"       copy %%r4, %%r19\n"
-# define K_USING_GR4	"%r4",
-#else
-# define K_STW_ASM_PIC	" \n"
-# define K_LDW_ASM_PIC	" \n"
-# define K_USING_GR4
-#endif
-
-/* GCC has to be warned that a syscall may clobber all the ABI
-   registers listed as "caller-saves", see page 8, Table 2
-   in section 2.2.6 of the PA-RISC RUN-TIME architecture
-   document. However! r28 is the result and will conflict with
-   the clobber list so it is left out. Also the input arguments
-   registers r20 -> r26 will conflict with the list so they
-   are treated specially. Although r19 is clobbered by the syscall
-   we cannot say this because it would violate ABI, thus we say
-   r4 is clobbered and use that register to save/restore r19
-   across the syscall. */
-
-#define K_CALL_CLOB_REGS "%r1", "%r2", K_USING_GR4 \
-	        	 "%r20", "%r29", "%r31"
-
-#undef K_INLINE_SYSCALL
-#define K_INLINE_SYSCALL(name, nr, args...)	({			\
-	long __sys_res;							\
-	{								\
-		register unsigned long __res __asm__("r28");		\
-		K_LOAD_ARGS_##nr(args)					\
-		/* FIXME: HACK stw/ldw r19 around syscall */		\
-		__asm__ volatile(					\
-			K_STW_ASM_PIC					\
-			"	ble  0x100(%%sr2, %%r0)\n"		\
-			"	ldi %1, %%r20\n"			\
-			K_LDW_ASM_PIC					\
-			: "=r" (__res)					\
-			: "i" (SYS_ify(name)) K_ASM_ARGS_##nr   	\
-			: "memory", K_CALL_CLOB_REGS K_CLOB_ARGS_##nr	\
-		);							\
-		__sys_res = (long)__res;				\
-	}								\
-	if ( (unsigned long)__sys_res >= (unsigned long)-4095 ){	\
-		errno = -__sys_res;		        		\
-		__sys_res = -1;						\
-	}								\
-	__sys_res;							\
-})
-
-#define K_LOAD_ARGS_0()
-#define K_LOAD_ARGS_1(r26)					\
-	register unsigned long __r26 __asm__("r26") = (unsigned long)(r26);   \
-	K_LOAD_ARGS_0()
-#define K_LOAD_ARGS_2(r26,r25)					\
-	register unsigned long __r25 __asm__("r25") = (unsigned long)(r25);   \
-	K_LOAD_ARGS_1(r26)
-#define K_LOAD_ARGS_3(r26,r25,r24)				\
-	register unsigned long __r24 __asm__("r24") = (unsigned long)(r24);   \
-	K_LOAD_ARGS_2(r26,r25)
-#define K_LOAD_ARGS_4(r26,r25,r24,r23)				\
-	register unsigned long __r23 __asm__("r23") = (unsigned long)(r23);   \
-	K_LOAD_ARGS_3(r26,r25,r24)
-#define K_LOAD_ARGS_5(r26,r25,r24,r23,r22)			\
-	register unsigned long __r22 __asm__("r22") = (unsigned long)(r22);   \
-	K_LOAD_ARGS_4(r26,r25,r24,r23)
-#define K_LOAD_ARGS_6(r26,r25,r24,r23,r22,r21)			\
-	register unsigned long __r21 __asm__("r21") = (unsigned long)(r21);   \
-	K_LOAD_ARGS_5(r26,r25,r24,r23,r22)
-
-/* Even with zero args we use r20 for the syscall number */
-#define K_ASM_ARGS_0
-#define K_ASM_ARGS_1 K_ASM_ARGS_0, "r" (__r26)
-#define K_ASM_ARGS_2 K_ASM_ARGS_1, "r" (__r25)
-#define K_ASM_ARGS_3 K_ASM_ARGS_2, "r" (__r24)
-#define K_ASM_ARGS_4 K_ASM_ARGS_3, "r" (__r23)
-#define K_ASM_ARGS_5 K_ASM_ARGS_4, "r" (__r22)
-#define K_ASM_ARGS_6 K_ASM_ARGS_5, "r" (__r21)
-
-/* The registers not listed as inputs but clobbered */
-#define K_CLOB_ARGS_6
-#define K_CLOB_ARGS_5 K_CLOB_ARGS_6, "%r21"
-#define K_CLOB_ARGS_4 K_CLOB_ARGS_5, "%r22"
-#define K_CLOB_ARGS_3 K_CLOB_ARGS_4, "%r23"
-#define K_CLOB_ARGS_2 K_CLOB_ARGS_3, "%r24"
-#define K_CLOB_ARGS_1 K_CLOB_ARGS_2, "%r25"
-#define K_CLOB_ARGS_0 K_CLOB_ARGS_1, "%r26"
-
-#define _syscall0(type,name)						\
-type name(void)								\
-{									\
-    return K_INLINE_SYSCALL(name, 0);	                                \
-}
-
-#define _syscall1(type,name,type1,arg1)					\
-type name(type1 arg1)							\
-{									\
-    return K_INLINE_SYSCALL(name, 1, arg1);	                        \
-}
-
-#define _syscall2(type,name,type1,arg1,type2,arg2)			\
-type name(type1 arg1, type2 arg2)					\
-{									\
-    return K_INLINE_SYSCALL(name, 2, arg1, arg2);	                \
-}
-
-#define _syscall3(type,name,type1,arg1,type2,arg2,type3,arg3)		\
-type name(type1 arg1, type2 arg2, type3 arg3)				\
-{									\
-    return K_INLINE_SYSCALL(name, 3, arg1, arg2, arg3);	                \
-}
-
-#define _syscall4(type,name,type1,arg1,type2,arg2,type3,arg3,type4,arg4) \
-type name(type1 arg1, type2 arg2, type3 arg3, type4 arg4)		\
-{									\
-    return K_INLINE_SYSCALL(name, 4, arg1, arg2, arg3, arg4);	        \
-}
-
-/* select takes 5 arguments */
-#define _syscall5(type,name,type1,arg1,type2,arg2,type3,arg3,type4,arg4,type5,arg5) \
-type name(type1 arg1, type2 arg2, type3 arg3, type4 arg4, type5 arg5)	\
-{									\
-    return K_INLINE_SYSCALL(name, 5, arg1, arg2, arg3, arg4, arg5);	\
-}
-
-#define __ARCH_WANT_OLD_READDIR
-#define __ARCH_WANT_STAT64
-#define __ARCH_WANT_SYS_ALARM
-#define __ARCH_WANT_SYS_GETHOSTNAME
-#define __ARCH_WANT_SYS_PAUSE
-#define __ARCH_WANT_SYS_SGETMASK
-#define __ARCH_WANT_SYS_SIGNAL
-#define __ARCH_WANT_SYS_TIME
-#define __ARCH_WANT_COMPAT_SYS_TIME
-#define __ARCH_WANT_SYS_UTIME
-#define __ARCH_WANT_SYS_WAITPID
-#define __ARCH_WANT_SYS_SOCKETCALL
-#define __ARCH_WANT_SYS_FADVISE64
-#define __ARCH_WANT_SYS_GETPGRP
-#define __ARCH_WANT_SYS_LLSEEK
-#define __ARCH_WANT_SYS_NICE
-#define __ARCH_WANT_SYS_OLD_GETRLIMIT
-#define __ARCH_WANT_SYS_OLDUMOUNT
-#define __ARCH_WANT_SYS_SIGPENDING
-#define __ARCH_WANT_SYS_SIGPROCMASK
-#define __ARCH_WANT_SYS_RT_SIGACTION
-#define __ARCH_WANT_SYS_RT_SIGSUSPEND
-#define __ARCH_WANT_COMPAT_SYS_RT_SIGSUSPEND
-
-#endif /* __ASSEMBLY__ */
-
-#undef STR
-
-/*
- * "Conditional" syscalls
- *
- * What we want is __attribute__((weak,alias("sys_ni_syscall"))),
- * but it doesn't work on all toolchains, so we just do it by hand
- */
-#define cond_syscall(x) asm(".weak\t" #x "\n\t.set\t" #x ",sys_ni_syscall")
-
-#endif /* __KERNEL__ */
-#endif /* _ASM_PARISC_UNISTD_H_ */
diff --git a/include/asm-parisc/unwind.h b/include/asm-parisc/unwind.h
deleted file mode 100644
index 2f7e6e50a158..000000000000
--- a/include/asm-parisc/unwind.h
+++ /dev/null
@@ -1,77 +0,0 @@
-#ifndef _UNWIND_H_
-#define _UNWIND_H_
-
-#include <linux/list.h>
-
-/* From ABI specifications */
-struct unwind_table_entry {
-	unsigned int region_start;
-	unsigned int region_end;
-	unsigned int Cannot_unwind:1; /* 0 */
-	unsigned int Millicode:1;	/* 1 */
-	unsigned int Millicode_save_sr0:1;	/* 2 */
-	unsigned int Region_description:2;	/* 3..4 */
-	unsigned int reserved1:1;	/* 5 */
-	unsigned int Entry_SR:1;	/* 6 */
-	unsigned int Entry_FR:4;	/* number saved *//* 7..10 */
-	unsigned int Entry_GR:5;	/* number saved *//* 11..15 */
-	unsigned int Args_stored:1;	/* 16 */
-	unsigned int Variable_Frame:1;	/* 17 */
-	unsigned int Separate_Package_Body:1;	/* 18 */
-	unsigned int Frame_Extension_Millicode:1;	/* 19 */
-	unsigned int Stack_Overflow_Check:1;	/* 20 */
-	unsigned int Two_Instruction_SP_Increment:1;	/* 21 */
-	unsigned int Ada_Region:1;	/* 22 */
-	unsigned int cxx_info:1;	/* 23 */
-	unsigned int cxx_try_catch:1;	/* 24 */
-	unsigned int sched_entry_seq:1;	/* 25 */
-	unsigned int reserved2:1;	/* 26 */
-	unsigned int Save_SP:1;	/* 27 */
-	unsigned int Save_RP:1;	/* 28 */
-	unsigned int Save_MRP_in_frame:1;	/* 29 */
-	unsigned int extn_ptr_defined:1;	/* 30 */
-	unsigned int Cleanup_defined:1;	/* 31 */
-	
-	unsigned int MPE_XL_interrupt_marker:1;	/* 0 */
-	unsigned int HP_UX_interrupt_marker:1;	/* 1 */
-	unsigned int Large_frame:1;	/* 2 */
-	unsigned int Pseudo_SP_Set:1;	/* 3 */
-	unsigned int reserved4:1;	/* 4 */
-	unsigned int Total_frame_size:27;	/* 5..31 */
-};
-
-struct unwind_table {
-	struct list_head list;
-	const char *name;
-	unsigned long gp;
-	unsigned long base_addr;
-	unsigned long start;
-	unsigned long end;
-	const struct unwind_table_entry *table;
-	unsigned long length;
-};
-
-struct unwind_frame_info {
-	struct task_struct *t;
-	/* Eventually we would like to be able to get at any of the registers
-	   available; but for now we only try to get the sp and ip for each
-	   frame */
-	/* struct pt_regs regs; */
-	unsigned long sp, ip, rp, r31;
-	unsigned long prev_sp, prev_ip;
-};
-
-struct unwind_table *
-unwind_table_add(const char *name, unsigned long base_addr, 
-		 unsigned long gp, void *start, void *end);
-void
-unwind_table_remove(struct unwind_table *table);
-
-void unwind_frame_init(struct unwind_frame_info *info, struct task_struct *t, 
-		       struct pt_regs *regs);
-void unwind_frame_init_from_blocked_task(struct unwind_frame_info *info, struct task_struct *t);
-void unwind_frame_init_running(struct unwind_frame_info *info, struct pt_regs *regs);
-int unwind_once(struct unwind_frame_info *info);
-int unwind_to_user(struct unwind_frame_info *info);
-
-#endif
diff --git a/include/asm-parisc/user.h b/include/asm-parisc/user.h
deleted file mode 100644
index 80224753e508..000000000000
--- a/include/asm-parisc/user.h
+++ /dev/null
@@ -1,5 +0,0 @@
-/* This file should not exist, but lots of generic code still includes
-   it. It's a hangover from old a.out days and the traditional core
-   dump format.  We are ELF-only, and so are our core dumps.  If we
-   need to support HP/UX core format then we'll do it here
-   eventually. */
diff --git a/include/asm-parisc/vga.h b/include/asm-parisc/vga.h
deleted file mode 100644
index 171399a88ca6..000000000000
--- a/include/asm-parisc/vga.h
+++ /dev/null
@@ -1,6 +0,0 @@
-#ifndef __ASM_PARISC_VGA_H__
-#define __ASM_PARISC_VGA_H__
-
-/* nothing */
-
-#endif /* __ASM_PARISC_VGA_H__ */
diff --git a/include/asm-parisc/xor.h b/include/asm-parisc/xor.h
deleted file mode 100644
index c82eb12a5b18..000000000000
--- a/include/asm-parisc/xor.h
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/xor.h>
-- 
cgit v1.2.3-55-g7522


From 24b574d052a1996bac42fbd56715ab602092c291 Mon Sep 17 00:00:00 2001
From: Kyle McMartin
Date: Tue, 29 Jul 2008 00:09:22 -0400
Subject: parisc: add pdc_coproc_cfg_unlocked and set_firmware_width_unlocked

These functions are called only when bringing up the monarch cpu,
so it is safe to call them without taking the pdc spinlock. In the
future, this may become relevant for lockdep, since these functions were
taking spinlocks before start_kernel called the lockdep initializers.
---
 arch/parisc/include/asm/pdc.h |  2 ++
 arch/parisc/kernel/firmware.c | 65 +++++++++++++++++++++++++++++--------------
 2 files changed, 46 insertions(+), 21 deletions(-)

diff --git a/arch/parisc/include/asm/pdc.h b/arch/parisc/include/asm/pdc.h
index 46b75f9cce51..c584b00c6074 100644
--- a/arch/parisc/include/asm/pdc.h
+++ b/arch/parisc/include/asm/pdc.h
@@ -603,6 +603,7 @@ int pdc_chassis_info(struct pdc_chassis_info *chassis_info, void *led_info, unsi
 int pdc_chassis_disp(unsigned long disp);
 int pdc_chassis_warn(unsigned long *warn);
 int pdc_coproc_cfg(struct pdc_coproc_cfg *pdc_coproc_info);
+int pdc_coproc_cfg_unlocked(struct pdc_coproc_cfg *pdc_coproc_info);
 int pdc_iodc_read(unsigned long *actcnt, unsigned long hpa, unsigned int index,
 		  void *iodc_data, unsigned int iodc_data_size);
 int pdc_system_map_find_mods(struct pdc_system_map_mod_info *pdc_mod_info,
@@ -641,6 +642,7 @@ int pdc_mem_mem_table(struct pdc_memory_table_raddr *r_addr,
 #endif
 
 void set_firmware_width(void);
+void set_firmware_width_unlocked(void);
 int pdc_do_firm_test_reset(unsigned long ftc_bitmap);
 int pdc_do_reset(void);
 int pdc_soft_power_info(unsigned long *power_reg);
diff --git a/arch/parisc/kernel/firmware.c b/arch/parisc/kernel/firmware.c
index 99a9e505edf9..03f26bd75bd8 100644
--- a/arch/parisc/kernel/firmware.c
+++ b/arch/parisc/kernel/firmware.c
@@ -150,26 +150,40 @@ static void convert_to_wide(unsigned long *addr)
 #endif
 }
 
+#ifdef CONFIG_64BIT
+void __init set_firmware_width_unlocked(void)
+{
+	int ret;
+
+	ret = mem_pdc_call(PDC_MODEL, PDC_MODEL_CAPABILITIES,
+		__pa(pdc_result), 0);
+	convert_to_wide(pdc_result);
+	if (pdc_result[0] != NARROW_FIRMWARE)
+		parisc_narrow_firmware = 0;
+}
+	
 /**
  * set_firmware_width - Determine if the firmware is wide or narrow.
  * 
- * This function must be called before any pdc_* function that uses the convert_to_wide
- * function.
+ * This function must be called before any pdc_* function that uses the
+ * convert_to_wide function.
  */
 void __init set_firmware_width(void)
 {
-#ifdef CONFIG_64BIT
-	int retval;
 	unsigned long flags;
+	spin_lock_irqsave(&pdc_lock, flags);
+	set_firmware_width_unlocked();
+	spin_unlock_irqrestore(&pdc_lock, flags);
+}
+#else
+void __init set_firmware_width_unlocked(void) {
+	return;
+}
 
-        spin_lock_irqsave(&pdc_lock, flags);
-	retval = mem_pdc_call(PDC_MODEL, PDC_MODEL_CAPABILITIES, __pa(pdc_result), 0);
-	convert_to_wide(pdc_result);
-	if(pdc_result[0] != NARROW_FIRMWARE)
-		parisc_narrow_firmware = 0;
-        spin_unlock_irqrestore(&pdc_lock, flags);
-#endif
+void __init set_firmware_width(void) {
+	return;
 }
+#endif /*CONFIG_64BIT*/
 
 /**
  * pdc_emergency_unlock - Unlock the linux pdc lock
@@ -288,6 +302,20 @@ int pdc_chassis_warn(unsigned long *warn)
 	return retval;
 }
 
+int __init pdc_coproc_cfg_unlocked(struct pdc_coproc_cfg *pdc_coproc_info)
+{
+	int ret;
+
+	ret = mem_pdc_call(PDC_COPROC, PDC_COPROC_CFG, __pa(pdc_result));
+	convert_to_wide(pdc_result);
+	pdc_coproc_info->ccr_functional = pdc_result[0];
+	pdc_coproc_info->ccr_present = pdc_result[1];
+	pdc_coproc_info->revision = pdc_result[17];
+	pdc_coproc_info->model = pdc_result[18];
+
+	return ret;
+}
+
 /**
  * pdc_coproc_cfg - To identify coprocessors attached to the processor.
  * @pdc_coproc_info: Return buffer address.
@@ -297,19 +325,14 @@ int pdc_chassis_warn(unsigned long *warn)
  */
 int __init pdc_coproc_cfg(struct pdc_coproc_cfg *pdc_coproc_info)
 {
-        int retval;
+	int ret;
 	unsigned long flags;
 
-        spin_lock_irqsave(&pdc_lock, flags);
-        retval = mem_pdc_call(PDC_COPROC, PDC_COPROC_CFG, __pa(pdc_result));
-        convert_to_wide(pdc_result);
-        pdc_coproc_info->ccr_functional = pdc_result[0];
-        pdc_coproc_info->ccr_present = pdc_result[1];
-        pdc_coproc_info->revision = pdc_result[17];
-        pdc_coproc_info->model = pdc_result[18];
-        spin_unlock_irqrestore(&pdc_lock, flags);
+	spin_lock_irqsave(&pdc_lock, flags);
+	ret = pdc_coproc_cfg_unlocked(pdc_coproc_info);
+	spin_unlock_irqrestore(&pdc_lock, flags);
 
-        return retval;
+	return ret;
 }
 
 /**
-- 
cgit v1.2.3-55-g7522


From 089d55289db5d58d938d73b47a415b2b82ee19ac Mon Sep 17 00:00:00 2001
From: Kyle McMartin
Date: Tue, 29 Jul 2008 00:11:13 -0400
Subject: parisc: hijack jump to start_kernel

Bang in our own start_parisc call, which initializes the PDC
width, and turns on the FPU.

Previously, if CONFIG_PRINTK_TIME was on, we'd attempt to use
the FPU before we had enabled it, resulting in a difficult
to diagnose panic.

This patch causes init_per_cpu to redundantly set these for
cpu0, but this is harmless.
---
 arch/parisc/kernel/head.S  |  2 +-
 arch/parisc/kernel/setup.c | 27 ++++++++++++++++++++++++++-
 2 files changed, 27 insertions(+), 2 deletions(-)

diff --git a/arch/parisc/kernel/head.S b/arch/parisc/kernel/head.S
index a84e31e82876..0e3d9f9b9e33 100644
--- a/arch/parisc/kernel/head.S
+++ b/arch/parisc/kernel/head.S
@@ -121,7 +121,7 @@ $pgt_fill_loop:
 	copy		%r0,%r2
 
 	/* And the RFI Target address too */
-	load32		start_kernel,%r11
+	load32		start_parisc,%r11
 
 	/* And the initial task pointer */
 	load32		init_thread_union,%r6
diff --git a/arch/parisc/kernel/setup.c b/arch/parisc/kernel/setup.c
index 39e7c5a5946a..a59b71efdbe5 100644
--- a/arch/parisc/kernel/setup.c
+++ b/arch/parisc/kernel/setup.c
@@ -368,6 +368,31 @@ static int __init parisc_init(void)
 
 	return 0;
 }
-
 arch_initcall(parisc_init);
 
+void start_parisc(void)
+{
+	extern void start_kernel(void);
+
+	int ret, cpunum;
+	struct pdc_coproc_cfg coproc_cfg;
+
+	cpunum = smp_processor_id();
+
+	set_firmware_width_unlocked();
+
+	ret = pdc_coproc_cfg_unlocked(&coproc_cfg);
+	if (ret >= 0 && coproc_cfg.ccr_functional) {
+		mtctl(coproc_cfg.ccr_functional, 10);
+
+		cpu_data[cpunum].fp_rev = coproc_cfg.revision;
+		cpu_data[cpunum].fp_model = coproc_cfg.model;
+
+		asm volatile ("fstd	%fr0,8(%sp)");
+	} else {
+		panic("must have an fpu to boot linux");
+	}
+
+	start_kernel();
+	// not reached
+}
-- 
cgit v1.2.3-55-g7522


From 0be7d1fe4361bb9f2ebbd6fa394687cbe4bea950 Mon Sep 17 00:00:00 2001
From: Kyle McMartin
Date: Sat, 9 Aug 2008 14:38:18 -0400
Subject: parisc: add new syscalls

Signed-off-by: Kyle McMartin <kyle@mcmartin.ca>
---
 arch/parisc/include/asm/unistd.h   | 10 ++++++++--
 arch/parisc/kernel/syscall_table.S |  6 ++++++
 2 files changed, 14 insertions(+), 2 deletions(-)

diff --git a/arch/parisc/include/asm/unistd.h b/arch/parisc/include/asm/unistd.h
index a7d857f0e4f4..ef26b009dc5d 100644
--- a/arch/parisc/include/asm/unistd.h
+++ b/arch/parisc/include/asm/unistd.h
@@ -801,8 +801,14 @@
 #define __NR_timerfd_create	(__NR_Linux + 306)
 #define __NR_timerfd_settime	(__NR_Linux + 307)
 #define __NR_timerfd_gettime	(__NR_Linux + 308)
-
-#define __NR_Linux_syscalls	(__NR_timerfd_gettime + 1)
+#define __NR_signalfd4		(__NR_Linux + 309)
+#define __NR_eventfd2		(__NR_Linux + 310)
+#define __NR_epoll_create1	(__NR_Linux + 311)
+#define __NR_dup3		(__NR_Linux + 312)
+#define __NR_pipe2		(__NR_Linux + 313)
+#define __NR_inotify_init1	(__NR_Linux + 314)
+
+#define __NR_Linux_syscalls	(__NR_inotify_init1 + 1)
 
 
 #define __IGNORE_select		/* newselect */
diff --git a/arch/parisc/kernel/syscall_table.S b/arch/parisc/kernel/syscall_table.S
index 6b5ac38f5a99..6084667eacf9 100644
--- a/arch/parisc/kernel/syscall_table.S
+++ b/arch/parisc/kernel/syscall_table.S
@@ -407,6 +407,12 @@
 	ENTRY_SAME(timerfd_create)
 	ENTRY_COMP(timerfd_settime)
 	ENTRY_COMP(timerfd_gettime)
+	ENTRY_COMP(signalfd4)
+	ENTRY_SAME(eventfd2)		/* 310 */
+	ENTRY_SAME(epoll_create1)
+	ENTRY_SAME(dup3)
+	ENTRY_SAME(pipe2)
+	ENTRY_SAME(inotify_init1)
 
 	/* Nothing yet */
 
-- 
cgit v1.2.3-55-g7522


From f0514ae323f19ba1ad4bea4174ea274c812f7eee Mon Sep 17 00:00:00 2001
From: James Bottomley
Date: Thu, 11 Sep 2008 10:17:23 -0400
Subject: parisc: initialize unwinder much earlier

The unwinder was being initialized way too late to be any use
debugging early boot crashes. Instead of relying on module_init
initcalls to initialize it, let's do it explicitly as early as
we can.

Signed-off-by: James Bottomley <James.Bottomley@HansenPartnership.com>
Signed-off-by: Kyle McMartin <kyle@mcmartin.ca>
---
 arch/parisc/include/asm/unwind.h | 2 ++
 arch/parisc/kernel/setup.c       | 2 ++
 arch/parisc/kernel/unwind.c      | 4 +---
 3 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/arch/parisc/include/asm/unwind.h b/arch/parisc/include/asm/unwind.h
index 2f7e6e50a158..52482e4fc20d 100644
--- a/arch/parisc/include/asm/unwind.h
+++ b/arch/parisc/include/asm/unwind.h
@@ -74,4 +74,6 @@ void unwind_frame_init_running(struct unwind_frame_info *info, struct pt_regs *r
 int unwind_once(struct unwind_frame_info *info);
 int unwind_to_user(struct unwind_frame_info *info);
 
+int unwind_init(void);
+
 #endif
diff --git a/arch/parisc/kernel/setup.c b/arch/parisc/kernel/setup.c
index a59b71efdbe5..7d27853ff8c8 100644
--- a/arch/parisc/kernel/setup.c
+++ b/arch/parisc/kernel/setup.c
@@ -44,6 +44,7 @@
 #include <asm/pdc_chassis.h>
 #include <asm/io.h>
 #include <asm/setup.h>
+#include <asm/unwind.h>
 
 static char __initdata command_line[COMMAND_LINE_SIZE];
 
@@ -123,6 +124,7 @@ void __init setup_arch(char **cmdline_p)
 #ifdef CONFIG_64BIT
 	extern int parisc_narrow_firmware;
 #endif
+	unwind_init();
 
 	init_per_cpu(smp_processor_id());	/* Set Modes & Enable FP */
 
diff --git a/arch/parisc/kernel/unwind.c b/arch/parisc/kernel/unwind.c
index 701b2d2d8882..6773c582e457 100644
--- a/arch/parisc/kernel/unwind.c
+++ b/arch/parisc/kernel/unwind.c
@@ -170,7 +170,7 @@ void unwind_table_remove(struct unwind_table *table)
 }
 
 /* Called from setup_arch to import the kernel unwind info */
-static int unwind_init(void)
+int unwind_init(void)
 {
 	long start, stop;
 	register unsigned long gp __asm__ ("r27");
@@ -417,5 +417,3 @@ int unwind_to_user(struct unwind_frame_info *info)
 
 	return ret;
 }
-
-module_init(unwind_init);
-- 
cgit v1.2.3-55-g7522


From 9eb1686423756f4dfb0ad8bfb02bb8bf1b89e50a Mon Sep 17 00:00:00 2001
From: Kyle McMartin
Date: Wed, 10 Sep 2008 14:24:07 +0000
Subject: parisc: add rtc platform driver

Signed-off-by: Kyle McMartin <kyle@mcmartin.ca>
---
 arch/parisc/Kconfig       |   2 +
 arch/parisc/kernel/time.c |  20 ++++++++-
 drivers/rtc/Kconfig       |   8 ++++
 drivers/rtc/Makefile      |   1 +
 drivers/rtc/rtc-parisc.c  | 111 ++++++++++++++++++++++++++++++++++++++++++++++
 5 files changed, 141 insertions(+), 1 deletion(-)
 create mode 100644 drivers/rtc/rtc-parisc.c

diff --git a/arch/parisc/Kconfig b/arch/parisc/Kconfig
index a7d4fd353c2b..0a8ac703dbec 100644
--- a/arch/parisc/Kconfig
+++ b/arch/parisc/Kconfig
@@ -9,6 +9,8 @@ config PARISC
 	def_bool y
 	select HAVE_IDE
 	select HAVE_OPROFILE
+	select RTC_CLASS
+	select RTC_DRV_PARISC
 	help
 	  The PA-RISC microprocessor is designed by Hewlett-Packard and used
 	  in many of their workstations & servers (HP9000 700 and 800 series,
diff --git a/arch/parisc/kernel/time.c b/arch/parisc/kernel/time.c
index 24be86bba94d..4d09203bc693 100644
--- a/arch/parisc/kernel/time.c
+++ b/arch/parisc/kernel/time.c
@@ -23,6 +23,7 @@
 #include <linux/smp.h>
 #include <linux/profile.h>
 #include <linux/clocksource.h>
+#include <linux/platform_device.h>
 
 #include <asm/uaccess.h>
 #include <asm/io.h>
@@ -215,6 +216,24 @@ void __init start_cpu_itimer(void)
 	cpu_data[cpu].it_value = next_tick;
 }
 
+struct platform_device rtc_parisc_dev = {
+	.name = "rtc-parisc",
+	.id = -1,
+};
+
+static int __init rtc_init(void)
+{
+	int ret;
+
+	ret = platform_device_register(&rtc_parisc_dev);
+	if (ret < 0)
+		printk(KERN_ERR "unable to register rtc device...\n");
+
+	/* not necessarily an error */
+	return 0;
+}
+module_init(rtc_init);
+
 void __init time_init(void)
 {
 	static struct pdc_tod tod_data;
@@ -245,4 +264,3 @@ void __init time_init(void)
 		xtime.tv_nsec = 0;
 	}
 }
-
diff --git a/drivers/rtc/Kconfig b/drivers/rtc/Kconfig
index 9a9755c92fad..30d40fe194a8 100644
--- a/drivers/rtc/Kconfig
+++ b/drivers/rtc/Kconfig
@@ -575,6 +575,14 @@ config RTC_DRV_RS5C313
 	help
 	  If you say yes here you get support for the Ricoh RS5C313 RTC chips.
 
+config RTC_DRV_PARISC
+	tristate "PA-RISC firmware RTC support"
+	depends on PARISC
+	help
+	  Say Y or M here to enable RTC support on PA-RISC systems using
+	  firmware calls. If you do not know what you are doing, you should
+	  just say Y.
+
 config RTC_DRV_PPC
        tristate "PowerPC machine dependent RTC support"
        depends on PPC_MERGE
diff --git a/drivers/rtc/Makefile b/drivers/rtc/Makefile
index 18622ef84cab..180ddacde730 100644
--- a/drivers/rtc/Makefile
+++ b/drivers/rtc/Makefile
@@ -45,6 +45,7 @@ obj-$(CONFIG_RTC_DRV_PCF8563)	+= rtc-pcf8563.o
 obj-$(CONFIG_RTC_DRV_PCF8583)	+= rtc-pcf8583.o
 obj-$(CONFIG_RTC_DRV_PL030)	+= rtc-pl030.o
 obj-$(CONFIG_RTC_DRV_PL031)	+= rtc-pl031.o
+obj-$(CONFIG_RTC_DRV_PARISC)	+= rtc-parisc.o
 obj-$(CONFIG_RTC_DRV_PPC)	+= rtc-ppc.o
 obj-$(CONFIG_RTC_DRV_R9701)	+= rtc-r9701.o
 obj-$(CONFIG_RTC_DRV_RS5C313)	+= rtc-rs5c313.o
diff --git a/drivers/rtc/rtc-parisc.c b/drivers/rtc/rtc-parisc.c
new file mode 100644
index 000000000000..346d633655e7
--- /dev/null
+++ b/drivers/rtc/rtc-parisc.c
@@ -0,0 +1,111 @@
+/* rtc-parisc: RTC for HP PA-RISC firmware
+ *
+ * Copyright (C) 2008 Kyle McMartin <kyle@mcmartin.ca>
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/time.h>
+#include <linux/platform_device.h>
+
+#include <asm/rtc.h>
+
+/* as simple as can be, and no simpler. */
+struct parisc_rtc {
+	struct rtc_device *rtc;
+	spinlock_t lock;
+};
+
+static int parisc_get_time(struct device *dev, struct rtc_time *tm)
+{
+	struct parisc_rtc *p = dev_get_drvdata(dev);
+	unsigned long flags, ret;
+
+	spin_lock_irqsave(&p->lock, flags);
+	ret = get_rtc_time(tm);
+	spin_unlock_irqrestore(&p->lock, flags);
+
+	if (ret & RTC_BATT_BAD)
+		return -EOPNOTSUPP;
+
+	return 0;
+}
+
+static int parisc_set_time(struct device *dev, struct rtc_time *tm)
+{
+	struct parisc_rtc *p = dev_get_drvdata(dev);
+	unsigned long flags, ret;
+
+	spin_lock_irqsave(&p->lock, flags);
+	ret = set_rtc_time(tm);
+	spin_unlock_irqrestore(&p->lock, flags);
+
+	if (ret < 0)
+		return -EOPNOTSUPP;
+
+	return 0;
+}
+
+static const struct rtc_class_ops parisc_rtc_ops = {
+	.read_time = parisc_get_time,
+	.set_time = parisc_set_time,
+};
+
+static int __devinit parisc_rtc_probe(struct platform_device *dev)
+{
+	struct parisc_rtc *p;
+
+	p = kzalloc(sizeof (*p), GFP_KERNEL);
+	if (!p)
+		return -ENOMEM;
+
+	spin_lock_init(&p->lock);
+
+	p->rtc = rtc_device_register("rtc-parisc", &dev->dev, &parisc_rtc_ops,
+					THIS_MODULE);
+	if (IS_ERR(p->rtc)) {
+		int err = PTR_ERR(p->rtc);
+		kfree(p);
+		return err;
+	}
+
+	platform_set_drvdata(dev, p);
+
+	return 0;
+}
+
+static int __devexit parisc_rtc_remove(struct platform_device *dev)
+{
+	struct parisc_rtc *p = platform_get_drvdata(dev);
+
+	rtc_device_unregister(p->rtc);
+	kfree(p);
+
+	return 0;
+}
+
+static struct platform_driver parisc_rtc_driver = {
+	.driver = {
+		.name = "rtc-parisc",
+		.owner = THIS_MODULE,
+	},
+	.probe = parisc_rtc_probe,
+	.remove = __devexit_p(parisc_rtc_remove),
+};
+
+static int __init parisc_rtc_init(void)
+{
+	return platform_driver_register(&parisc_rtc_driver);
+}
+
+static void __exit parisc_rtc_fini(void)
+{
+	platform_driver_unregister(&parisc_rtc_driver);
+}
+
+module_init(parisc_rtc_init);
+module_exit(parisc_rtc_fini);
+
+MODULE_AUTHOR("Kyle McMartin <kyle@mcmartin.ca>");
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("HP PA-RISC RTC driver");
-- 
cgit v1.2.3-55-g7522


From 97e1c18e8d17bd87e1e383b2e9d9fc740332c8e2 Mon Sep 17 00:00:00 2001
From: Mathieu Desnoyers
Date: Fri, 18 Jul 2008 12:16:16 -0400
Subject: tracing: Kernel Tracepoints

Implementation of kernel tracepoints. Inspired from the Linux Kernel
Markers. Allows complete typing verification by declaring both tracing
statement inline functions and probe registration/unregistration static
inline functions within the same macro "DEFINE_TRACE". No format string
is required. See the tracepoint Documentation and Samples patches for
usage examples.

Taken from the documentation patch :

"A tracepoint placed in code provides a hook to call a function (probe)
that you can provide at runtime. A tracepoint can be "on" (a probe is
connected to it) or "off" (no probe is attached). When a tracepoint is
"off" it has no effect, except for adding a tiny time penalty (checking
a condition for a branch) and space penalty (adding a few bytes for the
function call at the end of the instrumented function and adds a data
structure in a separate section).  When a tracepoint is "on", the
function you provide is called each time the tracepoint is executed, in
the execution context of the caller. When the function provided ends its
execution, it returns to the caller (continuing from the tracepoint
site).

You can put tracepoints at important locations in the code. They are
lightweight hooks that can pass an arbitrary number of parameters, which
prototypes are described in a tracepoint declaration placed in a header
file."

Addition and removal of tracepoints is synchronized by RCU using the
scheduler (and preempt_disable) as guarantees to find a quiescent state
(this is really RCU "classic"). The update side uses rcu_barrier_sched()
with call_rcu_sched() and the read/execute side uses
"preempt_disable()/preempt_enable()".

We make sure the previous array containing probes, which has been
scheduled for deletion by the rcu callback, is indeed freed before we
proceed to the next update. It therefore limits the rate of modification
of a single tracepoint to one update per RCU period. The objective here
is to permit fast batch add/removal of probes on _different_
tracepoints.

Changelog :
- Use #name ":" #proto as string to identify the tracepoint in the
  tracepoint table. This will make sure not type mismatch happens due to
  connexion of a probe with the wrong type to a tracepoint declared with
  the same name in a different header.
- Add tracepoint_entry_free_old.
- Change __TO_TRACE to get rid of the 'i' iterator.

Masami Hiramatsu <mhiramat@redhat.com> :
Tested on x86-64.

Performance impact of a tracepoint : same as markers, except that it
adds about 70 bytes of instructions in an unlikely branch of each
instrumented function (the for loop, the stack setup and the function
call). It currently adds a memory read, a test and a conditional branch
at the instrumentation site (in the hot path). Immediate values will
eventually change this into a load immediate, test and branch, which
removes the memory read which will make the i-cache impact smaller
(changing the memory read for a load immediate removes 3-4 bytes per
site on x86_32 (depending on mov prefixes), or 7-8 bytes on x86_64, it
also saves the d-cache hit).

About the performance impact of tracepoints (which is comparable to
markers), even without immediate values optimizations, tests done by
Hideo Aoki on ia64 show no regression. His test case was using hackbench
on a kernel where scheduler instrumentation (about 5 events in code
scheduler code) was added.

Quoting Hideo Aoki about Markers :

I evaluated overhead of kernel marker using linux-2.6-sched-fixes git
tree, which includes several markers for LTTng, using an ia64 server.

While the immediate trace mark feature isn't implemented on ia64, there
is no major performance regression. So, I think that we don't have any
issues to propose merging marker point patches into Linus's tree from
the viewpoint of performance impact.

I prepared two kernels to evaluate. The first one was compiled without
CONFIG_MARKERS. The second one was enabled CONFIG_MARKERS.

I downloaded the original hackbench from the following URL:
http://devresources.linux-foundation.org/craiger/hackbench/src/hackbench.c

I ran hackbench 5 times in each condition and calculated the average and
difference between the kernels.

    The parameter of hackbench: every 50 from 50 to 800
    The number of CPUs of the server: 2, 4, and 8

Below is the results. As you can see, major performance regression
wasn't found in any case. Even if number of processes increases,
differences between marker-enabled kernel and marker- disabled kernel
doesn't increase. Moreover, if number of CPUs increases, the differences
doesn't increase either.

Curiously, marker-enabled kernel is better than marker-disabled kernel
in more than half cases, although I guess it comes from the difference
of memory access pattern.

* 2 CPUs

Number of | without      | with         | diff     | diff    |
processes | Marker [Sec] | Marker [Sec] |   [Sec]  |   [%]   |
--------------------------------------------------------------
       50 |      4.811   |       4.872  |  +0.061  |  +1.27  |
      100 |      9.854   |      10.309  |  +0.454  |  +4.61  |
      150 |     15.602   |      15.040  |  -0.562  |  -3.6   |
      200 |     20.489   |      20.380  |  -0.109  |  -0.53  |
      250 |     25.798   |      25.652  |  -0.146  |  -0.56  |
      300 |     31.260   |      30.797  |  -0.463  |  -1.48  |
      350 |     36.121   |      35.770  |  -0.351  |  -0.97  |
      400 |     42.288   |      42.102  |  -0.186  |  -0.44  |
      450 |     47.778   |      47.253  |  -0.526  |  -1.1   |
      500 |     51.953   |      52.278  |  +0.325  |  +0.63  |
      550 |     58.401   |      57.700  |  -0.701  |  -1.2   |
      600 |     63.334   |      63.222  |  -0.112  |  -0.18  |
      650 |     68.816   |      68.511  |  -0.306  |  -0.44  |
      700 |     74.667   |      74.088  |  -0.579  |  -0.78  |
      750 |     78.612   |      79.582  |  +0.970  |  +1.23  |
      800 |     85.431   |      85.263  |  -0.168  |  -0.2   |
--------------------------------------------------------------

* 4 CPUs

Number of | without      | with         | diff     | diff    |
processes | Marker [Sec] | Marker [Sec] |   [Sec]  |   [%]   |
--------------------------------------------------------------
       50 |      2.586   |       2.584  |  -0.003  |  -0.1   |
      100 |      5.254   |       5.283  |  +0.030  |  +0.56  |
      150 |      8.012   |       8.074  |  +0.061  |  +0.76  |
      200 |     11.172   |      11.000  |  -0.172  |  -1.54  |
      250 |     13.917   |      14.036  |  +0.119  |  +0.86  |
      300 |     16.905   |      16.543  |  -0.362  |  -2.14  |
      350 |     19.901   |      20.036  |  +0.135  |  +0.68  |
      400 |     22.908   |      23.094  |  +0.186  |  +0.81  |
      450 |     26.273   |      26.101  |  -0.172  |  -0.66  |
      500 |     29.554   |      29.092  |  -0.461  |  -1.56  |
      550 |     32.377   |      32.274  |  -0.103  |  -0.32  |
      600 |     35.855   |      35.322  |  -0.533  |  -1.49  |
      650 |     39.192   |      38.388  |  -0.804  |  -2.05  |
      700 |     41.744   |      41.719  |  -0.025  |  -0.06  |
      750 |     45.016   |      44.496  |  -0.520  |  -1.16  |
      800 |     48.212   |      47.603  |  -0.609  |  -1.26  |
--------------------------------------------------------------

* 8 CPUs

Number of | without      | with         | diff     | diff    |
processes | Marker [Sec] | Marker [Sec] |   [Sec]  |   [%]   |
--------------------------------------------------------------
       50 |      2.094   |       2.072  |  -0.022  |  -1.07  |
      100 |      4.162   |       4.273  |  +0.111  |  +2.66  |
      150 |      6.485   |       6.540  |  +0.055  |  +0.84  |
      200 |      8.556   |       8.478  |  -0.078  |  -0.91  |
      250 |     10.458   |      10.258  |  -0.200  |  -1.91  |
      300 |     12.425   |      12.750  |  +0.325  |  +2.62  |
      350 |     14.807   |      14.839  |  +0.032  |  +0.22  |
      400 |     16.801   |      16.959  |  +0.158  |  +0.94  |
      450 |     19.478   |      19.009  |  -0.470  |  -2.41  |
      500 |     21.296   |      21.504  |  +0.208  |  +0.98  |
      550 |     23.842   |      23.979  |  +0.137  |  +0.57  |
      600 |     26.309   |      26.111  |  -0.198  |  -0.75  |
      650 |     28.705   |      28.446  |  -0.259  |  -0.9   |
      700 |     31.233   |      31.394  |  +0.161  |  +0.52  |
      750 |     34.064   |      33.720  |  -0.344  |  -1.01  |
      800 |     36.320   |      36.114  |  -0.206  |  -0.57  |
--------------------------------------------------------------

Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
Acked-by: Masami Hiramatsu <mhiramat@redhat.com>
Acked-by: 'Peter Zijlstra' <peterz@infradead.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/asm-generic/vmlinux.lds.h |   6 +-
 include/linux/module.h            |  17 ++
 include/linux/tracepoint.h        | 127 ++++++++++
 init/Kconfig                      |   7 +
 kernel/Makefile                   |   1 +
 kernel/module.c                   |  66 +++++-
 kernel/tracepoint.c               | 476 ++++++++++++++++++++++++++++++++++++++
 7 files changed, 698 insertions(+), 2 deletions(-)
 create mode 100644 include/linux/tracepoint.h
 create mode 100644 kernel/tracepoint.c

diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h
index 7440a0dceddb..3d8e472a09c8 100644
--- a/include/asm-generic/vmlinux.lds.h
+++ b/include/asm-generic/vmlinux.lds.h
@@ -52,7 +52,10 @@
 	. = ALIGN(8);							\
 	VMLINUX_SYMBOL(__start___markers) = .;				\
 	*(__markers)							\
-	VMLINUX_SYMBOL(__stop___markers) = .;
+	VMLINUX_SYMBOL(__stop___markers) = .;				\
+	VMLINUX_SYMBOL(__start___tracepoints) = .;			\
+	*(__tracepoints)						\
+	VMLINUX_SYMBOL(__stop___tracepoints) = .;
 
 #define RO_DATA(align)							\
 	. = ALIGN((align));						\
@@ -61,6 +64,7 @@
 		*(.rodata) *(.rodata.*)					\
 		*(__vermagic)		/* Kernel version magic */	\
 		*(__markers_strings)	/* Markers: strings */		\
+		*(__tracepoints_strings)/* Tracepoints: strings */	\
 	}								\
 									\
 	.rodata1          : AT(ADDR(.rodata1) - LOAD_OFFSET) {		\
diff --git a/include/linux/module.h b/include/linux/module.h
index 68e09557c951..8b6113503863 100644
--- a/include/linux/module.h
+++ b/include/linux/module.h
@@ -16,6 +16,7 @@
 #include <linux/kobject.h>
 #include <linux/moduleparam.h>
 #include <linux/marker.h>
+#include <linux/tracepoint.h>
 #include <asm/local.h>
 
 #include <asm/module.h>
@@ -331,6 +332,10 @@ struct module
 	struct marker *markers;
 	unsigned int num_markers;
 #endif
+#ifdef CONFIG_TRACEPOINTS
+	struct tracepoint *tracepoints;
+	unsigned int num_tracepoints;
+#endif
 
 #ifdef CONFIG_MODULE_UNLOAD
 	/* What modules depend on me? */
@@ -454,6 +459,9 @@ extern void print_modules(void);
 
 extern void module_update_markers(void);
 
+extern void module_update_tracepoints(void);
+extern int module_get_iter_tracepoints(struct tracepoint_iter *iter);
+
 #else /* !CONFIG_MODULES... */
 #define EXPORT_SYMBOL(sym)
 #define EXPORT_SYMBOL_GPL(sym)
@@ -558,6 +566,15 @@ static inline void module_update_markers(void)
 {
 }
 
+static inline void module_update_tracepoints(void)
+{
+}
+
+static inline int module_get_iter_tracepoints(struct tracepoint_iter *iter)
+{
+	return 0;
+}
+
 #endif /* CONFIG_MODULES */
 
 struct device_driver;
diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h
new file mode 100644
index 000000000000..e623a6fca5c3
--- /dev/null
+++ b/include/linux/tracepoint.h
@@ -0,0 +1,127 @@
+#ifndef _LINUX_TRACEPOINT_H
+#define _LINUX_TRACEPOINT_H
+
+/*
+ * Kernel Tracepoint API.
+ *
+ * See Documentation/tracepoint.txt.
+ *
+ * (C) Copyright 2008 Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
+ *
+ * Heavily inspired from the Linux Kernel Markers.
+ *
+ * This file is released under the GPLv2.
+ * See the file COPYING for more details.
+ */
+
+#include <linux/types.h>
+#include <linux/rcupdate.h>
+
+struct module;
+struct tracepoint;
+
+struct tracepoint {
+	const char *name;		/* Tracepoint name */
+	int state;			/* State. */
+	void **funcs;
+} __attribute__((aligned(8)));
+
+
+#define TPPROTO(args...)	args
+#define TPARGS(args...)		args
+
+#ifdef CONFIG_TRACEPOINTS
+
+/*
+ * it_func[0] is never NULL because there is at least one element in the array
+ * when the array itself is non NULL.
+ */
+#define __DO_TRACE(tp, proto, args)					\
+	do {								\
+		void **it_func;						\
+									\
+		rcu_read_lock_sched();					\
+		it_func = rcu_dereference((tp)->funcs);			\
+		if (it_func) {						\
+			do {						\
+				((void(*)(proto))(*it_func))(args);	\
+			} while (*(++it_func));				\
+		}							\
+		rcu_read_unlock_sched();				\
+	} while (0)
+
+/*
+ * Make sure the alignment of the structure in the __tracepoints section will
+ * not add unwanted padding between the beginning of the section and the
+ * structure. Force alignment to the same alignment as the section start.
+ */
+#define DEFINE_TRACE(name, proto, args)					\
+	static inline void trace_##name(proto)				\
+	{								\
+		static const char __tpstrtab_##name[]			\
+		__attribute__((section("__tracepoints_strings")))	\
+		= #name ":" #proto;					\
+		static struct tracepoint __tracepoint_##name		\
+		__attribute__((section("__tracepoints"), aligned(8))) =	\
+		{ __tpstrtab_##name, 0, NULL };				\
+		if (unlikely(__tracepoint_##name.state))		\
+			__DO_TRACE(&__tracepoint_##name,		\
+				TPPROTO(proto), TPARGS(args));		\
+	}								\
+	static inline int register_trace_##name(void (*probe)(proto))	\
+	{								\
+		return tracepoint_probe_register(#name ":" #proto,	\
+			(void *)probe);					\
+	}								\
+	static inline void unregister_trace_##name(void (*probe)(proto))\
+	{								\
+		tracepoint_probe_unregister(#name ":" #proto,		\
+			(void *)probe);					\
+	}
+
+extern void tracepoint_update_probe_range(struct tracepoint *begin,
+	struct tracepoint *end);
+
+#else /* !CONFIG_TRACEPOINTS */
+#define DEFINE_TRACE(name, proto, args)			\
+	static inline void _do_trace_##name(struct tracepoint *tp, proto) \
+	{ }								\
+	static inline void trace_##name(proto)				\
+	{ }								\
+	static inline int register_trace_##name(void (*probe)(proto))	\
+	{								\
+		return -ENOSYS;						\
+	}								\
+	static inline void unregister_trace_##name(void (*probe)(proto))\
+	{ }
+
+static inline void tracepoint_update_probe_range(struct tracepoint *begin,
+	struct tracepoint *end)
+{ }
+#endif /* CONFIG_TRACEPOINTS */
+
+/*
+ * Connect a probe to a tracepoint.
+ * Internal API, should not be used directly.
+ */
+extern int tracepoint_probe_register(const char *name, void *probe);
+
+/*
+ * Disconnect a probe from a tracepoint.
+ * Internal API, should not be used directly.
+ */
+extern int tracepoint_probe_unregister(const char *name, void *probe);
+
+struct tracepoint_iter {
+	struct module *module;
+	struct tracepoint *tracepoint;
+};
+
+extern void tracepoint_iter_start(struct tracepoint_iter *iter);
+extern void tracepoint_iter_next(struct tracepoint_iter *iter);
+extern void tracepoint_iter_stop(struct tracepoint_iter *iter);
+extern void tracepoint_iter_reset(struct tracepoint_iter *iter);
+extern int tracepoint_get_iter_range(struct tracepoint **tracepoint,
+	struct tracepoint *begin, struct tracepoint *end);
+
+#endif
diff --git a/init/Kconfig b/init/Kconfig
index c11da38837e5..70082678a914 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -771,6 +771,13 @@ config PROFILING
 	  Say Y here to enable the extended profiling support mechanisms used
 	  by profilers such as OProfile.
 
+config TRACEPOINTS
+	bool "Activate tracepoints"
+	default y
+	help
+	  Place an empty function call at each tracepoint site. Can be
+	  dynamically changed for a probe function.
+
 config MARKERS
 	bool "Activate markers"
 	help
diff --git a/kernel/Makefile b/kernel/Makefile
index 4e1d7df7c3e2..8f9ce7ec21b6 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -83,6 +83,7 @@ obj-$(CONFIG_SYSCTL) += utsname_sysctl.o
 obj-$(CONFIG_TASK_DELAY_ACCT) += delayacct.o
 obj-$(CONFIG_TASKSTATS) += taskstats.o tsacct.o
 obj-$(CONFIG_MARKERS) += marker.o
+obj-$(CONFIG_TRACEPOINTS) += tracepoint.o
 obj-$(CONFIG_LATENCYTOP) += latencytop.o
 obj-$(CONFIG_HAVE_GENERIC_DMA_COHERENT) += dma-coherent.o
 obj-$(CONFIG_FTRACE) += trace/
diff --git a/kernel/module.c b/kernel/module.c
index 9db11911e04b..661d73db786e 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -46,6 +46,7 @@
 #include <asm/cacheflush.h>
 #include <linux/license.h>
 #include <asm/sections.h>
+#include <linux/tracepoint.h>
 
 #if 0
 #define DEBUGP printk
@@ -1831,6 +1832,8 @@ static noinline struct module *load_module(void __user *umod,
 #endif
 	unsigned int markersindex;
 	unsigned int markersstringsindex;
+	unsigned int tracepointsindex;
+	unsigned int tracepointsstringsindex;
 	struct module *mod;
 	long err = 0;
 	void *percpu = NULL, *ptr = NULL; /* Stops spurious gcc warning */
@@ -2117,6 +2120,9 @@ static noinline struct module *load_module(void __user *umod,
 	markersindex = find_sec(hdr, sechdrs, secstrings, "__markers");
  	markersstringsindex = find_sec(hdr, sechdrs, secstrings,
 					"__markers_strings");
+	tracepointsindex = find_sec(hdr, sechdrs, secstrings, "__tracepoints");
+	tracepointsstringsindex = find_sec(hdr, sechdrs, secstrings,
+					"__tracepoints_strings");
 
 	/* Now do relocations. */
 	for (i = 1; i < hdr->e_shnum; i++) {
@@ -2144,6 +2150,12 @@ static noinline struct module *load_module(void __user *umod,
 	mod->num_markers =
 		sechdrs[markersindex].sh_size / sizeof(*mod->markers);
 #endif
+#ifdef CONFIG_TRACEPOINTS
+	mod->tracepoints = (void *)sechdrs[tracepointsindex].sh_addr;
+	mod->num_tracepoints =
+		sechdrs[tracepointsindex].sh_size / sizeof(*mod->tracepoints);
+#endif
+
 
         /* Find duplicate symbols */
 	err = verify_export_symbols(mod);
@@ -2162,11 +2174,16 @@ static noinline struct module *load_module(void __user *umod,
 
 	add_kallsyms(mod, sechdrs, symindex, strindex, secstrings);
 
+	if (!mod->taints) {
 #ifdef CONFIG_MARKERS
-	if (!mod->taints)
 		marker_update_probe_range(mod->markers,
 			mod->markers + mod->num_markers);
 #endif
+#ifdef CONFIG_TRACEPOINTS
+		tracepoint_update_probe_range(mod->tracepoints,
+			mod->tracepoints + mod->num_tracepoints);
+#endif
+	}
 	err = module_finalize(hdr, sechdrs, mod);
 	if (err < 0)
 		goto cleanup;
@@ -2717,3 +2734,50 @@ void module_update_markers(void)
 	mutex_unlock(&module_mutex);
 }
 #endif
+
+#ifdef CONFIG_TRACEPOINTS
+void module_update_tracepoints(void)
+{
+	struct module *mod;
+
+	mutex_lock(&module_mutex);
+	list_for_each_entry(mod, &modules, list)
+		if (!mod->taints)
+			tracepoint_update_probe_range(mod->tracepoints,
+				mod->tracepoints + mod->num_tracepoints);
+	mutex_unlock(&module_mutex);
+}
+
+/*
+ * Returns 0 if current not found.
+ * Returns 1 if current found.
+ */
+int module_get_iter_tracepoints(struct tracepoint_iter *iter)
+{
+	struct module *iter_mod;
+	int found = 0;
+
+	mutex_lock(&module_mutex);
+	list_for_each_entry(iter_mod, &modules, list) {
+		if (!iter_mod->taints) {
+			/*
+			 * Sorted module list
+			 */
+			if (iter_mod < iter->module)
+				continue;
+			else if (iter_mod > iter->module)
+				iter->tracepoint = NULL;
+			found = tracepoint_get_iter_range(&iter->tracepoint,
+				iter_mod->tracepoints,
+				iter_mod->tracepoints
+					+ iter_mod->num_tracepoints);
+			if (found) {
+				iter->module = iter_mod;
+				break;
+			}
+		}
+	}
+	mutex_unlock(&module_mutex);
+	return found;
+}
+#endif
diff --git a/kernel/tracepoint.c b/kernel/tracepoint.c
new file mode 100644
index 000000000000..42e86ddbd2a0
--- /dev/null
+++ b/kernel/tracepoint.c
@@ -0,0 +1,476 @@
+/*
+ * Copyright (C) 2008 Mathieu Desnoyers
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ */
+#include <linux/module.h>
+#include <linux/mutex.h>
+#include <linux/types.h>
+#include <linux/jhash.h>
+#include <linux/list.h>
+#include <linux/rcupdate.h>
+#include <linux/tracepoint.h>
+#include <linux/err.h>
+#include <linux/slab.h>
+
+extern struct tracepoint __start___tracepoints[];
+extern struct tracepoint __stop___tracepoints[];
+
+/* Set to 1 to enable tracepoint debug output */
+static const int tracepoint_debug;
+
+/*
+ * tracepoints_mutex nests inside module_mutex. Tracepoints mutex protects the
+ * builtin and module tracepoints and the hash table.
+ */
+static DEFINE_MUTEX(tracepoints_mutex);
+
+/*
+ * Tracepoint hash table, containing the active tracepoints.
+ * Protected by tracepoints_mutex.
+ */
+#define TRACEPOINT_HASH_BITS 6
+#define TRACEPOINT_TABLE_SIZE (1 << TRACEPOINT_HASH_BITS)
+
+/*
+ * Note about RCU :
+ * It is used to to delay the free of multiple probes array until a quiescent
+ * state is reached.
+ * Tracepoint entries modifications are protected by the tracepoints_mutex.
+ */
+struct tracepoint_entry {
+	struct hlist_node hlist;
+	void **funcs;
+	int refcount;	/* Number of times armed. 0 if disarmed. */
+	struct rcu_head rcu;
+	void *oldptr;
+	unsigned char rcu_pending:1;
+	char name[0];
+};
+
+static struct hlist_head tracepoint_table[TRACEPOINT_TABLE_SIZE];
+
+static void free_old_closure(struct rcu_head *head)
+{
+	struct tracepoint_entry *entry = container_of(head,
+		struct tracepoint_entry, rcu);
+	kfree(entry->oldptr);
+	/* Make sure we free the data before setting the pending flag to 0 */
+	smp_wmb();
+	entry->rcu_pending = 0;
+}
+
+static void tracepoint_entry_free_old(struct tracepoint_entry *entry, void *old)
+{
+	if (!old)
+		return;
+	entry->oldptr = old;
+	entry->rcu_pending = 1;
+	/* write rcu_pending before calling the RCU callback */
+	smp_wmb();
+#ifdef CONFIG_PREEMPT_RCU
+	synchronize_sched();	/* Until we have the call_rcu_sched() */
+#endif
+	call_rcu(&entry->rcu, free_old_closure);
+}
+
+static void debug_print_probes(struct tracepoint_entry *entry)
+{
+	int i;
+
+	if (!tracepoint_debug)
+		return;
+
+	for (i = 0; entry->funcs[i]; i++)
+		printk(KERN_DEBUG "Probe %d : %p\n", i, entry->funcs[i]);
+}
+
+static void *
+tracepoint_entry_add_probe(struct tracepoint_entry *entry, void *probe)
+{
+	int nr_probes = 0;
+	void **old, **new;
+
+	WARN_ON(!probe);
+
+	debug_print_probes(entry);
+	old = entry->funcs;
+	if (old) {
+		/* (N -> N+1), (N != 0, 1) probes */
+		for (nr_probes = 0; old[nr_probes]; nr_probes++)
+			if (old[nr_probes] == probe)
+				return ERR_PTR(-EEXIST);
+	}
+	/* + 2 : one for new probe, one for NULL func */
+	new = kzalloc((nr_probes + 2) * sizeof(void *), GFP_KERNEL);
+	if (new == NULL)
+		return ERR_PTR(-ENOMEM);
+	if (old)
+		memcpy(new, old, nr_probes * sizeof(void *));
+	new[nr_probes] = probe;
+	entry->refcount = nr_probes + 1;
+	entry->funcs = new;
+	debug_print_probes(entry);
+	return old;
+}
+
+static void *
+tracepoint_entry_remove_probe(struct tracepoint_entry *entry, void *probe)
+{
+	int nr_probes = 0, nr_del = 0, i;
+	void **old, **new;
+
+	old = entry->funcs;
+
+	debug_print_probes(entry);
+	/* (N -> M), (N > 1, M >= 0) probes */
+	for (nr_probes = 0; old[nr_probes]; nr_probes++) {
+		if ((!probe || old[nr_probes] == probe))
+			nr_del++;
+	}
+
+	if (nr_probes - nr_del == 0) {
+		/* N -> 0, (N > 1) */
+		entry->funcs = NULL;
+		entry->refcount = 0;
+		debug_print_probes(entry);
+		return old;
+	} else {
+		int j = 0;
+		/* N -> M, (N > 1, M > 0) */
+		/* + 1 for NULL */
+		new = kzalloc((nr_probes - nr_del + 1)
+			* sizeof(void *), GFP_KERNEL);
+		if (new == NULL)
+			return ERR_PTR(-ENOMEM);
+		for (i = 0; old[i]; i++)
+			if ((probe && old[i] != probe))
+				new[j++] = old[i];
+		entry->refcount = nr_probes - nr_del;
+		entry->funcs = new;
+	}
+	debug_print_probes(entry);
+	return old;
+}
+
+/*
+ * Get tracepoint if the tracepoint is present in the tracepoint hash table.
+ * Must be called with tracepoints_mutex held.
+ * Returns NULL if not present.
+ */
+static struct tracepoint_entry *get_tracepoint(const char *name)
+{
+	struct hlist_head *head;
+	struct hlist_node *node;
+	struct tracepoint_entry *e;
+	u32 hash = jhash(name, strlen(name), 0);
+
+	head = &tracepoint_table[hash & ((1 << TRACEPOINT_HASH_BITS)-1)];
+	hlist_for_each_entry(e, node, head, hlist) {
+		if (!strcmp(name, e->name))
+			return e;
+	}
+	return NULL;
+}
+
+/*
+ * Add the tracepoint to the tracepoint hash table. Must be called with
+ * tracepoints_mutex held.
+ */
+static struct tracepoint_entry *add_tracepoint(const char *name)
+{
+	struct hlist_head *head;
+	struct hlist_node *node;
+	struct tracepoint_entry *e;
+	size_t name_len = strlen(name) + 1;
+	u32 hash = jhash(name, name_len-1, 0);
+
+	head = &tracepoint_table[hash & ((1 << TRACEPOINT_HASH_BITS)-1)];
+	hlist_for_each_entry(e, node, head, hlist) {
+		if (!strcmp(name, e->name)) {
+			printk(KERN_NOTICE
+				"tracepoint %s busy\n", name);
+			return ERR_PTR(-EEXIST);	/* Already there */
+		}
+	}
+	/*
+	 * Using kmalloc here to allocate a variable length element. Could
+	 * cause some memory fragmentation if overused.
+	 */
+	e = kmalloc(sizeof(struct tracepoint_entry) + name_len, GFP_KERNEL);
+	if (!e)
+		return ERR_PTR(-ENOMEM);
+	memcpy(&e->name[0], name, name_len);
+	e->funcs = NULL;
+	e->refcount = 0;
+	e->rcu_pending = 0;
+	hlist_add_head(&e->hlist, head);
+	return e;
+}
+
+/*
+ * Remove the tracepoint from the tracepoint hash table. Must be called with
+ * mutex_lock held.
+ */
+static int remove_tracepoint(const char *name)
+{
+	struct hlist_head *head;
+	struct hlist_node *node;
+	struct tracepoint_entry *e;
+	int found = 0;
+	size_t len = strlen(name) + 1;
+	u32 hash = jhash(name, len-1, 0);
+
+	head = &tracepoint_table[hash & ((1 << TRACEPOINT_HASH_BITS)-1)];
+	hlist_for_each_entry(e, node, head, hlist) {
+		if (!strcmp(name, e->name)) {
+			found = 1;
+			break;
+		}
+	}
+	if (!found)
+		return -ENOENT;
+	if (e->refcount)
+		return -EBUSY;
+	hlist_del(&e->hlist);
+	/* Make sure the call_rcu has been executed */
+	if (e->rcu_pending)
+		rcu_barrier();
+	kfree(e);
+	return 0;
+}
+
+/*
+ * Sets the probe callback corresponding to one tracepoint.
+ */
+static void set_tracepoint(struct tracepoint_entry **entry,
+	struct tracepoint *elem, int active)
+{
+	WARN_ON(strcmp((*entry)->name, elem->name) != 0);
+
+	/*
+	 * rcu_assign_pointer has a smp_wmb() which makes sure that the new
+	 * probe callbacks array is consistent before setting a pointer to it.
+	 * This array is referenced by __DO_TRACE from
+	 * include/linux/tracepoints.h. A matching smp_read_barrier_depends()
+	 * is used.
+	 */
+	rcu_assign_pointer(elem->funcs, (*entry)->funcs);
+	elem->state = active;
+}
+
+/*
+ * Disable a tracepoint and its probe callback.
+ * Note: only waiting an RCU period after setting elem->call to the empty
+ * function insures that the original callback is not used anymore. This insured
+ * by preempt_disable around the call site.
+ */
+static void disable_tracepoint(struct tracepoint *elem)
+{
+	elem->state = 0;
+}
+
+/**
+ * tracepoint_update_probe_range - Update a probe range
+ * @begin: beginning of the range
+ * @end: end of the range
+ *
+ * Updates the probe callback corresponding to a range of tracepoints.
+ */
+void tracepoint_update_probe_range(struct tracepoint *begin,
+	struct tracepoint *end)
+{
+	struct tracepoint *iter;
+	struct tracepoint_entry *mark_entry;
+
+	mutex_lock(&tracepoints_mutex);
+	for (iter = begin; iter < end; iter++) {
+		mark_entry = get_tracepoint(iter->name);
+		if (mark_entry) {
+			set_tracepoint(&mark_entry, iter,
+					!!mark_entry->refcount);
+		} else {
+			disable_tracepoint(iter);
+		}
+	}
+	mutex_unlock(&tracepoints_mutex);
+}
+
+/*
+ * Update probes, removing the faulty probes.
+ */
+static void tracepoint_update_probes(void)
+{
+	/* Core kernel tracepoints */
+	tracepoint_update_probe_range(__start___tracepoints,
+		__stop___tracepoints);
+	/* tracepoints in modules. */
+	module_update_tracepoints();
+}
+
+/**
+ * tracepoint_probe_register -  Connect a probe to a tracepoint
+ * @name: tracepoint name
+ * @probe: probe handler
+ *
+ * Returns 0 if ok, error value on error.
+ * The probe address must at least be aligned on the architecture pointer size.
+ */
+int tracepoint_probe_register(const char *name, void *probe)
+{
+	struct tracepoint_entry *entry;
+	int ret = 0;
+	void *old;
+
+	mutex_lock(&tracepoints_mutex);
+	entry = get_tracepoint(name);
+	if (!entry) {
+		entry = add_tracepoint(name);
+		if (IS_ERR(entry)) {
+			ret = PTR_ERR(entry);
+			goto end;
+		}
+	}
+	/*
+	 * If we detect that a call_rcu is pending for this tracepoint,
+	 * make sure it's executed now.
+	 */
+	if (entry->rcu_pending)
+		rcu_barrier();
+	old = tracepoint_entry_add_probe(entry, probe);
+	if (IS_ERR(old)) {
+		ret = PTR_ERR(old);
+		goto end;
+	}
+	mutex_unlock(&tracepoints_mutex);
+	tracepoint_update_probes();		/* may update entry */
+	mutex_lock(&tracepoints_mutex);
+	entry = get_tracepoint(name);
+	WARN_ON(!entry);
+	tracepoint_entry_free_old(entry, old);
+end:
+	mutex_unlock(&tracepoints_mutex);
+	return ret;
+}
+EXPORT_SYMBOL_GPL(tracepoint_probe_register);
+
+/**
+ * tracepoint_probe_unregister -  Disconnect a probe from a tracepoint
+ * @name: tracepoint name
+ * @probe: probe function pointer
+ *
+ * We do not need to call a synchronize_sched to make sure the probes have
+ * finished running before doing a module unload, because the module unload
+ * itself uses stop_machine(), which insures that every preempt disabled section
+ * have finished.
+ */
+int tracepoint_probe_unregister(const char *name, void *probe)
+{
+	struct tracepoint_entry *entry;
+	void *old;
+	int ret = -ENOENT;
+
+	mutex_lock(&tracepoints_mutex);
+	entry = get_tracepoint(name);
+	if (!entry)
+		goto end;
+	if (entry->rcu_pending)
+		rcu_barrier();
+	old = tracepoint_entry_remove_probe(entry, probe);
+	mutex_unlock(&tracepoints_mutex);
+	tracepoint_update_probes();		/* may update entry */
+	mutex_lock(&tracepoints_mutex);
+	entry = get_tracepoint(name);
+	if (!entry)
+		goto end;
+	tracepoint_entry_free_old(entry, old);
+	remove_tracepoint(name);	/* Ignore busy error message */
+	ret = 0;
+end:
+	mutex_unlock(&tracepoints_mutex);
+	return ret;
+}
+EXPORT_SYMBOL_GPL(tracepoint_probe_unregister);
+
+/**
+ * tracepoint_get_iter_range - Get a next tracepoint iterator given a range.
+ * @tracepoint: current tracepoints (in), next tracepoint (out)
+ * @begin: beginning of the range
+ * @end: end of the range
+ *
+ * Returns whether a next tracepoint has been found (1) or not (0).
+ * Will return the first tracepoint in the range if the input tracepoint is
+ * NULL.
+ */
+int tracepoint_get_iter_range(struct tracepoint **tracepoint,
+	struct tracepoint *begin, struct tracepoint *end)
+{
+	if (!*tracepoint && begin != end) {
+		*tracepoint = begin;
+		return 1;
+	}
+	if (*tracepoint >= begin && *tracepoint < end)
+		return 1;
+	return 0;
+}
+EXPORT_SYMBOL_GPL(tracepoint_get_iter_range);
+
+static void tracepoint_get_iter(struct tracepoint_iter *iter)
+{
+	int found = 0;
+
+	/* Core kernel tracepoints */
+	if (!iter->module) {
+		found = tracepoint_get_iter_range(&iter->tracepoint,
+				__start___tracepoints, __stop___tracepoints);
+		if (found)
+			goto end;
+	}
+	/* tracepoints in modules. */
+	found = module_get_iter_tracepoints(iter);
+end:
+	if (!found)
+		tracepoint_iter_reset(iter);
+}
+
+void tracepoint_iter_start(struct tracepoint_iter *iter)
+{
+	tracepoint_get_iter(iter);
+}
+EXPORT_SYMBOL_GPL(tracepoint_iter_start);
+
+void tracepoint_iter_next(struct tracepoint_iter *iter)
+{
+	iter->tracepoint++;
+	/*
+	 * iter->tracepoint may be invalid because we blindly incremented it.
+	 * Make sure it is valid by marshalling on the tracepoints, getting the
+	 * tracepoints from following modules if necessary.
+	 */
+	tracepoint_get_iter(iter);
+}
+EXPORT_SYMBOL_GPL(tracepoint_iter_next);
+
+void tracepoint_iter_stop(struct tracepoint_iter *iter)
+{
+}
+EXPORT_SYMBOL_GPL(tracepoint_iter_stop);
+
+void tracepoint_iter_reset(struct tracepoint_iter *iter)
+{
+	iter->module = NULL;
+	iter->tracepoint = NULL;
+}
+EXPORT_SYMBOL_GPL(tracepoint_iter_reset);
-- 
cgit v1.2.3-55-g7522


From 24b8d831d56aac7907752d22d2aba5d8127db6f6 Mon Sep 17 00:00:00 2001
From: Mathieu Desnoyers
Date: Fri, 18 Jul 2008 12:16:16 -0400
Subject: tracing: tracepoints, documentation

Documentation of tracepoint usage.

Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
Acked-by: 'Peter Zijlstra' <peterz@infradead.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 Documentation/tracepoints.txt | 101 ++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 101 insertions(+)
 create mode 100644 Documentation/tracepoints.txt

diff --git a/Documentation/tracepoints.txt b/Documentation/tracepoints.txt
new file mode 100644
index 000000000000..5d354e167494
--- /dev/null
+++ b/Documentation/tracepoints.txt
@@ -0,0 +1,101 @@
+	             Using the Linux Kernel Tracepoints
+
+			    Mathieu Desnoyers
+
+
+This document introduces Linux Kernel Tracepoints and their use. It provides
+examples of how to insert tracepoints in the kernel and connect probe functions
+to them and provides some examples of probe functions.
+
+
+* Purpose of tracepoints
+
+A tracepoint placed in code provides a hook to call a function (probe) that you
+can provide at runtime. A tracepoint can be "on" (a probe is connected to it) or
+"off" (no probe is attached). When a tracepoint is "off" it has no effect,
+except for adding a tiny time penalty (checking a condition for a branch) and
+space penalty (adding a few bytes for the function call at the end of the
+instrumented function and adds a data structure in a separate section).  When a
+tracepoint is "on", the function you provide is called each time the tracepoint
+is executed, in the execution context of the caller. When the function provided
+ends its execution, it returns to the caller (continuing from the tracepoint
+site).
+
+You can put tracepoints at important locations in the code. They are
+lightweight hooks that can pass an arbitrary number of parameters,
+which prototypes are described in a tracepoint declaration placed in a header
+file.
+
+They can be used for tracing and performance accounting.
+
+
+* Usage
+
+Two elements are required for tracepoints :
+
+- A tracepoint definition, placed in a header file.
+- The tracepoint statement, in C code.
+
+In order to use tracepoints, you should include linux/tracepoint.h.
+
+In include/trace/subsys.h :
+
+#include <linux/tracepoint.h>
+
+DEFINE_TRACE(subsys_eventname,
+	TPPTOTO(int firstarg, struct task_struct *p),
+	TPARGS(firstarg, p));
+
+In subsys/file.c (where the tracing statement must be added) :
+
+#include <trace/subsys.h>
+
+void somefct(void)
+{
+	...
+	trace_subsys_eventname(arg, task);
+	...
+}
+
+Where :
+- subsys_eventname is an identifier unique to your event
+    - subsys is the name of your subsystem.
+    - eventname is the name of the event to trace.
+- TPPTOTO(int firstarg, struct task_struct *p) is the prototype of the function
+  called by this tracepoint.
+- TPARGS(firstarg, p) are the parameters names, same as found in the prototype.
+
+Connecting a function (probe) to a tracepoint is done by providing a probe
+(function to call) for the specific tracepoint through
+register_trace_subsys_eventname().  Removing a probe is done through
+unregister_trace_subsys_eventname(); it will remove the probe sure there is no
+caller left using the probe when it returns. Probe removal is preempt-safe
+because preemption is disabled around the probe call. See the "Probe example"
+section below for a sample probe module.
+
+The tracepoint mechanism supports inserting multiple instances of the same
+tracepoint, but a single definition must be made of a given tracepoint name over
+all the kernel to make sure no type conflict will occur. Name mangling of the
+tracepoints is done using the prototypes to make sure typing is correct.
+Verification of probe type correctness is done at the registration site by the
+compiler. Tracepoints can be put in inline functions, inlined static functions,
+and unrolled loops as well as regular functions.
+
+The naming scheme "subsys_event" is suggested here as a convention intended
+to limit collisions. Tracepoint names are global to the kernel: they are
+considered as being the same whether they are in the core kernel image or in
+modules.
+
+
+* Probe / tracepoint example
+
+See the example provided in samples/tracepoints/src
+
+Compile them with your kernel.
+
+Run, as root :
+modprobe tracepoint-example (insmod order is not important)
+modprobe tracepoint-probe-example
+cat /proc/tracepoint-example (returns an expected error)
+rmmod tracepoint-example tracepoint-probe-example
+dmesg
-- 
cgit v1.2.3-55-g7522


From 4a0897526bbc5c6ac0df80b16b8c60339e717ae2 Mon Sep 17 00:00:00 2001
From: Mathieu Desnoyers
Date: Fri, 18 Jul 2008 12:16:16 -0400
Subject: tracing: tracepoints, samples

Tracepoint example code under samples/.

Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
Acked-by: 'Peter Zijlstra' <peterz@infradead.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 samples/Kconfig                                |  6 +++
 samples/Makefile                               |  2 +-
 samples/tracepoints/Makefile                   |  6 +++
 samples/tracepoints/tp-samples-trace.h         | 13 ++++++
 samples/tracepoints/tracepoint-probe-sample.c  | 55 ++++++++++++++++++++++++++
 samples/tracepoints/tracepoint-probe-sample2.c | 42 ++++++++++++++++++++
 samples/tracepoints/tracepoint-sample.c        | 53 +++++++++++++++++++++++++
 7 files changed, 176 insertions(+), 1 deletion(-)
 create mode 100644 samples/tracepoints/Makefile
 create mode 100644 samples/tracepoints/tp-samples-trace.h
 create mode 100644 samples/tracepoints/tracepoint-probe-sample.c
 create mode 100644 samples/tracepoints/tracepoint-probe-sample2.c
 create mode 100644 samples/tracepoints/tracepoint-sample.c

diff --git a/samples/Kconfig b/samples/Kconfig
index e1fb471cc501..4b02f5a0e656 100644
--- a/samples/Kconfig
+++ b/samples/Kconfig
@@ -13,6 +13,12 @@ config SAMPLE_MARKERS
 	help
 	  This build markers example modules.
 
+config SAMPLE_TRACEPOINTS
+	tristate "Build tracepoints examples -- loadable modules only"
+	depends on TRACEPOINTS && m
+	help
+	  This build tracepoints example modules.
+
 config SAMPLE_KOBJECT
 	tristate "Build kobject examples"
 	help
diff --git a/samples/Makefile b/samples/Makefile
index 2e02575f7794..10eaca89fe17 100644
--- a/samples/Makefile
+++ b/samples/Makefile
@@ -1,3 +1,3 @@
 # Makefile for Linux samples code
 
-obj-$(CONFIG_SAMPLES)	+= markers/ kobject/ kprobes/
+obj-$(CONFIG_SAMPLES)	+= markers/ kobject/ kprobes/ tracepoints/
diff --git a/samples/tracepoints/Makefile b/samples/tracepoints/Makefile
new file mode 100644
index 000000000000..36479ad9ae14
--- /dev/null
+++ b/samples/tracepoints/Makefile
@@ -0,0 +1,6 @@
+# builds the tracepoint example kernel modules;
+# then to use one (as root):  insmod <module_name.ko>
+
+obj-$(CONFIG_SAMPLE_TRACEPOINTS) += tracepoint-sample.o
+obj-$(CONFIG_SAMPLE_TRACEPOINTS) += tracepoint-probe-sample.o
+obj-$(CONFIG_SAMPLE_TRACEPOINTS) += tracepoint-probe-sample2.o
diff --git a/samples/tracepoints/tp-samples-trace.h b/samples/tracepoints/tp-samples-trace.h
new file mode 100644
index 000000000000..0216b55bd640
--- /dev/null
+++ b/samples/tracepoints/tp-samples-trace.h
@@ -0,0 +1,13 @@
+#ifndef _TP_SAMPLES_TRACE_H
+#define _TP_SAMPLES_TRACE_H
+
+#include <linux/proc_fs.h>	/* for struct inode and struct file */
+#include <linux/tracepoint.h>
+
+DEFINE_TRACE(subsys_event,
+	TPPROTO(struct inode *inode, struct file *file),
+	TPARGS(inode, file));
+DEFINE_TRACE(subsys_eventb,
+	TPPROTO(void),
+	TPARGS());
+#endif
diff --git a/samples/tracepoints/tracepoint-probe-sample.c b/samples/tracepoints/tracepoint-probe-sample.c
new file mode 100644
index 000000000000..55abfdda4bd4
--- /dev/null
+++ b/samples/tracepoints/tracepoint-probe-sample.c
@@ -0,0 +1,55 @@
+/*
+ * tracepoint-probe-sample.c
+ *
+ * sample tracepoint probes.
+ */
+
+#include <linux/module.h>
+#include <linux/file.h>
+#include <linux/dcache.h>
+#include "tp-samples-trace.h"
+
+/*
+ * Here the caller only guarantees locking for struct file and struct inode.
+ * Locking must therefore be done in the probe to use the dentry.
+ */
+static void probe_subsys_event(struct inode *inode, struct file *file)
+{
+	path_get(&file->f_path);
+	dget(file->f_path.dentry);
+	printk(KERN_INFO "Event is encountered with filename %s\n",
+		file->f_path.dentry->d_name.name);
+	dput(file->f_path.dentry);
+	path_put(&file->f_path);
+}
+
+static void probe_subsys_eventb(void)
+{
+	printk(KERN_INFO "Event B is encountered\n");
+}
+
+int __init tp_sample_trace_init(void)
+{
+	int ret;
+
+	ret = register_trace_subsys_event(probe_subsys_event);
+	WARN_ON(ret);
+	ret = register_trace_subsys_eventb(probe_subsys_eventb);
+	WARN_ON(ret);
+
+	return 0;
+}
+
+module_init(tp_sample_trace_init);
+
+void __exit tp_sample_trace_exit(void)
+{
+	unregister_trace_subsys_eventb(probe_subsys_eventb);
+	unregister_trace_subsys_event(probe_subsys_event);
+}
+
+module_exit(tp_sample_trace_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Mathieu Desnoyers");
+MODULE_DESCRIPTION("Tracepoint Probes Samples");
diff --git a/samples/tracepoints/tracepoint-probe-sample2.c b/samples/tracepoints/tracepoint-probe-sample2.c
new file mode 100644
index 000000000000..5e9fcf4afffe
--- /dev/null
+++ b/samples/tracepoints/tracepoint-probe-sample2.c
@@ -0,0 +1,42 @@
+/*
+ * tracepoint-probe-sample2.c
+ *
+ * 2nd sample tracepoint probes.
+ */
+
+#include <linux/module.h>
+#include <linux/fs.h>
+#include "tp-samples-trace.h"
+
+/*
+ * Here the caller only guarantees locking for struct file and struct inode.
+ * Locking must therefore be done in the probe to use the dentry.
+ */
+static void probe_subsys_event(struct inode *inode, struct file *file)
+{
+	printk(KERN_INFO "Event is encountered with inode number %lu\n",
+		inode->i_ino);
+}
+
+int __init tp_sample_trace_init(void)
+{
+	int ret;
+
+	ret = register_trace_subsys_event(probe_subsys_event);
+	WARN_ON(ret);
+
+	return 0;
+}
+
+module_init(tp_sample_trace_init);
+
+void __exit tp_sample_trace_exit(void)
+{
+	unregister_trace_subsys_event(probe_subsys_event);
+}
+
+module_exit(tp_sample_trace_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Mathieu Desnoyers");
+MODULE_DESCRIPTION("Tracepoint Probes Samples");
diff --git a/samples/tracepoints/tracepoint-sample.c b/samples/tracepoints/tracepoint-sample.c
new file mode 100644
index 000000000000..4ae4b7fcc043
--- /dev/null
+++ b/samples/tracepoints/tracepoint-sample.c
@@ -0,0 +1,53 @@
+/* tracepoint-sample.c
+ *
+ * Executes a tracepoint when /proc/tracepoint-example is opened.
+ *
+ * (C) Copyright 2007 Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
+ *
+ * This file is released under the GPLv2.
+ * See the file COPYING for more details.
+ */
+
+#include <linux/module.h>
+#include <linux/sched.h>
+#include <linux/proc_fs.h>
+#include "tp-samples-trace.h"
+
+struct proc_dir_entry *pentry_example;
+
+static int my_open(struct inode *inode, struct file *file)
+{
+	int i;
+
+	trace_subsys_event(inode, file);
+	for (i = 0; i < 10; i++)
+		trace_subsys_eventb();
+	return -EPERM;
+}
+
+static struct file_operations mark_ops = {
+	.open = my_open,
+};
+
+static int example_init(void)
+{
+	printk(KERN_ALERT "example init\n");
+	pentry_example = proc_create("tracepoint-example", 0444, NULL,
+		&mark_ops);
+	if (!pentry_example)
+		return -EPERM;
+	return 0;
+}
+
+static void example_exit(void)
+{
+	printk(KERN_ALERT "example exit\n");
+	remove_proc_entry("tracepoint-example", NULL);
+}
+
+module_init(example_init)
+module_exit(example_exit)
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Mathieu Desnoyers");
+MODULE_DESCRIPTION("Tracepoint example");
-- 
cgit v1.2.3-55-g7522


From 0a16b6075843325dc402edf80c1662838b929aff Mon Sep 17 00:00:00 2001
From: Mathieu Desnoyers
Date: Fri, 18 Jul 2008 12:16:17 -0400
Subject: tracing, sched: LTTng instrumentation - scheduler

Instrument the scheduler activity (sched_switch, migration, wakeups,
wait for a task, signal delivery) and process/thread
creation/destruction (fork, exit, kthread stop). Actually, kthread
creation is not instrumented in this patch because it is architecture
dependent. It allows to connect tracers such as ftrace which detects
scheduling latencies, good/bad scheduler decisions. Tools like LTTng can
export this scheduler information along with instrumentation of the rest
of the kernel activity to perform post-mortem analysis on the scheduler
activity.

About the performance impact of tracepoints (which is comparable to
markers), even without immediate values optimizations, tests done by
Hideo Aoki on ia64 show no regression. His test case was using hackbench
on a kernel where scheduler instrumentation (about 5 events in code
scheduler code) was added. See the "Tracepoints" patch header for
performance result detail.

Changelog :

- Change instrumentation location and parameter to match ftrace
  instrumentation, previously done with kernel markers.

[ mingo@elte.hu: conflict resolutions ]
Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
Acked-by: 'Peter Zijlstra' <peterz@infradead.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/trace/sched.h | 45 +++++++++++++++++++++++++++++++++++++++++++++
 kernel/exit.c         | 10 +++++++++-
 kernel/fork.c         |  3 +++
 kernel/kthread.c      |  5 +++++
 kernel/sched.c        | 17 ++++++-----------
 kernel/signal.c       |  3 +++
 6 files changed, 71 insertions(+), 12 deletions(-)
 create mode 100644 include/trace/sched.h

diff --git a/include/trace/sched.h b/include/trace/sched.h
new file mode 100644
index 000000000000..506ae1323656
--- /dev/null
+++ b/include/trace/sched.h
@@ -0,0 +1,45 @@
+#ifndef _TRACE_SCHED_H
+#define _TRACE_SCHED_H
+
+#include <linux/sched.h>
+#include <linux/tracepoint.h>
+
+DEFINE_TRACE(sched_kthread_stop,
+	TPPROTO(struct task_struct *t),
+	TPARGS(t));
+DEFINE_TRACE(sched_kthread_stop_ret,
+	TPPROTO(int ret),
+	TPARGS(ret));
+DEFINE_TRACE(sched_wait_task,
+	TPPROTO(struct rq *rq, struct task_struct *p),
+	TPARGS(rq, p));
+DEFINE_TRACE(sched_wakeup,
+	TPPROTO(struct rq *rq, struct task_struct *p),
+	TPARGS(rq, p));
+DEFINE_TRACE(sched_wakeup_new,
+	TPPROTO(struct rq *rq, struct task_struct *p),
+	TPARGS(rq, p));
+DEFINE_TRACE(sched_switch,
+	TPPROTO(struct rq *rq, struct task_struct *prev,
+		struct task_struct *next),
+	TPARGS(rq, prev, next));
+DEFINE_TRACE(sched_migrate_task,
+	TPPROTO(struct rq *rq, struct task_struct *p, int dest_cpu),
+	TPARGS(rq, p, dest_cpu));
+DEFINE_TRACE(sched_process_free,
+	TPPROTO(struct task_struct *p),
+	TPARGS(p));
+DEFINE_TRACE(sched_process_exit,
+	TPPROTO(struct task_struct *p),
+	TPARGS(p));
+DEFINE_TRACE(sched_process_wait,
+	TPPROTO(struct pid *pid),
+	TPARGS(pid));
+DEFINE_TRACE(sched_process_fork,
+	TPPROTO(struct task_struct *parent, struct task_struct *child),
+	TPARGS(parent, child));
+DEFINE_TRACE(sched_signal_send,
+	TPPROTO(int sig, struct task_struct *p),
+	TPARGS(sig, p));
+
+#endif
diff --git a/kernel/exit.c b/kernel/exit.c
index 85a83c831856..7b71f87f1207 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -47,6 +47,7 @@
 #include <linux/blkdev.h>
 #include <linux/task_io_accounting_ops.h>
 #include <linux/tracehook.h>
+#include <trace/sched.h>
 
 #include <asm/uaccess.h>
 #include <asm/unistd.h>
@@ -149,7 +150,10 @@ static void __exit_signal(struct task_struct *tsk)
 
 static void delayed_put_task_struct(struct rcu_head *rhp)
 {
-	put_task_struct(container_of(rhp, struct task_struct, rcu));
+	struct task_struct *tsk = container_of(rhp, struct task_struct, rcu);
+
+	trace_sched_process_free(tsk);
+	put_task_struct(tsk);
 }
 
 
@@ -1074,6 +1078,8 @@ NORET_TYPE void do_exit(long code)
 
 	if (group_dead)
 		acct_process();
+	trace_sched_process_exit(tsk);
+
 	exit_sem(tsk);
 	exit_files(tsk);
 	exit_fs(tsk);
@@ -1675,6 +1681,8 @@ static long do_wait(enum pid_type type, struct pid *pid, int options,
 	struct task_struct *tsk;
 	int retval;
 
+	trace_sched_process_wait(pid);
+
 	add_wait_queue(&current->signal->wait_chldexit,&wait);
 repeat:
 	/*
diff --git a/kernel/fork.c b/kernel/fork.c
index 30de644a40c4..cfaff92f61ff 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -58,6 +58,7 @@
 #include <linux/tty.h>
 #include <linux/proc_fs.h>
 #include <linux/blkdev.h>
+#include <trace/sched.h>
 
 #include <asm/pgtable.h>
 #include <asm/pgalloc.h>
@@ -1364,6 +1365,8 @@ long do_fork(unsigned long clone_flags,
 	if (!IS_ERR(p)) {
 		struct completion vfork;
 
+		trace_sched_process_fork(current, p);
+
 		nr = task_pid_vnr(p);
 
 		if (clone_flags & CLONE_PARENT_SETTID)
diff --git a/kernel/kthread.c b/kernel/kthread.c
index 96cff2f8710b..50598e29439a 100644
--- a/kernel/kthread.c
+++ b/kernel/kthread.c
@@ -13,6 +13,7 @@
 #include <linux/file.h>
 #include <linux/module.h>
 #include <linux/mutex.h>
+#include <trace/sched.h>
 
 #define KTHREAD_NICE_LEVEL (-5)
 
@@ -206,6 +207,8 @@ int kthread_stop(struct task_struct *k)
 	/* It could exit after stop_info.k set, but before wake_up_process. */
 	get_task_struct(k);
 
+	trace_sched_kthread_stop(k);
+
 	/* Must init completion *before* thread sees kthread_stop_info.k */
 	init_completion(&kthread_stop_info.done);
 	smp_wmb();
@@ -221,6 +224,8 @@ int kthread_stop(struct task_struct *k)
 	ret = kthread_stop_info.err;
 	mutex_unlock(&kthread_stop_lock);
 
+	trace_sched_kthread_stop_ret(ret);
+
 	return ret;
 }
 EXPORT_SYMBOL(kthread_stop);
diff --git a/kernel/sched.c b/kernel/sched.c
index 6f230596bd0c..3d1ad130c24e 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -71,6 +71,7 @@
 #include <linux/debugfs.h>
 #include <linux/ctype.h>
 #include <linux/ftrace.h>
+#include <trace/sched.h>
 
 #include <asm/tlb.h>
 #include <asm/irq_regs.h>
@@ -1936,6 +1937,7 @@ unsigned long wait_task_inactive(struct task_struct *p, long match_state)
 		 * just go back and repeat.
 		 */
 		rq = task_rq_lock(p, &flags);
+		trace_sched_wait_task(rq, p);
 		running = task_running(rq, p);
 		on_rq = p->se.on_rq;
 		ncsw = 0;
@@ -2297,9 +2299,7 @@ out_activate:
 	success = 1;
 
 out_running:
-	trace_mark(kernel_sched_wakeup,
-		"pid %d state %ld ## rq %p task %p rq->curr %p",
-		p->pid, p->state, rq, p, rq->curr);
+	trace_sched_wakeup(rq, p);
 	check_preempt_curr(rq, p, sync);
 
 	p->state = TASK_RUNNING;
@@ -2432,9 +2432,7 @@ void wake_up_new_task(struct task_struct *p, unsigned long clone_flags)
 		p->sched_class->task_new(rq, p);
 		inc_nr_running(rq);
 	}
-	trace_mark(kernel_sched_wakeup_new,
-		"pid %d state %ld ## rq %p task %p rq->curr %p",
-		p->pid, p->state, rq, p, rq->curr);
+	trace_sched_wakeup_new(rq, p);
 	check_preempt_curr(rq, p, 0);
 #ifdef CONFIG_SMP
 	if (p->sched_class->task_wake_up)
@@ -2607,11 +2605,7 @@ context_switch(struct rq *rq, struct task_struct *prev,
 	struct mm_struct *mm, *oldmm;
 
 	prepare_task_switch(rq, prev, next);
-	trace_mark(kernel_sched_schedule,
-		"prev_pid %d next_pid %d prev_state %ld "
-		"## rq %p prev %p next %p",
-		prev->pid, next->pid, prev->state,
-		rq, prev, next);
+	trace_sched_switch(rq, prev, next);
 	mm = next->mm;
 	oldmm = prev->active_mm;
 	/*
@@ -2851,6 +2845,7 @@ static void sched_migrate_task(struct task_struct *p, int dest_cpu)
 	    || unlikely(!cpu_active(dest_cpu)))
 		goto out;
 
+	trace_sched_migrate_task(rq, p, dest_cpu);
 	/* force the process onto the specified CPU */
 	if (migrate_task(p, dest_cpu, &req)) {
 		/* Need to wait for migration thread (might exit: take ref). */
diff --git a/kernel/signal.c b/kernel/signal.c
index e661b01d340f..bf40ecc87b26 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -27,6 +27,7 @@
 #include <linux/freezer.h>
 #include <linux/pid_namespace.h>
 #include <linux/nsproxy.h>
+#include <trace/sched.h>
 
 #include <asm/param.h>
 #include <asm/uaccess.h>
@@ -803,6 +804,8 @@ static int send_signal(int sig, struct siginfo *info, struct task_struct *t,
 	struct sigpending *pending;
 	struct sigqueue *q;
 
+	trace_sched_signal_send(sig, t);
+
 	assert_spin_locked(&t->sighand->siglock);
 	if (!prepare_signal(sig, t))
 		return 0;
-- 
cgit v1.2.3-55-g7522


From b07c3f193a8074aa4afe43cfa8ae38ec4c7ccfa9 Mon Sep 17 00:00:00 2001
From: Mathieu Desnoyers
Date: Fri, 18 Jul 2008 12:16:17 -0400
Subject: ftrace: port to tracepoints

Porting the trace_mark() used by ftrace to tracepoints. (cleanup)

Changelog :
- Change error messages : marker -> tracepoint

[ mingo@elte.hu: conflict resolutions ]
Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
Acked-by: 'Peter Zijlstra' <peterz@infradead.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 kernel/trace/trace_sched_switch.c | 120 +++++++--------------------------
 kernel/trace/trace_sched_wakeup.c | 135 ++++++++++----------------------------
 2 files changed, 58 insertions(+), 197 deletions(-)

diff --git a/kernel/trace/trace_sched_switch.c b/kernel/trace/trace_sched_switch.c
index cb817a209aa0..789e927abc9c 100644
--- a/kernel/trace/trace_sched_switch.c
+++ b/kernel/trace/trace_sched_switch.c
@@ -9,8 +9,8 @@
 #include <linux/debugfs.h>
 #include <linux/kallsyms.h>
 #include <linux/uaccess.h>
-#include <linux/marker.h>
 #include <linux/ftrace.h>
+#include <trace/sched.h>
 
 #include "trace.h"
 
@@ -19,16 +19,17 @@ static int __read_mostly	tracer_enabled;
 static atomic_t			sched_ref;
 
 static void
-sched_switch_func(void *private, void *__rq, struct task_struct *prev,
+probe_sched_switch(struct rq *__rq, struct task_struct *prev,
 			struct task_struct *next)
 {
-	struct trace_array **ptr = private;
-	struct trace_array *tr = *ptr;
 	struct trace_array_cpu *data;
 	unsigned long flags;
 	long disabled;
 	int cpu;
 
+	if (!atomic_read(&sched_ref))
+		return;
+
 	tracing_record_cmdline(prev);
 	tracing_record_cmdline(next);
 
@@ -37,95 +38,42 @@ sched_switch_func(void *private, void *__rq, struct task_struct *prev,
 
 	local_irq_save(flags);
 	cpu = raw_smp_processor_id();
-	data = tr->data[cpu];
+	data = ctx_trace->data[cpu];
 	disabled = atomic_inc_return(&data->disabled);
 
 	if (likely(disabled == 1))
-		tracing_sched_switch_trace(tr, data, prev, next, flags);
+		tracing_sched_switch_trace(ctx_trace, data, prev, next, flags);
 
 	atomic_dec(&data->disabled);
 	local_irq_restore(flags);
 }
 
-static notrace void
-sched_switch_callback(void *probe_data, void *call_data,
-		      const char *format, va_list *args)
-{
-	struct task_struct *prev;
-	struct task_struct *next;
-	struct rq *__rq;
-
-	if (!atomic_read(&sched_ref))
-		return;
-
-	/* skip prev_pid %d next_pid %d prev_state %ld */
-	(void)va_arg(*args, int);
-	(void)va_arg(*args, int);
-	(void)va_arg(*args, long);
-	__rq = va_arg(*args, typeof(__rq));
-	prev = va_arg(*args, typeof(prev));
-	next = va_arg(*args, typeof(next));
-
-	/*
-	 * If tracer_switch_func only points to the local
-	 * switch func, it still needs the ptr passed to it.
-	 */
-	sched_switch_func(probe_data, __rq, prev, next);
-}
-
 static void
-wakeup_func(void *private, void *__rq, struct task_struct *wakee, struct
-			task_struct *curr)
+probe_sched_wakeup(struct rq *__rq, struct task_struct *wakee)
 {
-	struct trace_array **ptr = private;
-	struct trace_array *tr = *ptr;
 	struct trace_array_cpu *data;
 	unsigned long flags;
 	long disabled;
 	int cpu;
 
-	if (!tracer_enabled)
+	if (!likely(tracer_enabled))
 		return;
 
-	tracing_record_cmdline(curr);
+	tracing_record_cmdline(current);
 
 	local_irq_save(flags);
 	cpu = raw_smp_processor_id();
-	data = tr->data[cpu];
+	data = ctx_trace->data[cpu];
 	disabled = atomic_inc_return(&data->disabled);
 
 	if (likely(disabled == 1))
-		tracing_sched_wakeup_trace(tr, data, wakee, curr, flags);
+		tracing_sched_wakeup_trace(ctx_trace, data, wakee, current,
+			flags);
 
 	atomic_dec(&data->disabled);
 	local_irq_restore(flags);
 }
 
-static notrace void
-wake_up_callback(void *probe_data, void *call_data,
-		 const char *format, va_list *args)
-{
-	struct task_struct *curr;
-	struct task_struct *task;
-	struct rq *__rq;
-
-	if (likely(!tracer_enabled))
-		return;
-
-	/* Skip pid %d state %ld */
-	(void)va_arg(*args, int);
-	(void)va_arg(*args, long);
-	/* now get the meat: "rq %p task %p rq->curr %p" */
-	__rq = va_arg(*args, typeof(__rq));
-	task = va_arg(*args, typeof(task));
-	curr = va_arg(*args, typeof(curr));
-
-	tracing_record_cmdline(task);
-	tracing_record_cmdline(curr);
-
-	wakeup_func(probe_data, __rq, task, curr);
-}
-
 static void sched_switch_reset(struct trace_array *tr)
 {
 	int cpu;
@@ -140,60 +88,40 @@ static int tracing_sched_register(void)
 {
 	int ret;
 
-	ret = marker_probe_register("kernel_sched_wakeup",
-			"pid %d state %ld ## rq %p task %p rq->curr %p",
-			wake_up_callback,
-			&ctx_trace);
+	ret = register_trace_sched_wakeup(probe_sched_wakeup);
 	if (ret) {
-		pr_info("wakeup trace: Couldn't add marker"
+		pr_info("wakeup trace: Couldn't activate tracepoint"
 			" probe to kernel_sched_wakeup\n");
 		return ret;
 	}
 
-	ret = marker_probe_register("kernel_sched_wakeup_new",
-			"pid %d state %ld ## rq %p task %p rq->curr %p",
-			wake_up_callback,
-			&ctx_trace);
+	ret = register_trace_sched_wakeup_new(probe_sched_wakeup);
 	if (ret) {
-		pr_info("wakeup trace: Couldn't add marker"
+		pr_info("wakeup trace: Couldn't activate tracepoint"
 			" probe to kernel_sched_wakeup_new\n");
 		goto fail_deprobe;
 	}
 
-	ret = marker_probe_register("kernel_sched_schedule",
-		"prev_pid %d next_pid %d prev_state %ld "
-		"## rq %p prev %p next %p",
-		sched_switch_callback,
-		&ctx_trace);
+	ret = register_trace_sched_switch(probe_sched_switch);
 	if (ret) {
-		pr_info("sched trace: Couldn't add marker"
+		pr_info("sched trace: Couldn't activate tracepoint"
 			" probe to kernel_sched_schedule\n");
 		goto fail_deprobe_wake_new;
 	}
 
 	return ret;
 fail_deprobe_wake_new:
-	marker_probe_unregister("kernel_sched_wakeup_new",
-				wake_up_callback,
-				&ctx_trace);
+	unregister_trace_sched_wakeup_new(probe_sched_wakeup);
 fail_deprobe:
-	marker_probe_unregister("kernel_sched_wakeup",
-				wake_up_callback,
-				&ctx_trace);
+	unregister_trace_sched_wakeup(probe_sched_wakeup);
 	return ret;
 }
 
 static void tracing_sched_unregister(void)
 {
-	marker_probe_unregister("kernel_sched_schedule",
-				sched_switch_callback,
-				&ctx_trace);
-	marker_probe_unregister("kernel_sched_wakeup_new",
-				wake_up_callback,
-				&ctx_trace);
-	marker_probe_unregister("kernel_sched_wakeup",
-				wake_up_callback,
-				&ctx_trace);
+	unregister_trace_sched_switch(probe_sched_switch);
+	unregister_trace_sched_wakeup_new(probe_sched_wakeup);
+	unregister_trace_sched_wakeup(probe_sched_wakeup);
 }
 
 static void tracing_start_sched_switch(void)
diff --git a/kernel/trace/trace_sched_wakeup.c b/kernel/trace/trace_sched_wakeup.c
index e303ccb62cdf..08206b4e29c4 100644
--- a/kernel/trace/trace_sched_wakeup.c
+++ b/kernel/trace/trace_sched_wakeup.c
@@ -15,7 +15,7 @@
 #include <linux/kallsyms.h>
 #include <linux/uaccess.h>
 #include <linux/ftrace.h>
-#include <linux/marker.h>
+#include <trace/sched.h>
 
 #include "trace.h"
 
@@ -112,18 +112,18 @@ static int report_latency(cycle_t delta)
 }
 
 static void notrace
-wakeup_sched_switch(void *private, void *rq, struct task_struct *prev,
+probe_wakeup_sched_switch(struct rq *rq, struct task_struct *prev,
 	struct task_struct *next)
 {
 	unsigned long latency = 0, t0 = 0, t1 = 0;
-	struct trace_array **ptr = private;
-	struct trace_array *tr = *ptr;
 	struct trace_array_cpu *data;
 	cycle_t T0, T1, delta;
 	unsigned long flags;
 	long disabled;
 	int cpu;
 
+	tracing_record_cmdline(prev);
+
 	if (unlikely(!tracer_enabled))
 		return;
 
@@ -140,11 +140,11 @@ wakeup_sched_switch(void *private, void *rq, struct task_struct *prev,
 		return;
 
 	/* The task we are waiting for is waking up */
-	data = tr->data[wakeup_cpu];
+	data = wakeup_trace->data[wakeup_cpu];
 
 	/* disable local data, not wakeup_cpu data */
 	cpu = raw_smp_processor_id();
-	disabled = atomic_inc_return(&tr->data[cpu]->disabled);
+	disabled = atomic_inc_return(&wakeup_trace->data[cpu]->disabled);
 	if (likely(disabled != 1))
 		goto out;
 
@@ -155,7 +155,7 @@ wakeup_sched_switch(void *private, void *rq, struct task_struct *prev,
 	if (unlikely(!tracer_enabled || next != wakeup_task))
 		goto out_unlock;
 
-	trace_function(tr, data, CALLER_ADDR1, CALLER_ADDR2, flags);
+	trace_function(wakeup_trace, data, CALLER_ADDR1, CALLER_ADDR2, flags);
 
 	/*
 	 * usecs conversion is slow so we try to delay the conversion
@@ -174,39 +174,14 @@ wakeup_sched_switch(void *private, void *rq, struct task_struct *prev,
 	t0 = nsecs_to_usecs(T0);
 	t1 = nsecs_to_usecs(T1);
 
-	update_max_tr(tr, wakeup_task, wakeup_cpu);
+	update_max_tr(wakeup_trace, wakeup_task, wakeup_cpu);
 
 out_unlock:
-	__wakeup_reset(tr);
+	__wakeup_reset(wakeup_trace);
 	__raw_spin_unlock(&wakeup_lock);
 	local_irq_restore(flags);
 out:
-	atomic_dec(&tr->data[cpu]->disabled);
-}
-
-static notrace void
-sched_switch_callback(void *probe_data, void *call_data,
-		      const char *format, va_list *args)
-{
-	struct task_struct *prev;
-	struct task_struct *next;
-	struct rq *__rq;
-
-	/* skip prev_pid %d next_pid %d prev_state %ld */
-	(void)va_arg(*args, int);
-	(void)va_arg(*args, int);
-	(void)va_arg(*args, long);
-	__rq = va_arg(*args, typeof(__rq));
-	prev = va_arg(*args, typeof(prev));
-	next = va_arg(*args, typeof(next));
-
-	tracing_record_cmdline(prev);
-
-	/*
-	 * If tracer_switch_func only points to the local
-	 * switch func, it still needs the ptr passed to it.
-	 */
-	wakeup_sched_switch(probe_data, __rq, prev, next);
+	atomic_dec(&wakeup_trace->data[cpu]->disabled);
 }
 
 static void __wakeup_reset(struct trace_array *tr)
@@ -240,19 +215,24 @@ static void wakeup_reset(struct trace_array *tr)
 }
 
 static void
-wakeup_check_start(struct trace_array *tr, struct task_struct *p,
-		   struct task_struct *curr)
+probe_wakeup(struct rq *rq, struct task_struct *p)
 {
 	int cpu = smp_processor_id();
 	unsigned long flags;
 	long disabled;
 
+	if (likely(!tracer_enabled))
+		return;
+
+	tracing_record_cmdline(p);
+	tracing_record_cmdline(current);
+
 	if (likely(!rt_task(p)) ||
 			p->prio >= wakeup_prio ||
-			p->prio >= curr->prio)
+			p->prio >= current->prio)
 		return;
 
-	disabled = atomic_inc_return(&tr->data[cpu]->disabled);
+	disabled = atomic_inc_return(&wakeup_trace->data[cpu]->disabled);
 	if (unlikely(disabled != 1))
 		goto out;
 
@@ -264,7 +244,7 @@ wakeup_check_start(struct trace_array *tr, struct task_struct *p,
 		goto out_locked;
 
 	/* reset the trace */
-	__wakeup_reset(tr);
+	__wakeup_reset(wakeup_trace);
 
 	wakeup_cpu = task_cpu(p);
 	wakeup_prio = p->prio;
@@ -274,74 +254,37 @@ wakeup_check_start(struct trace_array *tr, struct task_struct *p,
 
 	local_save_flags(flags);
 
-	tr->data[wakeup_cpu]->preempt_timestamp = ftrace_now(cpu);
-	trace_function(tr, tr->data[wakeup_cpu],
+	wakeup_trace->data[wakeup_cpu]->preempt_timestamp = ftrace_now(cpu);
+	trace_function(wakeup_trace, wakeup_trace->data[wakeup_cpu],
 		       CALLER_ADDR1, CALLER_ADDR2, flags);
 
 out_locked:
 	__raw_spin_unlock(&wakeup_lock);
 out:
-	atomic_dec(&tr->data[cpu]->disabled);
-}
-
-static notrace void
-wake_up_callback(void *probe_data, void *call_data,
-		 const char *format, va_list *args)
-{
-	struct trace_array **ptr = probe_data;
-	struct trace_array *tr = *ptr;
-	struct task_struct *curr;
-	struct task_struct *task;
-	struct rq *__rq;
-
-	if (likely(!tracer_enabled))
-		return;
-
-	/* Skip pid %d state %ld */
-	(void)va_arg(*args, int);
-	(void)va_arg(*args, long);
-	/* now get the meat: "rq %p task %p rq->curr %p" */
-	__rq = va_arg(*args, typeof(__rq));
-	task = va_arg(*args, typeof(task));
-	curr = va_arg(*args, typeof(curr));
-
-	tracing_record_cmdline(task);
-	tracing_record_cmdline(curr);
-
-	wakeup_check_start(tr, task, curr);
+	atomic_dec(&wakeup_trace->data[cpu]->disabled);
 }
 
 static void start_wakeup_tracer(struct trace_array *tr)
 {
 	int ret;
 
-	ret = marker_probe_register("kernel_sched_wakeup",
-			"pid %d state %ld ## rq %p task %p rq->curr %p",
-			wake_up_callback,
-			&wakeup_trace);
+	ret = register_trace_sched_wakeup(probe_wakeup);
 	if (ret) {
-		pr_info("wakeup trace: Couldn't add marker"
+		pr_info("wakeup trace: Couldn't activate tracepoint"
 			" probe to kernel_sched_wakeup\n");
 		return;
 	}
 
-	ret = marker_probe_register("kernel_sched_wakeup_new",
-			"pid %d state %ld ## rq %p task %p rq->curr %p",
-			wake_up_callback,
-			&wakeup_trace);
+	ret = register_trace_sched_wakeup_new(probe_wakeup);
 	if (ret) {
-		pr_info("wakeup trace: Couldn't add marker"
+		pr_info("wakeup trace: Couldn't activate tracepoint"
 			" probe to kernel_sched_wakeup_new\n");
 		goto fail_deprobe;
 	}
 
-	ret = marker_probe_register("kernel_sched_schedule",
-		"prev_pid %d next_pid %d prev_state %ld "
-		"## rq %p prev %p next %p",
-		sched_switch_callback,
-		&wakeup_trace);
+	ret = register_trace_sched_switch(probe_wakeup_sched_switch);
 	if (ret) {
-		pr_info("sched trace: Couldn't add marker"
+		pr_info("sched trace: Couldn't activate tracepoint"
 			" probe to kernel_sched_schedule\n");
 		goto fail_deprobe_wake_new;
 	}
@@ -363,28 +306,18 @@ static void start_wakeup_tracer(struct trace_array *tr)
 
 	return;
 fail_deprobe_wake_new:
-	marker_probe_unregister("kernel_sched_wakeup_new",
-				wake_up_callback,
-				&wakeup_trace);
+	unregister_trace_sched_wakeup_new(probe_wakeup);
 fail_deprobe:
-	marker_probe_unregister("kernel_sched_wakeup",
-				wake_up_callback,
-				&wakeup_trace);
+	unregister_trace_sched_wakeup(probe_wakeup);
 }
 
 static void stop_wakeup_tracer(struct trace_array *tr)
 {
 	tracer_enabled = 0;
 	unregister_ftrace_function(&trace_ops);
-	marker_probe_unregister("kernel_sched_schedule",
-				sched_switch_callback,
-				&wakeup_trace);
-	marker_probe_unregister("kernel_sched_wakeup_new",
-				wake_up_callback,
-				&wakeup_trace);
-	marker_probe_unregister("kernel_sched_wakeup",
-				wake_up_callback,
-				&wakeup_trace);
+	unregister_trace_sched_switch(probe_wakeup_sched_switch);
+	unregister_trace_sched_wakeup_new(probe_wakeup);
+	unregister_trace_sched_wakeup(probe_wakeup);
 }
 
 static void wakeup_tracer_init(struct trace_array *tr)
-- 
cgit v1.2.3-55-g7522


From fa340d9c050e78fb21a142b617304214ae5e0c2d Mon Sep 17 00:00:00 2001
From: Ingo Molnar
Date: Wed, 23 Jul 2008 13:38:00 +0200
Subject: tracing: disable tracepoints by default

while it's arguably low overhead, we dont enable new features by default.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 init/Kconfig | 1 -
 1 file changed, 1 deletion(-)

diff --git a/init/Kconfig b/init/Kconfig
index 70082678a914..d5994490b0b0 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -773,7 +773,6 @@ config PROFILING
 
 config TRACEPOINTS
 	bool "Activate tracepoints"
-	default y
 	help
 	  Place an empty function call at each tracepoint site. Can be
 	  dynamically changed for a probe function.
-- 
cgit v1.2.3-55-g7522


From cf569a932217b97e2fc2c48aa597fe29519a0cff Mon Sep 17 00:00:00 2001
From: Ingo Molnar
Date: Wed, 23 Jul 2008 13:48:22 +0200
Subject: sched: clean up tracepoints

make it a bit more structured hence more readable.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/trace/sched.h | 35 +++++++++++++++++++++++------------
 1 file changed, 23 insertions(+), 12 deletions(-)

diff --git a/include/trace/sched.h b/include/trace/sched.h
index 506ae1323656..ad47369d01b5 100644
--- a/include/trace/sched.h
+++ b/include/trace/sched.h
@@ -6,40 +6,51 @@
 
 DEFINE_TRACE(sched_kthread_stop,
 	TPPROTO(struct task_struct *t),
-	TPARGS(t));
+		TPARGS(t));
+
 DEFINE_TRACE(sched_kthread_stop_ret,
 	TPPROTO(int ret),
-	TPARGS(ret));
+		TPARGS(ret));
+
 DEFINE_TRACE(sched_wait_task,
 	TPPROTO(struct rq *rq, struct task_struct *p),
-	TPARGS(rq, p));
+		TPARGS(rq, p));
+
 DEFINE_TRACE(sched_wakeup,
 	TPPROTO(struct rq *rq, struct task_struct *p),
-	TPARGS(rq, p));
+		TPARGS(rq, p));
+
 DEFINE_TRACE(sched_wakeup_new,
 	TPPROTO(struct rq *rq, struct task_struct *p),
-	TPARGS(rq, p));
+		TPARGS(rq, p));
+
 DEFINE_TRACE(sched_switch,
 	TPPROTO(struct rq *rq, struct task_struct *prev,
 		struct task_struct *next),
-	TPARGS(rq, prev, next));
+		TPARGS(rq, prev, next));
+
 DEFINE_TRACE(sched_migrate_task,
 	TPPROTO(struct rq *rq, struct task_struct *p, int dest_cpu),
-	TPARGS(rq, p, dest_cpu));
+		TPARGS(rq, p, dest_cpu));
+
 DEFINE_TRACE(sched_process_free,
 	TPPROTO(struct task_struct *p),
-	TPARGS(p));
+		TPARGS(p));
+
 DEFINE_TRACE(sched_process_exit,
 	TPPROTO(struct task_struct *p),
-	TPARGS(p));
+		TPARGS(p));
+
 DEFINE_TRACE(sched_process_wait,
 	TPPROTO(struct pid *pid),
-	TPARGS(pid));
+		TPARGS(pid));
+
 DEFINE_TRACE(sched_process_fork,
 	TPPROTO(struct task_struct *parent, struct task_struct *child),
-	TPARGS(parent, child));
+		TPARGS(parent, child));
+
 DEFINE_TRACE(sched_signal_send,
 	TPPROTO(int sig, struct task_struct *p),
-	TPARGS(sig, p));
+		TPARGS(sig, p));
 
 #endif
-- 
cgit v1.2.3-55-g7522


From 5f87f1121895dc09d2d1c1db5f14af6aa4ce3e94 Mon Sep 17 00:00:00 2001
From: Ingo Molnar
Date: Wed, 23 Jul 2008 14:15:22 +0200
Subject: tracing: clean up tracepoints kconfig structure

do not expose users to CONFIG_TRACEPOINTS - tracers can select it
just fine.

update ftrace to select CONFIG_TRACEPOINTS.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 init/Kconfig         | 9 +++++----
 kernel/trace/Kconfig | 1 +
 2 files changed, 6 insertions(+), 4 deletions(-)

diff --git a/init/Kconfig b/init/Kconfig
index d5994490b0b0..031344f954fd 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -771,11 +771,12 @@ config PROFILING
 	  Say Y here to enable the extended profiling support mechanisms used
 	  by profilers such as OProfile.
 
+#
+# Place an empty function call at each tracepoint site. Can be
+# dynamically changed for a probe function.
+#
 config TRACEPOINTS
-	bool "Activate tracepoints"
-	help
-	  Place an empty function call at each tracepoint site. Can be
-	  dynamically changed for a probe function.
+	bool
 
 config MARKERS
 	bool "Activate markers"
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index 263e9e6bbd60..cae2637d5e68 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -14,6 +14,7 @@ config TRACING
 	bool
 	select DEBUG_FS
 	select STACKTRACE
+	select TRACEPOINTS
 
 config FTRACE
 	bool "Kernel Function Tracer"
-- 
cgit v1.2.3-55-g7522


From 611b1597680dd4a57896bcca1af0484be463c55e Mon Sep 17 00:00:00 2001
From: Pekka Paalanen
Date: Mon, 21 Jul 2008 18:49:56 +0300
Subject: x86: fix mmiotrace 8-bit register decoding

When SIL, DIL, BPL or SPL registers were used in MMIO, the datum
was extracted from AH, BH, CH, or DH, which are incorrect.

Signed-off-by: Pekka Paalanen <pq@iki.fi>
Cc: "Vegard Nossum" <vegard.nossum@gmail.com>
Cc: "Steven Rostedt" <srostedt@redhat.com>
Cc: proski@gnu.org
Cc: "Pekka Enberg"
	<penberg@cs.helsinki.fi>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/mm/pf_in.c | 121 ++++++++++++++++++++++++++++++++++++----------------
 1 file changed, 84 insertions(+), 37 deletions(-)

diff --git a/arch/x86/mm/pf_in.c b/arch/x86/mm/pf_in.c
index efa1911e20ca..df3d5c861cda 100644
--- a/arch/x86/mm/pf_in.c
+++ b/arch/x86/mm/pf_in.c
@@ -79,25 +79,34 @@ static unsigned int mw32[] = { 0xC7 };
 static unsigned int mw64[] = { 0x89, 0x8B };
 #endif /* not __i386__ */
 
-static int skip_prefix(unsigned char *addr, int *shorted, int *enlarged,
-								int *rexr)
+struct prefix_bits {
+	unsigned shorted:1;
+	unsigned enlarged:1;
+	unsigned rexr:1;
+	unsigned rex:1;
+};
+
+static int skip_prefix(unsigned char *addr, struct prefix_bits *prf)
 {
 	int i;
 	unsigned char *p = addr;
-	*shorted = 0;
-	*enlarged = 0;
-	*rexr = 0;
+	prf->shorted = 0;
+	prf->enlarged = 0;
+	prf->rexr = 0;
+	prf->rex = 0;
 
 restart:
 	for (i = 0; i < ARRAY_SIZE(prefix_codes); i++) {
 		if (*p == prefix_codes[i]) {
 			if (*p == 0x66)
-				*shorted = 1;
+				prf->shorted = 1;
 #ifdef __amd64__
 			if ((*p & 0xf8) == 0x48)
-				*enlarged = 1;
+				prf->enlarged = 1;
 			if ((*p & 0xf4) == 0x44)
-				*rexr = 1;
+				prf->rexr = 1;
+			if ((*p & 0xf0) == 0x40)
+				prf->rex = 1;
 #endif
 			p++;
 			goto restart;
@@ -135,12 +144,12 @@ enum reason_type get_ins_type(unsigned long ins_addr)
 {
 	unsigned int opcode;
 	unsigned char *p;
-	int shorted, enlarged, rexr;
+	struct prefix_bits prf;
 	int i;
 	enum reason_type rv = OTHERS;
 
 	p = (unsigned char *)ins_addr;
-	p += skip_prefix(p, &shorted, &enlarged, &rexr);
+	p += skip_prefix(p, &prf);
 	p += get_opcode(p, &opcode);
 
 	CHECK_OP_TYPE(opcode, reg_rop, REG_READ);
@@ -156,10 +165,11 @@ static unsigned int get_ins_reg_width(unsigned long ins_addr)
 {
 	unsigned int opcode;
 	unsigned char *p;
-	int i, shorted, enlarged, rexr;
+	struct prefix_bits prf;
+	int i;
 
 	p = (unsigned char *)ins_addr;
-	p += skip_prefix(p, &shorted, &enlarged, &rexr);
+	p += skip_prefix(p, &prf);
 	p += get_opcode(p, &opcode);
 
 	for (i = 0; i < ARRAY_SIZE(rw8); i++)
@@ -168,7 +178,7 @@ static unsigned int get_ins_reg_width(unsigned long ins_addr)
 
 	for (i = 0; i < ARRAY_SIZE(rw32); i++)
 		if (rw32[i] == opcode)
-			return (shorted ? 2 : (enlarged ? 8 : 4));
+			return prf.shorted ? 2 : (prf.enlarged ? 8 : 4);
 
 	printk(KERN_ERR "mmiotrace: Unknown opcode 0x%02x\n", opcode);
 	return 0;
@@ -178,10 +188,11 @@ unsigned int get_ins_mem_width(unsigned long ins_addr)
 {
 	unsigned int opcode;
 	unsigned char *p;
-	int i, shorted, enlarged, rexr;
+	struct prefix_bits prf;
+	int i;
 
 	p = (unsigned char *)ins_addr;
-	p += skip_prefix(p, &shorted, &enlarged, &rexr);
+	p += skip_prefix(p, &prf);
 	p += get_opcode(p, &opcode);
 
 	for (i = 0; i < ARRAY_SIZE(mw8); i++)
@@ -194,11 +205,11 @@ unsigned int get_ins_mem_width(unsigned long ins_addr)
 
 	for (i = 0; i < ARRAY_SIZE(mw32); i++)
 		if (mw32[i] == opcode)
-			return shorted ? 2 : 4;
+			return prf.shorted ? 2 : 4;
 
 	for (i = 0; i < ARRAY_SIZE(mw64); i++)
 		if (mw64[i] == opcode)
-			return shorted ? 2 : (enlarged ? 8 : 4);
+			return prf.shorted ? 2 : (prf.enlarged ? 8 : 4);
 
 	printk(KERN_ERR "mmiotrace: Unknown opcode 0x%02x\n", opcode);
 	return 0;
@@ -238,7 +249,7 @@ enum {
 #endif
 };
 
-static unsigned char *get_reg_w8(int no, struct pt_regs *regs)
+static unsigned char *get_reg_w8(int no, int rex, struct pt_regs *regs)
 {
 	unsigned char *rv = NULL;
 
@@ -255,18 +266,6 @@ static unsigned char *get_reg_w8(int no, struct pt_regs *regs)
 	case arg_DL:
 		rv = (unsigned char *)&regs->dx;
 		break;
-	case arg_AH:
-		rv = 1 + (unsigned char *)&regs->ax;
-		break;
-	case arg_BH:
-		rv = 1 + (unsigned char *)&regs->bx;
-		break;
-	case arg_CH:
-		rv = 1 + (unsigned char *)&regs->cx;
-		break;
-	case arg_DH:
-		rv = 1 + (unsigned char *)&regs->dx;
-		break;
 #ifdef __amd64__
 	case arg_R8:
 		rv = (unsigned char *)&regs->r8;
@@ -294,9 +293,55 @@ static unsigned char *get_reg_w8(int no, struct pt_regs *regs)
 		break;
 #endif
 	default:
-		printk(KERN_ERR "mmiotrace: Error reg no# %d\n", no);
 		break;
 	}
+
+	if (rv)
+		return rv;
+
+	if (rex) {
+		/*
+		 * If REX prefix exists, access low bytes of SI etc.
+		 * instead of AH etc.
+		 */
+		switch (no) {
+		case arg_SI:
+			rv = (unsigned char *)&regs->si;
+			break;
+		case arg_DI:
+			rv = (unsigned char *)&regs->di;
+			break;
+		case arg_BP:
+			rv = (unsigned char *)&regs->bp;
+			break;
+		case arg_SP:
+			rv = (unsigned char *)&regs->sp;
+			break;
+		default:
+			break;
+		}
+	} else {
+		switch (no) {
+		case arg_AH:
+			rv = 1 + (unsigned char *)&regs->ax;
+			break;
+		case arg_BH:
+			rv = 1 + (unsigned char *)&regs->bx;
+			break;
+		case arg_CH:
+			rv = 1 + (unsigned char *)&regs->cx;
+			break;
+		case arg_DH:
+			rv = 1 + (unsigned char *)&regs->dx;
+			break;
+		default:
+			break;
+		}
+	}
+
+	if (!rv)
+		printk(KERN_ERR "mmiotrace: Error reg no# %d\n", no);
+
 	return rv;
 }
 
@@ -368,11 +413,12 @@ unsigned long get_ins_reg_val(unsigned long ins_addr, struct pt_regs *regs)
 	unsigned char mod_rm;
 	int reg;
 	unsigned char *p;
-	int i, shorted, enlarged, rexr;
+	struct prefix_bits prf;
+	int i;
 	unsigned long rv;
 
 	p = (unsigned char *)ins_addr;
-	p += skip_prefix(p, &shorted, &enlarged, &rexr);
+	p += skip_prefix(p, &prf);
 	p += get_opcode(p, &opcode);
 	for (i = 0; i < ARRAY_SIZE(reg_rop); i++)
 		if (reg_rop[i] == opcode) {
@@ -392,10 +438,10 @@ unsigned long get_ins_reg_val(unsigned long ins_addr, struct pt_regs *regs)
 
 do_work:
 	mod_rm = *p;
-	reg = ((mod_rm >> 3) & 0x7) | (rexr << 3);
+	reg = ((mod_rm >> 3) & 0x7) | (prf.rexr << 3);
 	switch (get_ins_reg_width(ins_addr)) {
 	case 1:
-		return *get_reg_w8(reg, regs);
+		return *get_reg_w8(reg, prf.rex, regs);
 
 	case 2:
 		return *(unsigned short *)get_reg_w32(reg, regs);
@@ -422,11 +468,12 @@ unsigned long get_ins_imm_val(unsigned long ins_addr)
 	unsigned char mod_rm;
 	unsigned char mod;
 	unsigned char *p;
-	int i, shorted, enlarged, rexr;
+	struct prefix_bits prf;
+	int i;
 	unsigned long rv;
 
 	p = (unsigned char *)ins_addr;
-	p += skip_prefix(p, &shorted, &enlarged, &rexr);
+	p += skip_prefix(p, &prf);
 	p += get_opcode(p, &opcode);
 	for (i = 0; i < ARRAY_SIZE(imm_wop); i++)
 		if (imm_wop[i] == opcode) {
-- 
cgit v1.2.3-55-g7522


From 9795302acf2817d0842e56d23df6008e43df0970 Mon Sep 17 00:00:00 2001
From: Mathieu Desnoyers
Date: Thu, 24 Jul 2008 16:37:23 -0400
Subject: tracepoints: use TABLE_SIZE macro

Steven Rostedt suggested:

| Wouldn't it look nicer to have: (TRACEPOINT_TABLE_SIZE - 1) ?

Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
Cc: Steven Rostedt <rostedt@goodmis.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 kernel/tracepoint.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/kernel/tracepoint.c b/kernel/tracepoint.c
index 42e86ddbd2a0..c7c62a4a75f5 100644
--- a/kernel/tracepoint.c
+++ b/kernel/tracepoint.c
@@ -177,7 +177,7 @@ static struct tracepoint_entry *get_tracepoint(const char *name)
 	struct tracepoint_entry *e;
 	u32 hash = jhash(name, strlen(name), 0);
 
-	head = &tracepoint_table[hash & ((1 << TRACEPOINT_HASH_BITS)-1)];
+	head = &tracepoint_table[hash & (TRACEPOINT_TABLE_SIZE - 1)];
 	hlist_for_each_entry(e, node, head, hlist) {
 		if (!strcmp(name, e->name))
 			return e;
@@ -197,7 +197,7 @@ static struct tracepoint_entry *add_tracepoint(const char *name)
 	size_t name_len = strlen(name) + 1;
 	u32 hash = jhash(name, name_len-1, 0);
 
-	head = &tracepoint_table[hash & ((1 << TRACEPOINT_HASH_BITS)-1)];
+	head = &tracepoint_table[hash & (TRACEPOINT_TABLE_SIZE - 1)];
 	hlist_for_each_entry(e, node, head, hlist) {
 		if (!strcmp(name, e->name)) {
 			printk(KERN_NOTICE
@@ -233,7 +233,7 @@ static int remove_tracepoint(const char *name)
 	size_t len = strlen(name) + 1;
 	u32 hash = jhash(name, len-1, 0);
 
-	head = &tracepoint_table[hash & ((1 << TRACEPOINT_HASH_BITS)-1)];
+	head = &tracepoint_table[hash & (TRACEPOINT_TABLE_SIZE - 1)];
 	hlist_for_each_entry(e, node, head, hlist) {
 		if (!strcmp(name, e->name)) {
 			found = 1;
-- 
cgit v1.2.3-55-g7522


From 36dcd67ae994fece615b7c700958d215e884b9ae Mon Sep 17 00:00:00 2001
From: Ingo Molnar
Date: Tue, 29 Jul 2008 12:00:59 +0200
Subject: ftrace: ignore functions that cannot be kprobe-ed

kprobes already has an extensive list of annotations for functions
that should not be instrumented. Add notrace annotations to these
functions as well.

This is particularly useful for functions called by the NMI path.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/kprobes.h | 5 +++--
 kernel/notifier.c       | 2 +-
 2 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/include/linux/kprobes.h b/include/linux/kprobes.h
index 0be7795655fa..497b1d1f7a05 100644
--- a/include/linux/kprobes.h
+++ b/include/linux/kprobes.h
@@ -29,6 +29,7 @@
  *		<jkenisto@us.ibm.com>  and Prasanna S Panchamukhi
  *		<prasanna@in.ibm.com> added function-return probes.
  */
+#include <linux/linkage.h>
 #include <linux/list.h>
 #include <linux/notifier.h>
 #include <linux/smp.h>
@@ -47,7 +48,7 @@
 #define KPROBE_HIT_SSDONE	0x00000008
 
 /* Attach to insert probes on any functions which should be ignored*/
-#define __kprobes	__attribute__((__section__(".kprobes.text")))
+#define __kprobes	__attribute__((__section__(".kprobes.text"))) notrace
 
 struct kprobe;
 struct pt_regs;
@@ -256,7 +257,7 @@ void recycle_rp_inst(struct kretprobe_instance *ri, struct hlist_head *head);
 
 #else /* CONFIG_KPROBES */
 
-#define __kprobes	/**/
+#define __kprobes	notrace
 struct jprobe;
 struct kretprobe;
 
diff --git a/kernel/notifier.c b/kernel/notifier.c
index 823be11584ef..4282c0a40a57 100644
--- a/kernel/notifier.c
+++ b/kernel/notifier.c
@@ -550,7 +550,7 @@ EXPORT_SYMBOL(unregister_reboot_notifier);
 
 static ATOMIC_NOTIFIER_HEAD(die_chain);
 
-int notify_die(enum die_val val, const char *str,
+int notrace notify_die(enum die_val val, const char *str,
 	       struct pt_regs *regs, long err, int trap, int sig)
 {
 	struct die_args args = {
-- 
cgit v1.2.3-55-g7522


From 8b1fa1d7b22f386747c7b78b918d4c680c16066f Mon Sep 17 00:00:00 2001
From: Ingo Molnar
Date: Tue, 29 Jul 2008 12:36:02 +0200
Subject: ftrace: mark lapic_wd_event() notrace

it can be called in the NMI path:

[    0.645999] calling  ftrace_dynamic_init+0x0/0xd6
[    0.647521] ------------[ cut here ]------------
[    0.647521] WARNING: at kernel/trace/ftrace.c:348 ftrace_record_ip+0x4e/0x252()
[    0.647521] Modules linked in:
[    0.647521] Pid: 15, comm: kstop1 Not tainted 2.6.27-rc1-tip #22686
[    0.647521]
[    0.647521] Call Trace:
[    0.647521]  <NMI>  [<ffffffff8024593f>] warn_on_slowpath+0x5d/0x84
[    0.647521]  [<ffffffff80220b99>] ? lapic_wd_event+0xb/0x5c
[    0.647521]  [<ffffffff80287b3b>] ftrace_record_ip+0x4e/0x252
[    0.647521]  [<ffffffff80211274>] mcount_call+0x5/0x31
[    0.647521]  [<ffffffff80220b9e>] ? lapic_wd_event+0x10/0x5c
[    0.647521]  [<ffffffff8083f3ec>] nmi_watchdog_tick+0x19d/0x1ad
[    0.647521]  [<ffffffff8083e875>] default_do_nmi+0x75/0x1e3
[    0.647521]  [<ffffffff8083f0b3>] do_nmi+0x5d/0x94
[    0.647521]  [<ffffffff8083e2d2>] nmi+0xa2/0xc2
[    0.647521]  [<ffffffff802b48c3>] ? check_bytes_and_report+0x11/0xcc
[    0.647521]  <<EOE>>  [<ffffffff80211274>] ? mcount_call+0x5/0x31
[    0.647521]  [<ffffffff802b49df>] check_object+0x61/0x1b0
[    0.647521]  [<ffffffff802b502a>] __slab_free+0x169/0x2ae
[    0.647521]  [<ffffffff80242dbf>] ? __cleanup_sighand+0x25/0x27
[    0.647521]  [<ffffffff80242dbf>] ? __cleanup_sighand+0x25/0x27
[    0.647521]  [<ffffffff802b60cd>] kmem_cache_free+0x85/0xb9
[    0.647521]  [<ffffffff80242dbf>] __cleanup_sighand+0x25/0x27
[    0.647521]  [<ffffffff80247b3d>] release_task+0x256/0x339
[    0.647521]  [<ffffffff802490b4>] do_exit+0x764/0x7ef
[    0.647521]  [<ffffffff8027624c>] __xchg+0x0/0x38
[    0.647521]  [<ffffffff8027619a>] ? stop_cpu+0x0/0xb2
[    0.647521]  [<ffffffff8027619a>] ? stop_cpu+0x0/0xb2
[    0.647521]  [<ffffffff8025922f>] kthread+0x4e/0x7b
[    0.647521]  [<ffffffff80212979>] child_rip+0xa/0x11
[    0.647521]  [<ffffffff80211c17>] ? restore_args+0x0/0x30
[    0.647521]  [<ffffffff802283a5>] ? native_load_tls+0x14/0x2e
[    0.647521]  [<ffffffff802591e1>] ? kthread+0x0/0x7b
[    0.647521]  [<ffffffff8021296f>] ? child_rip+0x0/0x11
[    0.647521]
[    0.647521] ---[ end trace 4eaa2a86a8e2da22 ]---
[    0.672032] initcall ftrace_dynamic_init+0x0/0xd6 returned 0 after 19 msecs

also mark it no-kprobes while at it.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/cpu/perfctr-watchdog.c | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/arch/x86/kernel/cpu/perfctr-watchdog.c b/arch/x86/kernel/cpu/perfctr-watchdog.c
index 6bff382094f5..9abd48b22674 100644
--- a/arch/x86/kernel/cpu/perfctr-watchdog.c
+++ b/arch/x86/kernel/cpu/perfctr-watchdog.c
@@ -17,6 +17,8 @@
 #include <linux/bitops.h>
 #include <linux/smp.h>
 #include <linux/nmi.h>
+#include <linux/kprobes.h>
+
 #include <asm/apic.h>
 #include <asm/intel_arch_perfmon.h>
 
@@ -336,7 +338,8 @@ static void single_msr_unreserve(void)
 	release_perfctr_nmi(wd_ops->perfctr);
 }
 
-static void single_msr_rearm(struct nmi_watchdog_ctlblk *wd, unsigned nmi_hz)
+static void __kprobes
+single_msr_rearm(struct nmi_watchdog_ctlblk *wd, unsigned nmi_hz)
 {
 	/* start the cycle over again */
 	write_watchdog_counter(wd->perfctr_msr, NULL, nmi_hz);
@@ -401,7 +404,7 @@ static int setup_p6_watchdog(unsigned nmi_hz)
 	return 1;
 }
 
-static void p6_rearm(struct nmi_watchdog_ctlblk *wd, unsigned nmi_hz)
+static void __kprobes p6_rearm(struct nmi_watchdog_ctlblk *wd, unsigned nmi_hz)
 {
 	/*
 	 * P6 based Pentium M need to re-unmask
@@ -605,7 +608,7 @@ static void p4_unreserve(void)
 	release_perfctr_nmi(MSR_P4_IQ_PERFCTR0);
 }
 
-static void p4_rearm(struct nmi_watchdog_ctlblk *wd, unsigned nmi_hz)
+static void __kprobes p4_rearm(struct nmi_watchdog_ctlblk *wd, unsigned nmi_hz)
 {
 	unsigned dummy;
 	/*
@@ -784,7 +787,7 @@ unsigned lapic_adjust_nmi_hz(unsigned hz)
 	return hz;
 }
 
-int lapic_wd_event(unsigned nmi_hz)
+int __kprobes lapic_wd_event(unsigned nmi_hz)
 {
 	struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
 	u64 ctr;
-- 
cgit v1.2.3-55-g7522


From 8da3821ba5634497da63d58a69e24a97697c4a2b Mon Sep 17 00:00:00 2001
From: Steven Rostedt
Date: Thu, 14 Aug 2008 15:45:07 -0400
Subject: ftrace: create __mcount_loc section

This patch creates a section in the kernel called "__mcount_loc".
This will hold a list of pointers to the mcount relocation for
each call site of mcount.

For example:

objdump -dr init/main.o
[...]
Disassembly of section .text:

0000000000000000 <do_one_initcall>:
   0:   55                      push   %rbp
[...]
000000000000017b <init_post>:
 17b:   55                      push   %rbp
 17c:   48 89 e5                mov    %rsp,%rbp
 17f:   53                      push   %rbx
 180:   48 83 ec 08             sub    $0x8,%rsp
 184:   e8 00 00 00 00          callq  189 <init_post+0xe>
                        185: R_X86_64_PC32      mcount+0xfffffffffffffffc
[...]

We will add a section to point to each function call.

   .section __mcount_loc,"a",@progbits
[...]
   .quad .text + 0x185
[...]

The offset to of the mcount call site in init_post is an offset from
the start of the section, and not the start of the function init_post.
The mcount relocation is at the call site 0x185 from the start of the
.text section.

  .text + 0x185  == init_post + 0xa

We need a way to add this __mcount_loc section in a way that we do not
lose the relocations after final link.  The .text section here will
be attached to all other .text sections after final link and the
offsets will be meaningless.  We need to keep track of where these
.text sections are.

To do this, we use the start of the first function in the section.
do_one_initcall.  We can make a tmp.s file with this function as a reference
to the start of the .text section.

   .section __mcount_loc,"a",@progbits
[...]
   .quad do_one_initcall + 0x185
[...]

Then we can compile the tmp.s into a tmp.o

  gcc -c tmp.s -o tmp.o

And link it into back into main.o.

  ld -r main.o tmp.o -o tmp_main.o
  mv tmp_main.o main.o

But we have a problem.  What happens if the first function in a section
is not exported, and is a static function. The linker will not let
the tmp.o use it.  This case exists in main.o as well.

Disassembly of section .init.text:

0000000000000000 <set_reset_devices>:
   0:   55                      push   %rbp
   1:   48 89 e5                mov    %rsp,%rbp
   4:   e8 00 00 00 00          callq  9 <set_reset_devices+0x9>
                        5: R_X86_64_PC32        mcount+0xfffffffffffffffc

The first function in .init.text is a static function.

00000000000000a8 t __setup_set_reset_devices
000000000000105f t __setup_str_set_reset_devices
0000000000000000 t set_reset_devices

The lowercase 't' means that set_reset_devices is local and is not exported.
If we simply try to link the tmp.o with the set_reset_devices we end
up with two symbols: one local and one global.

 .section __mcount_loc,"a",@progbits
 .quad set_reset_devices + 0x10

00000000000000a8 t __setup_set_reset_devices
000000000000105f t __setup_str_set_reset_devices
0000000000000000 t set_reset_devices
                 U set_reset_devices

We still have an undefined reference to set_reset_devices, and if we try
to compile the kernel, we will end up with an undefined reference to
set_reset_devices, or even worst, it could be exported someplace else,
and then we will have a reference to the wrong location.

To handle this case, we make an intermediate step using objcopy.
We convert set_reset_devices into a global exported symbol before linking
it with tmp.o and set it back afterwards.

00000000000000a8 t __setup_set_reset_devices
000000000000105f t __setup_str_set_reset_devices
0000000000000000 T set_reset_devices

00000000000000a8 t __setup_set_reset_devices
000000000000105f t __setup_str_set_reset_devices
0000000000000000 T set_reset_devices

00000000000000a8 t __setup_set_reset_devices
000000000000105f t __setup_str_set_reset_devices
0000000000000000 t set_reset_devices

Now we have a section in main.o called __mcount_loc that we can place
somewhere in the kernel using vmlinux.ld.S and access it to convert
all these locations that call mcount into nops before starting SMP
and thus, eliminating the need to do this with kstop_machine.

Note, A well documented perl script (scripts/recordmcount.pl) is used
to do all this in one location.

Signed-off-by: Steven Rostedt <srostedt@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/asm-generic/vmlinux.lds.h |   8 ++
 kernel/trace/Kconfig              |   8 ++
 scripts/Makefile.build            |   6 +
 scripts/recordmcount.pl           | 280 ++++++++++++++++++++++++++++++++++++++
 4 files changed, 302 insertions(+)
 create mode 100755 scripts/recordmcount.pl

diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h
index 3d8e472a09c8..838d9b2a0da1 100644
--- a/include/asm-generic/vmlinux.lds.h
+++ b/include/asm-generic/vmlinux.lds.h
@@ -37,6 +37,13 @@
 #define MEM_DISCARD(sec) *(.mem##sec)
 #endif
 
+#ifdef CONFIG_FTRACE_MCOUNT_RECORD
+#define MCOUNT_REC()	VMLINUX_SYMBOL(__start_mcount_loc) = .; \
+			*(__mcount_loc)				\
+			VMLINUX_SYMBOL(__stop_mcount_loc) = .;
+#else
+#define MCOUNT_REC()
+#endif
 
 /* .data section */
 #define DATA_DATA							\
@@ -192,6 +199,7 @@
 	/* __*init sections */						\
 	__init_rodata : AT(ADDR(__init_rodata) - LOAD_OFFSET) {		\
 		*(.ref.rodata)						\
+		MCOUNT_REC()						\
 		DEV_KEEP(init.rodata)					\
 		DEV_KEEP(exit.rodata)					\
 		CPU_KEEP(init.rodata)					\
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index cae2637d5e68..14d9505178ca 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -7,6 +7,9 @@ config HAVE_FTRACE
 config HAVE_DYNAMIC_FTRACE
 	bool
 
+config HAVE_FTRACE_MCOUNT_RECORD
+	bool
+
 config TRACER_MAX_TRACE
 	bool
 
@@ -122,6 +125,11 @@ config DYNAMIC_FTRACE
 	 were made. If so, it runs stop_machine (stops all CPUS)
 	 and modifies the code to jump over the call to ftrace.
 
+config FTRACE_MCOUNT_RECORD
+	def_bool y
+	depends on DYNAMIC_FTRACE
+	depends on HAVE_FTRACE_MCOUNT_RECORD
+
 config FTRACE_SELFTEST
 	bool
 
diff --git a/scripts/Makefile.build b/scripts/Makefile.build
index 277cfe0b7100..463ddcc583ed 100644
--- a/scripts/Makefile.build
+++ b/scripts/Makefile.build
@@ -198,10 +198,16 @@ cmd_modversions =							\
 	fi;
 endif
 
+ifdef CONFIG_FTRACE_MCOUNT_RECORD
+cmd_record_mcount = scripts/recordmcount.pl "$(ARCH)" \
+	"$(OBJDUMP)" "$(OBJCOPY)" "$(CC)" "$(LD)" "$(NM)" "$(RM)" "$(MV)" "$(@)";
+endif
+
 define rule_cc_o_c
 	$(call echo-cmd,checksrc) $(cmd_checksrc)			  \
 	$(call echo-cmd,cc_o_c) $(cmd_cc_o_c);				  \
 	$(cmd_modversions)						  \
+	$(cmd_record_mcount)						  \
 	scripts/basic/fixdep $(depfile) $@ '$(call make-cmd,cc_o_c)' >    \
 	                                              $(dot-target).tmp;  \
 	rm -f $(depfile);						  \
diff --git a/scripts/recordmcount.pl b/scripts/recordmcount.pl
new file mode 100755
index 000000000000..44b4b23e91b2
--- /dev/null
+++ b/scripts/recordmcount.pl
@@ -0,0 +1,280 @@
+#!/usr/bin/perl -w
+# (c) 2008, Steven Rostedt <srostedt@redhat.com>
+# Licensed under the terms of the GNU GPL License version 2
+#
+# recordmcount.pl - makes a section called __mcount_loc that holds
+#                   all the offsets to the calls to mcount.
+#
+#
+# What we want to end up with is a section in vmlinux called
+# __mcount_loc that contains a list of pointers to all the
+# call sites in the kernel that call mcount. Later on boot up, the kernel
+# will read this list, save the locations and turn them into nops.
+# When tracing or profiling is later enabled, these locations will then
+# be converted back to pointers to some function.
+#
+# This is no easy feat. This script is called just after the original
+# object is compiled and before it is linked.
+#
+# The references to the call sites are offsets from the section of text
+# that the call site is in. Hence, all functions in a section that
+# has a call site to mcount, will have the offset from the beginning of
+# the section and not the beginning of the function.
+#
+# The trick is to find a way to record the beginning of the section.
+# The way we do this is to look at the first function in the section
+# which will also be the location of that section after final link.
+# e.g.
+#
+#  .section ".text.sched"
+#  .globl my_func
+#  my_func:
+#        [...]
+#        call mcount  (offset: 0x5)
+#        [...]
+#        ret
+#  other_func:
+#        [...]
+#        call mcount (offset: 0x1b)
+#        [...]
+#
+# Both relocation offsets for the mcounts in the above example will be
+# offset from .text.sched. If we make another file called tmp.s with:
+#
+#  .section __mcount_loc
+#  .quad  my_func + 0x5
+#  .quad  my_func + 0x1b
+#
+# We can then compile this tmp.s into tmp.o, and link it to the original
+# object.
+#
+# But this gets hard if my_func is not globl (a static function).
+# In such a case we have:
+#
+#  .section ".text.sched"
+#  my_func:
+#        [...]
+#        call mcount  (offset: 0x5)
+#        [...]
+#        ret
+#  .globl my_func
+#  other_func:
+#        [...]
+#        call mcount (offset: 0x1b)
+#        [...]
+#
+# If we make the tmp.s the same as above, when we link together with
+# the original object, we will end up with two symbols for my_func:
+# one local, one global.  After final compile, we will end up with
+# an undefined reference to my_func.
+#
+# Since local objects can reference local variables, we need to find
+# a way to make tmp.o reference the local objects of the original object
+# file after it is linked together. To do this, we convert the my_func
+# into a global symbol before linking tmp.o. Then after we link tmp.o
+# we will only have a single symbol for my_func that is global.
+# We can convert my_func back into a local symbol and we are done.
+#
+# Here are the steps we take:
+#
+# 1) Record all the local symbols by using 'nm'
+# 2) Use objdump to find all the call site offsets and sections for
+#    mcount.
+# 3) Compile the list into its own object.
+# 4) Do we have to deal with local functions? If not, go to step 8.
+# 5) Make an object that converts these local functions to global symbols
+#    with objcopy.
+# 6) Link together this new object with the list object.
+# 7) Convert the local functions back to local symbols and rename
+#    the result as the original object.
+#    End.
+# 8) Link the object with the list object.
+# 9) Move the result back to the original object.
+#    End.
+#
+
+use strict;
+
+my $P = $0;
+$P =~ s@.*/@@g;
+
+my $V = '0.1';
+
+if ($#ARGV < 6) {
+	print "usage: $P arch objdump objcopy cc ld nm rm mv inputfile\n";
+	print "version: $V\n";
+	exit(1);
+}
+
+my ($arch, $objdump, $objcopy, $cc, $ld, $nm, $rm, $mv, $inputfile) = @ARGV;
+
+$objdump = "objdump" if ((length $objdump) == 0);
+$objcopy = "objcopy" if ((length $objcopy) == 0);
+$cc = "gcc" if ((length $cc) == 0);
+$ld = "ld" if ((length $ld) == 0);
+$nm = "nm" if ((length $nm) == 0);
+$rm = "rm" if ((length $rm) == 0);
+$mv = "mv" if ((length $mv) == 0);
+
+#print STDERR "running: $P '$arch' '$objdump' '$objcopy' '$cc' '$ld' " .
+#    "'$nm' '$rm' '$mv' '$inputfile'\n";
+
+my %locals;
+my %convert;
+
+my $type;
+my $section_regex;	# Find the start of a section
+my $function_regex;	# Find the name of a function (return func name)
+my $mcount_regex;	# Find the call site to mcount (return offset)
+
+if ($arch eq "x86_64") {
+    $section_regex = "Disassembly of section";
+    $function_regex = "<(.*?)>:";
+    $mcount_regex = "^\\s*([0-9a-fA-F]+):.*\\smcount([+-]0x[0-9a-zA-Z]+)?\$";
+    $type = ".quad";
+} elsif ($arch eq "i386") {
+    $section_regex = "Disassembly of section";
+    $function_regex = "<(.*?)>:";
+    $mcount_regex = "^\\s*([0-9a-fA-F]+):.*\\smcount\$";
+    $type = ".long";
+} else {
+    die "Arch $arch is not supported with CONFIG_FTRACE_MCOUNT_RECORD";
+}
+
+my $text_found = 0;
+my $read_function = 0;
+my $opened = 0;
+my $text = "";
+my $mcount_section = "__mcount_loc";
+
+my $dirname;
+my $filename;
+my $prefix;
+my $ext;
+
+if ($inputfile =~ m,^(.*)/([^/]*)$,) {
+    $dirname = $1;
+    $filename = $2;
+} else {
+    $dirname = ".";
+    $filename = $inputfile;
+}
+
+if ($filename =~ m,^(.*)(\.\S),) {
+    $prefix = $1;
+    $ext = $2;
+} else {
+    $prefix = $filename;
+    $ext = "";
+}
+
+my $mcount_s = $dirname . "/.tmp_mc_" . $prefix . ".s";
+my $mcount_o = $dirname . "/.tmp_mc_" . $prefix . ".o";
+
+#
+# Step 1: find all the local symbols (static functions).
+#
+open (IN, "$nm $inputfile|") || die "error running $nm";
+while (<IN>) {
+    if (/^[0-9a-fA-F]+\s+t\s+(\S+)/) {
+	$locals{$1} = 1;
+    }
+}
+close(IN);
+
+#
+# Step 2: find the sections and mcount call sites
+#
+open(IN, "$objdump -dr $inputfile|") || die "error running $objdump";
+
+while (<IN>) {
+    # is it a section?
+    if (/$section_regex/) {
+	$read_function = 1;
+	$text_found = 0;
+    # section found, now is this a start of a function?
+    } elsif ($read_function && /$function_regex/) {
+	$read_function = 0;
+	$text_found = 1;
+	$text = $1;
+	# is this function static? If so, note this fact.
+	if (defined $locals{$text}) {
+	    $convert{$text} = 1;
+	}
+    # is this a call site to mcount? If so, print the offset from the section
+    } elsif ($text_found && /$mcount_regex/) {
+	if (!$opened) {
+	    open(FILE, ">$mcount_s") || die "can't create $mcount_s\n";
+	    $opened = 1;
+	    print FILE "\t.section $mcount_section,\"a\",\@progbits\n";
+	}
+	print FILE "\t$type $text + 0x$1\n";
+    }
+}
+
+# If we did not find any mcount callers, we are done (do nothing).
+if (!$opened) {
+    exit(0);
+}
+
+close(FILE);
+
+#
+# Step 3: Compile the file that holds the list of call sites to mcount.
+#
+`$cc -o $mcount_o -c $mcount_s`;
+
+my @converts = keys %convert;
+
+#
+# Step 4: Do we have sections that started with local functions?
+#
+if ($#converts >= 0) {
+    my $globallist = "";
+    my $locallist = "";
+
+    foreach my $con (@converts) {
+	$globallist .= " --globalize-symbol $con";
+	$locallist .= " --localize-symbol $con";
+    }
+
+    my $globalobj = $dirname . "/.tmp_gl_" . $filename;
+    my $globalmix = $dirname . "/.tmp_mx_" . $filename;
+
+    #
+    # Step 5: set up each local function as a global
+    #
+    `$objcopy $globallist $inputfile $globalobj`;
+
+    #
+    # Step 6: Link the global version to our list.
+    #
+    `$ld -r $globalobj $mcount_o -o $globalmix`;
+
+    #
+    # Step 7: Convert the local functions back into local symbols
+    #
+    `$objcopy $locallist $globalmix $inputfile`;
+
+    # Remove the temp files
+    `$rm $globalobj $globalmix`;
+
+} else {
+
+    my $mix = $dirname . "/.tmp_mx_" . $filename;
+
+    #
+    # Step 8: Link the object with our list of call sites object.
+    #
+    `$ld -r $inputfile $mcount_o -o $mix`;
+
+    #
+    # Step 9: Move the result back to the original object.
+    #
+    `$mv $mix $inputfile`;
+}
+
+# Clean up the temp files
+`$rm $mcount_o $mcount_s`;
+
+exit(0);
-- 
cgit v1.2.3-55-g7522


From 68bf21aa15c85d2e9b623dcda2b1ed8893275fa1 Mon Sep 17 00:00:00 2001
From: Steven Rostedt
Date: Thu, 14 Aug 2008 15:45:08 -0400
Subject: ftrace: mcount call site on boot nops core

This is the infrastructure to the converting the mcount call sites
recorded by the __mcount_loc section into nops on boot. It also allows
for using these sites to enable tracing as normal. When the __mcount_loc
section is used, the "ftraced" kernel thread is disabled.

This uses the current infrastructure to record the mcount call sites
as well as convert them to nops. The mcount function is kept as a stub
on boot up and not converted to the ftrace_record_ip function. We use the
ftrace_record_ip to only record from the table.

This patch does not handle modules. That comes with a later patch.

Signed-off-by: Steven Rostedt <srostedt@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/asm-x86/ftrace.h |  10 ++++
 include/linux/ftrace.h   |   6 ++
 init/main.c              |   3 +
 kernel/trace/ftrace.c    | 148 +++++++++++++++++++++++++++++++++--------------
 4 files changed, 124 insertions(+), 43 deletions(-)

diff --git a/include/asm-x86/ftrace.h b/include/asm-x86/ftrace.h
index be0e004ad148..1bb6f9bbe1ab 100644
--- a/include/asm-x86/ftrace.h
+++ b/include/asm-x86/ftrace.h
@@ -7,6 +7,16 @@
 
 #ifndef __ASSEMBLY__
 extern void mcount(void);
+
+static inline unsigned long ftrace_call_adjust(unsigned long addr)
+{
+	/*
+	 * call mcount is "e8 <4 byte offset>"
+	 * The addr points to the 4 byte offset and the caller of this
+	 * function wants the pointer to e8. Simply subtract one.
+	 */
+	return addr - 1;
+}
 #endif
 
 #endif /* CONFIG_FTRACE */
diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index bb384068272e..d4d6ab453b78 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -162,4 +162,10 @@ static inline void
 ftrace_special(unsigned long arg1, unsigned long arg2, unsigned long arg3) { }
 #endif
 
+#ifdef CONFIG_FTRACE_MCOUNT_RECORD
+extern void ftrace_init(void);
+#else
+static inline void ftrace_init(void) { }
+#endif
+
 #endif /* _LINUX_FTRACE_H */
diff --git a/init/main.c b/init/main.c
index 3820323c4c84..ded1fae965ab 100644
--- a/init/main.c
+++ b/init/main.c
@@ -60,6 +60,7 @@
 #include <linux/sched.h>
 #include <linux/signal.h>
 #include <linux/idr.h>
+#include <linux/ftrace.h>
 
 #include <asm/io.h>
 #include <asm/bugs.h>
@@ -687,6 +688,8 @@ asmlinkage void __init start_kernel(void)
 
 	acpi_early_init(); /* before LAPIC and SMP init */
 
+	ftrace_init();
+
 	/* Do the rest non-__init'ed, we're now alive */
 	rest_init();
 }
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index f6e3af31b403..df96d5990c04 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -792,47 +792,7 @@ static int ftrace_update_code(void)
 	return 1;
 }
 
-static int ftraced(void *ignore)
-{
-	unsigned long usecs;
-
-	while (!kthread_should_stop()) {
-
-		set_current_state(TASK_INTERRUPTIBLE);
-
-		/* check once a second */
-		schedule_timeout(HZ);
-
-		if (unlikely(ftrace_disabled))
-			continue;
-
-		mutex_lock(&ftrace_sysctl_lock);
-		mutex_lock(&ftraced_lock);
-		if (!ftraced_suspend && !ftraced_stop &&
-		    ftrace_update_code()) {
-			usecs = nsecs_to_usecs(ftrace_update_time);
-			if (ftrace_update_tot_cnt > 100000) {
-				ftrace_update_tot_cnt = 0;
-				pr_info("hm, dftrace overflow: %lu change%s"
-					" (%lu total) in %lu usec%s\n",
-					ftrace_update_cnt,
-					ftrace_update_cnt != 1 ? "s" : "",
-					ftrace_update_tot_cnt,
-					usecs, usecs != 1 ? "s" : "");
-				ftrace_disabled = 1;
-				WARN_ON_ONCE(1);
-			}
-		}
-		mutex_unlock(&ftraced_lock);
-		mutex_unlock(&ftrace_sysctl_lock);
-
-		ftrace_shutdown_replenish();
-	}
-	__set_current_state(TASK_RUNNING);
-	return 0;
-}
-
-static int __init ftrace_dyn_table_alloc(void)
+static int __init ftrace_dyn_table_alloc(unsigned long num_to_init)
 {
 	struct ftrace_page *pg;
 	int cnt;
@@ -859,7 +819,9 @@ static int __init ftrace_dyn_table_alloc(void)
 
 	pg = ftrace_pages = ftrace_pages_start;
 
-	cnt = NR_TO_INIT / ENTRIES_PER_PAGE;
+	cnt = num_to_init / ENTRIES_PER_PAGE;
+	pr_info("ftrace: allocating %ld hash entries in %d pages\n",
+		num_to_init, cnt);
 
 	for (i = 0; i < cnt; i++) {
 		pg->next = (void *)get_zeroed_page(GFP_KERNEL);
@@ -1556,6 +1518,104 @@ static __init int ftrace_init_debugfs(void)
 
 fs_initcall(ftrace_init_debugfs);
 
+#ifdef CONFIG_FTRACE_MCOUNT_RECORD
+static int ftrace_convert_nops(unsigned long *start,
+			       unsigned long *end)
+{
+	unsigned long *p;
+	unsigned long addr;
+	unsigned long flags;
+
+	p = start;
+	while (p < end) {
+		addr = ftrace_call_adjust(*p++);
+		ftrace_record_ip(addr);
+		ftrace_shutdown_replenish();
+	}
+
+	/* p is ignored */
+	local_irq_save(flags);
+	__ftrace_update_code(p);
+	local_irq_restore(flags);
+
+	return 0;
+}
+
+extern unsigned long __start_mcount_loc[];
+extern unsigned long __stop_mcount_loc[];
+
+void __init ftrace_init(void)
+{
+	unsigned long count, addr, flags;
+	int ret;
+
+	/* Keep the ftrace pointer to the stub */
+	addr = (unsigned long)ftrace_stub;
+
+	local_irq_save(flags);
+	ftrace_dyn_arch_init(&addr);
+	local_irq_restore(flags);
+
+	/* ftrace_dyn_arch_init places the return code in addr */
+	if (addr)
+		goto failed;
+
+	count = __stop_mcount_loc - __start_mcount_loc;
+
+	ret = ftrace_dyn_table_alloc(count);
+	if (ret)
+		goto failed;
+
+	last_ftrace_enabled = ftrace_enabled = 1;
+
+	ret = ftrace_convert_nops(__start_mcount_loc,
+				  __stop_mcount_loc);
+
+	return;
+ failed:
+	ftrace_disabled = 1;
+}
+#else /* CONFIG_FTRACE_MCOUNT_RECORD */
+static int ftraced(void *ignore)
+{
+	unsigned long usecs;
+
+	while (!kthread_should_stop()) {
+
+		set_current_state(TASK_INTERRUPTIBLE);
+
+		/* check once a second */
+		schedule_timeout(HZ);
+
+		if (unlikely(ftrace_disabled))
+			continue;
+
+		mutex_lock(&ftrace_sysctl_lock);
+		mutex_lock(&ftraced_lock);
+		if (!ftraced_suspend && !ftraced_stop &&
+		    ftrace_update_code()) {
+			usecs = nsecs_to_usecs(ftrace_update_time);
+			if (ftrace_update_tot_cnt > 100000) {
+				ftrace_update_tot_cnt = 0;
+				pr_info("hm, dftrace overflow: %lu change%s"
+					" (%lu total) in %lu usec%s\n",
+					ftrace_update_cnt,
+					ftrace_update_cnt != 1 ? "s" : "",
+					ftrace_update_tot_cnt,
+					usecs, usecs != 1 ? "s" : "");
+				ftrace_disabled = 1;
+				WARN_ON_ONCE(1);
+			}
+		}
+		mutex_unlock(&ftraced_lock);
+		mutex_unlock(&ftrace_sysctl_lock);
+
+		ftrace_shutdown_replenish();
+	}
+	__set_current_state(TASK_RUNNING);
+	return 0;
+}
+
 static int __init ftrace_dynamic_init(void)
 {
 	struct task_struct *p;
@@ -1572,7 +1632,7 @@ static int __init ftrace_dynamic_init(void)
 		goto failed;
 	}
 
-	ret = ftrace_dyn_table_alloc();
+	ret = ftrace_dyn_table_alloc(NR_TO_INIT);
 	if (ret)
 		goto failed;
 
@@ -1593,6 +1653,8 @@ static int __init ftrace_dynamic_init(void)
 }
 
 core_initcall(ftrace_dynamic_init);
+#endif /* CONFIG_FTRACE_MCOUNT_RECORD */
+
 #else
 # define ftrace_startup()		do { } while (0)
 # define ftrace_shutdown()		do { } while (0)
-- 
cgit v1.2.3-55-g7522


From 90d595fe5ca4b685465c068907e6e554760abea8 Mon Sep 17 00:00:00 2001
From: Steven Rostedt
Date: Thu, 14 Aug 2008 15:45:09 -0400
Subject: ftrace: enable mcount recording for modules

This patch enables the loading of the __mcount_section of modules and
changing all the callers of mcount into nops.

The modification is done before the init_module function is called, so
again, we do not need to use kstop_machine to make these changes.

Signed-off-by: Steven Rostedt <srostedt@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/ftrace.h |  3 +++
 kernel/module.c        | 11 +++++++++++
 kernel/trace/ftrace.c  |  5 +++++
 3 files changed, 19 insertions(+)

diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index d4d6ab453b78..4936489f9ed8 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -164,8 +164,11 @@ ftrace_special(unsigned long arg1, unsigned long arg2, unsigned long arg3) { }
 
 #ifdef CONFIG_FTRACE_MCOUNT_RECORD
 extern void ftrace_init(void);
+extern void ftrace_init_module(unsigned long *start, unsigned long *end);
 #else
 static inline void ftrace_init(void) { }
+static inline void
+ftrace_init_module(unsigned long *start, unsigned long *end) { }
 #endif
 
 #endif /* _LINUX_FTRACE_H */
diff --git a/kernel/module.c b/kernel/module.c
index 661d73db786e..d753fd9d83ec 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -47,6 +47,7 @@
 #include <linux/license.h>
 #include <asm/sections.h>
 #include <linux/tracepoint.h>
+#include <linux/ftrace.h>
 
 #if 0
 #define DEBUGP printk
@@ -1834,6 +1835,7 @@ static noinline struct module *load_module(void __user *umod,
 	unsigned int markersstringsindex;
 	unsigned int tracepointsindex;
 	unsigned int tracepointsstringsindex;
+	unsigned int mcountindex;
 	struct module *mod;
 	long err = 0;
 	void *percpu = NULL, *ptr = NULL; /* Stops spurious gcc warning */
@@ -2124,6 +2126,9 @@ static noinline struct module *load_module(void __user *umod,
 	tracepointsstringsindex = find_sec(hdr, sechdrs, secstrings,
 					"__tracepoints_strings");
 
+	mcountindex = find_sec(hdr, sechdrs, secstrings,
+			       "__mcount_loc");
+
 	/* Now do relocations. */
 	for (i = 1; i < hdr->e_shnum; i++) {
 		const char *strtab = (char *)sechdrs[strindex].sh_addr;
@@ -2184,6 +2189,12 @@ static noinline struct module *load_module(void __user *umod,
 			mod->tracepoints + mod->num_tracepoints);
 #endif
 	}
+
+	if (mcountindex) {
+		void *mseg = (void *)sechdrs[mcountindex].sh_addr;
+		ftrace_init_module(mseg, mseg + sechdrs[mcountindex].sh_size);
+	}
+
 	err = module_finalize(hdr, sechdrs, mod);
 	if (err < 0)
 		goto cleanup;
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index df96d5990c04..ea45bb1c0fd6 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -1541,6 +1541,11 @@ static int ftrace_convert_nops(unsigned long *start,
 	return 0;
 }
 
+void ftrace_init_module(unsigned long *start, unsigned long *end)
+{
+	ftrace_convert_nops(start, end);
+}
+
 extern unsigned long __start_mcount_loc[];
 extern unsigned long __stop_mcount_loc[];
 
-- 
cgit v1.2.3-55-g7522


From 29e71abf56cebc5c5a4e184a6eb4360cc58554ad Mon Sep 17 00:00:00 2001
From: Steven Rostedt
Date: Thu, 14 Aug 2008 15:45:10 -0400
Subject: ftrace: rebuild everything on change to FTRACE_MCOUNT_RECORD

When enabling or disabling CONFIG_FTRACE_MCOUNT_RECORD, we want a full
kernel compile to handle the adding of the __mcount_loc sections.

Signed-off-by: Steven Rostedt <srostedt@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/kernel.h | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index 75d81f157d2e..ecce4a4ccd5f 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -486,4 +486,9 @@ struct sysinfo {
 #define NUMA_BUILD 0
 #endif
 
+/* Rebuild everything on CONFIG_FTRACE_MCOUNT_RECORD */
+#ifdef CONFIG_FTRACE_MCOUNT_RECORD
+# define REBUILD_DUE_TO_FTRACE_MCOUNT_RECORD
+#endif
+
 #endif
-- 
cgit v1.2.3-55-g7522


From e4b2b8866121bd06d5f3d9de0f79a04339ffa252 Mon Sep 17 00:00:00 2001
From: Steven Rostedt
Date: Thu, 14 Aug 2008 15:45:11 -0400
Subject: ftrace: enable using mcount recording on x86

Enable the use of the __mcount_loc infrastructure on x86_64 and i386.

Signed-off-by: Steven Rostedt <srostedt@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/Kconfig | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index fc8351f374fd..5a34c5427a07 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -25,6 +25,7 @@ config X86
 	select HAVE_KPROBES
 	select ARCH_WANT_OPTIONAL_GPIOLIB
 	select HAVE_KRETPROBES
+	select HAVE_FTRACE_MCOUNT_RECORD
 	select HAVE_DYNAMIC_FTRACE
 	select HAVE_FTRACE
 	select HAVE_KVM if ((X86_32 && !X86_VOYAGER && !X86_VISWS && !X86_NUMAQ) || X86_64)
-- 
cgit v1.2.3-55-g7522


From 0a37605c2261a06d8cafc62dee11374ad676c8c4 Mon Sep 17 00:00:00 2001
From: Steven Rostedt
Date: Thu, 14 Aug 2008 15:45:12 -0400
Subject: ftrace: x86 mcount stub

x86 now sets up the mcount locations through the build and no longer
needs to record the ip when the function is executed. This patch changes
the initial mcount to simply return. There's no need to do any other work.
If the ftrace start up test fails, the original mcount will be what everything
will use, so having this as fast as possible is a good thing.

Signed-off-by: Steven Rostedt <srostedt@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/entry_32.S | 14 --------------
 arch/x86/kernel/entry_64.S | 26 --------------------------
 arch/x86/kernel/ftrace.c   | 14 ++------------
 3 files changed, 2 insertions(+), 52 deletions(-)

diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S
index b21fbfaffe39..4e4269c73bb7 100644
--- a/arch/x86/kernel/entry_32.S
+++ b/arch/x86/kernel/entry_32.S
@@ -1153,20 +1153,6 @@ ENDPROC(xen_failsafe_callback)
 #ifdef CONFIG_DYNAMIC_FTRACE
 
 ENTRY(mcount)
-	pushl %eax
-	pushl %ecx
-	pushl %edx
-	movl 0xc(%esp), %eax
-	subl $MCOUNT_INSN_SIZE, %eax
-
-.globl mcount_call
-mcount_call:
-	call ftrace_stub
-
-	popl %edx
-	popl %ecx
-	popl %eax
-
 	ret
 END(mcount)
 
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index 1db6ce4314e1..09e7145484c5 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -64,32 +64,6 @@
 #ifdef CONFIG_FTRACE
 #ifdef CONFIG_DYNAMIC_FTRACE
 ENTRY(mcount)
-
-	subq $0x38, %rsp
-	movq %rax, (%rsp)
-	movq %rcx, 8(%rsp)
-	movq %rdx, 16(%rsp)
-	movq %rsi, 24(%rsp)
-	movq %rdi, 32(%rsp)
-	movq %r8, 40(%rsp)
-	movq %r9, 48(%rsp)
-
-	movq 0x38(%rsp), %rdi
-	subq $MCOUNT_INSN_SIZE, %rdi
-
-.globl mcount_call
-mcount_call:
-	call ftrace_stub
-
-	movq 48(%rsp), %r9
-	movq 40(%rsp), %r8
-	movq 32(%rsp), %rdi
-	movq 24(%rsp), %rsi
-	movq 16(%rsp), %rdx
-	movq 8(%rsp), %rcx
-	movq (%rsp), %rax
-	addq $0x38, %rsp
-
 	retq
 END(mcount)
 
diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c
index ab115cd15fdf..96aadbfedcc6 100644
--- a/arch/x86/kernel/ftrace.c
+++ b/arch/x86/kernel/ftrace.c
@@ -112,18 +112,8 @@ notrace int ftrace_update_ftrace_func(ftrace_func_t func)
 
 notrace int ftrace_mcount_set(unsigned long *data)
 {
-	unsigned long ip = (long)(&mcount_call);
-	unsigned long *addr = data;
-	unsigned char old[MCOUNT_INSN_SIZE], *new;
-
-	/*
-	 * Replace the mcount stub with a pointer to the
-	 * ip recorder function.
-	 */
-	memcpy(old, &mcount_call, MCOUNT_INSN_SIZE);
-	new = ftrace_call_replace(ip, *addr);
-	*addr = ftrace_modify_code(ip, old, new);
-
+	/* mcount is initialized as a nop */
+	*data = 0;
 	return 0;
 }
 
-- 
cgit v1.2.3-55-g7522


From 732f3ca7d4ba3c1be8d051d52302ef441ee7748b Mon Sep 17 00:00:00 2001
From: Steven Rostedt
Date: Thu, 14 Aug 2008 18:05:05 -0400
Subject: ftrace: use only 5 byte nops for x86

Mathieu Desnoyers revealed a bug in the original code. The nop that is
used to relpace the mcount caller can be a two part nop. This runs the
risk where a process can be preempted after executing the first nop, but
before the second part of the nop.

The ftrace code calls kstop_machine to keep multiple CPUs from executing
code that is being modified, but it does not protect against a task preempting
in the middle of a two part nop.

If the above preemption happens and the tracer is enabled, after the
kstop_machine runs, all those nops will be calls to the trace function.
If the preempted process that was preempted between the two nops is executed
again, it will execute half of the call to the trace function, and this
might crash the system.

This patch instead uses what both the latest Intel and AMD spec suggests.
That is the P6_NOP5 sequence of "0x0f 0x1f 0x44 0x00 0x00".

Note, some older CPUs and QEMU might fault on this nop, so this nop
is executed with fault handling first. If it detects a fault, it will then
use the code "0x66 0x66 0x66 0x66 0x90". If that faults, it will then
default to a simple "jmp 1f; .byte 0x00 0x00 0x00; 1:". The jmp is
not optimal but will do if the first two can not be executed.

TODO: Examine the cpuid to determine the nop to use.

Signed-off-by: Steven Rostedt <srostedt@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/ftrace.c | 68 +++++++++++++++++++++++++++++++++++++++++++-----
 1 file changed, 61 insertions(+), 7 deletions(-)

diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c
index 96aadbfedcc6..4151c91254e8 100644
--- a/arch/x86/kernel/ftrace.c
+++ b/arch/x86/kernel/ftrace.c
@@ -16,8 +16,8 @@
 #include <linux/init.h>
 #include <linux/list.h>
 
-#include <asm/alternative.h>
 #include <asm/ftrace.h>
+#include <asm/nops.h>
 
 
 /* Long is fine, even if it is only 4 bytes ;-) */
@@ -119,13 +119,67 @@ notrace int ftrace_mcount_set(unsigned long *data)
 
 int __init ftrace_dyn_arch_init(void *data)
 {
-	const unsigned char *const *noptable = find_nop_table();
-
-	/* This is running in kstop_machine */
-
-	ftrace_mcount_set(data);
+	extern const unsigned char ftrace_test_p6nop[];
+	extern const unsigned char ftrace_test_nop5[];
+	extern const unsigned char ftrace_test_jmp[];
+	int faulted = 0;
 
-	ftrace_nop = (unsigned long *)noptable[MCOUNT_INSN_SIZE];
+	/*
+	 * There is no good nop for all x86 archs.
+	 * We will default to using the P6_NOP5, but first we
+	 * will test to make sure that the nop will actually
+	 * work on this CPU. If it faults, we will then
+	 * go to a lesser efficient 5 byte nop. If that fails
+	 * we then just use a jmp as our nop. This isn't the most
+	 * efficient nop, but we can not use a multi part nop
+	 * since we would then risk being preempted in the middle
+	 * of that nop, and if we enabled tracing then, it might
+	 * cause a system crash.
+	 *
+	 * TODO: check the cpuid to determine the best nop.
+	 */
+	asm volatile (
+		"jmp ftrace_test_jmp\n"
+		/* This code needs to stay around */
+		".section .text, \"ax\"\n"
+		"ftrace_test_jmp:"
+		"jmp ftrace_test_p6nop\n"
+		".byte 0x00,0x00,0x00\n"  /* 2 byte jmp + 3 bytes */
+		"ftrace_test_p6nop:"
+		P6_NOP5
+		"jmp 1f\n"
+		"ftrace_test_nop5:"
+		".byte 0x66,0x66,0x66,0x66,0x90\n"
+		"jmp 1f\n"
+		".previous\n"
+		"1:"
+		".section .fixup, \"ax\"\n"
+		"2:	movl $1, %0\n"
+		"	jmp ftrace_test_nop5\n"
+		"3:	movl $2, %0\n"
+		"	jmp 1b\n"
+		".previous\n"
+		_ASM_EXTABLE(ftrace_test_p6nop, 2b)
+		_ASM_EXTABLE(ftrace_test_nop5, 3b)
+		: "=r"(faulted) : "0" (faulted));
+
+	switch (faulted) {
+	case 0:
+		pr_info("ftrace: converting mcount calls to 0f 1f 44 00 00\n");
+		ftrace_nop = (unsigned long *)ftrace_test_p6nop;
+		break;
+	case 1:
+		pr_info("ftrace: converting mcount calls to 66 66 66 66 90\n");
+		ftrace_nop = (unsigned long *)ftrace_test_nop5;
+		break;
+	case 2:
+		pr_info("ftrace: converting mcount calls to jmp 1f\n");
+		ftrace_nop = (unsigned long *)ftrace_test_jmp;
+		break;
+	}
+
+	/* The return code is retured via data */
+	*(unsigned long *)data = 0;
 
 	return 0;
 }
-- 
cgit v1.2.3-55-g7522


From a9fdda33cd7c7519b082e37538fe790f9ff684bb Mon Sep 17 00:00:00 2001
From: Steven Rostedt
Date: Thu, 14 Aug 2008 22:47:17 -0400
Subject: ftrace: do not show freed records in available_filter_functions

Seems that freed records can appear in the available_filter_functions list.
This patch fixes that.

Signed-off-by: Steven Rostedt <srostedt@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 kernel/trace/ftrace.c | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index ea45bb1c0fd6..8affb6d00ec1 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -872,15 +872,13 @@ t_next(struct seq_file *m, void *v, loff_t *pos)
 		}
 	} else {
 		rec = &iter->pg->records[iter->idx++];
-		if ((!(iter->flags & FTRACE_ITER_FAILURES) &&
+		if ((rec->flags & FTRACE_FL_FREE) ||
+
+		    (!(iter->flags & FTRACE_ITER_FAILURES) &&
 		     (rec->flags & FTRACE_FL_FAILED)) ||
 
 		    ((iter->flags & FTRACE_ITER_FAILURES) &&
-		     (!(rec->flags & FTRACE_FL_FAILED) ||
-		      (rec->flags & FTRACE_FL_FREE))) ||
-
-		    ((iter->flags & FTRACE_ITER_FILTER) &&
-		     !(rec->flags & FTRACE_FL_FILTER)) ||
+		     !(rec->flags & FTRACE_FL_FAILED)) ||
 
 		    ((iter->flags & FTRACE_ITER_NOTRACE) &&
 		     !(rec->flags & FTRACE_FL_NOTRACE))) {
-- 
cgit v1.2.3-55-g7522


From 28614889bcb2558a47d02d52394b7fd9795a9547 Mon Sep 17 00:00:00 2001
From: Steven Rostedt
Date: Thu, 14 Aug 2008 22:47:18 -0400
Subject: ftrace: move notrace to compiler.h

The notrace define belongs in compiler.h so that it can be used in
init.h

Signed-off-by: Steven Rostedt <srostedt@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/compiler.h | 2 ++
 include/linux/linkage.h  | 2 --
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/include/linux/compiler.h b/include/linux/compiler.h
index 8322141ee480..98115d9d04da 100644
--- a/include/linux/compiler.h
+++ b/include/linux/compiler.h
@@ -44,6 +44,8 @@ extern void __chk_io_ptr(const volatile void __iomem *);
 # error Sorry, your compiler is too old/not recognized.
 #endif
 
+#define notrace __attribute__((no_instrument_function))
+
 /* Intel compiler defines __GNUC__. So we will overwrite implementations
  * coming from above header files here
  */
diff --git a/include/linux/linkage.h b/include/linux/linkage.h
index 56ba37394656..9fd1f859021b 100644
--- a/include/linux/linkage.h
+++ b/include/linux/linkage.h
@@ -4,8 +4,6 @@
 #include <linux/compiler.h>
 #include <asm/linkage.h>
 
-#define notrace __attribute__((no_instrument_function))
-
 #ifdef __cplusplus
 #define CPP_ASMLINKAGE extern "C"
 #else
-- 
cgit v1.2.3-55-g7522


From fed1939c64d2288938fdc1c367d49082da65e195 Mon Sep 17 00:00:00 2001
From: Steven Rostedt
Date: Thu, 14 Aug 2008 22:47:19 -0400
Subject: ftrace: remove old pointers to mcount

When a mcount pointer is recorded into a table, it is used to add or
remove calls to mcount (replacing them with nops). If the code is removed
via removing a module, the pointers still exist.  At modifying the code
a check is always made to make sure the code being replaced is the code
expected. In-other-words, the code being replaced is compared to what
it is expected to be before being replaced.

There is a very small chance that the code being replaced just happens
to look like code that calls mcount (very small since the call to mcount
is relative). To remove this chance, this patch adds ftrace_release to
allow module unloading to remove the pointers to mcount within the module.

Another change for init calls is made to not trace calls marked with
__init. The tracing can not be started until after init is done anyway.

Signed-off-by: Steven Rostedt <srostedt@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/ftrace.h |  2 ++
 include/linux/init.h   |  2 +-
 kernel/module.c        | 12 ++++++++----
 kernel/trace/ftrace.c  | 32 ++++++++++++++++++++++++++++++--
 4 files changed, 41 insertions(+), 7 deletions(-)

diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index 4936489f9ed8..6b232a2460c0 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -165,10 +165,12 @@ ftrace_special(unsigned long arg1, unsigned long arg2, unsigned long arg3) { }
 #ifdef CONFIG_FTRACE_MCOUNT_RECORD
 extern void ftrace_init(void);
 extern void ftrace_init_module(unsigned long *start, unsigned long *end);
+extern void ftrace_release(void *start, unsigned long size);
 #else
 static inline void ftrace_init(void) { }
 static inline void
 ftrace_init_module(unsigned long *start, unsigned long *end) { }
+static inline void ftrace_release(void *start, unsigned long size) { }
 #endif
 
 #endif /* _LINUX_FTRACE_H */
diff --git a/include/linux/init.h b/include/linux/init.h
index 93538b696e3d..27f61f6b3cb9 100644
--- a/include/linux/init.h
+++ b/include/linux/init.h
@@ -40,7 +40,7 @@
 
 /* These are for everybody (although not all archs will actually
    discard it in modules) */
-#define __init		__section(.init.text) __cold
+#define __init		__section(.init.text) __cold notrace
 #define __initdata	__section(.init.data)
 #define __initconst	__section(.init.rodata)
 #define __exitdata	__section(.exit.data)
diff --git a/kernel/module.c b/kernel/module.c
index d753fd9d83ec..7576c2d9462f 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -1431,6 +1431,9 @@ static void free_module(struct module *mod)
 	/* Module unload stuff */
 	module_unload_free(mod);
 
+	/* release any pointers to mcount in this module */
+	ftrace_release(mod->module_core, mod->core_size);
+
 	/* This may be NULL, but that's OK */
 	module_free(mod, mod->module_init);
 	kfree(mod->args);
@@ -1839,6 +1842,7 @@ static noinline struct module *load_module(void __user *umod,
 	struct module *mod;
 	long err = 0;
 	void *percpu = NULL, *ptr = NULL; /* Stops spurious gcc warning */
+	void *mseg;
 	struct exception_table_entry *extable;
 	mm_segment_t old_fs;
 
@@ -2190,10 +2194,9 @@ static noinline struct module *load_module(void __user *umod,
 #endif
 	}
 
-	if (mcountindex) {
-		void *mseg = (void *)sechdrs[mcountindex].sh_addr;
-		ftrace_init_module(mseg, mseg + sechdrs[mcountindex].sh_size);
-	}
+	/* sechdrs[0].sh_size is always zero */
+	mseg = (void *)sechdrs[mcountindex].sh_addr;
+	ftrace_init_module(mseg, mseg + sechdrs[mcountindex].sh_size);
 
 	err = module_finalize(hdr, sechdrs, mod);
 	if (err < 0)
@@ -2264,6 +2267,7 @@ static noinline struct module *load_module(void __user *umod,
  cleanup:
 	kobject_del(&mod->mkobj.kobj);
 	kobject_put(&mod->mkobj.kobj);
+	ftrace_release(mod->module_core, mod->core_size);
  free_unload:
 	module_unload_free(mod);
 	module_free(mod, mod->module_init);
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 8affb6d00ec1..eadd0eaea9b6 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -294,13 +294,37 @@ static inline void ftrace_del_hash(struct dyn_ftrace *node)
 
 static void ftrace_free_rec(struct dyn_ftrace *rec)
 {
-	/* no locking, only called from kstop_machine */
-
 	rec->ip = (unsigned long)ftrace_free_records;
 	ftrace_free_records = rec;
 	rec->flags |= FTRACE_FL_FREE;
 }
 
+void ftrace_release(void *start, unsigned long size)
+{
+	struct dyn_ftrace *rec;
+	struct ftrace_page *pg;
+	unsigned long s = (unsigned long)start;
+	unsigned long e = s + size;
+	int i;
+
+	if (!start)
+		return;
+
+	/* No interrupt should call this */
+	spin_lock(&ftrace_lock);
+
+	for (pg = ftrace_pages_start; pg; pg = pg->next) {
+		for (i = 0; i < pg->index; i++) {
+			rec = &pg->records[i];
+
+			if ((rec->ip >= s) && (rec->ip < e))
+				ftrace_free_rec(rec);
+		}
+	}
+	spin_unlock(&ftrace_lock);
+
+}
+
 static struct dyn_ftrace *ftrace_alloc_dyn_node(unsigned long ip)
 {
 	struct dyn_ftrace *rec;
@@ -1527,7 +1551,9 @@ static int ftrace_convert_nops(unsigned long *start,
 	p = start;
 	while (p < end) {
 		addr = ftrace_call_adjust(*p++);
+		spin_lock(&ftrace_lock);
 		ftrace_record_ip(addr);
+		spin_unlock(&ftrace_lock);
 		ftrace_shutdown_replenish();
 	}
 
@@ -1541,6 +1567,8 @@ static int ftrace_convert_nops(unsigned long *start,
 
 void ftrace_init_module(unsigned long *start, unsigned long *end)
 {
+	if (start == end)
+		return;
 	ftrace_convert_nops(start, end);
 }
 
-- 
cgit v1.2.3-55-g7522


From 2e2ca155cd2213b4f398031180fb3d399d5b7db9 Mon Sep 17 00:00:00 2001
From: Steven Rostedt
Date: Fri, 1 Aug 2008 12:26:40 -0400
Subject: ftrace: new continue entry - separate out from trace_entry

Some tracers will need to work with more than one entry. In order to do this
the trace_entry structure was split into two fields. One for the start of
all entries, and one to continue an existing entry.

The trace_entry structure now has a "field" entry that consists of the previous
content of the trace_entry, and a "cont" entry that is just a string buffer
the size of the "field" entry.

Thanks to Andrew Morton for suggesting this idea.

Signed-off-by: Steven Rostedt <srostedt@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 kernel/trace/trace.c           | 267 +++++++++++++++++++++--------------------
 kernel/trace/trace.h           |  17 ++-
 kernel/trace/trace_mmiotrace.c |  12 +-
 3 files changed, 160 insertions(+), 136 deletions(-)

diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 8f3fb3db61c3..76dfe6d2466c 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -817,10 +817,11 @@ tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags)
 
 	pc = preempt_count();
 
-	entry->preempt_count	= pc & 0xff;
-	entry->pid		= (tsk) ? tsk->pid : 0;
-	entry->t		= ftrace_now(raw_smp_processor_id());
-	entry->flags = (irqs_disabled_flags(flags) ? TRACE_FLAG_IRQS_OFF : 0) |
+	entry->field.preempt_count	= pc & 0xff;
+	entry->field.pid		= (tsk) ? tsk->pid : 0;
+	entry->field.t			= ftrace_now(raw_smp_processor_id());
+	entry->field.flags =
+		(irqs_disabled_flags(flags) ? TRACE_FLAG_IRQS_OFF : 0) |
 		((pc & HARDIRQ_MASK) ? TRACE_FLAG_HARDIRQ : 0) |
 		((pc & SOFTIRQ_MASK) ? TRACE_FLAG_SOFTIRQ : 0) |
 		(need_resched() ? TRACE_FLAG_NEED_RESCHED : 0);
@@ -835,11 +836,11 @@ trace_function(struct trace_array *tr, struct trace_array_cpu *data,
 
 	raw_local_irq_save(irq_flags);
 	__raw_spin_lock(&data->lock);
-	entry			= tracing_get_trace_entry(tr, data);
+	entry				= tracing_get_trace_entry(tr, data);
 	tracing_generic_entry_update(entry, flags);
-	entry->type		= TRACE_FN;
-	entry->fn.ip		= ip;
-	entry->fn.parent_ip	= parent_ip;
+	entry->type			= TRACE_FN;
+	entry->field.fn.ip		= ip;
+	entry->field.fn.parent_ip	= parent_ip;
 	__raw_spin_unlock(&data->lock);
 	raw_local_irq_restore(irq_flags);
 }
@@ -862,10 +863,10 @@ void __trace_mmiotrace_rw(struct trace_array *tr, struct trace_array_cpu *data,
 	raw_local_irq_save(irq_flags);
 	__raw_spin_lock(&data->lock);
 
-	entry			= tracing_get_trace_entry(tr, data);
+	entry				= tracing_get_trace_entry(tr, data);
 	tracing_generic_entry_update(entry, 0);
-	entry->type		= TRACE_MMIO_RW;
-	entry->mmiorw		= *rw;
+	entry->type			= TRACE_MMIO_RW;
+	entry->field.mmiorw		= *rw;
 
 	__raw_spin_unlock(&data->lock);
 	raw_local_irq_restore(irq_flags);
@@ -882,10 +883,10 @@ void __trace_mmiotrace_map(struct trace_array *tr, struct trace_array_cpu *data,
 	raw_local_irq_save(irq_flags);
 	__raw_spin_lock(&data->lock);
 
-	entry			= tracing_get_trace_entry(tr, data);
+	entry				= tracing_get_trace_entry(tr, data);
 	tracing_generic_entry_update(entry, 0);
-	entry->type		= TRACE_MMIO_MAP;
-	entry->mmiomap		= *map;
+	entry->type			= TRACE_MMIO_MAP;
+	entry->field.mmiomap		= *map;
 
 	__raw_spin_unlock(&data->lock);
 	raw_local_irq_restore(irq_flags);
@@ -909,12 +910,12 @@ void __trace_stack(struct trace_array *tr,
 	tracing_generic_entry_update(entry, flags);
 	entry->type		= TRACE_STACK;
 
-	memset(&entry->stack, 0, sizeof(entry->stack));
+	memset(&entry->field.stack, 0, sizeof(entry->field.stack));
 
 	trace.nr_entries	= 0;
 	trace.max_entries	= FTRACE_STACK_ENTRIES;
 	trace.skip		= skip;
-	trace.entries		= entry->stack.caller;
+	trace.entries		= entry->field.stack.caller;
 
 	save_stack_trace(&trace);
 }
@@ -930,12 +931,12 @@ __trace_special(void *__tr, void *__data,
 
 	raw_local_irq_save(irq_flags);
 	__raw_spin_lock(&data->lock);
-	entry			= tracing_get_trace_entry(tr, data);
+	entry				= tracing_get_trace_entry(tr, data);
 	tracing_generic_entry_update(entry, 0);
-	entry->type		= TRACE_SPECIAL;
-	entry->special.arg1	= arg1;
-	entry->special.arg2	= arg2;
-	entry->special.arg3	= arg3;
+	entry->type			= TRACE_SPECIAL;
+	entry->field.special.arg1	= arg1;
+	entry->field.special.arg2	= arg2;
+	entry->field.special.arg3	= arg3;
 	__trace_stack(tr, data, irq_flags, 4);
 	__raw_spin_unlock(&data->lock);
 	raw_local_irq_restore(irq_flags);
@@ -955,15 +956,15 @@ tracing_sched_switch_trace(struct trace_array *tr,
 
 	raw_local_irq_save(irq_flags);
 	__raw_spin_lock(&data->lock);
-	entry			= tracing_get_trace_entry(tr, data);
+	entry				= tracing_get_trace_entry(tr, data);
 	tracing_generic_entry_update(entry, flags);
-	entry->type		= TRACE_CTX;
-	entry->ctx.prev_pid	= prev->pid;
-	entry->ctx.prev_prio	= prev->prio;
-	entry->ctx.prev_state	= prev->state;
-	entry->ctx.next_pid	= next->pid;
-	entry->ctx.next_prio	= next->prio;
-	entry->ctx.next_state	= next->state;
+	entry->type			= TRACE_CTX;
+	entry->field.ctx.prev_pid	= prev->pid;
+	entry->field.ctx.prev_prio	= prev->prio;
+	entry->field.ctx.prev_state	= prev->state;
+	entry->field.ctx.next_pid	= next->pid;
+	entry->field.ctx.next_prio	= next->prio;
+	entry->field.ctx.next_state	= next->state;
 	__trace_stack(tr, data, flags, 5);
 	__raw_spin_unlock(&data->lock);
 	raw_local_irq_restore(irq_flags);
@@ -984,12 +985,12 @@ tracing_sched_wakeup_trace(struct trace_array *tr,
 	entry			= tracing_get_trace_entry(tr, data);
 	tracing_generic_entry_update(entry, flags);
 	entry->type		= TRACE_WAKE;
-	entry->ctx.prev_pid	= curr->pid;
-	entry->ctx.prev_prio	= curr->prio;
-	entry->ctx.prev_state	= curr->state;
-	entry->ctx.next_pid	= wakee->pid;
-	entry->ctx.next_prio	= wakee->prio;
-	entry->ctx.next_state	= wakee->state;
+	entry->field.ctx.prev_pid	= curr->pid;
+	entry->field.ctx.prev_prio	= curr->prio;
+	entry->field.ctx.prev_state	= curr->state;
+	entry->field.ctx.next_pid	= wakee->pid;
+	entry->field.ctx.next_prio	= wakee->prio;
+	entry->field.ctx.next_state	= wakee->state;
 	__trace_stack(tr, data, flags, 6);
 	__raw_spin_unlock(&data->lock);
 	raw_local_irq_restore(irq_flags);
@@ -1118,7 +1119,7 @@ find_next_entry(struct trace_iterator *iter, int *ent_cpu)
 		/*
 		 * Pick the entry with the smallest timestamp:
 		 */
-		if (ent && (!next || ent->t < next->t)) {
+		if (ent && (!next || ent->field.t < next->field.t)) {
 			next = ent;
 			next_cpu = cpu;
 		}
@@ -1422,19 +1423,20 @@ print_trace_header(struct seq_file *m, struct trace_iterator *iter)
 static void
 lat_print_generic(struct trace_seq *s, struct trace_entry *entry, int cpu)
 {
+	struct trace_field *field = &entry->field;
 	int hardirq, softirq;
 	char *comm;
 
-	comm = trace_find_cmdline(entry->pid);
+	comm = trace_find_cmdline(field->pid);
 
-	trace_seq_printf(s, "%8.8s-%-5d ", comm, entry->pid);
+	trace_seq_printf(s, "%8.8s-%-5d ", comm, field->pid);
 	trace_seq_printf(s, "%d", cpu);
 	trace_seq_printf(s, "%c%c",
-			(entry->flags & TRACE_FLAG_IRQS_OFF) ? 'd' : '.',
-			((entry->flags & TRACE_FLAG_NEED_RESCHED) ? 'N' : '.'));
+			(field->flags & TRACE_FLAG_IRQS_OFF) ? 'd' : '.',
+			((field->flags & TRACE_FLAG_NEED_RESCHED) ? 'N' : '.'));
 
-	hardirq = entry->flags & TRACE_FLAG_HARDIRQ;
-	softirq = entry->flags & TRACE_FLAG_SOFTIRQ;
+	hardirq = field->flags & TRACE_FLAG_HARDIRQ;
+	softirq = field->flags & TRACE_FLAG_SOFTIRQ;
 	if (hardirq && softirq) {
 		trace_seq_putc(s, 'H');
 	} else {
@@ -1448,8 +1450,8 @@ lat_print_generic(struct trace_seq *s, struct trace_entry *entry, int cpu)
 		}
 	}
 
-	if (entry->preempt_count)
-		trace_seq_printf(s, "%x", entry->preempt_count);
+	if (field->preempt_count)
+		trace_seq_printf(s, "%x", field->preempt_count);
 	else
 		trace_seq_puts(s, ".");
 }
@@ -1479,6 +1481,7 @@ print_lat_fmt(struct trace_iterator *iter, unsigned int trace_idx, int cpu)
 	struct trace_entry *next_entry = find_next_entry(iter, NULL);
 	unsigned long verbose = (trace_flags & TRACE_ITER_VERBOSE);
 	struct trace_entry *entry = iter->ent;
+	struct trace_field *field = &entry->field;
 	unsigned long abs_usecs;
 	unsigned long rel_usecs;
 	char *comm;
@@ -1488,17 +1491,17 @@ print_lat_fmt(struct trace_iterator *iter, unsigned int trace_idx, int cpu)
 
 	if (!next_entry)
 		next_entry = entry;
-	rel_usecs = ns2usecs(next_entry->t - entry->t);
-	abs_usecs = ns2usecs(entry->t - iter->tr->time_start);
+	rel_usecs = ns2usecs(next_entry->field.t - entry->field.t);
+	abs_usecs = ns2usecs(entry->field.t - iter->tr->time_start);
 
 	if (verbose) {
-		comm = trace_find_cmdline(entry->pid);
+		comm = trace_find_cmdline(field->pid);
 		trace_seq_printf(s, "%16s %5d %d %d %08x %08x [%08lx]"
 				 " %ld.%03ldms (+%ld.%03ldms): ",
 				 comm,
-				 entry->pid, cpu, entry->flags,
-				 entry->preempt_count, trace_idx,
-				 ns2usecs(entry->t),
+				 field->pid, cpu, field->flags,
+				 field->preempt_count, trace_idx,
+				 ns2usecs(field->t),
 				 abs_usecs/1000,
 				 abs_usecs % 1000, rel_usecs/1000,
 				 rel_usecs % 1000);
@@ -1508,41 +1511,42 @@ print_lat_fmt(struct trace_iterator *iter, unsigned int trace_idx, int cpu)
 	}
 	switch (entry->type) {
 	case TRACE_FN:
-		seq_print_ip_sym(s, entry->fn.ip, sym_flags);
+		seq_print_ip_sym(s, field->fn.ip, sym_flags);
 		trace_seq_puts(s, " (");
-		if (kretprobed(entry->fn.parent_ip))
+		if (kretprobed(field->fn.parent_ip))
 			trace_seq_puts(s, KRETPROBE_MSG);
 		else
-			seq_print_ip_sym(s, entry->fn.parent_ip, sym_flags);
+			seq_print_ip_sym(s, field->fn.parent_ip, sym_flags);
 		trace_seq_puts(s, ")\n");
 		break;
 	case TRACE_CTX:
 	case TRACE_WAKE:
-		T = entry->ctx.next_state < sizeof(state_to_char) ?
-			state_to_char[entry->ctx.next_state] : 'X';
+		T = field->ctx.next_state < sizeof(state_to_char) ?
+			state_to_char[field->ctx.next_state] : 'X';
 
-		state = entry->ctx.prev_state ? __ffs(entry->ctx.prev_state) + 1 : 0;
+		state = field->ctx.prev_state ?
+			__ffs(field->ctx.prev_state) + 1 : 0;
 		S = state < sizeof(state_to_char) - 1 ? state_to_char[state] : 'X';
-		comm = trace_find_cmdline(entry->ctx.next_pid);
+		comm = trace_find_cmdline(field->ctx.next_pid);
 		trace_seq_printf(s, " %5d:%3d:%c %s %5d:%3d:%c %s\n",
-				 entry->ctx.prev_pid,
-				 entry->ctx.prev_prio,
+				 field->ctx.prev_pid,
+				 field->ctx.prev_prio,
 				 S, entry->type == TRACE_CTX ? "==>" : "  +",
-				 entry->ctx.next_pid,
-				 entry->ctx.next_prio,
+				 field->ctx.next_pid,
+				 field->ctx.next_prio,
 				 T, comm);
 		break;
 	case TRACE_SPECIAL:
 		trace_seq_printf(s, "# %ld %ld %ld\n",
-				 entry->special.arg1,
-				 entry->special.arg2,
-				 entry->special.arg3);
+				 field->special.arg1,
+				 field->special.arg2,
+				 field->special.arg3);
 		break;
 	case TRACE_STACK:
 		for (i = 0; i < FTRACE_STACK_ENTRIES; i++) {
 			if (i)
 				trace_seq_puts(s, " <= ");
-			seq_print_ip_sym(s, entry->stack.caller[i], sym_flags);
+			seq_print_ip_sym(s, field->stack.caller[i], sym_flags);
 		}
 		trace_seq_puts(s, "\n");
 		break;
@@ -1557,6 +1561,7 @@ static int print_trace_fmt(struct trace_iterator *iter)
 	struct trace_seq *s = &iter->seq;
 	unsigned long sym_flags = (trace_flags & TRACE_ITER_SYM_MASK);
 	struct trace_entry *entry;
+	struct trace_field *field;
 	unsigned long usec_rem;
 	unsigned long long t;
 	unsigned long secs;
@@ -1566,14 +1571,15 @@ static int print_trace_fmt(struct trace_iterator *iter)
 	int i;
 
 	entry = iter->ent;
+	field = &entry->field;
 
-	comm = trace_find_cmdline(iter->ent->pid);
+	comm = trace_find_cmdline(iter->ent->field.pid);
 
-	t = ns2usecs(entry->t);
+	t = ns2usecs(field->t);
 	usec_rem = do_div(t, 1000000ULL);
 	secs = (unsigned long)t;
 
-	ret = trace_seq_printf(s, "%16s-%-5d ", comm, entry->pid);
+	ret = trace_seq_printf(s, "%16s-%-5d ", comm, field->pid);
 	if (!ret)
 		return 0;
 	ret = trace_seq_printf(s, "[%02d] ", iter->cpu);
@@ -1585,18 +1591,19 @@ static int print_trace_fmt(struct trace_iterator *iter)
 
 	switch (entry->type) {
 	case TRACE_FN:
-		ret = seq_print_ip_sym(s, entry->fn.ip, sym_flags);
+		ret = seq_print_ip_sym(s, field->fn.ip, sym_flags);
 		if (!ret)
 			return 0;
 		if ((sym_flags & TRACE_ITER_PRINT_PARENT) &&
-						entry->fn.parent_ip) {
+						field->fn.parent_ip) {
 			ret = trace_seq_printf(s, " <-");
 			if (!ret)
 				return 0;
-			if (kretprobed(entry->fn.parent_ip))
+			if (kretprobed(field->fn.parent_ip))
 				ret = trace_seq_puts(s, KRETPROBE_MSG);
 			else
-				ret = seq_print_ip_sym(s, entry->fn.parent_ip,
+				ret = seq_print_ip_sym(s,
+						       field->fn.parent_ip,
 						       sym_flags);
 			if (!ret)
 				return 0;
@@ -1607,26 +1614,26 @@ static int print_trace_fmt(struct trace_iterator *iter)
 		break;
 	case TRACE_CTX:
 	case TRACE_WAKE:
-		S = entry->ctx.prev_state < sizeof(state_to_char) ?
-			state_to_char[entry->ctx.prev_state] : 'X';
-		T = entry->ctx.next_state < sizeof(state_to_char) ?
-			state_to_char[entry->ctx.next_state] : 'X';
+		S = field->ctx.prev_state < sizeof(state_to_char) ?
+			state_to_char[field->ctx.prev_state] : 'X';
+		T = field->ctx.next_state < sizeof(state_to_char) ?
+			state_to_char[field->ctx.next_state] : 'X';
 		ret = trace_seq_printf(s, " %5d:%3d:%c %s %5d:%3d:%c\n",
-				       entry->ctx.prev_pid,
-				       entry->ctx.prev_prio,
+				       field->ctx.prev_pid,
+				       field->ctx.prev_prio,
 				       S,
 				       entry->type == TRACE_CTX ? "==>" : "  +",
-				       entry->ctx.next_pid,
-				       entry->ctx.next_prio,
+				       field->ctx.next_pid,
+				       field->ctx.next_prio,
 				       T);
 		if (!ret)
 			return 0;
 		break;
 	case TRACE_SPECIAL:
 		ret = trace_seq_printf(s, "# %ld %ld %ld\n",
-				 entry->special.arg1,
-				 entry->special.arg2,
-				 entry->special.arg3);
+				 field->special.arg1,
+				 field->special.arg2,
+				 field->special.arg3);
 		if (!ret)
 			return 0;
 		break;
@@ -1637,7 +1644,7 @@ static int print_trace_fmt(struct trace_iterator *iter)
 				if (!ret)
 					return 0;
 			}
-			ret = seq_print_ip_sym(s, entry->stack.caller[i],
+			ret = seq_print_ip_sym(s, field->stack.caller[i],
 					       sym_flags);
 			if (!ret)
 				return 0;
@@ -1654,37 +1661,40 @@ static int print_raw_fmt(struct trace_iterator *iter)
 {
 	struct trace_seq *s = &iter->seq;
 	struct trace_entry *entry;
+	struct trace_field *field;
 	int ret;
 	int S, T;
 
 	entry = iter->ent;
+	field = &entry->field;
 
 	ret = trace_seq_printf(s, "%d %d %llu ",
-		entry->pid, iter->cpu, entry->t);
+		field->pid, iter->cpu, field->t);
 	if (!ret)
 		return 0;
 
 	switch (entry->type) {
 	case TRACE_FN:
 		ret = trace_seq_printf(s, "%x %x\n",
-					entry->fn.ip, entry->fn.parent_ip);
+					field->fn.ip,
+					field->fn.parent_ip);
 		if (!ret)
 			return 0;
 		break;
 	case TRACE_CTX:
 	case TRACE_WAKE:
-		S = entry->ctx.prev_state < sizeof(state_to_char) ?
-			state_to_char[entry->ctx.prev_state] : 'X';
-		T = entry->ctx.next_state < sizeof(state_to_char) ?
-			state_to_char[entry->ctx.next_state] : 'X';
+		S = field->ctx.prev_state < sizeof(state_to_char) ?
+			state_to_char[field->ctx.prev_state] : 'X';
+		T = field->ctx.next_state < sizeof(state_to_char) ?
+			state_to_char[field->ctx.next_state] : 'X';
 		if (entry->type == TRACE_WAKE)
 			S = '+';
 		ret = trace_seq_printf(s, "%d %d %c %d %d %c\n",
-				       entry->ctx.prev_pid,
-				       entry->ctx.prev_prio,
+				       field->ctx.prev_pid,
+				       field->ctx.prev_prio,
 				       S,
-				       entry->ctx.next_pid,
-				       entry->ctx.next_prio,
+				       field->ctx.next_pid,
+				       field->ctx.next_prio,
 				       T);
 		if (!ret)
 			return 0;
@@ -1692,9 +1702,9 @@ static int print_raw_fmt(struct trace_iterator *iter)
 	case TRACE_SPECIAL:
 	case TRACE_STACK:
 		ret = trace_seq_printf(s, "# %ld %ld %ld\n",
-				 entry->special.arg1,
-				 entry->special.arg2,
-				 entry->special.arg3);
+				 field->special.arg1,
+				 field->special.arg2,
+				 field->special.arg3);
 		if (!ret)
 			return 0;
 		break;
@@ -1719,40 +1729,41 @@ static int print_hex_fmt(struct trace_iterator *iter)
 	struct trace_seq *s = &iter->seq;
 	unsigned char newline = '\n';
 	struct trace_entry *entry;
+	struct trace_field *field;
 	int S, T;
 
 	entry = iter->ent;
+	field = &entry->field;
 
-	SEQ_PUT_HEX_FIELD_RET(s, entry->pid);
+	SEQ_PUT_HEX_FIELD_RET(s, field->pid);
 	SEQ_PUT_HEX_FIELD_RET(s, iter->cpu);
-	SEQ_PUT_HEX_FIELD_RET(s, entry->t);
+	SEQ_PUT_HEX_FIELD_RET(s, field->t);
 
 	switch (entry->type) {
 	case TRACE_FN:
-		SEQ_PUT_HEX_FIELD_RET(s, entry->fn.ip);
-		SEQ_PUT_HEX_FIELD_RET(s, entry->fn.parent_ip);
+		SEQ_PUT_HEX_FIELD_RET(s, field->fn.ip);
+		SEQ_PUT_HEX_FIELD_RET(s, field->fn.parent_ip);
 		break;
 	case TRACE_CTX:
 	case TRACE_WAKE:
-		S = entry->ctx.prev_state < sizeof(state_to_char) ?
-			state_to_char[entry->ctx.prev_state] : 'X';
-		T = entry->ctx.next_state < sizeof(state_to_char) ?
-			state_to_char[entry->ctx.next_state] : 'X';
+		S = field->ctx.prev_state < sizeof(state_to_char) ?
+			state_to_char[field->ctx.prev_state] : 'X';
+		T = field->ctx.next_state < sizeof(state_to_char) ?
+			state_to_char[field->ctx.next_state] : 'X';
 		if (entry->type == TRACE_WAKE)
 			S = '+';
-		SEQ_PUT_HEX_FIELD_RET(s, entry->ctx.prev_pid);
-		SEQ_PUT_HEX_FIELD_RET(s, entry->ctx.prev_prio);
+		SEQ_PUT_HEX_FIELD_RET(s, field->ctx.prev_pid);
+		SEQ_PUT_HEX_FIELD_RET(s, field->ctx.prev_prio);
 		SEQ_PUT_HEX_FIELD_RET(s, S);
-		SEQ_PUT_HEX_FIELD_RET(s, entry->ctx.next_pid);
-		SEQ_PUT_HEX_FIELD_RET(s, entry->ctx.next_prio);
-		SEQ_PUT_HEX_FIELD_RET(s, entry->fn.parent_ip);
+		SEQ_PUT_HEX_FIELD_RET(s, field->ctx.next_pid);
+		SEQ_PUT_HEX_FIELD_RET(s, field->ctx.next_prio);
 		SEQ_PUT_HEX_FIELD_RET(s, T);
 		break;
 	case TRACE_SPECIAL:
 	case TRACE_STACK:
-		SEQ_PUT_HEX_FIELD_RET(s, entry->special.arg1);
-		SEQ_PUT_HEX_FIELD_RET(s, entry->special.arg2);
-		SEQ_PUT_HEX_FIELD_RET(s, entry->special.arg3);
+		SEQ_PUT_HEX_FIELD_RET(s, field->special.arg1);
+		SEQ_PUT_HEX_FIELD_RET(s, field->special.arg2);
+		SEQ_PUT_HEX_FIELD_RET(s, field->special.arg3);
 		break;
 	}
 	SEQ_PUT_FIELD_RET(s, newline);
@@ -1764,31 +1775,33 @@ static int print_bin_fmt(struct trace_iterator *iter)
 {
 	struct trace_seq *s = &iter->seq;
 	struct trace_entry *entry;
+	struct trace_field *field;
 
 	entry = iter->ent;
+	field = &entry->field;
 
-	SEQ_PUT_FIELD_RET(s, entry->pid);
-	SEQ_PUT_FIELD_RET(s, entry->cpu);
-	SEQ_PUT_FIELD_RET(s, entry->t);
+	SEQ_PUT_FIELD_RET(s, field->pid);
+	SEQ_PUT_FIELD_RET(s, field->cpu);
+	SEQ_PUT_FIELD_RET(s, field->t);
 
 	switch (entry->type) {
 	case TRACE_FN:
-		SEQ_PUT_FIELD_RET(s, entry->fn.ip);
-		SEQ_PUT_FIELD_RET(s, entry->fn.parent_ip);
+		SEQ_PUT_FIELD_RET(s, field->fn.ip);
+		SEQ_PUT_FIELD_RET(s, field->fn.parent_ip);
 		break;
 	case TRACE_CTX:
-		SEQ_PUT_FIELD_RET(s, entry->ctx.prev_pid);
-		SEQ_PUT_FIELD_RET(s, entry->ctx.prev_prio);
-		SEQ_PUT_FIELD_RET(s, entry->ctx.prev_state);
-		SEQ_PUT_FIELD_RET(s, entry->ctx.next_pid);
-		SEQ_PUT_FIELD_RET(s, entry->ctx.next_prio);
-		SEQ_PUT_FIELD_RET(s, entry->ctx.next_state);
+		SEQ_PUT_FIELD_RET(s, field->ctx.prev_pid);
+		SEQ_PUT_FIELD_RET(s, field->ctx.prev_prio);
+		SEQ_PUT_FIELD_RET(s, field->ctx.prev_state);
+		SEQ_PUT_FIELD_RET(s, field->ctx.next_pid);
+		SEQ_PUT_FIELD_RET(s, field->ctx.next_prio);
+		SEQ_PUT_FIELD_RET(s, field->ctx.next_state);
 		break;
 	case TRACE_SPECIAL:
 	case TRACE_STACK:
-		SEQ_PUT_FIELD_RET(s, entry->special.arg1);
-		SEQ_PUT_FIELD_RET(s, entry->special.arg2);
-		SEQ_PUT_FIELD_RET(s, entry->special.arg3);
+		SEQ_PUT_FIELD_RET(s, field->special.arg1);
+		SEQ_PUT_FIELD_RET(s, field->special.arg2);
+		SEQ_PUT_FIELD_RET(s, field->special.arg3);
 		break;
 	}
 	return 1;
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index f69f86788c2b..6ddd6a6556cf 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -61,13 +61,12 @@ struct stack_entry {
 };
 
 /*
- * The trace entry - the most basic unit of tracing. This is what
+ * The trace field - the most basic unit of tracing. This is what
  * is printed in the end as a single line in the trace output, such as:
  *
  *     bash-15816 [01]   235.197585: idle_cpu <- irq_enter
  */
-struct trace_entry {
-	char			type;
+struct trace_field {
 	char			cpu;
 	char			flags;
 	char			preempt_count;
@@ -83,6 +82,18 @@ struct trace_entry {
 	};
 };
 
+struct trace_field_cont {
+	char				buf[sizeof(struct trace_field)];
+};
+
+struct trace_entry {
+	char 				type;
+	union {
+		struct trace_field	field;
+		struct trace_field_cont	cont;
+	};
+};
+
 #define TRACE_ENTRY_SIZE	sizeof(struct trace_entry)
 
 /*
diff --git a/kernel/trace/trace_mmiotrace.c b/kernel/trace/trace_mmiotrace.c
index b13dc19dcbb4..9b7a936f4b1f 100644
--- a/kernel/trace/trace_mmiotrace.c
+++ b/kernel/trace/trace_mmiotrace.c
@@ -174,14 +174,14 @@ print_out:
 static int mmio_print_rw(struct trace_iterator *iter)
 {
 	struct trace_entry *entry = iter->ent;
-	struct mmiotrace_rw *rw	= &entry->mmiorw;
+	struct mmiotrace_rw *rw	= &entry->field.mmiorw;
 	struct trace_seq *s	= &iter->seq;
-	unsigned long long t	= ns2usecs(entry->t);
+	unsigned long long t	= ns2usecs(entry->field.t);
 	unsigned long usec_rem	= do_div(t, 1000000ULL);
 	unsigned secs		= (unsigned long)t;
 	int ret = 1;
 
-	switch (entry->mmiorw.opcode) {
+	switch (entry->field.mmiorw.opcode) {
 	case MMIO_READ:
 		ret = trace_seq_printf(s,
 			"R %d %lu.%06lu %d 0x%llx 0x%lx 0x%lx %d\n",
@@ -216,14 +216,14 @@ static int mmio_print_rw(struct trace_iterator *iter)
 static int mmio_print_map(struct trace_iterator *iter)
 {
 	struct trace_entry *entry = iter->ent;
-	struct mmiotrace_map *m	= &entry->mmiomap;
+	struct mmiotrace_map *m	= &entry->field.mmiomap;
 	struct trace_seq *s	= &iter->seq;
-	unsigned long long t	= ns2usecs(entry->t);
+	unsigned long long t	= ns2usecs(entry->field.t);
 	unsigned long usec_rem	= do_div(t, 1000000ULL);
 	unsigned secs		= (unsigned long)t;
 	int ret = 1;
 
-	switch (entry->mmiorw.opcode) {
+	switch (entry->field.mmiorw.opcode) {
 	case MMIO_PROBE:
 		ret = trace_seq_printf(s,
 			"MAP %lu.%06lu %d 0x%llx 0x%lx 0x%lx 0x%lx %d\n",
-- 
cgit v1.2.3-55-g7522


From dd0e545f061f90099a3dcc13aa77e29c6295cf23 Mon Sep 17 00:00:00 2001
From: Steven Rostedt
Date: Fri, 1 Aug 2008 12:26:41 -0400
Subject: ftrace: printk formatting infrastructure

This patch adds a feature that can help kernel developers debug their
code using ftrace.

  int ftrace_printk(const char *fmt, ...);

This records into the ftrace buffer using printf formatting. The entry
size in the buffers are still a fixed length. A new type has been added
that allows for more entries to be used for a single recording.

The start of the print is still the same as the other entries.

It returns the number of characters written to the ftrace buffer.

For example:

Having a module with the following code:

static int __init ftrace_print_test(void)
{
        ftrace_printk("jiffies are %ld\n", jiffies);
        return 0;
}

Gives me:

  insmod-5441  3...1 7569us : ftrace_print_test: jiffies are 4296626666

for the latency_trace file and:

          insmod-5441  [03]  1959.370498: ftrace_print_test jiffies are 4296626666

for the trace file.

Note: Only the infrastructure should go into the kernel. It is to help
facilitate debugging for other kernel developers. Calls to ftrace_printk
is not intended to be left in the kernel, and should be frowned upon just
like scattering printks around in the code.

But having this easily at your fingertips helps the debugging go faster
and bugs be solved quicker.

Maybe later on, we can hook this with markers and have their printf format
be sucked into ftrace output.

Signed-off-by: Steven Rostedt <srostedt@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/ftrace.h        |  10 ++
 kernel/trace/trace.c          | 273 +++++++++++++++++++++++++++++++++++++-----
 kernel/trace/trace.h          |  11 ++
 kernel/trace/trace_selftest.c |  11 +-
 4 files changed, 272 insertions(+), 33 deletions(-)

diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index 6b232a2460c0..f53b975e32fa 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -157,9 +157,18 @@ static inline void __ftrace_enabled_restore(int enabled)
 #ifdef CONFIG_TRACING
 extern void
 ftrace_special(unsigned long arg1, unsigned long arg2, unsigned long arg3);
+# define ftrace_printk(x...) __ftrace_printk(_THIS_IP_, x)
+extern int
+__ftrace_printk(unsigned long ip, const char *fmt, ...)
+	__attribute__ ((format (printf, 2, 3)));
 #else
 static inline void
 ftrace_special(unsigned long arg1, unsigned long arg2, unsigned long arg3) { }
+static inline int
+ftrace_printk(const char *fmt, ...) __attribute__ ((format (printf, 1, 0)))
+{
+	return 0;
+}
 #endif
 
 #ifdef CONFIG_FTRACE_MCOUNT_RECORD
@@ -173,4 +182,5 @@ ftrace_init_module(unsigned long *start, unsigned long *end) { }
 static inline void ftrace_release(void *start, unsigned long size) { }
 #endif
 
+
 #endif /* _LINUX_FTRACE_H */
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 76dfe6d2466c..a917bea82715 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -197,12 +197,14 @@ unsigned long nsecs_to_usecs(unsigned long nsecs)
  *  NEED_RESCED - reschedule is requested
  *  HARDIRQ	- inside an interrupt handler
  *  SOFTIRQ	- inside a softirq handler
+ *  CONT	- multiple entries hold the trace item
  */
 enum trace_flag_type {
 	TRACE_FLAG_IRQS_OFF		= 0x01,
 	TRACE_FLAG_NEED_RESCHED		= 0x02,
 	TRACE_FLAG_HARDIRQ		= 0x04,
 	TRACE_FLAG_SOFTIRQ		= 0x08,
+	TRACE_FLAG_CONT			= 0x10,
 };
 
 /*
@@ -1074,6 +1076,7 @@ enum trace_file_type {
 	TRACE_FILE_LAT_FMT	= 1,
 };
 
+/* Return the current entry.  */
 static struct trace_entry *
 trace_entry_idx(struct trace_array *tr, struct trace_array_cpu *data,
 		struct trace_iterator *iter, int cpu)
@@ -1104,8 +1107,58 @@ trace_entry_idx(struct trace_array *tr, struct trace_array_cpu *data,
 	return &array[iter->next_page_idx[cpu]];
 }
 
+/* Increment the index counter of an iterator by one */
+static void trace_iterator_increment(struct trace_iterator *iter, int cpu)
+{
+	iter->idx++;
+	iter->next_idx[cpu]++;
+	iter->next_page_idx[cpu]++;
+
+	if (iter->next_page_idx[cpu] >= ENTRIES_PER_PAGE) {
+		struct trace_array_cpu *data = iter->tr->data[cpu];
+
+		iter->next_page_idx[cpu] = 0;
+		iter->next_page[cpu] =
+			trace_next_list(data, iter->next_page[cpu]);
+	}
+}
+
 static struct trace_entry *
-find_next_entry(struct trace_iterator *iter, int *ent_cpu)
+trace_entry_next(struct trace_array *tr, struct trace_array_cpu *data,
+		 struct trace_iterator *iter, int cpu)
+{
+	struct list_head *next_page;
+	struct trace_entry *ent;
+	int idx, next_idx, next_page_idx;
+
+	ent = trace_entry_idx(tr, tr->data[cpu], iter, cpu);
+
+	if (likely(!ent || ent->type != TRACE_CONT))
+		return ent;
+
+	/* save the iterator details */
+	idx		= iter->idx;
+	next_idx	= iter->next_idx[cpu];
+	next_page_idx	= iter->next_page_idx[cpu];
+	next_page	= iter->next_page[cpu];
+
+	/* find a real entry */
+	do {
+		trace_iterator_increment(iter, cpu);
+		ent = trace_entry_idx(tr, tr->data[cpu], iter, cpu);
+	} while (ent && ent->type != TRACE_CONT);
+
+	/* reset the iterator */
+	iter->idx			= idx;
+	iter->next_idx[cpu]		= next_idx;
+	iter->next_page_idx[cpu]	= next_page_idx;
+	iter->next_page[cpu]		= next_page;
+
+	return ent;
+}
+
+static struct trace_entry *
+__find_next_entry(struct trace_iterator *iter, int *ent_cpu, int inc)
 {
 	struct trace_array *tr = iter->tr;
 	struct trace_entry *ent, *next = NULL;
@@ -1115,7 +1168,23 @@ find_next_entry(struct trace_iterator *iter, int *ent_cpu)
 	for_each_tracing_cpu(cpu) {
 		if (!head_page(tr->data[cpu]))
 			continue;
+
 		ent = trace_entry_idx(tr, tr->data[cpu], iter, cpu);
+
+		if (ent && ent->type == TRACE_CONT) {
+			struct trace_array_cpu *data = tr->data[cpu];
+
+			if (!inc)
+				ent = trace_entry_next(tr, data, iter, cpu);
+			else {
+				while (ent && ent->type == TRACE_CONT) {
+					trace_iterator_increment(iter, cpu);
+					ent = trace_entry_idx(tr, tr->data[cpu],
+							      iter, cpu);
+				}
+			}
+		}
+
 		/*
 		 * Pick the entry with the smallest timestamp:
 		 */
@@ -1131,25 +1200,39 @@ find_next_entry(struct trace_iterator *iter, int *ent_cpu)
 	return next;
 }
 
-static void trace_iterator_increment(struct trace_iterator *iter)
+/* Find the next real entry, without updating the iterator itself */
+static struct trace_entry *
+find_next_entry(struct trace_iterator *iter, int *ent_cpu)
 {
-	iter->idx++;
-	iter->next_idx[iter->cpu]++;
-	iter->next_page_idx[iter->cpu]++;
+	return __find_next_entry(iter, ent_cpu, 0);
+}
+
+/* Find the next real entry, and increment the iterator to the next entry */
+static void *find_next_entry_inc(struct trace_iterator *iter)
+{
+	struct trace_entry *next;
+	int next_cpu = -1;
 
-	if (iter->next_page_idx[iter->cpu] >= ENTRIES_PER_PAGE) {
-		struct trace_array_cpu *data = iter->tr->data[iter->cpu];
+	next = __find_next_entry(iter, &next_cpu, 1);
 
-		iter->next_page_idx[iter->cpu] = 0;
-		iter->next_page[iter->cpu] =
-			trace_next_list(data, iter->next_page[iter->cpu]);
-	}
+	iter->prev_ent = iter->ent;
+	iter->prev_cpu = iter->cpu;
+
+	iter->ent = next;
+	iter->cpu = next_cpu;
+
+	if (next)
+		trace_iterator_increment(iter, iter->cpu);
+
+	return next ? iter : NULL;
 }
 
 static void trace_consume(struct trace_iterator *iter)
 {
 	struct trace_array_cpu *data = iter->tr->data[iter->cpu];
+	struct trace_entry *ent;
 
+ again:
 	data->trace_tail_idx++;
 	if (data->trace_tail_idx >= ENTRIES_PER_PAGE) {
 		data->trace_tail = trace_next_page(data, data->trace_tail);
@@ -1160,25 +1243,11 @@ static void trace_consume(struct trace_iterator *iter)
 	if (data->trace_head == data->trace_tail &&
 	    data->trace_head_idx == data->trace_tail_idx)
 		data->trace_idx = 0;
-}
-
-static void *find_next_entry_inc(struct trace_iterator *iter)
-{
-	struct trace_entry *next;
-	int next_cpu = -1;
-
-	next = find_next_entry(iter, &next_cpu);
-
-	iter->prev_ent = iter->ent;
-	iter->prev_cpu = iter->cpu;
-
-	iter->ent = next;
-	iter->cpu = next_cpu;
-
-	if (next)
-		trace_iterator_increment(iter);
 
-	return next ? iter : NULL;
+	ent = trace_entry_idx(iter->tr, iter->tr->data[iter->cpu],
+			      iter, iter->cpu);
+	if (ent && ent->type == TRACE_CONT)
+		goto again;
 }
 
 static void *s_next(struct seq_file *m, void *v, loff_t *pos)
@@ -1473,6 +1542,26 @@ lat_print_timestamp(struct trace_seq *s, unsigned long long abs_usecs,
 
 static const char state_to_char[] = TASK_STATE_TO_CHAR_STR;
 
+static void
+trace_seq_print_cont(struct trace_seq *s, struct trace_iterator *iter)
+{
+	struct trace_array *tr = iter->tr;
+	struct trace_array_cpu *data = tr->data[iter->cpu];
+	struct trace_entry *ent;
+
+	ent = trace_entry_idx(tr, data, iter, iter->cpu);
+	if (!ent || ent->type != TRACE_CONT) {
+		trace_seq_putc(s, '\n');
+		return;
+	}
+
+	do {
+		trace_seq_printf(s, "%s", ent->cont.buf);
+		trace_iterator_increment(iter, iter->cpu);
+		ent = trace_entry_idx(tr, data, iter, iter->cpu);
+	} while (ent && ent->type == TRACE_CONT);
+}
+
 static int
 print_lat_fmt(struct trace_iterator *iter, unsigned int trace_idx, int cpu)
 {
@@ -1491,6 +1580,10 @@ print_lat_fmt(struct trace_iterator *iter, unsigned int trace_idx, int cpu)
 
 	if (!next_entry)
 		next_entry = entry;
+
+	if (entry->type == TRACE_CONT)
+		return 1;
+
 	rel_usecs = ns2usecs(next_entry->field.t - entry->field.t);
 	abs_usecs = ns2usecs(entry->field.t - iter->tr->time_start);
 
@@ -1550,6 +1643,12 @@ print_lat_fmt(struct trace_iterator *iter, unsigned int trace_idx, int cpu)
 		}
 		trace_seq_puts(s, "\n");
 		break;
+	case TRACE_PRINT:
+		seq_print_ip_sym(s, field->print.ip, sym_flags);
+		trace_seq_printf(s, ": %s", field->print.buf);
+		if (field->flags && TRACE_FLAG_CONT)
+			trace_seq_print_cont(s, iter);
+		break;
 	default:
 		trace_seq_printf(s, "Unknown type %d\n", entry->type);
 	}
@@ -1571,6 +1670,10 @@ static int print_trace_fmt(struct trace_iterator *iter)
 	int i;
 
 	entry = iter->ent;
+
+	if (entry->type == TRACE_CONT)
+		return 1;
+
 	field = &entry->field;
 
 	comm = trace_find_cmdline(iter->ent->field.pid);
@@ -1653,6 +1756,12 @@ static int print_trace_fmt(struct trace_iterator *iter)
 		if (!ret)
 			return 0;
 		break;
+	case TRACE_PRINT:
+		seq_print_ip_sym(s, field->print.ip, sym_flags);
+		trace_seq_printf(s, ": %s", field->print.buf);
+		if (field->flags && TRACE_FLAG_CONT)
+			trace_seq_print_cont(s, iter);
+		break;
 	}
 	return 1;
 }
@@ -1666,6 +1775,10 @@ static int print_raw_fmt(struct trace_iterator *iter)
 	int S, T;
 
 	entry = iter->ent;
+
+	if (entry->type == TRACE_CONT)
+		return 1;
+
 	field = &entry->field;
 
 	ret = trace_seq_printf(s, "%d %d %llu ",
@@ -1708,6 +1821,12 @@ static int print_raw_fmt(struct trace_iterator *iter)
 		if (!ret)
 			return 0;
 		break;
+	case TRACE_PRINT:
+		trace_seq_printf(s, "# %lx %s",
+				 field->print.ip, field->print.buf);
+		if (field->flags && TRACE_FLAG_CONT)
+			trace_seq_print_cont(s, iter);
+		break;
 	}
 	return 1;
 }
@@ -1733,6 +1852,10 @@ static int print_hex_fmt(struct trace_iterator *iter)
 	int S, T;
 
 	entry = iter->ent;
+
+	if (entry->type == TRACE_CONT)
+		return 1;
+
 	field = &entry->field;
 
 	SEQ_PUT_HEX_FIELD_RET(s, field->pid);
@@ -1778,6 +1901,10 @@ static int print_bin_fmt(struct trace_iterator *iter)
 	struct trace_field *field;
 
 	entry = iter->ent;
+
+	if (entry->type == TRACE_CONT)
+		return 1;
+
 	field = &entry->field;
 
 	SEQ_PUT_FIELD_RET(s, field->pid);
@@ -2943,6 +3070,94 @@ static __init void tracer_init_debugfs(void)
 #endif
 }
 
+#define TRACE_BUF_SIZE 1024
+#define TRACE_PRINT_BUF_SIZE \
+	(sizeof(struct trace_field) - offsetof(struct trace_field, print.buf))
+#define TRACE_CONT_BUF_SIZE sizeof(struct trace_field)
+
+/**
+ * ftrace_printk - printf formatting in the ftrace buffer
+ * @fmt - the printf format for printing.
+ *
+ * Note: __ftrace_printk is an internal function for ftrace_printk and
+ *       the @ip is passed in via the ftrace_printk macro.
+ *
+ * This function allows a kernel developer to debug fast path sections
+ * that printk is not appropriate for. By scattering in various
+ * printk like tracing in the code, a developer can quickly see
+ * where problems are occurring.
+ *
+ * This is intended as a debugging tool for the developer only.
+ * Please reframe from leaving ftrace_printks scattered around in
+ * your code.
+ */
+int __ftrace_printk(unsigned long ip, const char *fmt, ...)
+{
+	struct trace_array *tr = &global_trace;
+	static DEFINE_SPINLOCK(trace_buf_lock);
+	static char trace_buf[TRACE_BUF_SIZE];
+	struct trace_array_cpu *data;
+	struct trace_entry *entry;
+	unsigned long flags;
+	long disabled;
+	va_list ap;
+	int cpu, len = 0, write, written = 0;
+
+	if (likely(!ftrace_function_enabled))
+		return 0;
+
+	local_irq_save(flags);
+	cpu = raw_smp_processor_id();
+	data = tr->data[cpu];
+	disabled = atomic_inc_return(&data->disabled);
+
+	if (unlikely(disabled != 1 || !ftrace_function_enabled))
+		goto out;
+
+	spin_lock(&trace_buf_lock);
+	va_start(ap, fmt);
+	len = vsnprintf(trace_buf, TRACE_BUF_SIZE, fmt, ap);
+	va_end(ap);
+
+	len = min(len, TRACE_BUF_SIZE-1);
+	trace_buf[len] = 0;
+
+	__raw_spin_lock(&data->lock);
+	entry				= tracing_get_trace_entry(tr, data);
+	tracing_generic_entry_update(entry, flags);
+	entry->type			= TRACE_PRINT;
+	entry->field.print.ip		= ip;
+
+	write = min(len, (int)(TRACE_PRINT_BUF_SIZE-1));
+
+	memcpy(&entry->field.print.buf, trace_buf, write);
+	entry->field.print.buf[write] = 0;
+	written = write;
+
+	if (written != len)
+		entry->field.flags |= TRACE_FLAG_CONT;
+
+	while (written != len) {
+		entry = tracing_get_trace_entry(tr, data);
+
+		entry->type = TRACE_CONT;
+		write = min(len - written, (int)(TRACE_CONT_BUF_SIZE-1));
+		memcpy(&entry->cont.buf, trace_buf+written, write);
+		entry->cont.buf[write] = 0;
+		written += write;
+	}
+	__raw_spin_unlock(&data->lock);
+
+	spin_unlock(&trace_buf_lock);
+
+ out:
+	atomic_dec(&data->disabled);
+	local_irq_restore(flags);
+
+	return len;
+}
+EXPORT_SYMBOL_GPL(__ftrace_printk);
+
 static int trace_alloc_page(void)
 {
 	struct trace_array_cpu *data;
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 6ddd6a6556cf..50b6d7a6f01a 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -13,7 +13,9 @@ enum trace_type {
 	TRACE_FN,
 	TRACE_CTX,
 	TRACE_WAKE,
+	TRACE_CONT,
 	TRACE_STACK,
+	TRACE_PRINT,
 	TRACE_SPECIAL,
 	TRACE_MMIO_RW,
 	TRACE_MMIO_MAP,
@@ -60,6 +62,14 @@ struct stack_entry {
 	unsigned long		caller[FTRACE_STACK_ENTRIES];
 };
 
+/*
+ * ftrace_printk entry:
+ */
+struct print_entry {
+	unsigned long		ip;
+	char			buf[];
+};
+
 /*
  * The trace field - the most basic unit of tracing. This is what
  * is printed in the end as a single line in the trace output, such as:
@@ -77,6 +87,7 @@ struct trace_field {
 		struct ctx_switch_entry		ctx;
 		struct special_entry		special;
 		struct stack_entry		stack;
+		struct print_entry		print;
 		struct mmiotrace_rw		mmiorw;
 		struct mmiotrace_map		mmiomap;
 	};
diff --git a/kernel/trace/trace_selftest.c b/kernel/trace/trace_selftest.c
index 0911b7e073bf..630715bbd572 100644
--- a/kernel/trace/trace_selftest.c
+++ b/kernel/trace/trace_selftest.c
@@ -9,7 +9,9 @@ static inline int trace_valid_entry(struct trace_entry *entry)
 	case TRACE_FN:
 	case TRACE_CTX:
 	case TRACE_WAKE:
+	case TRACE_CONT:
 	case TRACE_STACK:
+	case TRACE_PRINT:
 	case TRACE_SPECIAL:
 		return 1;
 	}
@@ -120,11 +122,11 @@ int trace_selftest_startup_dynamic_tracing(struct tracer *trace,
 					   struct trace_array *tr,
 					   int (*func)(void))
 {
-	unsigned long count;
-	int ret;
 	int save_ftrace_enabled = ftrace_enabled;
 	int save_tracer_enabled = tracer_enabled;
+	unsigned long count;
 	char *func_name;
+	int ret;
 
 	/* The ftrace test PASSED */
 	printk(KERN_CONT "PASSED\n");
@@ -157,6 +159,7 @@ int trace_selftest_startup_dynamic_tracing(struct tracer *trace,
 	/* enable tracing */
 	tr->ctrl = 1;
 	trace->init(tr);
+
 	/* Sleep for a 1/10 of a second */
 	msleep(100);
 
@@ -212,10 +215,10 @@ int trace_selftest_startup_dynamic_tracing(struct tracer *trace,
 int
 trace_selftest_startup_function(struct tracer *trace, struct trace_array *tr)
 {
-	unsigned long count;
-	int ret;
 	int save_ftrace_enabled = ftrace_enabled;
 	int save_tracer_enabled = tracer_enabled;
+	unsigned long count;
+	int ret;
 
 	/* make sure msleep has been recorded */
 	msleep(1);
-- 
cgit v1.2.3-55-g7522


From 2f2c99dba2398ef7d9c21f7c793180a50e68b1f0 Mon Sep 17 00:00:00 2001
From: Steven Rostedt
Date: Fri, 1 Aug 2008 16:45:49 -0400
Subject: ftrace: ftrace_printk doc moved

Based on Randy Dunlap's suggestion, the ftrace_printk kernel-doc belongs
with the ftrace_printk macro that should be used. Not with the
__ftrace_printk internal function.

Signed-off-by: Steven Rostedt <srostedt@redhat.com>
Acked-by: Randy Dunlap <randy.dunlap@oracle.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/ftrace.h | 19 ++++++++++++++++++-
 kernel/trace/trace.c   | 16 ----------------
 2 files changed, 18 insertions(+), 17 deletions(-)

diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index f53b975e32fa..018af16bce5c 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -157,7 +157,24 @@ static inline void __ftrace_enabled_restore(int enabled)
 #ifdef CONFIG_TRACING
 extern void
 ftrace_special(unsigned long arg1, unsigned long arg2, unsigned long arg3);
-# define ftrace_printk(x...) __ftrace_printk(_THIS_IP_, x)
+
+/**
+ * ftrace_printk - printf formatting in the ftrace buffer
+ * @fmt: the printf format for printing
+ *
+ * Note: __ftrace_printk is an internal function for ftrace_printk and
+ *       the @ip is passed in via the ftrace_printk macro.
+ *
+ * This function allows a kernel developer to debug fast path sections
+ * that printk is not appropriate for. By scattering in various
+ * printk like tracing in the code, a developer can quickly see
+ * where problems are occurring.
+ *
+ * This is intended as a debugging tool for the developer only.
+ * Please refrain from leaving ftrace_printks scattered around in
+ * your code.
+ */
+# define ftrace_printk(fmt...) __ftrace_printk(_THIS_IP_, fmt)
 extern int
 __ftrace_printk(unsigned long ip, const char *fmt, ...)
 	__attribute__ ((format (printf, 2, 3)));
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index a917bea82715..2597e7e49c35 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -3075,22 +3075,6 @@ static __init void tracer_init_debugfs(void)
 	(sizeof(struct trace_field) - offsetof(struct trace_field, print.buf))
 #define TRACE_CONT_BUF_SIZE sizeof(struct trace_field)
 
-/**
- * ftrace_printk - printf formatting in the ftrace buffer
- * @fmt - the printf format for printing.
- *
- * Note: __ftrace_printk is an internal function for ftrace_printk and
- *       the @ip is passed in via the ftrace_printk macro.
- *
- * This function allows a kernel developer to debug fast path sections
- * that printk is not appropriate for. By scattering in various
- * printk like tracing in the code, a developer can quickly see
- * where problems are occurring.
- *
- * This is intended as a debugging tool for the developer only.
- * Please reframe from leaving ftrace_printks scattered around in
- * your code.
- */
 int __ftrace_printk(unsigned long ip, const char *fmt, ...)
 {
 	struct trace_array *tr = &global_trace;
-- 
cgit v1.2.3-55-g7522


From 3f5a54e371ca20b119b73704f6c01b71295c1714 Mon Sep 17 00:00:00 2001
From: Steven Rostedt
Date: Wed, 30 Jul 2008 22:36:46 -0400
Subject: ftrace: dump out ftrace buffers to console on panic

At OLS I had a lot of interest to be able to have the ftrace buffers
dumped on panic.  Usually one would expect to uses kexec and examine
the buffers after a new kernel is loaded. But sometimes the resources
do not permit kdump and kexec, so having an option to still see the
sequence of events up to the crash is very advantageous.

This patch adds the option to have the ftrace buffers dumped to the
console in the latency_trace format on a panic. When the option is set,
the default entries per CPU buffer are lowered to 16384, since the writing
to the serial (if that is the console) may take an awful long time
otherwise.

[
 Changes since -v1:
  Got alpine to send correctly (as well as spell check working).
  Removed config option.
  Moved the static variables into ftrace_dump itself.
  Gave printk a log level.
]

Signed-off-by: Steven Rostedt <srostedt@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/ftrace.h |   2 +
 kernel/trace/trace.c   | 175 ++++++++++++++++++++++++++++++++++++++++++++++++-
 2 files changed, 176 insertions(+), 1 deletion(-)

diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index 018af16bce5c..f7fb92045bf0 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -178,6 +178,7 @@ ftrace_special(unsigned long arg1, unsigned long arg2, unsigned long arg3);
 extern int
 __ftrace_printk(unsigned long ip, const char *fmt, ...)
 	__attribute__ ((format (printf, 2, 3)));
+extern void ftrace_dump(void);
 #else
 static inline void
 ftrace_special(unsigned long arg1, unsigned long arg2, unsigned long arg3) { }
@@ -186,6 +187,7 @@ ftrace_printk(const char *fmt, ...) __attribute__ ((format (printf, 1, 0)))
 {
 	return 0;
 }
+static inline void ftrace_dump(void) { }
 #endif
 
 #ifdef CONFIG_FTRACE_MCOUNT_RECORD
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 2597e7e49c35..97513c8ecd67 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -14,6 +14,7 @@
 #include <linux/utsrelease.h>
 #include <linux/kallsyms.h>
 #include <linux/seq_file.h>
+#include <linux/notifier.h>
 #include <linux/debugfs.h>
 #include <linux/pagemap.h>
 #include <linux/hardirq.h>
@@ -22,6 +23,7 @@
 #include <linux/ftrace.h>
 #include <linux/module.h>
 #include <linux/percpu.h>
+#include <linux/kdebug.h>
 #include <linux/ctype.h>
 #include <linux/init.h>
 #include <linux/poll.h>
@@ -103,8 +105,15 @@ int				ftrace_function_enabled;
  * trace_nr_entries is the number of entries that is allocated
  * for a buffer. Note, the number of entries is always rounded
  * to ENTRIES_PER_PAGE.
+ *
+ * This number is purposely set to a low number of 16384.
+ * If the dump on oops happens, it will be much appreciated
+ * to not have to wait for all that output. Anyway this can be
+ * boot time and run time configurable.
  */
-static unsigned long		trace_nr_entries = 65536UL;
+#define TRACE_ENTRIES_DEFAULT	16384UL
+
+static unsigned long		trace_nr_entries = TRACE_ENTRIES_DEFAULT;
 
 /* trace_types holds a link list of available tracers. */
 static struct tracer		*trace_types __read_mostly;
@@ -3142,6 +3151,165 @@ int __ftrace_printk(unsigned long ip, const char *fmt, ...)
 }
 EXPORT_SYMBOL_GPL(__ftrace_printk);
 
+static int trace_panic_handler(struct notifier_block *this,
+			       unsigned long event, void *unused)
+{
+	ftrace_dump();
+	return NOTIFY_OK;
+}
+
+static struct notifier_block trace_panic_notifier = {
+	.notifier_call  = trace_panic_handler,
+	.next           = NULL,
+	.priority       = 150   /* priority: INT_MAX >= x >= 0 */
+};
+
+static int trace_die_handler(struct notifier_block *self,
+			     unsigned long val,
+			     void *data)
+{
+	switch (val) {
+	case DIE_OOPS:
+		ftrace_dump();
+		break;
+	default:
+		break;
+	}
+	return NOTIFY_OK;
+}
+
+static struct notifier_block trace_die_notifier = {
+	.notifier_call = trace_die_handler,
+	.priority = 200
+};
+
+/*
+ * printk is set to max of 1024, we really don't need it that big.
+ * Nothing should be printing 1000 characters anyway.
+ */
+#define TRACE_MAX_PRINT		1000
+
+/*
+ * Define here KERN_TRACE so that we have one place to modify
+ * it if we decide to change what log level the ftrace dump
+ * should be at.
+ */
+#define KERN_TRACE		KERN_INFO
+
+static void
+trace_printk_seq(struct trace_seq *s)
+{
+	/* Probably should print a warning here. */
+	if (s->len >= 1000)
+		s->len = 1000;
+
+	/* should be zero ended, but we are paranoid. */
+	s->buffer[s->len] = 0;
+
+	printk(KERN_TRACE "%s", s->buffer);
+
+	trace_seq_reset(s);
+}
+
+
+void ftrace_dump(void)
+{
+	static DEFINE_SPINLOCK(ftrace_dump_lock);
+	/* use static because iter can be a bit big for the stack */
+	static struct trace_iterator iter;
+	struct trace_array_cpu *data;
+	static cpumask_t mask;
+	static int dump_ran;
+	unsigned long flags;
+	int cnt = 0;
+	int cpu;
+
+	/* only one dump */
+	spin_lock_irqsave(&ftrace_dump_lock, flags);
+	if (dump_ran)
+		goto out;
+
+	dump_ran = 1;
+
+	/* No turning back! */
+	ftrace_kill_atomic();
+
+	printk(KERN_TRACE "Dumping ftrace buffer:\n");
+
+	iter.tr = &global_trace;
+	iter.trace = current_trace;
+
+	/*
+	 * We need to stop all tracing on all CPUS to read the
+	 * the next buffer. This is a bit expensive, but is
+	 * not done often. We fill all what we can read,
+	 * and then release the locks again.
+	 */
+
+	cpus_clear(mask);
+
+	for_each_tracing_cpu(cpu) {
+		data = iter.tr->data[cpu];
+
+		if (!head_page(data) || !data->trace_idx)
+			continue;
+
+		atomic_inc(&data->disabled);
+		cpu_set(cpu, mask);
+	}
+
+	for_each_cpu_mask(cpu, mask) {
+		data = iter.tr->data[cpu];
+		__raw_spin_lock(&data->lock);
+
+		if (data->overrun > iter.last_overrun[cpu])
+			iter.overrun[cpu] +=
+				data->overrun - iter.last_overrun[cpu];
+		iter.last_overrun[cpu] = data->overrun;
+	}
+
+	while (!trace_empty(&iter)) {
+
+		if (!cnt)
+			printk(KERN_TRACE "---------------------------------\n");
+
+		cnt++;
+
+		/* reset all but tr, trace, and overruns */
+		memset(&iter.seq, 0,
+		       sizeof(struct trace_iterator) -
+		       offsetof(struct trace_iterator, seq));
+		iter.iter_flags |= TRACE_FILE_LAT_FMT;
+		iter.pos = -1;
+
+		if (find_next_entry_inc(&iter) != NULL) {
+			print_trace_line(&iter);
+			trace_consume(&iter);
+		}
+
+		trace_printk_seq(&iter.seq);
+	}
+
+	if (!cnt)
+		printk(KERN_TRACE "   (ftrace buffer empty)\n");
+	else
+		printk(KERN_TRACE "---------------------------------\n");
+
+	for_each_cpu_mask(cpu, mask) {
+		data = iter.tr->data[cpu];
+		__raw_spin_unlock(&data->lock);
+	}
+
+	for_each_cpu_mask(cpu, mask) {
+		data = iter.tr->data[cpu];
+		atomic_dec(&data->disabled);
+	}
+
+
+ out:
+	spin_unlock_irqrestore(&ftrace_dump_lock, flags);
+}
+
 static int trace_alloc_page(void)
 {
 	struct trace_array_cpu *data;
@@ -3338,6 +3506,11 @@ __init static int tracer_alloc_buffers(void)
 	global_trace.ctrl = tracer_enabled;
 	tracing_disabled = 0;
 
+	atomic_notifier_chain_register(&panic_notifier_list,
+				       &trace_panic_notifier);
+
+	register_die_notifier(&trace_die_notifier);
+
 	return 0;
 
  free_buffers:
-- 
cgit v1.2.3-55-g7522


From 7b928c23fa3e9fa37d1d4ba52ba963f41ee5aae0 Mon Sep 17 00:00:00 2001
From: Ingo Molnar
Date: Fri, 15 Aug 2008 17:48:02 +0200
Subject: ftrace: build fix

fix:

 In file included from init/main.c:65:
 include/linux/ftrace.h:166: error: expected ‘,' or ‘;' before ‘{' token
 make[1]: *** [init/main.o] Error 1
 make: *** [init/main.o] Error 2

Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/ftrace.h | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index f7fb92045bf0..ce929cb55435 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -183,7 +183,10 @@ extern void ftrace_dump(void);
 static inline void
 ftrace_special(unsigned long arg1, unsigned long arg2, unsigned long arg3) { }
 static inline int
-ftrace_printk(const char *fmt, ...) __attribute__ ((format (printf, 1, 0)))
+ftrace_printk(const char *fmt, ...) __attribute__ ((format (printf, 1, 0)));
+
+static inline int
+ftrace_printk(const char *fmt, ...)
 {
 	return 0;
 }
-- 
cgit v1.2.3-55-g7522


From c5131ad6c3cbe8f6674993e29a76cecf8deb4384 Mon Sep 17 00:00:00 2001
From: Ingo Molnar
Date: Fri, 15 Aug 2008 18:22:09 +0200
Subject: ftrace: ftrace_kill_atomic() build fix

fix:

 kernel/built-in.o: In function `ftrace_dump':
 (.text+0x2e2ea): undefined reference to `ftrace_kill_atomic'

Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/ftrace.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index ce929cb55435..36c439927ff1 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -36,6 +36,7 @@ extern void ftrace_stub(unsigned long a0, unsigned long a1);
 # define register_ftrace_function(ops) do { } while (0)
 # define unregister_ftrace_function(ops) do { } while (0)
 # define clear_ftrace_function(ops) do { } while (0)
+static inline void ftrace_kill_atomic(void) { }
 #endif /* CONFIG_FTRACE */
 
 #ifdef CONFIG_DYNAMIC_FTRACE
-- 
cgit v1.2.3-55-g7522


From 3989cce82b272bd6312555fcc47c11715c157102 Mon Sep 17 00:00:00 2001
From: Ingo Molnar
Date: Mon, 9 Jun 2008 20:54:22 +0200
Subject: ftrace: scripts/recordmcount.pl cross-build hack

hack around:

 ld: Relocatable linking with relocations from format elf32-i386 (init/.tmp_gl_calibrate.o) to format elf64-x86-64 (init/.tmp_mx_calibrate.o) i  CC      arch/x86/mm/extable.o
 objcopy: 'init/.tmp_mx_calibrate.o': No such file
 rm: cannot remove `init/.tmp_mx_calibrate.o': No such file or directory
 ld: Relocatable linking with relocations from format elf32-i386 (arch/x86/mm/extable.o) to format elf64-x86-64 (arch/x86/mm/.tmp_mx_extable.o) is not supported
 mv: cannot stat `arch/x86/mm/.tmp_mx_extable.o': No such file or directory
 ld: Relocatable linking with relocations from format elf32-i386 (arch/x86/mm/fault.o) to format elf64-x86-64 (arch/x86/mm/.tmp_mx_fault.o) is not supported

Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 scripts/recordmcount.pl | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/scripts/recordmcount.pl b/scripts/recordmcount.pl
index 44b4b23e91b2..e4922a6c963b 100755
--- a/scripts/recordmcount.pl
+++ b/scripts/recordmcount.pl
@@ -108,6 +108,20 @@ if ($#ARGV < 6) {
 
 my ($arch, $objdump, $objcopy, $cc, $ld, $nm, $rm, $mv, $inputfile) = @ARGV;
 
+if ($arch eq "i386") {
+  $ld = "ld -m elf_i386";
+  $objdump = "objdump -M i386";
+  $objcopy = "objcopy -O elf32-i386";
+  $cc = "gcc -m32";
+}
+
+if ($arch eq "x86_64") {
+  $ld = "ld -m elf_x86_64";
+  $objdump = "objdump -M x86-64";
+  $objcopy = "objcopy -O elf64-x86-64";
+  $cc = "gcc -m64";
+}
+
 $objdump = "objdump" if ((length $objdump) == 0);
 $objcopy = "objcopy" if ((length $objcopy) == 0);
 $cc = "gcc" if ((length $cc) == 0);
-- 
cgit v1.2.3-55-g7522


From 98a983aad2e5b3dc83a8a761675445cdd8f3e6bd Mon Sep 17 00:00:00 2001
From: Frédéric Weisbecker
Date: Fri, 15 Aug 2008 21:08:22 +0200
Subject: ftrace: fix some mistakes in error messages

This patch fixes some mistakes on the tracer in warning messages when
debugfs fails to create tracing files.

Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Cc: srostedt@redhat.com
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 kernel/trace/trace.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 97513c8ecd67..896e59f772c9 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -3030,12 +3030,12 @@ static __init void tracer_init_debugfs(void)
 	entry = debugfs_create_file("available_tracers", 0444, d_tracer,
 				    &global_trace, &show_traces_fops);
 	if (!entry)
-		pr_warning("Could not create debugfs 'trace' entry\n");
+		pr_warning("Could not create debugfs 'available_tracers' entry\n");
 
 	entry = debugfs_create_file("current_tracer", 0444, d_tracer,
 				    &global_trace, &set_tracer_fops);
 	if (!entry)
-		pr_warning("Could not create debugfs 'trace' entry\n");
+		pr_warning("Could not create debugfs 'current_tracer' entry\n");
 
 	entry = debugfs_create_file("tracing_max_latency", 0644, d_tracer,
 				    &tracing_max_latency,
@@ -3048,7 +3048,7 @@ static __init void tracer_init_debugfs(void)
 				    &tracing_thresh, &tracing_max_lat_fops);
 	if (!entry)
 		pr_warning("Could not create debugfs "
-			   "'tracing_threash' entry\n");
+			   "'tracing_thresh' entry\n");
 	entry = debugfs_create_file("README", 0644, d_tracer,
 				    NULL, &tracing_readme_fops);
 	if (!entry)
@@ -3058,13 +3058,13 @@ static __init void tracer_init_debugfs(void)
 				    NULL, &tracing_pipe_fops);
 	if (!entry)
 		pr_warning("Could not create debugfs "
-			   "'tracing_threash' entry\n");
+			   "'trace_pipe' entry\n");
 
 	entry = debugfs_create_file("trace_entries", 0644, d_tracer,
 				    &global_trace, &tracing_entries_fops);
 	if (!entry)
 		pr_warning("Could not create debugfs "
-			   "'tracing_threash' entry\n");
+			   "'trace_entries' entry\n");
 
 #ifdef CONFIG_DYNAMIC_FTRACE
 	entry = debugfs_create_file("dyn_ftrace_total_info", 0444, d_tracer,
-- 
cgit v1.2.3-55-g7522


From 00fd61aee10533e003f2f00ab7163207660a4051 Mon Sep 17 00:00:00 2001
From: Steven Rostedt
Date: Fri, 15 Aug 2008 21:40:04 -0400
Subject: ftrace: do not init module on ftrace disabled

If one of the self tests of ftrace has disabled the function tracer,
do not run the code to convert the mcount calls in modules.

Signed-off-by: Steven Rostedt <srostedt@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 kernel/trace/ftrace.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index eadd0eaea9b6..11d94f2dc485 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -307,7 +307,7 @@ void ftrace_release(void *start, unsigned long size)
 	unsigned long e = s + size;
 	int i;
 
-	if (!start)
+	if (ftrace_disabled || !start)
 		return;
 
 	/* No interrupt should call this */
@@ -1567,7 +1567,7 @@ static int ftrace_convert_nops(unsigned long *start,
 
 void ftrace_init_module(unsigned long *start, unsigned long *end)
 {
-	if (start == end)
+	if (ftrace_disabled || start == end)
 		return;
 	ftrace_convert_nops(start, end);
 }
-- 
cgit v1.2.3-55-g7522


From 99ecdc43bc17faf5fa571db8569df171ecd0e5b8 Mon Sep 17 00:00:00 2001
From: Steven Rostedt
Date: Fri, 15 Aug 2008 21:40:05 -0400
Subject: ftrace: add necessary locking for ftrace records

The new design of pre-recorded mcounts and updating the code outside of
kstop_machine has changed the way the records themselves are protected.

This patch uses the ftrace_lock to protect the records. Note, the lock
still does not need to be taken within calls that are only called via
kstop_machine, since the that code can not run while the spin lock is held.

Also removed the hash_lock needed for the daemon when MCOUNT_RECORD is
configured. Also did a slight cleanup of an unused variable.

Signed-off-by: Steven Rostedt <srostedt@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 kernel/trace/ftrace.c | 44 ++++++++++++++++++++++++++++++--------------
 1 file changed, 30 insertions(+), 14 deletions(-)

diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 11d94f2dc485..43665add9805 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -81,7 +81,7 @@ void clear_ftrace_function(void)
 
 static int __register_ftrace_function(struct ftrace_ops *ops)
 {
-	/* Should never be called by interrupts */
+	/* should not be called from interrupt context */
 	spin_lock(&ftrace_lock);
 
 	ops->next = ftrace_list;
@@ -115,6 +115,7 @@ static int __unregister_ftrace_function(struct ftrace_ops *ops)
 	struct ftrace_ops **p;
 	int ret = 0;
 
+	/* should not be called from interrupt context */
 	spin_lock(&ftrace_lock);
 
 	/*
@@ -153,6 +154,21 @@ static int __unregister_ftrace_function(struct ftrace_ops *ops)
 
 #ifdef CONFIG_DYNAMIC_FTRACE
 
+#ifndef CONFIG_FTRACE_MCOUNT_RECORD
+/*
+ * The hash lock is only needed when the recording of the mcount
+ * callers are dynamic. That is, by the caller themselves and
+ * not recorded via the compilation.
+ */
+static DEFINE_SPINLOCK(ftrace_hash_lock);
+#define ftrace_hash_lock(flags)	  spin_lock_irqsave(ftrace_hash_lock, flags)
+#define ftrace_hash_unlock(flags) spin_lock_irqsave(ftrace_hash_lock, flags)
+#else
+/* This is protected via the ftrace_lock with MCOUNT_RECORD. */
+#define ftrace_hash_lock(flags)   do { (void)flags; } while (0)
+#define ftrace_hash_unlock(flags) do { } while(0)
+#endif
+
 static struct task_struct *ftraced_task;
 
 enum {
@@ -171,7 +187,6 @@ static struct hlist_head ftrace_hash[FTRACE_HASHSIZE];
 
 static DEFINE_PER_CPU(int, ftrace_shutdown_disable_cpu);
 
-static DEFINE_SPINLOCK(ftrace_shutdown_lock);
 static DEFINE_MUTEX(ftraced_lock);
 static DEFINE_MUTEX(ftrace_regex_lock);
 
@@ -310,7 +325,7 @@ void ftrace_release(void *start, unsigned long size)
 	if (ftrace_disabled || !start)
 		return;
 
-	/* No interrupt should call this */
+	/* should not be called from interrupt context */
 	spin_lock(&ftrace_lock);
 
 	for (pg = ftrace_pages_start; pg; pg = pg->next) {
@@ -362,7 +377,6 @@ ftrace_record_ip(unsigned long ip)
 	unsigned long flags;
 	unsigned long key;
 	int resched;
-	int atomic;
 	int cpu;
 
 	if (!ftrace_enabled || ftrace_disabled)
@@ -392,9 +406,7 @@ ftrace_record_ip(unsigned long ip)
 	if (ftrace_ip_in_hash(ip, key))
 		goto out;
 
-	atomic = irqs_disabled();
-
-	spin_lock_irqsave(&ftrace_shutdown_lock, flags);
+	ftrace_hash_lock(flags);
 
 	/* This ip may have hit the hash before the lock */
 	if (ftrace_ip_in_hash(ip, key))
@@ -411,7 +423,7 @@ ftrace_record_ip(unsigned long ip)
 	ftraced_trigger = 1;
 
  out_unlock:
-	spin_unlock_irqrestore(&ftrace_shutdown_lock, flags);
+	ftrace_hash_unlock(flags);
  out:
 	per_cpu(ftrace_shutdown_disable_cpu, cpu)--;
 
@@ -887,6 +899,8 @@ t_next(struct seq_file *m, void *v, loff_t *pos)
 
 	(*pos)++;
 
+	/* should not be called from interrupt context */
+	spin_lock(&ftrace_lock);
  retry:
 	if (iter->idx >= iter->pg->index) {
 		if (iter->pg->next) {
@@ -910,6 +924,7 @@ t_next(struct seq_file *m, void *v, loff_t *pos)
 			goto retry;
 		}
 	}
+	spin_unlock(&ftrace_lock);
 
 	iter->pos = *pos;
 
@@ -1023,8 +1038,8 @@ static void ftrace_filter_reset(int enable)
 	unsigned long type = enable ? FTRACE_FL_FILTER : FTRACE_FL_NOTRACE;
 	unsigned i;
 
-	/* keep kstop machine from running */
-	preempt_disable();
+	/* should not be called from interrupt context */
+	spin_lock(&ftrace_lock);
 	if (enable)
 		ftrace_filtered = 0;
 	pg = ftrace_pages_start;
@@ -1037,7 +1052,7 @@ static void ftrace_filter_reset(int enable)
 		}
 		pg = pg->next;
 	}
-	preempt_enable();
+	spin_unlock(&ftrace_lock);
 }
 
 static int
@@ -1149,8 +1164,8 @@ ftrace_match(unsigned char *buff, int len, int enable)
 		}
 	}
 
-	/* keep kstop machine from running */
-	preempt_disable();
+	/* should not be called from interrupt context */
+	spin_lock(&ftrace_lock);
 	if (enable)
 		ftrace_filtered = 1;
 	pg = ftrace_pages_start;
@@ -1187,7 +1202,7 @@ ftrace_match(unsigned char *buff, int len, int enable)
 		}
 		pg = pg->next;
 	}
-	preempt_enable();
+	spin_unlock(&ftrace_lock);
 }
 
 static ssize_t
@@ -1551,6 +1566,7 @@ static int ftrace_convert_nops(unsigned long *start,
 	p = start;
 	while (p < end) {
 		addr = ftrace_call_adjust(*p++);
+		/* should not be called from interrupt context */
 		spin_lock(&ftrace_lock);
 		ftrace_record_ip(addr);
 		spin_unlock(&ftrace_lock);
-- 
cgit v1.2.3-55-g7522


From 3700273586ee6a58b95dd07d9f8a02db4a9b476f Mon Sep 17 00:00:00 2001
From: Huang Ying
Date: Mon, 18 Aug 2008 16:24:56 +0800
Subject: ftrace: fix incorrect comment style of __ftrace_enabled_save()

This patch fixes incorrect comment style of __ftrace_enabled_save().

Signed-off-by: Huang Ying <ying.huang@intel.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/ftrace.h | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index 36c439927ff1..8b4cf38c80d2 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -99,9 +99,11 @@ static inline void tracer_disable(void)
 #endif
 }
 
-/* Ftrace disable/restore without lock. Some synchronization mechanism
+/*
+ * Ftrace disable/restore without lock. Some synchronization mechanism
  * must be used to prevent ftrace_enabled to be changed between
- * disable/restore. */
+ * disable/restore.
+ */
 static inline int __ftrace_enabled_save(void)
 {
 #ifdef CONFIG_FTRACE
-- 
cgit v1.2.3-55-g7522


From 6a4917e3ae5194a10e0723f96edc854c381e3063 Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge
Date: Mon, 18 Aug 2008 15:58:12 -0700
Subject: ftrace: fix build problem with CONFIG_FTRACE

I'm seeing when I use separate src/build dirs:

make[3]: *** [arch/x86/kernel/time_32.o] Error 1
/bin/sh: scripts/recordmcount.pl: No such file or directory
make[3]: *** [arch/x86/kernel/irq_32.o] Error 1
/bin/sh: scripts/recordmcount.pl: No such file or directory
make[3]: *** [arch/x86/kernel/ldt.o] Error 1
/bin/sh: scripts/recordmcount.pl: No such file or directory
make[3]: *** [arch/x86/kernel/i8259.o] Error 1
/bin/sh: scripts/recordmcount.pl: No such file or directory

This fixes it.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 scripts/Makefile.build | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/scripts/Makefile.build b/scripts/Makefile.build
index 463ddcc583ed..232485ec5265 100644
--- a/scripts/Makefile.build
+++ b/scripts/Makefile.build
@@ -199,7 +199,7 @@ cmd_modversions =							\
 endif
 
 ifdef CONFIG_FTRACE_MCOUNT_RECORD
-cmd_record_mcount = scripts/recordmcount.pl "$(ARCH)" \
+cmd_record_mcount = $(srctree)/scripts/recordmcount.pl "$(ARCH)" \
 	"$(OBJDUMP)" "$(OBJCOPY)" "$(CC)" "$(LD)" "$(NM)" "$(RM)" "$(MV)" "$(@)";
 endif
 
-- 
cgit v1.2.3-55-g7522


From d74fcd1e4e8842d5302cd303ef25cef7e67f68b4 Mon Sep 17 00:00:00 2001
From: Steven Rostedt
Date: Fri, 15 Aug 2008 11:40:24 -0400
Subject: ftrace: update recordmount.pl arch changes

I'm trying to keep all the arch changes in recordmcount.pl in one place.
I moved your code into that area, by adding the flags to the commands
that were passed in.

Signed-off-by: Steven Rostedt <srostedt@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 scripts/recordmcount.pl | 28 ++++++++++++++--------------
 1 file changed, 14 insertions(+), 14 deletions(-)

diff --git a/scripts/recordmcount.pl b/scripts/recordmcount.pl
index e4922a6c963b..36c8355c555e 100755
--- a/scripts/recordmcount.pl
+++ b/scripts/recordmcount.pl
@@ -108,20 +108,6 @@ if ($#ARGV < 6) {
 
 my ($arch, $objdump, $objcopy, $cc, $ld, $nm, $rm, $mv, $inputfile) = @ARGV;
 
-if ($arch eq "i386") {
-  $ld = "ld -m elf_i386";
-  $objdump = "objdump -M i386";
-  $objcopy = "objcopy -O elf32-i386";
-  $cc = "gcc -m32";
-}
-
-if ($arch eq "x86_64") {
-  $ld = "ld -m elf_x86_64";
-  $objdump = "objdump -M x86-64";
-  $objcopy = "objcopy -O elf64-x86-64";
-  $cc = "gcc -m64";
-}
-
 $objdump = "objdump" if ((length $objdump) == 0);
 $objcopy = "objcopy" if ((length $objcopy) == 0);
 $cc = "gcc" if ((length $cc) == 0);
@@ -146,11 +132,25 @@ if ($arch eq "x86_64") {
     $function_regex = "<(.*?)>:";
     $mcount_regex = "^\\s*([0-9a-fA-F]+):.*\\smcount([+-]0x[0-9a-zA-Z]+)?\$";
     $type = ".quad";
+
+    # force flags for this arch
+    $ld .= " -m elf_x86_64";
+    $objdump .= " -M x86-64";
+    $objcopy .= " -O elf64-x86-64";
+    $cc .= " -m64";
+
 } elsif ($arch eq "i386") {
     $section_regex = "Disassembly of section";
     $function_regex = "<(.*?)>:";
     $mcount_regex = "^\\s*([0-9a-fA-F]+):.*\\smcount\$";
     $type = ".long";
+
+    # force flags for this arch
+    $ld .= " -m elf_i386";
+    $objdump .= " -M i386";
+    $objcopy .= " -O elf32-i386";
+    $cc .= " -m32";
+
 } else {
     die "Arch $arch is not supported with CONFIG_FTRACE_MCOUNT_RECORD";
 }
-- 
cgit v1.2.3-55-g7522


From 8feff1cacc29e9cfdc6d1ce5f2108db87b91046e Mon Sep 17 00:00:00 2001
From: Steven Rostedt
Date: Wed, 20 Aug 2008 10:07:35 -0400
Subject: ftrace: handle weak symbol functions

During tests and checks, I've discovered that there were failures to
convert mcount callers into nops. Looking deeper into these failures,
code that was attempted to be changed was not an mcount caller.
The current code only updates if the code being changed is what it expects,
but I still investigate any time there is a failure.

What was happening is that a weak symbol was being used as a reference
for other mcount callers. That weak symbol was also referenced elsewhere
so the offsets were using the strong symbol and not the function symbol
that it was referenced from.

This patch changes the setting up of the mcount_loc section to search
for a global function that is not weak. It will pick a local over a weak
but if only a weak is found in a section, a warning is printed and the
mcount location is not recorded (just to be safe).

Signed-off-by: Steven Rostedt <srostedt@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 scripts/recordmcount.pl | 106 +++++++++++++++++++++++++++++++++++++++---------
 1 file changed, 86 insertions(+), 20 deletions(-)

diff --git a/scripts/recordmcount.pl b/scripts/recordmcount.pl
index 36c8355c555e..1891cf9743fc 100755
--- a/scripts/recordmcount.pl
+++ b/scripts/recordmcount.pl
@@ -119,17 +119,19 @@ $mv = "mv" if ((length $mv) == 0);
 #print STDERR "running: $P '$arch' '$objdump' '$objcopy' '$cc' '$ld' " .
 #    "'$nm' '$rm' '$mv' '$inputfile'\n";
 
-my %locals;
-my %convert;
+my %locals;		# List of local (static) functions
+my %weak;		# List of weak functions
+my %convert;		# List of local functions used that needs conversion
 
 my $type;
 my $section_regex;	# Find the start of a section
-my $function_regex;	# Find the name of a function (return func name)
+my $function_regex;	# Find the name of a function
+			#    (return offset and func name)
 my $mcount_regex;	# Find the call site to mcount (return offset)
 
 if ($arch eq "x86_64") {
     $section_regex = "Disassembly of section";
-    $function_regex = "<(.*?)>:";
+    $function_regex = "^([0-9a-fA-F]+)\\s+<(.*?)>:";
     $mcount_regex = "^\\s*([0-9a-fA-F]+):.*\\smcount([+-]0x[0-9a-zA-Z]+)?\$";
     $type = ".quad";
 
@@ -141,7 +143,7 @@ if ($arch eq "x86_64") {
 
 } elsif ($arch eq "i386") {
     $section_regex = "Disassembly of section";
-    $function_regex = "<(.*?)>:";
+    $function_regex = "^([0-9a-fA-F]+)\\s+<(.*?)>:";
     $mcount_regex = "^\\s*([0-9a-fA-F]+):.*\\smcount\$";
     $type = ".long";
 
@@ -158,7 +160,6 @@ if ($arch eq "x86_64") {
 my $text_found = 0;
 my $read_function = 0;
 my $opened = 0;
-my $text = "";
 my $mcount_section = "__mcount_loc";
 
 my $dirname;
@@ -186,46 +187,111 @@ my $mcount_s = $dirname . "/.tmp_mc_" . $prefix . ".s";
 my $mcount_o = $dirname . "/.tmp_mc_" . $prefix . ".o";
 
 #
-# Step 1: find all the local symbols (static functions).
+# Step 1: find all the local (static functions) and weak symbols.
+#        't' is local, 'w/W' is weak (we never use a weak function)
 #
 open (IN, "$nm $inputfile|") || die "error running $nm";
 while (<IN>) {
     if (/^[0-9a-fA-F]+\s+t\s+(\S+)/) {
 	$locals{$1} = 1;
+    } elsif (/^[0-9a-fA-F]+\s+([wW])\s+(\S+)/) {
+	$weak{$2} = $1;
     }
 }
 close(IN);
 
+my @offsets;		# Array of offsets of mcount callers
+my $ref_func;		# reference function to use for offsets
+my $offset = 0;		# offset of ref_func to section beginning
+
+##
+# update_funcs - print out the current mcount callers
+#
+#  Go through the list of offsets to callers and write them to
+#  the output file in a format that can be read by an assembler.
+#
+sub update_funcs
+{
+    return if ($#offsets < 0);
+
+    defined($ref_func) || die "No function to reference";
+
+    # A section only had a weak function, to represent it.
+    # Unfortunately, a weak function may be overwritten by another
+    # function of the same name, making all these offsets incorrect.
+    # To be safe, we simply print a warning and bail.
+    if (defined $weak{$ref_func}) {
+	print STDERR
+	    "$inputfile: WARNING: referencing weak function" .
+	    " $ref_func for mcount\n";
+	return;
+    }
+
+    # is this function static? If so, note this fact.
+    if (defined $locals{$ref_func}) {
+	$convert{$ref_func} = 1;
+    }
+
+    # Loop through all the mcount caller offsets and print a reference
+    # to the caller based from the ref_func.
+    for (my $i=0; $i <= $#offsets; $i++) {
+	if (!$opened) {
+	    open(FILE, ">$mcount_s") || die "can't create $mcount_s\n";
+	    $opened = 1;
+	    print FILE "\t.section $mcount_section,\"a\",\@progbits\n";
+	}
+	printf FILE "\t%s %s + %d\n", $type, $ref_func, $offsets[$i] - $offset;
+    }
+}
+
 #
 # Step 2: find the sections and mcount call sites
 #
 open(IN, "$objdump -dr $inputfile|") || die "error running $objdump";
 
+my $text;
+
 while (<IN>) {
     # is it a section?
     if (/$section_regex/) {
 	$read_function = 1;
+	# print out any recorded offsets
+	update_funcs() if ($text_found);
+
+	# reset all markers and arrays
 	$text_found = 0;
+	undef($ref_func);
+	undef(@offsets);
+
     # section found, now is this a start of a function?
     } elsif ($read_function && /$function_regex/) {
-	$read_function = 0;
 	$text_found = 1;
-	$text = $1;
-	# is this function static? If so, note this fact.
-	if (defined $locals{$text}) {
-	    $convert{$text} = 1;
+	$offset = hex $1;
+	$text = $2;
+
+	# if this is either a local function or a weak function
+	# keep looking for functions that are global that
+	# we can use safely.
+	if (!defined($locals{$text}) && !defined($weak{$text})) {
+	    $ref_func = $text;
+	    $read_function = 0;
+	} else {
+	    # if we already have a function, and this is weak, skip it
+	    if (!defined($ref_func) || !defined($weak{$text})) {
+		$ref_func = $text;
+	    }
 	}
-    # is this a call site to mcount? If so, print the offset from the section
-    } elsif ($text_found && /$mcount_regex/) {
-	if (!$opened) {
-	    open(FILE, ">$mcount_s") || die "can't create $mcount_s\n";
-	    $opened = 1;
-	    print FILE "\t.section $mcount_section,\"a\",\@progbits\n";
-	}
-	print FILE "\t$type $text + 0x$1\n";
+    }
+
+    # is this a call site to mcount? If so, record it to print later
+    if ($text_found && /$mcount_regex/) {
+	$offsets[$#offsets + 1] = hex $1;
     }
 }
 
+# dump out anymore offsets that may have been found
+update_funcs() if ($text_found);
+
 # If we did not find any mcount callers, we are done (do nothing).
 if (!$opened) {
     exit(0);
-- 
cgit v1.2.3-55-g7522


From 6f93fc076a464bfe24e8d4c5fea3f6ca5bdb264d Mon Sep 17 00:00:00 2001
From: Steven Rostedt
Date: Wed, 20 Aug 2008 12:55:07 -0400
Subject: ftrace: x86 use copy to and from user functions

The modification of code is performed either by kstop_machine, before
SMP starts, or on module code before the module is executed. There is
no reason to do the modifications from assembly. The copy to and from
user functions are sufficient and produces cleaner and easier to read
code.

Thanks to Benjamin Herrenschmidt for suggesting the idea.

Signed-off-by: Steven Rostedt <srostedt@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/ftrace.c | 38 +++++++++++++-------------------------
 1 file changed, 13 insertions(+), 25 deletions(-)

diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c
index 4151c91254e8..082d99642622 100644
--- a/arch/x86/kernel/ftrace.c
+++ b/arch/x86/kernel/ftrace.c
@@ -11,6 +11,7 @@
 
 #include <linux/spinlock.h>
 #include <linux/hardirq.h>
+#include <linux/uaccess.h>
 #include <linux/ftrace.h>
 #include <linux/percpu.h>
 #include <linux/init.h>
@@ -60,11 +61,7 @@ notrace int
 ftrace_modify_code(unsigned long ip, unsigned char *old_code,
 		   unsigned char *new_code)
 {
-	unsigned replaced;
-	unsigned old = *(unsigned *)old_code; /* 4 bytes */
-	unsigned new = *(unsigned *)new_code; /* 4 bytes */
-	unsigned char newch = new_code[4];
-	int faulted = 0;
+	unsigned char replaced[MCOUNT_INSN_SIZE];
 
 	/*
 	 * Note: Due to modules and __init, code can
@@ -72,29 +69,20 @@ ftrace_modify_code(unsigned long ip, unsigned char *old_code,
 	 *  as well as code changing.
 	 *
 	 * No real locking needed, this code is run through
-	 * kstop_machine.
+	 * kstop_machine, or before SMP starts.
 	 */
-	asm volatile (
-		"1: lock\n"
-		"   cmpxchg %3, (%2)\n"
-		"   jnz 2f\n"
-		"   movb %b4, 4(%2)\n"
-		"2:\n"
-		".section .fixup, \"ax\"\n"
-		"3:	movl $1, %0\n"
-		"	jmp 2b\n"
-		".previous\n"
-		_ASM_EXTABLE(1b, 3b)
-		: "=r"(faulted), "=a"(replaced)
-		: "r"(ip), "r"(new), "c"(newch),
-		  "0"(faulted), "a"(old)
-		: "memory");
-	sync_core();
+	if (__copy_from_user(replaced, (char __user *)ip, MCOUNT_INSN_SIZE))
+		return 1;
 
-	if (replaced != old && replaced != new)
-		faulted = 2;
+	if (memcmp(replaced, old_code, MCOUNT_INSN_SIZE) != 0)
+		return 2;
 
-	return faulted;
+	WARN_ON_ONCE(__copy_to_user((char __user *)ip, new_code,
+				    MCOUNT_INSN_SIZE));
+
+	sync_core();
+
+	return 0;
 }
 
 notrace int ftrace_update_ftrace_func(ftrace_func_t func)
-- 
cgit v1.2.3-55-g7522


From 2d7da80f7138c4276ef4fa0334be400b805d0fbf Mon Sep 17 00:00:00 2001
From: Stephen Rothwell
Date: Mon, 25 Aug 2008 13:08:44 +1000
Subject: ftrace: fix build failure

After disabling FTRACE_MCOUNT_RECORD via a patch, a dormant build
failure surfaced:

 kernel/trace/ftrace.c: In function 'ftrace_record_ip':
 kernel/trace/ftrace.c:416: error: incompatible type for argument 1 of '_spin_lock_irqsave'
 kernel/trace/ftrace.c:433: error: incompatible type for argument 1 of '_spin_lock_irqsave'

Introduced by commit 6dad8e07f4c10b17b038e84d29f3ca41c2e55cd0 ("ftrace:
add necessary locking for ftrace records").

Signed-off-by: Stephen Rothwell <sfr@canb.auug.org.au>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 kernel/trace/ftrace.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 43665add9805..7599abdf6d4d 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -161,8 +161,8 @@ static int __unregister_ftrace_function(struct ftrace_ops *ops)
  * not recorded via the compilation.
  */
 static DEFINE_SPINLOCK(ftrace_hash_lock);
-#define ftrace_hash_lock(flags)	  spin_lock_irqsave(ftrace_hash_lock, flags)
-#define ftrace_hash_unlock(flags) spin_lock_irqsave(ftrace_hash_lock, flags)
+#define ftrace_hash_lock(flags)	  spin_lock_irqsave(&ftrace_hash_lock, flags)
+#define ftrace_hash_unlock(flags) spin_lock_irqsave(&ftrace_hash_lock, flags)
 #else
 /* This is protected via the ftrace_lock with MCOUNT_RECORD. */
 #define ftrace_hash_lock(flags)   do { (void)flags; } while (0)
-- 
cgit v1.2.3-55-g7522


From ac8825ec6d941b6899331b84c7d6bf027c3bb4f1 Mon Sep 17 00:00:00 2001
From: Ingo Molnar
Date: Mon, 25 Aug 2008 08:12:04 +0200
Subject: ftrace: clean up macro usage

enclose the argument in parenthesis. (especially since we cast it,
which is a high prio operation)

Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 kernel/trace/ftrace.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 7599abdf6d4d..969a83f75a3e 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -165,7 +165,7 @@ static DEFINE_SPINLOCK(ftrace_hash_lock);
 #define ftrace_hash_unlock(flags) spin_lock_irqsave(&ftrace_hash_lock, flags)
 #else
 /* This is protected via the ftrace_lock with MCOUNT_RECORD. */
-#define ftrace_hash_lock(flags)   do { (void)flags; } while (0)
+#define ftrace_hash_lock(flags)   do { (void)(flags); } while (0)
 #define ftrace_hash_unlock(flags) do { } while(0)
 #endif
 
-- 
cgit v1.2.3-55-g7522


From f2f8458e751f9ae41dfec3c00a46d3e62dc38f60 Mon Sep 17 00:00:00 2001
From: Steven Rostedt
Date: Mon, 25 Aug 2008 14:52:11 -0400
Subject: ftrace: objcopy version test for local symbols

The --globalize-symbols option came out in objcopy version 2.17.
If the kernel is being compiled on a system with a lower version of
objcopy, then we can not use the globalize / localize trick to
link to symbols pointing to local functions.

This patch tests the version of objcopy and will only use the trick
if the version is greater than or equal to 2.17. Otherwise, if an
object has only local functions within a section, it will give a
nice warning and recommend the user to upgrade their objcopy.

Leaving the symbols unrecorded is not that big of a deal, since the
mcount record method changes the actual mcount code to be a simple
"ret" without recording registers or anything.

Reported-by: Stephen Rothwell <sfr@canb.auug.org.au>
Signed-off-by: Steven Rostedt <srostedt@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 scripts/recordmcount.pl | 41 +++++++++++++++++++++++++++++++++++++++++
 1 file changed, 41 insertions(+)

diff --git a/scripts/recordmcount.pl b/scripts/recordmcount.pl
index 1891cf9743fc..ee9e12676776 100755
--- a/scripts/recordmcount.pl
+++ b/scripts/recordmcount.pl
@@ -186,6 +186,36 @@ if ($filename =~ m,^(.*)(\.\S),) {
 my $mcount_s = $dirname . "/.tmp_mc_" . $prefix . ".s";
 my $mcount_o = $dirname . "/.tmp_mc_" . $prefix . ".o";
 
+#
+# --globalize-symbols came out in 2.17, we must test the version
+# of objcopy, and if it is less than 2.17, then we can not
+# record local functions.
+my $use_locals = 01;
+my $local_warn_once = 0;
+my $found_version = 0;
+
+open (IN, "$objcopy --version |") || die "error running $objcopy";
+while (<IN>) {
+    if (/objcopy.*\s(\d+)\.(\d+)/) {
+	my $major = $1;
+	my $minor = $2;
+
+	$found_version = 1;
+	if ($major < 2 ||
+	    ($major == 2 && $minor < 17)) {
+	    $use_locals = 0;
+	}
+	last;
+    }
+}
+close (IN);
+
+if (!$found_version) {
+    print STDERR "WARNING: could not find objcopy version.\n" .
+	"\tDisabling local function references.\n";
+}
+
+
 #
 # Step 1: find all the local (static functions) and weak symbols.
 #        't' is local, 'w/W' is weak (we never use a weak function)
@@ -229,6 +259,17 @@ sub update_funcs
 
     # is this function static? If so, note this fact.
     if (defined $locals{$ref_func}) {
+
+	# only use locals if objcopy supports globalize-symbols
+	if (!$use_locals) {
+	    print STDERR
+		"$inputfile: WARNING: referencing local function " .
+		"$ref_func for mcount\n" .
+		"\tConsider upgrading objcopy to support the globalize-" .
+		"symbols option.\n"
+		if (!$local_warn_once++);
+	    return;
+	}
 	$convert{$ref_func} = 1;
     }
 
-- 
cgit v1.2.3-55-g7522


From b3a320417484a6d6b9d28098944df58341353992 Mon Sep 17 00:00:00 2001
From: Andrew Morton
Date: Wed, 27 Aug 2008 09:08:30 +0200
Subject: kbuild: ftrace: don't assume that scripts/recordmcount.pl is
 executable

CHK     include/linux/version.h
  CHK     include/linux/utsrelease.h
  CC      scripts/mod/empty.o
/bin/sh: /usr/src/25/scripts/recordmcount.pl: Permission denied

We shouldn't assume that files have their `x' bits set.  There are various
ways in which file permissions get lost, including use of patch(1).

It might not be correct to assume that perl lives in $PATH?

Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 scripts/Makefile.build | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/scripts/Makefile.build b/scripts/Makefile.build
index 232485ec5265..5ed4cbf1e0e1 100644
--- a/scripts/Makefile.build
+++ b/scripts/Makefile.build
@@ -199,8 +199,9 @@ cmd_modversions =							\
 endif
 
 ifdef CONFIG_FTRACE_MCOUNT_RECORD
-cmd_record_mcount = $(srctree)/scripts/recordmcount.pl "$(ARCH)" \
-	"$(OBJDUMP)" "$(OBJCOPY)" "$(CC)" "$(LD)" "$(NM)" "$(RM)" "$(MV)" "$(@)";
+cmd_record_mcount = perl $(srctree)/scripts/recordmcount.pl \
+	"$(ARCH)" "$(OBJDUMP)" "$(OBJCOPY)" "$(CC)" "$(LD)" "$(NM)" "$(RM)" \
+	"$(MV)" "$(@)";
 endif
 
 define rule_cc_o_c
-- 
cgit v1.2.3-55-g7522


From e5a81b629ea8feb9e7530cfac35cfb41c45facf3 Mon Sep 17 00:00:00 2001
From: Steven Rostedt
Date: Wed, 27 Aug 2008 23:31:01 -0400
Subject: ftrace: add stack tracer

This is another tracer using the ftrace infrastructure, that examines
at each function call the size of the stack. If the stack use is greater
than the previous max it is recorded.

You can always see (and set) the max stack size seen. By setting it
to zero will start the recording again. The backtrace is also available.

For example:

# cat /debug/tracing/stack_max_size
1856

# cat /debug/tracing/stack_trace
[<c027764d>] stack_trace_call+0x8f/0x101
[<c021b966>] ftrace_call+0x5/0x8
[<c02553cc>] clocksource_get_next+0x12/0x48
[<c02542a5>] update_wall_time+0x538/0x6d1
[<c0245913>] do_timer+0x23/0xb0
[<c0257657>] tick_do_update_jiffies64+0xd9/0xf1
[<c02576b9>] tick_sched_timer+0x4a/0xad
[<c0250fe6>] __run_hrtimer+0x3e/0x75
[<c02518ed>] hrtimer_interrupt+0xf1/0x154
[<c022c870>] smp_apic_timer_interrupt+0x71/0x84
[<c021b7e9>] apic_timer_interrupt+0x2d/0x34
[<c0238597>] finish_task_switch+0x29/0xa0
[<c05abd13>] schedule+0x765/0x7be
[<c05abfca>] schedule_timeout+0x1b/0x90
[<c05ab4d4>] wait_for_common+0xab/0x101
[<c05ab5ac>] wait_for_completion+0x12/0x14
[<c033cfc3>] blk_execute_rq+0x84/0x99
[<c0402470>] scsi_execute+0xc2/0x105
[<c040250a>] scsi_execute_req+0x57/0x7f
[<c043afe0>] sr_test_unit_ready+0x3e/0x97
[<c043bbd6>] sr_media_change+0x43/0x205
[<c046b59f>] media_changed+0x48/0x77
[<c046b5ff>] cdrom_media_changed+0x31/0x37
[<c043b091>] sr_block_media_changed+0x16/0x18
[<c02b9e69>] check_disk_change+0x1b/0x63
[<c046f4c3>] cdrom_open+0x7a1/0x806
[<c043b148>] sr_block_open+0x78/0x8d
[<c02ba4c0>] do_open+0x90/0x257
[<c02ba869>] blkdev_open+0x2d/0x56
[<c0296a1f>] __dentry_open+0x14d/0x23c
[<c0296b32>] nameidata_to_filp+0x24/0x38
[<c02a1c68>] do_filp_open+0x347/0x626
[<c02967ef>] do_sys_open+0x47/0xbc
[<c02968b0>] sys_open+0x23/0x2b
[<c021aadd>] sysenter_do_call+0x12/0x26

I've tested this on both x86_64 and i386.

Signed-off-by: Steven Rostedt <srostedt@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 kernel/trace/Kconfig       |   9 ++
 kernel/trace/Makefile      |   1 +
 kernel/trace/trace_stack.c | 254 +++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 264 insertions(+)
 create mode 100644 kernel/trace/trace_stack.c

diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index 14d9505178ca..2a22e46390d3 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -106,6 +106,15 @@ config CONTEXT_SWITCH_TRACER
 	  This tracer gets called from the context switch and records
 	  all switching of tasks.
 
+config STACK_TRACER
+	bool "Trace max stack"
+	depends on HAVE_FTRACE
+	select FTRACE
+	select STACKTRACE
+	help
+	  This tracer records the max stack of the kernel, and displays
+	  it in debugfs/tracing/stack_trace
+
 config DYNAMIC_FTRACE
 	bool "enable/disable ftrace tracepoints dynamically"
 	depends on FTRACE
diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile
index 71d17de17288..58ec61c44bd6 100644
--- a/kernel/trace/Makefile
+++ b/kernel/trace/Makefile
@@ -19,6 +19,7 @@ obj-$(CONFIG_FTRACE) += trace_functions.o
 obj-$(CONFIG_IRQSOFF_TRACER) += trace_irqsoff.o
 obj-$(CONFIG_PREEMPT_TRACER) += trace_irqsoff.o
 obj-$(CONFIG_SCHED_TRACER) += trace_sched_wakeup.o
+obj-$(CONFIG_STACK_TRACER) += trace_stack.o
 obj-$(CONFIG_MMIOTRACE) += trace_mmiotrace.o
 
 libftrace-y := ftrace.o
diff --git a/kernel/trace/trace_stack.c b/kernel/trace/trace_stack.c
new file mode 100644
index 000000000000..4d1e522e3fe8
--- /dev/null
+++ b/kernel/trace/trace_stack.c
@@ -0,0 +1,254 @@
+/*
+ * Copyright (C) 2008 Steven Rostedt <srostedt@redhat.com>
+ *
+ */
+#include <linux/stacktrace.h>
+#include <linux/kallsyms.h>
+#include <linux/seq_file.h>
+#include <linux/spinlock.h>
+#include <linux/uaccess.h>
+#include <linux/debugfs.h>
+#include <linux/ftrace.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/fs.h>
+#include "trace.h"
+
+#define STACK_TRACE_ENTRIES 500
+
+static unsigned long stack_dump_trace[STACK_TRACE_ENTRIES] =
+	{ [0 ... (STACK_TRACE_ENTRIES-1)] = ULONG_MAX };
+static struct stack_trace max_stack_trace = {
+	.max_entries		= STACK_TRACE_ENTRIES,
+	.entries		= stack_dump_trace,
+};
+
+static unsigned long max_stack_size;
+static raw_spinlock_t max_stack_lock =
+	(raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED;
+
+static int stack_trace_disabled __read_mostly;
+static DEFINE_PER_CPU(int, trace_active);
+
+static inline void check_stack(void)
+{
+	unsigned long this_size;
+	unsigned long flags;
+
+	this_size = ((unsigned long)&this_size) & (THREAD_SIZE-1);
+	this_size = THREAD_SIZE - this_size;
+
+	if (this_size <= max_stack_size)
+		return;
+
+	raw_local_irq_save(flags);
+	__raw_spin_lock(&max_stack_lock);
+
+	/* a race could have already updated it */
+	if (this_size <= max_stack_size)
+		goto out;
+
+	max_stack_size = this_size;
+
+	max_stack_trace.nr_entries	= 0;
+	max_stack_trace.skip		= 1;
+
+	save_stack_trace(&max_stack_trace);
+
+ out:
+	__raw_spin_unlock(&max_stack_lock);
+	raw_local_irq_restore(flags);
+}
+
+static void
+stack_trace_call(unsigned long ip, unsigned long parent_ip)
+{
+	int cpu, resched;
+
+	if (unlikely(!ftrace_enabled || stack_trace_disabled))
+		return;
+
+	resched = need_resched();
+	preempt_disable_notrace();
+
+	cpu = raw_smp_processor_id();
+	/* no atomic needed, we only modify this variable by this cpu */
+	if (per_cpu(trace_active, cpu)++ != 0)
+		goto out;
+
+	check_stack();
+
+ out:
+	per_cpu(trace_active, cpu)--;
+	/* prevent recursion in schedule */
+	if (resched)
+		preempt_enable_no_resched_notrace();
+	else
+		preempt_enable_notrace();
+}
+
+static struct ftrace_ops trace_ops __read_mostly =
+{
+	.func = stack_trace_call,
+};
+
+static ssize_t
+stack_max_size_read(struct file *filp, char __user *ubuf,
+		    size_t count, loff_t *ppos)
+{
+	unsigned long *ptr = filp->private_data;
+	char buf[64];
+	int r;
+
+	r = snprintf(buf, sizeof(buf), "%ld\n", *ptr);
+	if (r > sizeof(buf))
+		r = sizeof(buf);
+	return simple_read_from_buffer(ubuf, count, ppos, buf, r);
+}
+
+static ssize_t
+stack_max_size_write(struct file *filp, const char __user *ubuf,
+		     size_t count, loff_t *ppos)
+{
+	long *ptr = filp->private_data;
+	unsigned long val, flags;
+	char buf[64];
+	int ret;
+
+	if (count >= sizeof(buf))
+		return -EINVAL;
+
+	if (copy_from_user(&buf, ubuf, count))
+		return -EFAULT;
+
+	buf[count] = 0;
+
+	ret = strict_strtoul(buf, 10, &val);
+	if (ret < 0)
+		return ret;
+
+	raw_local_irq_save(flags);
+	__raw_spin_lock(&max_stack_lock);
+	*ptr = val;
+	__raw_spin_unlock(&max_stack_lock);
+	raw_local_irq_restore(flags);
+
+	return count;
+}
+
+static struct file_operations stack_max_size_fops = {
+	.open		= tracing_open_generic,
+	.read		= stack_max_size_read,
+	.write		= stack_max_size_write,
+};
+
+static void *
+t_next(struct seq_file *m, void *v, loff_t *pos)
+{
+	unsigned long *t = m->private;
+
+	(*pos)++;
+
+	if (!t || *t == ULONG_MAX)
+		return NULL;
+
+	t++;
+	m->private = t;
+
+	return t;
+}
+
+static void *t_start(struct seq_file *m, loff_t *pos)
+{
+	unsigned long *t = m->private;
+	loff_t l = 0;
+
+	local_irq_disable();
+	__raw_spin_lock(&max_stack_lock);
+
+	for (; t && l < *pos; t = t_next(m, t, &l))
+		;
+
+	return t;
+}
+
+static void t_stop(struct seq_file *m, void *p)
+{
+	__raw_spin_unlock(&max_stack_lock);
+	local_irq_enable();
+}
+
+static int trace_lookup_stack(struct seq_file *m, unsigned long addr)
+{
+#ifdef CONFIG_KALLSYMS
+	char str[KSYM_SYMBOL_LEN];
+
+	sprint_symbol(str, addr);
+
+	return seq_printf(m, "[<%p>] %s\n", (void*)addr, str);
+#else
+	return seq_printf(m, "%p\n", (void*)addr);
+#endif
+}
+
+static int t_show(struct seq_file *m, void *v)
+{
+	unsigned long *t = v;
+
+	if (!t || *t == ULONG_MAX)
+		return 0;
+
+	trace_lookup_stack(m, *t);
+
+	return 0;
+}
+
+static struct seq_operations stack_trace_seq_ops = {
+	.start		= t_start,
+	.next		= t_next,
+	.stop		= t_stop,
+	.show		= t_show,
+};
+
+static int stack_trace_open(struct inode *inode, struct file *file)
+{
+	int ret;
+
+	ret = seq_open(file, &stack_trace_seq_ops);
+	if (!ret) {
+		struct seq_file *m = file->private_data;
+		m->private = stack_dump_trace;
+	}
+
+	return ret;
+}
+
+static struct file_operations stack_trace_fops = {
+	.open		= stack_trace_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+};
+
+static __init int stack_trace_init(void)
+{
+	struct dentry *d_tracer;
+	struct dentry *entry;
+
+	d_tracer = tracing_init_dentry();
+
+	entry = debugfs_create_file("stack_max_size", 0644, d_tracer,
+				    &max_stack_size, &stack_max_size_fops);
+	if (!entry)
+		pr_warning("Could not create debugfs 'stack_max_size' entry\n");
+
+	entry = debugfs_create_file("stack_trace", 0444, d_tracer,
+				    NULL, &stack_trace_fops);
+	if (!entry)
+		pr_warning("Could not create debugfs 'stack_trace' entry\n");
+
+	register_ftrace_function(&trace_ops);
+
+	return 0;
+}
+
+device_initcall(stack_trace_init);
-- 
cgit v1.2.3-55-g7522


From 3b47bfc1fca01cccad9cce2d18b79b18ef2e4131 Mon Sep 17 00:00:00 2001
From: Steven Rostedt
Date: Wed, 27 Aug 2008 23:24:15 -0400
Subject: ftrace: remove direct reference to mcount in trace code

The mcount record method of ftrace scans objdump for references to mcount.
Using mcount as the reference to test if the calls to mcount being replaced
are indeed calls to mcount, this use of mcount was also caught as a
location to change. Using a variable that points to the mcount address
moves this reference into the data section that is not scanned, and
we do not use a false location to try and modify.

The warn on code was what was used to detect this bug.

Signed-off-by: Steven Rostedt <srostedt@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 kernel/trace/ftrace.c | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 969a83f75a3e..b69966f0f144 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -36,6 +36,14 @@
 int ftrace_enabled __read_mostly;
 static int last_ftrace_enabled;
 
+/*
+ * Since MCOUNT_ADDR may point to mcount itself, we do not want
+ * to get it confused by reading a reference in the code as we
+ * are parsing on objcopy output of text. Use a variable for
+ * it instead.
+ */
+static unsigned long mcount_addr = MCOUNT_ADDR;
+
 /*
  * ftrace_disabled is set when an anomaly is discovered.
  * ftrace_disabled is much stronger than ftrace_enabled.
@@ -577,7 +585,7 @@ ftrace_code_disable(struct dyn_ftrace *rec)
 	ip = rec->ip;
 
 	nop = ftrace_nop_replace();
-	call = ftrace_call_replace(ip, MCOUNT_ADDR);
+	call = ftrace_call_replace(ip, mcount_addr);
 
 	failed = ftrace_modify_code(ip, call, nop);
 	if (failed) {
-- 
cgit v1.2.3-55-g7522


From d53475b5aa946752e3306b2ecb5a8c9c51cf8dd0 Mon Sep 17 00:00:00 2001
From: Steven Rostedt
Date: Wed, 27 Aug 2008 13:02:01 -0400
Subject: ftrace: remove warning of old objcopy and local functions

The warning messages about old objcopy and local functions spam the
user quite drastically.  Remove the warning until we can find a nicer
way of tell the user to upgrade their objcopy.

Signed-off-by: Steven Rostedt <srostedt@redhat.com>
Cc: Stephen Rothwell <sfr@canb.auug.org.au>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 scripts/recordmcount.pl | 6 ------
 1 file changed, 6 deletions(-)

diff --git a/scripts/recordmcount.pl b/scripts/recordmcount.pl
index ee9e12676776..f56d760bd589 100755
--- a/scripts/recordmcount.pl
+++ b/scripts/recordmcount.pl
@@ -262,12 +262,6 @@ sub update_funcs
 
 	# only use locals if objcopy supports globalize-symbols
 	if (!$use_locals) {
-	    print STDERR
-		"$inputfile: WARNING: referencing local function " .
-		"$ref_func for mcount\n" .
-		"\tConsider upgrading objcopy to support the globalize-" .
-		"symbols option.\n"
-		if (!$local_warn_once++);
 	    return;
 	}
 	$convert{$ref_func} = 1;
-- 
cgit v1.2.3-55-g7522


From 1b6cced6ec9677fa65471e890dfdcb4bf5387643 Mon Sep 17 00:00:00 2001
From: Steven Rostedt
Date: Fri, 29 Aug 2008 16:51:43 -0400
Subject: ftrace: stack trace add indexes

This patch adds indexes into the stack that the functions in the
stack dump were found at. As an added bonus, I also added a diff
to show which function is the most notorious consumer of the stack.

The output now looks like this:

# cat /debug/tracing/stack_trace
        Depth   Size      Location    (48 entries)
        -----   ----      --------
  0)     2476     212   blk_recount_segments+0x39/0x59
  1)     2264      12   bio_phys_segments+0x16/0x1d
  2)     2252      20   blk_rq_bio_prep+0x23/0xaf
  3)     2232      12   init_request_from_bio+0x74/0x77
  4)     2220      56   __make_request+0x294/0x331
  5)     2164     136   generic_make_request+0x34f/0x37d
  6)     2028      56   submit_bio+0xe7/0xef
  7)     1972      28   submit_bh+0xd1/0xf0
  8)     1944     112   block_read_full_page+0x299/0x2a9
  9)     1832       8   blkdev_readpage+0x14/0x16
 10)     1824      28   read_cache_page_async+0x7e/0x109
 11)     1796      16   read_cache_page+0x11/0x49
 12)     1780      32   read_dev_sector+0x3c/0x72
 13)     1748      48   read_lba+0x4d/0xaa
 14)     1700     168   efi_partition+0x85/0x61b
 15)     1532      72   rescan_partitions+0x10e/0x266
 16)     1460      40   do_open+0x1c7/0x24e
 17)     1420     292   __blkdev_get+0x79/0x84
 18)     1128      12   blkdev_get+0x12/0x14
 19)     1116      20   register_disk+0xd1/0x11e
 20)     1096      28   add_disk+0x34/0x90
 21)     1068      52   sd_probe+0x2b1/0x366
 22)     1016      20   driver_probe_device+0xa5/0x120
 23)      996       8   __device_attach+0xd/0xf
 24)      988      32   bus_for_each_drv+0x3e/0x68
 25)      956      24   device_attach+0x56/0x6c
 26)      932      16   bus_attach_device+0x26/0x4d
 27)      916      64   device_add+0x380/0x4b4
 28)      852      28   scsi_sysfs_add_sdev+0xa1/0x1c9
 29)      824     160   scsi_probe_and_add_lun+0x919/0xa2a
 30)      664      36   __scsi_add_device+0x88/0xae
 31)      628      44   ata_scsi_scan_host+0x9e/0x21c
 32)      584      28   ata_host_register+0x1cb/0x1db
 33)      556      24   ata_host_activate+0x98/0xb5
 34)      532     192   ahci_init_one+0x9bd/0x9e9
 35)      340      20   pci_device_probe+0x3e/0x5e
 36)      320      20   driver_probe_device+0xa5/0x120
 37)      300      20   __driver_attach+0x3f/0x5e
 38)      280      36   bus_for_each_dev+0x40/0x62
 39)      244      12   driver_attach+0x19/0x1b
 40)      232      28   bus_add_driver+0x9c/0x1af
 41)      204      28   driver_register+0x76/0xd2
 42)      176      20   __pci_register_driver+0x44/0x71
 43)      156       8   ahci_init+0x14/0x16
 44)      148     100   _stext+0x42/0x122
 45)       48      20   kernel_init+0x175/0x1dc
 46)       28      28   kernel_thread_helper+0x7/0x10

The first column is simply an index starting from the inner most function
and counting down to the outer most.

The next column is the location that the function was found on the stack.

The next column is the size of the stack for that function.

Signed-off-by: Steven Rostedt <srostedt@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 kernel/trace/trace_stack.c | 90 +++++++++++++++++++++++++++++++++++++---------
 1 file changed, 73 insertions(+), 17 deletions(-)

diff --git a/kernel/trace/trace_stack.c b/kernel/trace/trace_stack.c
index 4d1e522e3fe8..74c5d9a3afae 100644
--- a/kernel/trace/trace_stack.c
+++ b/kernel/trace/trace_stack.c
@@ -16,8 +16,10 @@
 
 #define STACK_TRACE_ENTRIES 500
 
-static unsigned long stack_dump_trace[STACK_TRACE_ENTRIES] =
-	{ [0 ... (STACK_TRACE_ENTRIES-1)] = ULONG_MAX };
+static unsigned long stack_dump_trace[STACK_TRACE_ENTRIES+1] =
+	 { [0 ... (STACK_TRACE_ENTRIES)] = ULONG_MAX };
+static unsigned stack_dump_index[STACK_TRACE_ENTRIES];
+
 static struct stack_trace max_stack_trace = {
 	.max_entries		= STACK_TRACE_ENTRIES,
 	.entries		= stack_dump_trace,
@@ -32,8 +34,9 @@ static DEFINE_PER_CPU(int, trace_active);
 
 static inline void check_stack(void)
 {
-	unsigned long this_size;
-	unsigned long flags;
+	unsigned long this_size, flags;
+	unsigned long *p, *top, *start;
+	int i;
 
 	this_size = ((unsigned long)&this_size) & (THREAD_SIZE-1);
 	this_size = THREAD_SIZE - this_size;
@@ -51,10 +54,42 @@ static inline void check_stack(void)
 	max_stack_size = this_size;
 
 	max_stack_trace.nr_entries	= 0;
-	max_stack_trace.skip		= 1;
+	max_stack_trace.skip		= 3;
 
 	save_stack_trace(&max_stack_trace);
 
+	/*
+	 * Now find where in the stack these are.
+	 */
+	i = 0;
+	start = &this_size;
+	top = (unsigned long *)
+		(((unsigned long)start & ~(THREAD_SIZE-1)) + THREAD_SIZE);
+
+	/*
+	 * Loop through all the entries. One of the entries may
+	 * for some reason be missed on the stack, so we may
+	 * have to account for them. If they are all there, this
+	 * loop will only happen once. This code only takes place
+	 * on a new max, so it is far from a fast path.
+	 */
+	while (i < max_stack_trace.nr_entries) {
+
+		stack_dump_index[i] = this_size;
+		p = start;
+
+		for (; p < top && i < max_stack_trace.nr_entries; p++) {
+			if (*p == stack_dump_trace[i]) {
+				this_size = stack_dump_index[i++] =
+					(top - p) * sizeof(unsigned long);
+				/* Start the search from here */
+				start = p + 1;
+			}
+		}
+
+		i++;
+	}
+
  out:
 	__raw_spin_unlock(&max_stack_lock);
 	raw_local_irq_restore(flags);
@@ -145,22 +180,24 @@ static struct file_operations stack_max_size_fops = {
 static void *
 t_next(struct seq_file *m, void *v, loff_t *pos)
 {
-	unsigned long *t = m->private;
+	long i = (long)m->private;
 
 	(*pos)++;
 
-	if (!t || *t == ULONG_MAX)
+	i++;
+
+	if (i >= max_stack_trace.nr_entries ||
+	    stack_dump_trace[i] == ULONG_MAX)
 		return NULL;
 
-	t++;
-	m->private = t;
+	m->private = (void *)i;
 
-	return t;
+	return &m->private;
 }
 
 static void *t_start(struct seq_file *m, loff_t *pos)
 {
-	unsigned long *t = m->private;
+	void *t = &m->private;
 	loff_t l = 0;
 
 	local_irq_disable();
@@ -178,14 +215,15 @@ static void t_stop(struct seq_file *m, void *p)
 	local_irq_enable();
 }
 
-static int trace_lookup_stack(struct seq_file *m, unsigned long addr)
+static int trace_lookup_stack(struct seq_file *m, long i)
 {
+	unsigned long addr = stack_dump_trace[i];
 #ifdef CONFIG_KALLSYMS
 	char str[KSYM_SYMBOL_LEN];
 
 	sprint_symbol(str, addr);
 
-	return seq_printf(m, "[<%p>] %s\n", (void*)addr, str);
+	return seq_printf(m, "%s\n", str);
 #else
 	return seq_printf(m, "%p\n", (void*)addr);
 #endif
@@ -193,12 +231,30 @@ static int trace_lookup_stack(struct seq_file *m, unsigned long addr)
 
 static int t_show(struct seq_file *m, void *v)
 {
-	unsigned long *t = v;
+	long i = *(long *)v;
+	int size;
+
+	if (i < 0) {
+		seq_printf(m, "        Depth   Size      Location"
+			   "    (%d entries)\n"
+			   "        -----   ----      --------\n",
+			   max_stack_trace.nr_entries);
+		return 0;
+	}
 
-	if (!t || *t == ULONG_MAX)
+	if (i >= max_stack_trace.nr_entries ||
+	    stack_dump_trace[i] == ULONG_MAX)
 		return 0;
 
-	trace_lookup_stack(m, *t);
+	if (i+1 == max_stack_trace.nr_entries ||
+	    stack_dump_trace[i+1] == ULONG_MAX)
+		size = stack_dump_index[i];
+	else
+		size = stack_dump_index[i] - stack_dump_index[i+1];
+
+	seq_printf(m, "%3ld) %8d   %5d   ", i, stack_dump_index[i], size);
+
+	trace_lookup_stack(m, i);
 
 	return 0;
 }
@@ -217,7 +273,7 @@ static int stack_trace_open(struct inode *inode, struct file *file)
 	ret = seq_open(file, &stack_trace_seq_ops);
 	if (!ret) {
 		struct seq_file *m = file->private_data;
-		m->private = stack_dump_trace;
+		m->private = (void *)-1;
 	}
 
 	return ret;
-- 
cgit v1.2.3-55-g7522


From 2ff01c6a17391225a18256d510b6e5b4aba40aa1 Mon Sep 17 00:00:00 2001
From: Ingo Molnar
Date: Thu, 4 Sep 2008 15:04:37 +0200
Subject: stack tracer: depends on DEBUG_KERNEL

Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 kernel/trace/Kconfig | 1 +
 1 file changed, 1 insertion(+)

diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index 2a22e46390d3..5a9cffb0fafb 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -109,6 +109,7 @@ config CONTEXT_SWITCH_TRACER
 config STACK_TRACER
 	bool "Trace max stack"
 	depends on HAVE_FTRACE
+	depends on DEBUG_KERNEL
 	select FTRACE
 	select STACKTRACE
 	help
-- 
cgit v1.2.3-55-g7522


From a6168353d1c0b24b7512e14d1c3e47ed69881a23 Mon Sep 17 00:00:00 2001
From: Michael Ellerman
Date: Wed, 20 Aug 2008 16:36:11 -0700
Subject: ftrace: make output nicely spaced for up to 999 cpus

Currently some of the ftrace output goes skewiff if you have more
than 9 cpus, and some if you have more than 99.

Twiddle with the headers and format strings to make up to 999 cpus
display without causing spacing problems.

Signed-off-by: Michael Ellerman <michael@ellerman.id.au>
Acked-by: Steven Rostedt <srostedt@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 kernel/trace/trace.c | 28 ++++++++++++++--------------
 1 file changed, 14 insertions(+), 14 deletions(-)

diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 896e59f772c9..1801900908e1 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -1409,21 +1409,21 @@ seq_print_ip_sym(struct trace_seq *s, unsigned long ip, unsigned long sym_flags)
 
 static void print_lat_help_header(struct seq_file *m)
 {
-	seq_puts(m, "#                _------=> CPU#            \n");
-	seq_puts(m, "#               / _-----=> irqs-off        \n");
-	seq_puts(m, "#              | / _----=> need-resched    \n");
-	seq_puts(m, "#              || / _---=> hardirq/softirq \n");
-	seq_puts(m, "#              ||| / _--=> preempt-depth   \n");
-	seq_puts(m, "#              |||| /                      \n");
-	seq_puts(m, "#              |||||     delay             \n");
-	seq_puts(m, "#  cmd     pid ||||| time  |   caller      \n");
-	seq_puts(m, "#     \\   /    |||||   \\   |   /           \n");
+	seq_puts(m, "#                  _------=> CPU#            \n");
+	seq_puts(m, "#                 / _-----=> irqs-off        \n");
+	seq_puts(m, "#                | / _----=> need-resched    \n");
+	seq_puts(m, "#                || / _---=> hardirq/softirq \n");
+	seq_puts(m, "#                ||| / _--=> preempt-depth   \n");
+	seq_puts(m, "#                |||| /                      \n");
+	seq_puts(m, "#                |||||     delay             \n");
+	seq_puts(m, "#  cmd     pid   ||||| time  |   caller      \n");
+	seq_puts(m, "#     \\   /      |||||   \\   |   /           \n");
 }
 
 static void print_func_help_header(struct seq_file *m)
 {
-	seq_puts(m, "#           TASK-PID   CPU#    TIMESTAMP  FUNCTION\n");
-	seq_puts(m, "#              | |      |          |         |\n");
+	seq_puts(m, "#           TASK-PID    CPU#    TIMESTAMP  FUNCTION\n");
+	seq_puts(m, "#              | |       |          |         |\n");
 }
 
 
@@ -1508,7 +1508,7 @@ lat_print_generic(struct trace_seq *s, struct trace_entry *entry, int cpu)
 	comm = trace_find_cmdline(field->pid);
 
 	trace_seq_printf(s, "%8.8s-%-5d ", comm, field->pid);
-	trace_seq_printf(s, "%d", cpu);
+	trace_seq_printf(s, "%3d", cpu);
 	trace_seq_printf(s, "%c%c",
 			(field->flags & TRACE_FLAG_IRQS_OFF) ? 'd' : '.',
 			((field->flags & TRACE_FLAG_NEED_RESCHED) ? 'N' : '.'));
@@ -1598,7 +1598,7 @@ print_lat_fmt(struct trace_iterator *iter, unsigned int trace_idx, int cpu)
 
 	if (verbose) {
 		comm = trace_find_cmdline(field->pid);
-		trace_seq_printf(s, "%16s %5d %d %d %08x %08x [%08lx]"
+		trace_seq_printf(s, "%16s %5d %3d %d %08x %08x [%08lx]"
 				 " %ld.%03ldms (+%ld.%03ldms): ",
 				 comm,
 				 field->pid, cpu, field->flags,
@@ -1694,7 +1694,7 @@ static int print_trace_fmt(struct trace_iterator *iter)
 	ret = trace_seq_printf(s, "%16s-%-5d ", comm, field->pid);
 	if (!ret)
 		return 0;
-	ret = trace_seq_printf(s, "[%02d] ", iter->cpu);
+	ret = trace_seq_printf(s, "[%03d] ", iter->cpu);
 	if (!ret)
 		return 0;
 	ret = trace_seq_printf(s, "%5lu.%06lu: ", secs, usec_rem);
-- 
cgit v1.2.3-55-g7522


From 652567aa2000f1d4a1fd434382a30d8dd4a7c980 Mon Sep 17 00:00:00 2001
From: Steven Rostedt
Date: Wed, 3 Sep 2008 17:42:50 -0400
Subject: ftrace: binary and not logical for continue test

Peter Zijlstra provided me with a nice brown paper bag while letting me know
that I was doing a logical AND and not a binary one, making a condition
true more often than it should be.

Luckily, a false true is handled by the calling function and no harm is
done. But this needs to be fixed regardless.

Signed-off-by: Steven Rostedt <srostedt@redhat.com>
Acked-by: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 kernel/trace/trace.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 1801900908e1..9639e45f0860 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -1655,7 +1655,7 @@ print_lat_fmt(struct trace_iterator *iter, unsigned int trace_idx, int cpu)
 	case TRACE_PRINT:
 		seq_print_ip_sym(s, field->print.ip, sym_flags);
 		trace_seq_printf(s, ": %s", field->print.buf);
-		if (field->flags && TRACE_FLAG_CONT)
+		if (field->flags & TRACE_FLAG_CONT)
 			trace_seq_print_cont(s, iter);
 		break;
 	default:
@@ -1768,7 +1768,7 @@ static int print_trace_fmt(struct trace_iterator *iter)
 	case TRACE_PRINT:
 		seq_print_ip_sym(s, field->print.ip, sym_flags);
 		trace_seq_printf(s, ": %s", field->print.buf);
-		if (field->flags && TRACE_FLAG_CONT)
+		if (field->flags & TRACE_FLAG_CONT)
 			trace_seq_print_cont(s, iter);
 		break;
 	}
@@ -1833,7 +1833,7 @@ static int print_raw_fmt(struct trace_iterator *iter)
 	case TRACE_PRINT:
 		trace_seq_printf(s, "# %lx %s",
 				 field->print.ip, field->print.buf);
-		if (field->flags && TRACE_FLAG_CONT)
+		if (field->flags & TRACE_FLAG_CONT)
 			trace_seq_print_cont(s, iter);
 		break;
 	}
-- 
cgit v1.2.3-55-g7522


From 5a90f577e5369a84b720ead42e621fcb1b8a8b21 Mon Sep 17 00:00:00 2001
From: Steven Rostedt
Date: Wed, 3 Sep 2008 17:42:51 -0400
Subject: ftrace: print continue index fix

An item in the trace buffer that is bigger than one entry may be split
up using the TRACE_CONT entry. This makes it a virtual single entry.
The current code increments the iterator index even while traversing
TRACE_CONT entries, making it look like the iterator is further than
it actually is.

This patch adds code to not increment the iterator index while skipping
over TRACE_CONT entries.

Signed-off-by: Steven Rostedt <srostedt@redhat.com>
Acked-by: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 kernel/trace/trace.c | 15 ++++++++++-----
 1 file changed, 10 insertions(+), 5 deletions(-)

diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 9639e45f0860..d24101cfc425 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -1117,9 +1117,8 @@ trace_entry_idx(struct trace_array *tr, struct trace_array_cpu *data,
 }
 
 /* Increment the index counter of an iterator by one */
-static void trace_iterator_increment(struct trace_iterator *iter, int cpu)
+static void __trace_iterator_increment(struct trace_iterator *iter, int cpu)
 {
-	iter->idx++;
 	iter->next_idx[cpu]++;
 	iter->next_page_idx[cpu]++;
 
@@ -1132,6 +1131,12 @@ static void trace_iterator_increment(struct trace_iterator *iter, int cpu)
 	}
 }
 
+static void trace_iterator_increment(struct trace_iterator *iter, int cpu)
+{
+	iter->idx++;
+	__trace_iterator_increment(iter, cpu);
+}
+
 static struct trace_entry *
 trace_entry_next(struct trace_array *tr, struct trace_array_cpu *data,
 		 struct trace_iterator *iter, int cpu)
@@ -1153,7 +1158,7 @@ trace_entry_next(struct trace_array *tr, struct trace_array_cpu *data,
 
 	/* find a real entry */
 	do {
-		trace_iterator_increment(iter, cpu);
+		__trace_iterator_increment(iter, cpu);
 		ent = trace_entry_idx(tr, tr->data[cpu], iter, cpu);
 	} while (ent && ent->type != TRACE_CONT);
 
@@ -1187,7 +1192,7 @@ __find_next_entry(struct trace_iterator *iter, int *ent_cpu, int inc)
 				ent = trace_entry_next(tr, data, iter, cpu);
 			else {
 				while (ent && ent->type == TRACE_CONT) {
-					trace_iterator_increment(iter, cpu);
+					__trace_iterator_increment(iter, cpu);
 					ent = trace_entry_idx(tr, tr->data[cpu],
 							      iter, cpu);
 				}
@@ -1566,7 +1571,7 @@ trace_seq_print_cont(struct trace_seq *s, struct trace_iterator *iter)
 
 	do {
 		trace_seq_printf(s, "%s", ent->cont.buf);
-		trace_iterator_increment(iter, iter->cpu);
+		__trace_iterator_increment(iter, iter->cpu);
 		ent = trace_entry_idx(tr, data, iter, iter->cpu);
 	} while (ent && ent->type == TRACE_CONT);
 }
-- 
cgit v1.2.3-55-g7522


From f09ce573f57ddc35c67b39e51f34545877b30031 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra
Date: Thu, 4 Sep 2008 10:24:14 +0200
Subject: ftrace: make ftrace_printk usable with the other tracers

Currently ftrace_printk only works with the ftrace tracer, switch it to an
iter_ctrl setting so we can make us of them with other tracers too.

[rostedt@redhat.com: tweak to the disable condition]

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 kernel/trace/trace.c | 8 +++++---
 kernel/trace/trace.h | 1 +
 2 files changed, 6 insertions(+), 3 deletions(-)

diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index d24101cfc425..8a00d6c5c0f5 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -235,6 +235,7 @@ static const char *trace_options[] = {
 	"block",
 	"stacktrace",
 	"sched-tree",
+	"ftrace_printk",
 	NULL
 };
 
@@ -3091,9 +3092,10 @@ static __init void tracer_init_debugfs(void)
 
 int __ftrace_printk(unsigned long ip, const char *fmt, ...)
 {
-	struct trace_array *tr = &global_trace;
 	static DEFINE_SPINLOCK(trace_buf_lock);
 	static char trace_buf[TRACE_BUF_SIZE];
+
+	struct trace_array *tr = &global_trace;
 	struct trace_array_cpu *data;
 	struct trace_entry *entry;
 	unsigned long flags;
@@ -3101,7 +3103,7 @@ int __ftrace_printk(unsigned long ip, const char *fmt, ...)
 	va_list ap;
 	int cpu, len = 0, write, written = 0;
 
-	if (likely(!ftrace_function_enabled))
+	if (!(trace_flags & TRACE_ITER_PRINTK) || !tr->ctrl || tracing_disabled)
 		return 0;
 
 	local_irq_save(flags);
@@ -3109,7 +3111,7 @@ int __ftrace_printk(unsigned long ip, const char *fmt, ...)
 	data = tr->data[cpu];
 	disabled = atomic_inc_return(&data->disabled);
 
-	if (unlikely(disabled != 1 || !ftrace_function_enabled))
+	if (unlikely(disabled != 1))
 		goto out;
 
 	spin_lock(&trace_buf_lock);
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 50b6d7a6f01a..5f54c875c8be 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -356,6 +356,7 @@ enum trace_iterator_flags {
 	TRACE_ITER_BLOCK		= 0x80,
 	TRACE_ITER_STACKTRACE		= 0x100,
 	TRACE_ITER_SCHED_TREE		= 0x200,
+	TRACE_ITER_PRINTK		= 0x400,
 };
 
 #endif /* _LINUX_KERNEL_TRACE_H */
-- 
cgit v1.2.3-55-g7522


From 80b5e940050c273ba277acbf3a9fbc1d4441e681 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra
Date: Thu, 4 Sep 2008 10:24:16 +0200
Subject: ftrace: sched_switch: show the wakee's cpu

While profiling the smp behaviour of the scheduler it was needed to know to
which cpu a task got woken.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 kernel/trace/trace.c | 12 +++++++++---
 kernel/trace/trace.h |  1 +
 2 files changed, 10 insertions(+), 3 deletions(-)

diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 8a00d6c5c0f5..7e6cb4fe62f2 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -977,6 +977,7 @@ tracing_sched_switch_trace(struct trace_array *tr,
 	entry->field.ctx.next_pid	= next->pid;
 	entry->field.ctx.next_prio	= next->prio;
 	entry->field.ctx.next_state	= next->state;
+	entry->field.ctx.next_cpu	= task_cpu(next);
 	__trace_stack(tr, data, flags, 5);
 	__raw_spin_unlock(&data->lock);
 	raw_local_irq_restore(irq_flags);
@@ -1003,6 +1004,7 @@ tracing_sched_wakeup_trace(struct trace_array *tr,
 	entry->field.ctx.next_pid	= wakee->pid;
 	entry->field.ctx.next_prio	= wakee->prio;
 	entry->field.ctx.next_state	= wakee->state;
+	entry->field.ctx.next_cpu	= task_cpu(wakee);
 	__trace_stack(tr, data, flags, 6);
 	__raw_spin_unlock(&data->lock);
 	raw_local_irq_restore(irq_flags);
@@ -1636,10 +1638,11 @@ print_lat_fmt(struct trace_iterator *iter, unsigned int trace_idx, int cpu)
 			__ffs(field->ctx.prev_state) + 1 : 0;
 		S = state < sizeof(state_to_char) - 1 ? state_to_char[state] : 'X';
 		comm = trace_find_cmdline(field->ctx.next_pid);
-		trace_seq_printf(s, " %5d:%3d:%c %s %5d:%3d:%c %s\n",
+		trace_seq_printf(s, " %5d:%3d:%c %s [%03d] %5d:%3d:%c %s\n",
 				 field->ctx.prev_pid,
 				 field->ctx.prev_prio,
 				 S, entry->type == TRACE_CTX ? "==>" : "  +",
+				 field->ctx.next_cpu,
 				 field->ctx.next_pid,
 				 field->ctx.next_prio,
 				 T, comm);
@@ -1736,11 +1739,12 @@ static int print_trace_fmt(struct trace_iterator *iter)
 			state_to_char[field->ctx.prev_state] : 'X';
 		T = field->ctx.next_state < sizeof(state_to_char) ?
 			state_to_char[field->ctx.next_state] : 'X';
-		ret = trace_seq_printf(s, " %5d:%3d:%c %s %5d:%3d:%c\n",
+		ret = trace_seq_printf(s, " %5d:%3d:%c %s [%03d] %5d:%3d:%c\n",
 				       field->ctx.prev_pid,
 				       field->ctx.prev_prio,
 				       S,
 				       entry->type == TRACE_CTX ? "==>" : "  +",
+				       field->ctx.next_cpu,
 				       field->ctx.next_pid,
 				       field->ctx.next_prio,
 				       T);
@@ -1817,10 +1821,11 @@ static int print_raw_fmt(struct trace_iterator *iter)
 			state_to_char[field->ctx.next_state] : 'X';
 		if (entry->type == TRACE_WAKE)
 			S = '+';
-		ret = trace_seq_printf(s, "%d %d %c %d %d %c\n",
+		ret = trace_seq_printf(s, "%d %d %c %d %d %d %c\n",
 				       field->ctx.prev_pid,
 				       field->ctx.prev_prio,
 				       S,
+				       field->ctx.next_cpu,
 				       field->ctx.next_pid,
 				       field->ctx.next_prio,
 				       T);
@@ -1893,6 +1898,7 @@ static int print_hex_fmt(struct trace_iterator *iter)
 		SEQ_PUT_HEX_FIELD_RET(s, field->ctx.prev_pid);
 		SEQ_PUT_HEX_FIELD_RET(s, field->ctx.prev_prio);
 		SEQ_PUT_HEX_FIELD_RET(s, S);
+		SEQ_PUT_HEX_FIELD_RET(s, field->ctx.next_cpu);
 		SEQ_PUT_HEX_FIELD_RET(s, field->ctx.next_pid);
 		SEQ_PUT_HEX_FIELD_RET(s, field->ctx.next_prio);
 		SEQ_PUT_HEX_FIELD_RET(s, T);
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 5f54c875c8be..77c265f6a779 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -41,6 +41,7 @@ struct ctx_switch_entry {
 	unsigned int		next_pid;
 	unsigned char		next_prio;
 	unsigned char		next_state;
+	unsigned int		next_cpu;
 };
 
 /*
-- 
cgit v1.2.3-55-g7522


From d3ee6d992821f471193a7ee7a00af9ebb4bf5d01 Mon Sep 17 00:00:00 2001
From: Ingo Molnar
Date: Thu, 4 Sep 2008 14:04:51 +0200
Subject: ftrace: make it depend on DEBUG_KERNEL

make most of the tracers depend on DEBUG_KERNEL - that's their intended
purpose. (most distributions have DEBUG_KERNEL enabled anyway so this is
not a practical limitation - but it simplifies the tracing menu in the
normal case)

Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 kernel/trace/Kconfig | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index 5a9cffb0fafb..16e5bb5daaa5 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -22,6 +22,7 @@ config TRACING
 config FTRACE
 	bool "Kernel Function Tracer"
 	depends on HAVE_FTRACE
+	depends on DEBUG_KERNEL
 	select FRAME_POINTER
 	select TRACING
 	select CONTEXT_SWITCH_TRACER
@@ -40,6 +41,7 @@ config IRQSOFF_TRACER
 	depends on TRACE_IRQFLAGS_SUPPORT
 	depends on GENERIC_TIME
 	depends on HAVE_FTRACE
+	depends on DEBUG_KERNEL
 	select TRACE_IRQFLAGS
 	select TRACING
 	select TRACER_MAX_TRACE
@@ -63,6 +65,7 @@ config PREEMPT_TRACER
 	depends on GENERIC_TIME
 	depends on PREEMPT
 	depends on HAVE_FTRACE
+	depends on DEBUG_KERNEL
 	select TRACING
 	select TRACER_MAX_TRACE
 	help
@@ -90,6 +93,7 @@ config SYSPROF_TRACER
 config SCHED_TRACER
 	bool "Scheduling Latency Tracer"
 	depends on HAVE_FTRACE
+	depends on DEBUG_KERNEL
 	select TRACING
 	select CONTEXT_SWITCH_TRACER
 	select TRACER_MAX_TRACE
@@ -100,6 +104,7 @@ config SCHED_TRACER
 config CONTEXT_SWITCH_TRACER
 	bool "Trace process context switches"
 	depends on HAVE_FTRACE
+	depends on DEBUG_KERNEL
 	select TRACING
 	select MARKERS
 	help
@@ -120,6 +125,7 @@ config DYNAMIC_FTRACE
 	bool "enable/disable ftrace tracepoints dynamically"
 	depends on FTRACE
 	depends on HAVE_DYNAMIC_FTRACE
+	depends on DEBUG_KERNEL
 	default y
 	help
          This option will modify all the calls to ftrace dynamically
@@ -146,6 +152,7 @@ config FTRACE_SELFTEST
 config FTRACE_STARTUP_TEST
 	bool "Perform a startup test on ftrace"
 	depends on TRACING
+	depends on DEBUG_KERNEL
 	select FTRACE_SELFTEST
 	help
 	  This option performs a series of startup tests on ftrace. On bootup
-- 
cgit v1.2.3-55-g7522


From c0719e5a4b1ccc04180b7a7b71095c9fb7131919 Mon Sep 17 00:00:00 2001
From: Steven Rostedt
Date: Sat, 6 Sep 2008 01:06:03 -0400
Subject: ftrace: use ftrace_release for all dynamic ftrace functions

ftrace_release is necessary for all uses of dynamic ftrace and not just
the archs that have CONFIG_FTRACE_MCOUNT_RECORD defined.

Signed-off-by: Steven Rostedt <srostedt@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/ftrace.h | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index 8b4cf38c80d2..5de9903645d5 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -77,8 +77,10 @@ extern void mcount_call(void);
 
 extern int skip_trace(unsigned long ip);
 
-void ftrace_disable_daemon(void);
-void ftrace_enable_daemon(void);
+extern void ftrace_release(void *start, unsigned long size);
+
+extern void ftrace_disable_daemon(void);
+extern void ftrace_enable_daemon(void);
 
 #else
 # define skip_trace(ip)				({ 0; })
@@ -86,6 +88,7 @@ void ftrace_enable_daemon(void);
 # define ftrace_set_filter(buf, len, reset)	do { } while (0)
 # define ftrace_disable_daemon()		do { } while (0)
 # define ftrace_enable_daemon()			do { } while (0)
+static inline void ftrace_release(void *start, unsigned long size) { }
 #endif /* CONFIG_DYNAMIC_FTRACE */
 
 /* totally disable ftrace - can not re-enable after this */
@@ -199,12 +202,10 @@ static inline void ftrace_dump(void) { }
 #ifdef CONFIG_FTRACE_MCOUNT_RECORD
 extern void ftrace_init(void);
 extern void ftrace_init_module(unsigned long *start, unsigned long *end);
-extern void ftrace_release(void *start, unsigned long size);
 #else
 static inline void ftrace_init(void) { }
 static inline void
 ftrace_init_module(unsigned long *start, unsigned long *end) { }
-static inline void ftrace_release(void *start, unsigned long size) { }
 #endif
 
 
-- 
cgit v1.2.3-55-g7522


From 644f991d4b920ab1f5043509651479420b293490 Mon Sep 17 00:00:00 2001
From: Steven Rostedt
Date: Sat, 6 Sep 2008 01:06:04 -0400
Subject: ftrace: fix unlocking of hash

This must be brown paper bag week for Steven Rostedt!

While working on ftrace for PPC, I discovered that the hash locking done
when CONFIG_FTRACE_MCOUNT_RECORD is not set, is totally incorrect.

With a cut and paste error, I had the hash lock macro to lock for both
hash_lock _and_ hash_unlock!

This bug did not affect x86 since this bug was introduced when
CONFIG_FTRACE_MCOUNT_RECORD was added to x86.

Signed-off-by: Steven Rostedt <srostedt@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 kernel/trace/ftrace.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index b69966f0f144..c9e09d86e1d8 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -170,7 +170,8 @@ static int __unregister_ftrace_function(struct ftrace_ops *ops)
  */
 static DEFINE_SPINLOCK(ftrace_hash_lock);
 #define ftrace_hash_lock(flags)	  spin_lock_irqsave(&ftrace_hash_lock, flags)
-#define ftrace_hash_unlock(flags) spin_lock_irqsave(&ftrace_hash_lock, flags)
+#define ftrace_hash_unlock(flags) \
+			spin_unlock_irqrestore(&ftrace_hash_lock, flags)
 #else
 /* This is protected via the ftrace_lock with MCOUNT_RECORD. */
 #define ftrace_hash_lock(flags)   do { (void)(flags); } while (0)
-- 
cgit v1.2.3-55-g7522


From bbe5c7830c6dbde58726d44ec0337bc8b2d95d37 Mon Sep 17 00:00:00 2001
From: Pekka Paalanen
Date: Tue, 16 Sep 2008 21:54:16 +0300
Subject: x86 mmiotrace: fix a rare memory leak

Signed-off-by: Pekka Paalanen <pq@iki.fi>
Acked-by: Steven Rostedt <rostedt@goodmis.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/mm/mmio-mod.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/arch/x86/mm/mmio-mod.c b/arch/x86/mm/mmio-mod.c
index 635b50e85581..754bd1eaf4f6 100644
--- a/arch/x86/mm/mmio-mod.c
+++ b/arch/x86/mm/mmio-mod.c
@@ -307,8 +307,10 @@ static void ioremap_trace_core(resource_size_t offset, unsigned long size,
 	map.map_id = trace->id;
 
 	spin_lock_irq(&trace_lock);
-	if (!is_enabled())
+	if (!is_enabled()) {
+		kfree(trace);
 		goto not_enabled;
+	}
 
 	mmio_trace_mapping(&map);
 	list_add_tail(&trace->list, &trace_list);
-- 
cgit v1.2.3-55-g7522


From 45dcd8b8a8ca855591e3ac882d3a7fc255d09d43 Mon Sep 17 00:00:00 2001
From: Pekka Paalanen
Date: Tue, 16 Sep 2008 21:56:41 +0300
Subject: ftrace: move mmiotrace functions out of trace.c

Moves the mmiotrace specific functions from trace.c to
trace_mmiotrace.c. Functions trace_wake_up(), tracing_get_trace_entry(),
and tracing_generic_entry_update() are therefore made available outside
trace.c.

Signed-off-by: Pekka Paalanen <pq@iki.fi>
Acked-by: Steven Rostedt <rostedt@goodmis.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 kernel/trace/trace.c           | 46 ++----------------------------------------
 kernel/trace/trace.h           | 15 ++++++--------
 kernel/trace/trace_mmiotrace.c | 42 ++++++++++++++++++++++++++++++++++++++
 3 files changed, 50 insertions(+), 53 deletions(-)

diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 7e6cb4fe62f2..d372bc535963 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -785,7 +785,7 @@ trace_next_page(struct trace_array_cpu *data, void *addr)
 	return page_address(page);
 }
 
-static inline struct trace_entry *
+struct trace_entry *
 tracing_get_trace_entry(struct trace_array *tr, struct trace_array_cpu *data)
 {
 	unsigned long idx, idx_next;
@@ -821,7 +821,7 @@ tracing_get_trace_entry(struct trace_array *tr, struct trace_array_cpu *data)
 	return entry;
 }
 
-static inline void
+void
 tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags)
 {
 	struct task_struct *tsk = current;
@@ -865,48 +865,6 @@ ftrace(struct trace_array *tr, struct trace_array_cpu *data,
 		trace_function(tr, data, ip, parent_ip, flags);
 }
 
-#ifdef CONFIG_MMIOTRACE
-void __trace_mmiotrace_rw(struct trace_array *tr, struct trace_array_cpu *data,
-						struct mmiotrace_rw *rw)
-{
-	struct trace_entry *entry;
-	unsigned long irq_flags;
-
-	raw_local_irq_save(irq_flags);
-	__raw_spin_lock(&data->lock);
-
-	entry				= tracing_get_trace_entry(tr, data);
-	tracing_generic_entry_update(entry, 0);
-	entry->type			= TRACE_MMIO_RW;
-	entry->field.mmiorw		= *rw;
-
-	__raw_spin_unlock(&data->lock);
-	raw_local_irq_restore(irq_flags);
-
-	trace_wake_up();
-}
-
-void __trace_mmiotrace_map(struct trace_array *tr, struct trace_array_cpu *data,
-						struct mmiotrace_map *map)
-{
-	struct trace_entry *entry;
-	unsigned long irq_flags;
-
-	raw_local_irq_save(irq_flags);
-	__raw_spin_lock(&data->lock);
-
-	entry				= tracing_get_trace_entry(tr, data);
-	tracing_generic_entry_update(entry, 0);
-	entry->type			= TRACE_MMIO_MAP;
-	entry->field.mmiomap		= *map;
-
-	__raw_spin_unlock(&data->lock);
-	raw_local_irq_restore(irq_flags);
-
-	trace_wake_up();
-}
-#endif
-
 void __trace_stack(struct trace_array *tr,
 		   struct trace_array_cpu *data,
 		   unsigned long flags,
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 77c265f6a779..9d39aa00a9c6 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -213,11 +213,17 @@ struct trace_iterator {
 	long			idx;
 };
 
+void trace_wake_up(void);
 void tracing_reset(struct trace_array_cpu *data);
 int tracing_open_generic(struct inode *inode, struct file *filp);
 struct dentry *tracing_init_dentry(void);
 void init_tracer_sysprof_debugfs(struct dentry *d_tracer);
 
+struct trace_entry *tracing_get_trace_entry(struct trace_array *tr,
+						struct trace_array_cpu *data);
+void tracing_generic_entry_update(struct trace_entry *entry,
+						unsigned long flags);
+
 void ftrace(struct trace_array *tr,
 			    struct trace_array_cpu *data,
 			    unsigned long ip,
@@ -291,15 +297,6 @@ extern unsigned long ftrace_update_tot_cnt;
 extern int DYN_FTRACE_TEST_NAME(void);
 #endif
 
-#ifdef CONFIG_MMIOTRACE
-extern void __trace_mmiotrace_rw(struct trace_array *tr,
-				struct trace_array_cpu *data,
-				struct mmiotrace_rw *rw);
-extern void __trace_mmiotrace_map(struct trace_array *tr,
-				struct trace_array_cpu *data,
-				struct mmiotrace_map *map);
-#endif
-
 #ifdef CONFIG_FTRACE_STARTUP_TEST
 #ifdef CONFIG_FTRACE
 extern int trace_selftest_startup_function(struct tracer *trace,
diff --git a/kernel/trace/trace_mmiotrace.c b/kernel/trace/trace_mmiotrace.c
index 9b7a936f4b1f..ef02747b26d9 100644
--- a/kernel/trace/trace_mmiotrace.c
+++ b/kernel/trace/trace_mmiotrace.c
@@ -276,6 +276,27 @@ __init static int init_mmio_trace(void)
 }
 device_initcall(init_mmio_trace);
 
+static void __trace_mmiotrace_rw(struct trace_array *tr,
+				struct trace_array_cpu *data,
+				struct mmiotrace_rw *rw)
+{
+	struct trace_entry *entry;
+	unsigned long irq_flags;
+
+	raw_local_irq_save(irq_flags);
+	__raw_spin_lock(&data->lock);
+
+	entry				= tracing_get_trace_entry(tr, data);
+	tracing_generic_entry_update(entry, 0);
+	entry->type			= TRACE_MMIO_RW;
+	entry->field.mmiorw		= *rw;
+
+	__raw_spin_unlock(&data->lock);
+	raw_local_irq_restore(irq_flags);
+
+	trace_wake_up();
+}
+
 void mmio_trace_rw(struct mmiotrace_rw *rw)
 {
 	struct trace_array *tr = mmio_trace_array;
@@ -283,6 +304,27 @@ void mmio_trace_rw(struct mmiotrace_rw *rw)
 	__trace_mmiotrace_rw(tr, data, rw);
 }
 
+static void __trace_mmiotrace_map(struct trace_array *tr,
+				struct trace_array_cpu *data,
+				struct mmiotrace_map *map)
+{
+	struct trace_entry *entry;
+	unsigned long irq_flags;
+
+	raw_local_irq_save(irq_flags);
+	__raw_spin_lock(&data->lock);
+
+	entry				= tracing_get_trace_entry(tr, data);
+	tracing_generic_entry_update(entry, 0);
+	entry->type			= TRACE_MMIO_MAP;
+	entry->field.mmiomap		= *map;
+
+	__raw_spin_unlock(&data->lock);
+	raw_local_irq_restore(irq_flags);
+
+	trace_wake_up();
+}
+
 void mmio_trace_mapping(struct mmiotrace_map *map)
 {
 	struct trace_array *tr = mmio_trace_array;
-- 
cgit v1.2.3-55-g7522


From 801fe40001dfc263848552fb28924b766ed44ea4 Mon Sep 17 00:00:00 2001
From: Pekka Paalanen
Date: Tue, 16 Sep 2008 21:58:24 +0300
Subject: ftrace: add trace_vprintk()

trace_vprintk() for easier implementation of tracer specific *_printk
functions. Add check check for no_tracer, and implement
__ftrace_printk() as a wrapper.

Signed-off-by: Pekka Paalanen <pq@iki.fi>
Acked-by: Steven Rostedt <rostedt@goodmis.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 kernel/trace/trace.c | 24 ++++++++++++++++++------
 kernel/trace/trace.h |  1 +
 2 files changed, 19 insertions(+), 6 deletions(-)

diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index d372bc535963..406de9cf2820 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -3054,7 +3054,7 @@ static __init void tracer_init_debugfs(void)
 	(sizeof(struct trace_field) - offsetof(struct trace_field, print.buf))
 #define TRACE_CONT_BUF_SIZE sizeof(struct trace_field)
 
-int __ftrace_printk(unsigned long ip, const char *fmt, ...)
+int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
 {
 	static DEFINE_SPINLOCK(trace_buf_lock);
 	static char trace_buf[TRACE_BUF_SIZE];
@@ -3064,10 +3064,9 @@ int __ftrace_printk(unsigned long ip, const char *fmt, ...)
 	struct trace_entry *entry;
 	unsigned long flags;
 	long disabled;
-	va_list ap;
 	int cpu, len = 0, write, written = 0;
 
-	if (!(trace_flags & TRACE_ITER_PRINTK) || !tr->ctrl || tracing_disabled)
+	if (current_trace == &no_tracer || !tr->ctrl || tracing_disabled)
 		return 0;
 
 	local_irq_save(flags);
@@ -3079,9 +3078,7 @@ int __ftrace_printk(unsigned long ip, const char *fmt, ...)
 		goto out;
 
 	spin_lock(&trace_buf_lock);
-	va_start(ap, fmt);
-	len = vsnprintf(trace_buf, TRACE_BUF_SIZE, fmt, ap);
-	va_end(ap);
+	len = vsnprintf(trace_buf, TRACE_BUF_SIZE, fmt, args);
 
 	len = min(len, TRACE_BUF_SIZE-1);
 	trace_buf[len] = 0;
@@ -3120,6 +3117,21 @@ int __ftrace_printk(unsigned long ip, const char *fmt, ...)
 
 	return len;
 }
+EXPORT_SYMBOL_GPL(trace_vprintk);
+
+int __ftrace_printk(unsigned long ip, const char *fmt, ...)
+{
+	int ret;
+	va_list ap;
+
+	if (!(trace_flags & TRACE_ITER_PRINTK))
+		return 0;
+
+	va_start(ap, fmt);
+	ret = trace_vprintk(ip, fmt, ap);
+	va_end(ap);
+	return ret;
+}
 EXPORT_SYMBOL_GPL(__ftrace_printk);
 
 static int trace_panic_handler(struct notifier_block *this,
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 9d39aa00a9c6..be3b3cf95f4b 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -333,6 +333,7 @@ extern int trace_seq_printf(struct trace_seq *s, const char *fmt, ...);
 extern ssize_t trace_seq_to_user(struct trace_seq *s, char __user *ubuf,
 				 size_t cnt);
 extern long ns2usecs(cycle_t nsec);
+extern int trace_vprintk(unsigned long ip, const char *fmt, va_list args);
 
 extern unsigned long trace_flags;
 
-- 
cgit v1.2.3-55-g7522


From 9e57fb35d711331a9b1410c5c56ebeb3733428a0 Mon Sep 17 00:00:00 2001
From: Pekka Paalanen
Date: Tue, 16 Sep 2008 22:00:34 +0300
Subject: x86 mmiotrace: implement mmiotrace_printk()

Offer mmiotrace users a function to inject markers from inside the kernel.
This depends on the trace_vprintk() patch.

Signed-off-by: Pekka Paalanen <pq@iki.fi>
Acked-by: Steven Rostedt <rostedt@goodmis.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/mm/mmio-mod.c         | 19 ++++++++++++++++++-
 arch/x86/mm/testmmiotrace.c    |  4 ++++
 include/linux/mmiotrace.h      | 17 +++++++++++++++--
 kernel/trace/trace_mmiotrace.c |  5 +++++
 4 files changed, 42 insertions(+), 3 deletions(-)

diff --git a/arch/x86/mm/mmio-mod.c b/arch/x86/mm/mmio-mod.c
index 754bd1eaf4f6..5e2e2e72ee80 100644
--- a/arch/x86/mm/mmio-mod.c
+++ b/arch/x86/mm/mmio-mod.c
@@ -75,7 +75,7 @@ static LIST_HEAD(trace_list);		/* struct remap_trace */
  *   and trace_lock.
  * - Routines depending on is_enabled() must take trace_lock.
  * - trace_list users must hold trace_lock.
- * - is_enabled() guarantees that mmio_trace_record is allowed.
+ * - is_enabled() guarantees that mmio_trace_{rw,mapping} are allowed.
  * - pre/post callbacks assume the effect of is_enabled() being true.
  */
 
@@ -379,6 +379,23 @@ void mmiotrace_iounmap(volatile void __iomem *addr)
 		iounmap_trace_core(addr);
 }
 
+int mmiotrace_printk(const char *fmt, ...)
+{
+	int ret = 0;
+	va_list args;
+	unsigned long flags;
+	va_start(args, fmt);
+
+	spin_lock_irqsave(&trace_lock, flags);
+	if (is_enabled())
+		ret = mmio_trace_printk(fmt, args);
+	spin_unlock_irqrestore(&trace_lock, flags);
+
+	va_end(args);
+	return ret;
+}
+EXPORT_SYMBOL(mmiotrace_printk);
+
 static void clear_trace_list(void)
 {
 	struct remap_trace *trace;
diff --git a/arch/x86/mm/testmmiotrace.c b/arch/x86/mm/testmmiotrace.c
index d877c5b423ef..ab50a8d7402c 100644
--- a/arch/x86/mm/testmmiotrace.c
+++ b/arch/x86/mm/testmmiotrace.c
@@ -3,6 +3,7 @@
  */
 #include <linux/module.h>
 #include <linux/io.h>
+#include <linux/mmiotrace.h>
 
 #define MODULE_NAME "testmmiotrace"
 
@@ -13,6 +14,7 @@ MODULE_PARM_DESC(mmio_address, "Start address of the mapping of 16 kB.");
 static void do_write_test(void __iomem *p)
 {
 	unsigned int i;
+	mmiotrace_printk("Write test.\n");
 	for (i = 0; i < 256; i++)
 		iowrite8(i, p + i);
 	for (i = 1024; i < (5 * 1024); i += 2)
@@ -24,6 +26,7 @@ static void do_write_test(void __iomem *p)
 static void do_read_test(void __iomem *p)
 {
 	unsigned int i;
+	mmiotrace_printk("Read test.\n");
 	for (i = 0; i < 256; i++)
 		ioread8(p + i);
 	for (i = 1024; i < (5 * 1024); i += 2)
@@ -39,6 +42,7 @@ static void do_test(void)
 		pr_err(MODULE_NAME ": could not ioremap, aborting.\n");
 		return;
 	}
+	mmiotrace_printk("ioremap returned %p.\n", p);
 	do_write_test(p);
 	do_read_test(p);
 	iounmap(p);
diff --git a/include/linux/mmiotrace.h b/include/linux/mmiotrace.h
index 61d19e1b7a0b..60cc3bf5c538 100644
--- a/include/linux/mmiotrace.h
+++ b/include/linux/mmiotrace.h
@@ -34,11 +34,15 @@ extern void unregister_kmmio_probe(struct kmmio_probe *p);
 /* Called from page fault handler. */
 extern int kmmio_handler(struct pt_regs *regs, unsigned long addr);
 
-/* Called from ioremap.c */
 #ifdef CONFIG_MMIOTRACE
+/* Called from ioremap.c */
 extern void mmiotrace_ioremap(resource_size_t offset, unsigned long size,
 							void __iomem *addr);
 extern void mmiotrace_iounmap(volatile void __iomem *addr);
+
+/* For anyone to insert markers. Remember trailing newline. */
+extern int mmiotrace_printk(const char *fmt, ...)
+				__attribute__ ((format (printf, 1, 2)));
 #else
 static inline void mmiotrace_ioremap(resource_size_t offset,
 					unsigned long size, void __iomem *addr)
@@ -48,7 +52,15 @@ static inline void mmiotrace_ioremap(resource_size_t offset,
 static inline void mmiotrace_iounmap(volatile void __iomem *addr)
 {
 }
-#endif /* CONFIG_MMIOTRACE_HOOKS */
+
+static inline int mmiotrace_printk(const char *fmt, ...)
+				__attribute__ ((format (printf, 1, 0)));
+
+static inline int mmiotrace_printk(const char *fmt, ...)
+{
+	return 0;
+}
+#endif /* CONFIG_MMIOTRACE */
 
 enum mm_io_opcode {
 	MMIO_READ = 0x1,     /* struct mmiotrace_rw */
@@ -81,5 +93,6 @@ extern void enable_mmiotrace(void);
 extern void disable_mmiotrace(void);
 extern void mmio_trace_rw(struct mmiotrace_rw *rw);
 extern void mmio_trace_mapping(struct mmiotrace_map *map);
+extern int mmio_trace_printk(const char *fmt, va_list args);
 
 #endif /* MMIOTRACE_H */
diff --git a/kernel/trace/trace_mmiotrace.c b/kernel/trace/trace_mmiotrace.c
index ef02747b26d9..767d1faf56e5 100644
--- a/kernel/trace/trace_mmiotrace.c
+++ b/kernel/trace/trace_mmiotrace.c
@@ -335,3 +335,8 @@ void mmio_trace_mapping(struct mmiotrace_map *map)
 	__trace_mmiotrace_map(tr, data, map);
 	preempt_enable();
 }
+
+int mmio_trace_printk(const char *fmt, va_list args)
+{
+	return trace_vprintk(0, fmt, args);
+}
-- 
cgit v1.2.3-55-g7522


From fc5e27ae4b45a0619701a83f30d9b7fad7ed9400 Mon Sep 17 00:00:00 2001
From: Pekka Paalanen
Date: Tue, 16 Sep 2008 22:02:27 +0300
Subject: mmiotrace: handle TRACE_PRINT entries

Also make trace_seq_print_cont() non-static, and add a newline if the
seq buffer can't hold all data.

Signed-off-by: Pekka Paalanen <pq@iki.fi>
Acked-by: Steven Rostedt <rostedt@goodmis.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 kernel/trace/trace.c           | 31 +++++++++++--------------------
 kernel/trace/trace.h           | 19 +++++++++++++++++++
 kernel/trace/trace_mmiotrace.c | 23 +++++++++++++++++++++++
 3 files changed, 53 insertions(+), 20 deletions(-)

diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 406de9cf2820..7e7154f77009 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -199,23 +199,6 @@ unsigned long nsecs_to_usecs(unsigned long nsecs)
 	return nsecs / 1000;
 }
 
-/*
- * trace_flag_type is an enumeration that holds different
- * states when a trace occurs. These are:
- *  IRQS_OFF	- interrupts were disabled
- *  NEED_RESCED - reschedule is requested
- *  HARDIRQ	- inside an interrupt handler
- *  SOFTIRQ	- inside a softirq handler
- *  CONT	- multiple entries hold the trace item
- */
-enum trace_flag_type {
-	TRACE_FLAG_IRQS_OFF		= 0x01,
-	TRACE_FLAG_NEED_RESCHED		= 0x02,
-	TRACE_FLAG_HARDIRQ		= 0x04,
-	TRACE_FLAG_SOFTIRQ		= 0x08,
-	TRACE_FLAG_CONT			= 0x10,
-};
-
 /*
  * TRACE_ITER_SYM_MASK masks the options in trace_flags that
  * control the output of kernel symbols.
@@ -1517,12 +1500,16 @@ lat_print_timestamp(struct trace_seq *s, unsigned long long abs_usecs,
 
 static const char state_to_char[] = TASK_STATE_TO_CHAR_STR;
 
-static void
-trace_seq_print_cont(struct trace_seq *s, struct trace_iterator *iter)
+/*
+ * The message is supposed to contain an ending newline.
+ * If the printing stops prematurely, try to add a newline of our own.
+ */
+void trace_seq_print_cont(struct trace_seq *s, struct trace_iterator *iter)
 {
 	struct trace_array *tr = iter->tr;
 	struct trace_array_cpu *data = tr->data[iter->cpu];
 	struct trace_entry *ent;
+	bool ok = true;
 
 	ent = trace_entry_idx(tr, data, iter, iter->cpu);
 	if (!ent || ent->type != TRACE_CONT) {
@@ -1531,10 +1518,14 @@ trace_seq_print_cont(struct trace_seq *s, struct trace_iterator *iter)
 	}
 
 	do {
-		trace_seq_printf(s, "%s", ent->cont.buf);
+		if (ok)
+			ok = (trace_seq_printf(s, "%s", ent->cont.buf) > 0);
 		__trace_iterator_increment(iter, iter->cpu);
 		ent = trace_entry_idx(tr, data, iter, iter->cpu);
 	} while (ent && ent->type == TRACE_CONT);
+
+	if (!ok)
+		trace_seq_putc(s, '\n');
 }
 
 static int
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index be3b3cf95f4b..648433d18ccb 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -71,6 +71,23 @@ struct print_entry {
 	char			buf[];
 };
 
+/*
+ * trace_flag_type is an enumeration that holds different
+ * states when a trace occurs. These are:
+ *  IRQS_OFF	- interrupts were disabled
+ *  NEED_RESCED - reschedule is requested
+ *  HARDIRQ	- inside an interrupt handler
+ *  SOFTIRQ	- inside a softirq handler
+ *  CONT	- multiple entries hold the trace item
+ */
+enum trace_flag_type {
+	TRACE_FLAG_IRQS_OFF		= 0x01,
+	TRACE_FLAG_NEED_RESCHED		= 0x02,
+	TRACE_FLAG_HARDIRQ		= 0x04,
+	TRACE_FLAG_SOFTIRQ		= 0x08,
+	TRACE_FLAG_CONT			= 0x10,
+};
+
 /*
  * The trace field - the most basic unit of tracing. This is what
  * is printed in the end as a single line in the trace output, such as:
@@ -330,6 +347,8 @@ extern int trace_selftest_startup_sysprof(struct tracer *trace,
 
 extern void *head_page(struct trace_array_cpu *data);
 extern int trace_seq_printf(struct trace_seq *s, const char *fmt, ...);
+extern void trace_seq_print_cont(struct trace_seq *s,
+				 struct trace_iterator *iter);
 extern ssize_t trace_seq_to_user(struct trace_seq *s, char __user *ubuf,
 				 size_t cnt);
 extern long ns2usecs(cycle_t nsec);
diff --git a/kernel/trace/trace_mmiotrace.c b/kernel/trace/trace_mmiotrace.c
index 767d1faf56e5..a108c326f36e 100644
--- a/kernel/trace/trace_mmiotrace.c
+++ b/kernel/trace/trace_mmiotrace.c
@@ -245,6 +245,27 @@ static int mmio_print_map(struct trace_iterator *iter)
 	return 0;
 }
 
+static int mmio_print_mark(struct trace_iterator *iter)
+{
+	struct trace_entry *entry = iter->ent;
+	const char *msg		= entry->field.print.buf;
+	struct trace_seq *s	= &iter->seq;
+	unsigned long long t	= ns2usecs(entry->field.t);
+	unsigned long usec_rem	= do_div(t, 1000000ULL);
+	unsigned secs		= (unsigned long)t;
+	int ret;
+
+	/* The trailing newline must be in the message. */
+	ret = trace_seq_printf(s, "MARK %lu.%06lu %s", secs, usec_rem, msg);
+	if (!ret)
+		return 0;
+
+	if (entry->field.flags & TRACE_FLAG_CONT)
+		trace_seq_print_cont(s, iter);
+
+	return 1;
+}
+
 /* return 0 to abort printing without consuming current entry in pipe mode */
 static int mmio_print_line(struct trace_iterator *iter)
 {
@@ -253,6 +274,8 @@ static int mmio_print_line(struct trace_iterator *iter)
 		return mmio_print_rw(iter);
 	case TRACE_MMIO_MAP:
 		return mmio_print_map(iter);
+	case TRACE_PRINT:
+		return mmio_print_mark(iter);
 	default:
 		return 1; /* ignore unknown entries */
 	}
-- 
cgit v1.2.3-55-g7522


From 4427414170a63331a9cc36b9598502c5cdfe453b Mon Sep 17 00:00:00 2001
From: Pekka Paalanen
Date: Tue, 16 Sep 2008 22:03:56 +0300
Subject: mmiotrace: remove left-over marker cruft

Signed-off-by: Pekka Paalanen <pq@iki.fi>
Acked-by: Steven Rostedt <rostedt@goodmis.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/mm/mmio-mod.c    | 64 -----------------------------------------------
 include/linux/mmiotrace.h |  3 +--
 2 files changed, 1 insertion(+), 66 deletions(-)

diff --git a/arch/x86/mm/mmio-mod.c b/arch/x86/mm/mmio-mod.c
index 5e2e2e72ee80..2c4baa88f2cb 100644
--- a/arch/x86/mm/mmio-mod.c
+++ b/arch/x86/mm/mmio-mod.c
@@ -56,13 +56,6 @@ struct remap_trace {
 static DEFINE_PER_CPU(struct trap_reason, pf_reason);
 static DEFINE_PER_CPU(struct mmiotrace_rw, cpu_trace);
 
-#if 0 /* XXX: no way gather this info anymore */
-/* Access to this is not per-cpu. */
-static DEFINE_PER_CPU(atomic_t, dropped);
-#endif
-
-static struct dentry *marker_file;
-
 static DEFINE_MUTEX(mmiotrace_mutex);
 static DEFINE_SPINLOCK(trace_lock);
 static atomic_t mmiotrace_enabled;
@@ -97,44 +90,6 @@ static bool is_enabled(void)
 	return atomic_read(&mmiotrace_enabled);
 }
 
-#if 0 /* XXX: needs rewrite */
-/*
- * Write callback for the debugfs entry:
- * Read a marker and write it to the mmio trace log
- */
-static ssize_t write_marker(struct file *file, const char __user *buffer,
-						size_t count, loff_t *ppos)
-{
-	char *event = NULL;
-	struct mm_io_header *headp;
-	ssize_t len = (count > 65535) ? 65535 : count;
-
-	event = kzalloc(sizeof(*headp) + len, GFP_KERNEL);
-	if (!event)
-		return -ENOMEM;
-
-	headp = (struct mm_io_header *)event;
-	headp->type = MMIO_MAGIC | (MMIO_MARKER << MMIO_OPCODE_SHIFT);
-	headp->data_len = len;
-
-	if (copy_from_user(event + sizeof(*headp), buffer, len)) {
-		kfree(event);
-		return -EFAULT;
-	}
-
-	spin_lock_irq(&trace_lock);
-#if 0 /* XXX: convert this to use tracing */
-	if (is_enabled())
-		relay_write(chan, event, sizeof(*headp) + len);
-	else
-#endif
-		len = -EINVAL;
-	spin_unlock_irq(&trace_lock);
-	kfree(event);
-	return len;
-}
-#endif
-
 static void print_pte(unsigned long address)
 {
 	unsigned int level;
@@ -481,26 +436,12 @@ static void leave_uniprocessor(void)
 }
 #endif
 
-#if 0 /* XXX: out of order */
-static struct file_operations fops_marker = {
-	.owner =	THIS_MODULE,
-	.write =	write_marker
-};
-#endif
-
 void enable_mmiotrace(void)
 {
 	mutex_lock(&mmiotrace_mutex);
 	if (is_enabled())
 		goto out;
 
-#if 0 /* XXX: tracing does not support text entries */
-	marker_file = debugfs_create_file("marker", 0660, dir, NULL,
-								&fops_marker);
-	if (!marker_file)
-		pr_err(NAME "marker file creation failed.\n");
-#endif
-
 	if (nommiotrace)
 		pr_info(NAME "MMIO tracing disabled.\n");
 	enter_uniprocessor();
@@ -525,11 +466,6 @@ void disable_mmiotrace(void)
 
 	clear_trace_list(); /* guarantees: no more kmmio callbacks */
 	leave_uniprocessor();
-	if (marker_file) {
-		debugfs_remove(marker_file);
-		marker_file = NULL;
-	}
-
 	pr_info(NAME "disabled.\n");
 out:
 	mutex_unlock(&mmiotrace_mutex);
diff --git a/include/linux/mmiotrace.h b/include/linux/mmiotrace.h
index 60cc3bf5c538..139d7c88d9c9 100644
--- a/include/linux/mmiotrace.h
+++ b/include/linux/mmiotrace.h
@@ -67,8 +67,7 @@ enum mm_io_opcode {
 	MMIO_WRITE = 0x2,    /* struct mmiotrace_rw */
 	MMIO_PROBE = 0x3,    /* struct mmiotrace_map */
 	MMIO_UNPROBE = 0x4,  /* struct mmiotrace_map */
-	MMIO_MARKER = 0x5,   /* raw char data */
-	MMIO_UNKNOWN_OP = 0x6, /* struct mmiotrace_rw */
+	MMIO_UNKNOWN_OP = 0x5, /* struct mmiotrace_rw */
 };
 
 struct mmiotrace_rw {
-- 
cgit v1.2.3-55-g7522


From 5bf9a1ee350a10feb94107de32a203d81fbbe706 Mon Sep 17 00:00:00 2001
From: Pekka Paalanen
Date: Tue, 16 Sep 2008 22:06:42 +0300
Subject: ftrace: inject markers via trace_marker file

Allow a user to inject a marker (TRACE_PRINT entry) into the trace ring
buffer. The related file operations are derived from code by Frédéric
Weisbecker <fweisbec@gmail.com>.

Signed-off-by: Pekka Paalanen <pq@iki.fi>
Acked-by: Steven Rostedt <rostedt@goodmis.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 Documentation/tracers/mmiotrace.txt |  5 +--
 kernel/trace/trace.c                | 76 ++++++++++++++++++++++++++++++++++---
 kernel/trace/trace.h                |  4 ++
 3 files changed, 77 insertions(+), 8 deletions(-)

diff --git a/Documentation/tracers/mmiotrace.txt b/Documentation/tracers/mmiotrace.txt
index a4afb560a45b..5bbbe2096223 100644
--- a/Documentation/tracers/mmiotrace.txt
+++ b/Documentation/tracers/mmiotrace.txt
@@ -36,7 +36,7 @@ $ mount -t debugfs debugfs /debug
 $ echo mmiotrace > /debug/tracing/current_tracer
 $ cat /debug/tracing/trace_pipe > mydump.txt &
 Start X or whatever.
-$ echo "X is up" > /debug/tracing/marker
+$ echo "X is up" > /debug/tracing/trace_marker
 $ echo none > /debug/tracing/current_tracer
 Check for lost events.
 
@@ -59,9 +59,8 @@ The 'cat' process should stay running (sleeping) in the background.
 Load the driver you want to trace and use it. Mmiotrace will only catch MMIO
 accesses to areas that are ioremapped while mmiotrace is active.
 
-[Unimplemented feature:]
 During tracing you can place comments (markers) into the trace by
-$ echo "X is up" > /debug/tracing/marker
+$ echo "X is up" > /debug/tracing/trace_marker
 This makes it easier to see which part of the (huge) trace corresponds to
 which action. It is recommended to place descriptive markers about what you
 do.
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 7e7154f77009..eee1fd964898 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -2879,6 +2879,66 @@ tracing_entries_write(struct file *filp, const char __user *ubuf,
 	return cnt;
 }
 
+static int tracing_open_mark(struct inode *inode, struct file *filp)
+{
+	int ret;
+
+	ret = tracing_open_generic(inode, filp);
+	if (ret)
+		return ret;
+
+	if (current_trace == &no_tracer)
+		return -ENODEV;
+
+	return 0;
+}
+
+static int mark_printk(const char *fmt, ...)
+{
+	int ret;
+	va_list args;
+	va_start(args, fmt);
+	ret = trace_vprintk(0, fmt, args);
+	va_end(args);
+	return ret;
+}
+
+static ssize_t
+tracing_mark_write(struct file *filp, const char __user *ubuf,
+					size_t cnt, loff_t *fpos)
+{
+	char *buf;
+	char *end;
+	struct trace_array *tr = &global_trace;
+
+	if (current_trace == &no_tracer || !tr->ctrl || tracing_disabled)
+		return -EINVAL;
+
+	if (cnt > TRACE_BUF_SIZE)
+		cnt = TRACE_BUF_SIZE;
+
+	buf = kmalloc(cnt + 1, GFP_KERNEL);
+	if (buf == NULL)
+		return -ENOMEM;
+
+	if (copy_from_user(buf, ubuf, cnt)) {
+		kfree(buf);
+		return -EFAULT;
+	}
+
+	/* Cut from the first nil or newline. */
+	buf[cnt] = '\0';
+	end = strchr(buf, '\n');
+	if (end)
+		*end = '\0';
+
+	cnt = mark_printk("%s\n", buf);
+	kfree(buf);
+	*fpos += cnt;
+
+	return cnt;
+}
+
 static struct file_operations tracing_max_lat_fops = {
 	.open		= tracing_open_generic,
 	.read		= tracing_max_lat_read,
@@ -2910,6 +2970,11 @@ static struct file_operations tracing_entries_fops = {
 	.write		= tracing_entries_write,
 };
 
+static struct file_operations tracing_mark_fops = {
+	.open		= tracing_open_mark,
+	.write		= tracing_mark_write,
+};
+
 #ifdef CONFIG_DYNAMIC_FTRACE
 
 static ssize_t
@@ -3027,6 +3092,12 @@ static __init void tracer_init_debugfs(void)
 		pr_warning("Could not create debugfs "
 			   "'trace_entries' entry\n");
 
+	entry = debugfs_create_file("trace_marker", 0220, d_tracer,
+				    NULL, &tracing_mark_fops);
+	if (!entry)
+		pr_warning("Could not create debugfs "
+			   "'trace_marker' entry\n");
+
 #ifdef CONFIG_DYNAMIC_FTRACE
 	entry = debugfs_create_file("dyn_ftrace_total_info", 0444, d_tracer,
 				    &ftrace_update_tot_cnt,
@@ -3040,11 +3111,6 @@ static __init void tracer_init_debugfs(void)
 #endif
 }
 
-#define TRACE_BUF_SIZE 1024
-#define TRACE_PRINT_BUF_SIZE \
-	(sizeof(struct trace_field) - offsetof(struct trace_field, print.buf))
-#define TRACE_CONT_BUF_SIZE sizeof(struct trace_field)
-
 int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
 {
 	static DEFINE_SPINLOCK(trace_buf_lock);
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 648433d18ccb..42f65d0097f0 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -124,6 +124,10 @@ struct trace_entry {
 };
 
 #define TRACE_ENTRY_SIZE	sizeof(struct trace_entry)
+#define TRACE_BUF_SIZE		1024
+#define TRACE_PRINT_BUF_SIZE \
+	(sizeof(struct trace_field) - offsetof(struct trace_field, print.buf))
+#define TRACE_CONT_BUF_SIZE	sizeof(struct trace_field)
 
 /*
  * The CPU trace array - it consists of thousands of trace entries
-- 
cgit v1.2.3-55-g7522


From fb1b6d8b5154c692172a424e45fbd0573295cb93 Mon Sep 17 00:00:00 2001
From: Steven Noonan
Date: Fri, 19 Sep 2008 03:06:43 -0700
Subject: ftrace: add nop tracer

A no-op tracer which can serve two purposes:

 1. A template for development of a new tracer.
 2. A convenient way to see ftrace_printk() calls without
    an irrelevant trace making the output messy.

[ mingo@elte.hu: resolved conflicts ]
Signed-off-by: Steven Noonan <steven@uplinklabs.net>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 kernel/trace/Kconfig          | 10 +++++++
 kernel/trace/Makefile         |  1 +
 kernel/trace/trace.h          |  4 +++
 kernel/trace/trace_nop.c      | 65 +++++++++++++++++++++++++++++++++++++++++++
 kernel/trace/trace_selftest.c |  9 ++++++
 5 files changed, 89 insertions(+)
 create mode 100644 kernel/trace/trace_nop.c

diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index 16e5bb5daaa5..d7b2de744631 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -101,6 +101,16 @@ config SCHED_TRACER
 	  This tracer tracks the latency of the highest priority task
 	  to be scheduled in, starting from the point it has woken up.
 
+config NOP_TRACER
+	bool "NOP Tracer"
+	depends on HAVE_FTRACE
+	depends on DEBUG_KERNEL
+	select TRACING
+	help
+	  This tracer does nothing. The primary purpose for it is to
+	  politely print the output of ftrace_printk() calls without
+	  the overhead of an irrelevant trace taking place.
+
 config CONTEXT_SWITCH_TRACER
 	bool "Trace process context switches"
 	depends on HAVE_FTRACE
diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile
index 58ec61c44bd6..73ba13f5a461 100644
--- a/kernel/trace/Makefile
+++ b/kernel/trace/Makefile
@@ -19,6 +19,7 @@ obj-$(CONFIG_FTRACE) += trace_functions.o
 obj-$(CONFIG_IRQSOFF_TRACER) += trace_irqsoff.o
 obj-$(CONFIG_PREEMPT_TRACER) += trace_irqsoff.o
 obj-$(CONFIG_SCHED_TRACER) += trace_sched_wakeup.o
+obj-$(CONFIG_NOP_TRACER) += trace_nop.o
 obj-$(CONFIG_STACK_TRACER) += trace_stack.o
 obj-$(CONFIG_MMIOTRACE) += trace_mmiotrace.o
 
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 42f65d0097f0..447d4b9b6391 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -339,6 +339,10 @@ extern int trace_selftest_startup_preemptirqsoff(struct tracer *trace,
 extern int trace_selftest_startup_wakeup(struct tracer *trace,
 					 struct trace_array *tr);
 #endif
+#ifdef CONFIG_NOP_TRACER
+extern int trace_selftest_startup_nop(struct tracer *trace,
+					 struct trace_array *tr);
+#endif
 #ifdef CONFIG_CONTEXT_SWITCH_TRACER
 extern int trace_selftest_startup_sched_switch(struct tracer *trace,
 					       struct trace_array *tr);
diff --git a/kernel/trace/trace_nop.c b/kernel/trace/trace_nop.c
new file mode 100644
index 000000000000..dafaefb84038
--- /dev/null
+++ b/kernel/trace/trace_nop.c
@@ -0,0 +1,65 @@
+/*
+ * nop tracer
+ *
+ * Copyright (C) 2008 Steven Noonan <steven@uplinklabs.net>
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/fs.h>
+#include <linux/debugfs.h>
+#include <linux/ftrace.h>
+
+#include "trace.h"
+
+static struct trace_array	*ctx_trace;
+
+static void start_nop_trace(struct trace_array *tr)
+{
+	/* Nothing to do! */
+}
+
+static void stop_nop_trace(struct trace_array *tr)
+{
+	/* Nothing to do! */
+}
+
+static void nop_trace_init(struct trace_array *tr)
+{
+	ctx_trace = tr;
+
+	if (tr->ctrl)
+		start_nop_trace(tr);
+}
+
+static void nop_trace_reset(struct trace_array *tr)
+{
+	if (tr->ctrl)
+		stop_nop_trace(tr);
+}
+
+static void nop_trace_ctrl_update(struct trace_array *tr)
+{
+	/* When starting a new trace, reset the buffers */
+	if (tr->ctrl)
+		start_nop_trace(tr);
+	else
+		stop_nop_trace(tr);
+}
+
+static struct tracer nop_trace __read_mostly =
+{
+	.name		= "nop",
+	.init		= nop_trace_init,
+	.reset		= nop_trace_reset,
+	.ctrl_update	= nop_trace_ctrl_update,
+#ifdef CONFIG_FTRACE_SELFTEST
+	.selftest	= trace_selftest_startup_nop,
+#endif
+};
+
+__init static int init_nop_trace(void)
+{
+	return register_tracer(&nop_trace);
+}
+device_initcall(init_nop_trace);
diff --git a/kernel/trace/trace_selftest.c b/kernel/trace/trace_selftest.c
index 630715bbd572..82db9103b9bc 100644
--- a/kernel/trace/trace_selftest.c
+++ b/kernel/trace/trace_selftest.c
@@ -418,6 +418,15 @@ trace_selftest_startup_preemptirqsoff(struct tracer *trace, struct trace_array *
 }
 #endif /* CONFIG_IRQSOFF_TRACER && CONFIG_PREEMPT_TRACER */
 
+#ifdef CONFIG_NOP_TRACER
+int
+trace_selftest_startup_nop(struct tracer *trace, struct trace_array *tr)
+{
+	/* What could possibly go wrong? */
+	return 0;
+}
+#endif
+
 #ifdef CONFIG_SCHED_TRACER
 static int trace_wakeup_test_thread(void *data)
 {
-- 
cgit v1.2.3-55-g7522


From 71c67d58b5660f8e42c7d4c3e77cbc03fac5ed31 Mon Sep 17 00:00:00 2001
From: Steven Noonan
Date: Sat, 20 Sep 2008 01:00:37 -0700
Subject: ftrace: mcount_addr defined but not used

When CONFIG_DYNAMIC_FTRACE isn't used, neither is mcount_addr. This
patch eliminates that warning.

Signed-off-by: Steven Noonan <steven@uplinklabs.net>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 kernel/trace/ftrace.c | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index c9e09d86e1d8..7694f3e59797 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -36,14 +36,6 @@
 int ftrace_enabled __read_mostly;
 static int last_ftrace_enabled;
 
-/*
- * Since MCOUNT_ADDR may point to mcount itself, we do not want
- * to get it confused by reading a reference in the code as we
- * are parsing on objcopy output of text. Use a variable for
- * it instead.
- */
-static unsigned long mcount_addr = MCOUNT_ADDR;
-
 /*
  * ftrace_disabled is set when an anomaly is discovered.
  * ftrace_disabled is much stronger than ftrace_enabled.
@@ -178,6 +170,14 @@ static DEFINE_SPINLOCK(ftrace_hash_lock);
 #define ftrace_hash_unlock(flags) do { } while(0)
 #endif
 
+/*
+ * Since MCOUNT_ADDR may point to mcount itself, we do not want
+ * to get it confused by reading a reference in the code as we
+ * are parsing on objcopy output of text. Use a variable for
+ * it instead.
+ */
+static unsigned long mcount_addr = MCOUNT_ADDR;
+
 static struct task_struct *ftraced_task;
 
 enum {
-- 
cgit v1.2.3-55-g7522


From 8925b394eca0bb0a444a6487d702d0f650e94a10 Mon Sep 17 00:00:00 2001
From: Steven Noonan
Date: Sat, 20 Sep 2008 01:00:38 -0700
Subject: trace: remove pointless ifdefs

The functions are already 'extern' anyway, so there's no problem
with linkage. Removing these ifdefs also helps find any potential
compiler errors.

Suggested by Andrew Morton.

Signed-off-by: Steven Noonan <steven@uplinklabs.net>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 kernel/trace/trace.h | 16 ----------------
 1 file changed, 16 deletions(-)

diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 447d4b9b6391..c8c687088b4d 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -319,38 +319,22 @@ extern int DYN_FTRACE_TEST_NAME(void);
 #endif
 
 #ifdef CONFIG_FTRACE_STARTUP_TEST
-#ifdef CONFIG_FTRACE
 extern int trace_selftest_startup_function(struct tracer *trace,
 					   struct trace_array *tr);
-#endif
-#ifdef CONFIG_IRQSOFF_TRACER
 extern int trace_selftest_startup_irqsoff(struct tracer *trace,
 					  struct trace_array *tr);
-#endif
-#ifdef CONFIG_PREEMPT_TRACER
 extern int trace_selftest_startup_preemptoff(struct tracer *trace,
 					     struct trace_array *tr);
-#endif
-#if defined(CONFIG_IRQSOFF_TRACER) && defined(CONFIG_PREEMPT_TRACER)
 extern int trace_selftest_startup_preemptirqsoff(struct tracer *trace,
 						 struct trace_array *tr);
-#endif
-#ifdef CONFIG_SCHED_TRACER
 extern int trace_selftest_startup_wakeup(struct tracer *trace,
 					 struct trace_array *tr);
-#endif
-#ifdef CONFIG_NOP_TRACER
 extern int trace_selftest_startup_nop(struct tracer *trace,
 					 struct trace_array *tr);
-#endif
-#ifdef CONFIG_CONTEXT_SWITCH_TRACER
 extern int trace_selftest_startup_sched_switch(struct tracer *trace,
 					       struct trace_array *tr);
-#endif
-#ifdef CONFIG_SYSPROF_TRACER
 extern int trace_selftest_startup_sysprof(struct tracer *trace,
 					       struct trace_array *tr);
-#endif
 #endif /* CONFIG_FTRACE_STARTUP_TEST */
 
 extern void *head_page(struct trace_array_cpu *data);
-- 
cgit v1.2.3-55-g7522


From 35cb5ed01261f5669657d2f1720aca902299887d Mon Sep 17 00:00:00 2001
From: Frédéric Weisbecker
Date: Sun, 21 Sep 2008 20:10:14 +0200
Subject: tracing/ftrace: make nop tracer reset previous entries

If nop tracer is selected, some old entries from the previous tracer
could still be enqueued. Tracing have to be reset.

Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Steven Noonan <steven@uplinklabs.net>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 kernel/trace/trace_nop.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/kernel/trace/trace_nop.c b/kernel/trace/trace_nop.c
index dafaefb84038..9fb02c17ad0c 100644
--- a/kernel/trace/trace_nop.c
+++ b/kernel/trace/trace_nop.c
@@ -26,8 +26,12 @@ static void stop_nop_trace(struct trace_array *tr)
 
 static void nop_trace_init(struct trace_array *tr)
 {
+	int cpu;
 	ctx_trace = tr;
 
+	for_each_online_cpu(cpu)
+		tracing_reset(tr->data[cpu]);
+
 	if (tr->ctrl)
 		start_nop_trace(tr);
 }
-- 
cgit v1.2.3-55-g7522


From 2a3a4f669df2164288d11406d11d5e4933bf5e53 Mon Sep 17 00:00:00 2001
From: Frédéric Weisbecker
Date: Sun, 21 Sep 2008 20:12:14 +0200
Subject: tracing/ftrace: tracing engine depends on Nop Tracer

Now that the nop tracer is used as the default tracer by
replacing the "none" tracer, tracing engine depends on it.

Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Steven Noonan <steven@uplinklabs.net>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 kernel/trace/Kconfig | 15 +++++----------
 1 file changed, 5 insertions(+), 10 deletions(-)

diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index d7b2de744631..254328dec672 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -1,8 +1,13 @@
 #
 # Architectures that offer an FTRACE implementation should select HAVE_FTRACE:
 #
+
+config NOP_TRACER
+	bool
+
 config HAVE_FTRACE
 	bool
+	select NOP_TRACER
 
 config HAVE_DYNAMIC_FTRACE
 	bool
@@ -101,16 +106,6 @@ config SCHED_TRACER
 	  This tracer tracks the latency of the highest priority task
 	  to be scheduled in, starting from the point it has woken up.
 
-config NOP_TRACER
-	bool "NOP Tracer"
-	depends on HAVE_FTRACE
-	depends on DEBUG_KERNEL
-	select TRACING
-	help
-	  This tracer does nothing. The primary purpose for it is to
-	  politely print the output of ftrace_printk() calls without
-	  the overhead of an irrelevant trace taking place.
-
 config CONTEXT_SWITCH_TRACER
 	bool "Trace process context switches"
 	depends on HAVE_FTRACE
-- 
cgit v1.2.3-55-g7522


From 43a15386c4faf913f7d70a47748c266d6210cd6e Mon Sep 17 00:00:00 2001
From: Frédéric Weisbecker
Date: Sun, 21 Sep 2008 20:16:30 +0200
Subject: tracing/ftrace: replace none tracer by nop tracer

Replace "none" tracer by the recently created "nop" tracer.
Both are pretty similar except that nop accepts TRACE_PRINT
or TRACE_SPECIAL entries.

And as a consequence, changing the size of the ring buffer now
requires that tracing has already been disabled.

Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Steven Noonan <steven@uplinklabs.net>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 kernel/trace/trace.c     | 49 +++++++++---------------------------------------
 kernel/trace/trace.h     |  2 ++
 kernel/trace/trace_nop.c |  7 +------
 3 files changed, 12 insertions(+), 46 deletions(-)

diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index eee1fd964898..f2ef72fa3f17 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -142,24 +142,6 @@ static DECLARE_WAIT_QUEUE_HEAD(trace_wait);
 /* trace_flags holds iter_ctrl options */
 unsigned long trace_flags = TRACE_ITER_PRINT_PARENT;
 
-static notrace void no_trace_init(struct trace_array *tr)
-{
-	int cpu;
-
-	ftrace_function_enabled = 0;
-	if(tr->ctrl)
-		for_each_online_cpu(cpu)
-			tracing_reset(tr->data[cpu]);
-	tracer_enabled = 0;
-}
-
-/* dummy trace to disable tracing */
-static struct tracer no_tracer __read_mostly = {
-	.name		= "none",
-	.init		= no_trace_init
-};
-
-
 /**
  * trace_wake_up - wake up tasks waiting for trace input
  *
@@ -962,7 +944,7 @@ ftrace_special(unsigned long arg1, unsigned long arg2, unsigned long arg3)
 	long disabled;
 	int cpu;
 
-	if (tracing_disabled || current_trace == &no_tracer || !tr->ctrl)
+	if (tracing_disabled || !tr->ctrl)
 		return;
 
 	local_irq_save(flags);
@@ -2795,6 +2777,7 @@ tracing_entries_write(struct file *filp, const char __user *ubuf,
 	unsigned long val;
 	char buf[64];
 	int i, ret;
+	struct trace_array *tr = filp->private_data;
 
 	if (cnt >= sizeof(buf))
 		return -EINVAL;
@@ -2814,9 +2797,9 @@ tracing_entries_write(struct file *filp, const char __user *ubuf,
 
 	mutex_lock(&trace_types_lock);
 
-	if (current_trace != &no_tracer) {
+	if (tr->ctrl) {
 		cnt = -EBUSY;
-		pr_info("ftrace: set current_tracer to none"
+		pr_info("ftrace: please disable tracing"
 			" before modifying buffer size\n");
 		goto out;
 	}
@@ -2879,20 +2862,6 @@ tracing_entries_write(struct file *filp, const char __user *ubuf,
 	return cnt;
 }
 
-static int tracing_open_mark(struct inode *inode, struct file *filp)
-{
-	int ret;
-
-	ret = tracing_open_generic(inode, filp);
-	if (ret)
-		return ret;
-
-	if (current_trace == &no_tracer)
-		return -ENODEV;
-
-	return 0;
-}
-
 static int mark_printk(const char *fmt, ...)
 {
 	int ret;
@@ -2911,7 +2880,7 @@ tracing_mark_write(struct file *filp, const char __user *ubuf,
 	char *end;
 	struct trace_array *tr = &global_trace;
 
-	if (current_trace == &no_tracer || !tr->ctrl || tracing_disabled)
+	if (!tr->ctrl || tracing_disabled)
 		return -EINVAL;
 
 	if (cnt > TRACE_BUF_SIZE)
@@ -2971,7 +2940,7 @@ static struct file_operations tracing_entries_fops = {
 };
 
 static struct file_operations tracing_mark_fops = {
-	.open		= tracing_open_mark,
+	.open		= tracing_open_generic,
 	.write		= tracing_mark_write,
 };
 
@@ -3123,7 +3092,7 @@ int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
 	long disabled;
 	int cpu, len = 0, write, written = 0;
 
-	if (current_trace == &no_tracer || !tr->ctrl || tracing_disabled)
+	if (!tr->ctrl || tracing_disabled)
 		return 0;
 
 	local_irq_save(flags);
@@ -3539,8 +3508,8 @@ __init static int tracer_alloc_buffers(void)
 
 	trace_init_cmdlines();
 
-	register_tracer(&no_tracer);
-	current_trace = &no_tracer;
+	register_tracer(&nop_trace);
+	current_trace = &nop_trace;
 
 	/* All seems OK, enable tracing */
 	global_trace.ctrl = tracer_enabled;
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index c8c687088b4d..cb2c3fb7dd54 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -369,4 +369,6 @@ enum trace_iterator_flags {
 	TRACE_ITER_PRINTK		= 0x400,
 };
 
+extern struct tracer nop_trace;
+
 #endif /* _LINUX_KERNEL_TRACE_H */
diff --git a/kernel/trace/trace_nop.c b/kernel/trace/trace_nop.c
index 9fb02c17ad0c..16c9ba060bab 100644
--- a/kernel/trace/trace_nop.c
+++ b/kernel/trace/trace_nop.c
@@ -51,7 +51,7 @@ static void nop_trace_ctrl_update(struct trace_array *tr)
 		stop_nop_trace(tr);
 }
 
-static struct tracer nop_trace __read_mostly =
+struct tracer nop_trace __read_mostly =
 {
 	.name		= "nop",
 	.init		= nop_trace_init,
@@ -62,8 +62,3 @@ static struct tracer nop_trace __read_mostly =
 #endif
 };
 
-__init static int init_nop_trace(void)
-{
-	return register_tracer(&nop_trace);
-}
-device_initcall(init_nop_trace);
-- 
cgit v1.2.3-55-g7522


From 05736a427f7e16be948ccbf39782bd3a6ae16b14 Mon Sep 17 00:00:00 2001
From: Steven Rostedt
Date: Mon, 22 Sep 2008 14:55:47 -0700
Subject: ftrace: warn on failure to disable mcount callers

With the recent updates to ftrace, there should not be any failures when
modifying the code. If there is, then we need to warn about it.

This patch has a cleaned up version of the code that I used to discover
that the weak symbols were causing failures.

Signed-off-by: Steven Rostedt <srostedt@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 kernel/trace/ftrace.c | 27 +++++++++++++++++++++++++++
 1 file changed, 27 insertions(+)

diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 7694f3e59797..4dda4f60a2a9 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -576,6 +576,16 @@ static void ftrace_shutdown_replenish(void)
 	ftrace_pages->next = (void *)get_zeroed_page(GFP_KERNEL);
 }
 
+static void print_ip_ins(const char *fmt, unsigned char *p)
+{
+	int i;
+
+	printk(KERN_CONT "%s", fmt);
+
+	for (i = 0; i < MCOUNT_INSN_SIZE; i++)
+		printk(KERN_CONT "%s%02x", i ? ":" : "", p[i]);
+}
+
 static int
 ftrace_code_disable(struct dyn_ftrace *rec)
 {
@@ -590,6 +600,23 @@ ftrace_code_disable(struct dyn_ftrace *rec)
 
 	failed = ftrace_modify_code(ip, call, nop);
 	if (failed) {
+		switch (failed) {
+		case 1:
+			WARN_ON_ONCE(1);
+			pr_info("ftrace faulted on modifying ");
+			print_ip_sym(ip);
+			break;
+		case 2:
+			WARN_ON_ONCE(1);
+			pr_info("ftrace failed to modify ");
+			print_ip_sym(ip);
+			print_ip_ins(" expected: ", call);
+			print_ip_ins(" actual: ", (unsigned char *)ip);
+			print_ip_ins(" replace: ", nop);
+			printk(KERN_CONT "\n");
+			break;
+		}
+
 		rec->flags |= FTRACE_FL_FAILED;
 		return 0;
 	}
-- 
cgit v1.2.3-55-g7522


From 37a52f5ef120b93734bb2461744512b55695f69c Mon Sep 17 00:00:00 2001
From: Harvey Harrison
Date: Tue, 23 Sep 2008 15:05:46 -0700
Subject: x86: suppress trivial sparse signedness warnings

Could just as easily change the three casts to cast to the correct
type...this patch changes the type of ftrace_nop instead.

Supresses sparse warnings:

 arch/x86/kernel/ftrace.c:157:14: warning: incorrect type in assignment (different signedness)
 arch/x86/kernel/ftrace.c:157:14:    expected long *static [toplevel] ftrace_nop
 arch/x86/kernel/ftrace.c:157:14:    got unsigned long *<noident>
 arch/x86/kernel/ftrace.c:161:14: warning: incorrect type in assignment (different signedness)
 arch/x86/kernel/ftrace.c:161:14:    expected long *static [toplevel] ftrace_nop
 arch/x86/kernel/ftrace.c:161:14:    got unsigned long *<noident>
 arch/x86/kernel/ftrace.c:165:14: warning: incorrect type in assignment (different signedness)
 arch/x86/kernel/ftrace.c:165:14:    expected long *static [toplevel] ftrace_nop
 arch/x86/kernel/ftrace.c:165:14:    got unsigned long *<noident>

Signed-off-by: Harvey Harrison <harvey.harrison@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/ftrace.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c
index 082d99642622..66d900248fc2 100644
--- a/arch/x86/kernel/ftrace.c
+++ b/arch/x86/kernel/ftrace.c
@@ -22,7 +22,7 @@
 
 
 /* Long is fine, even if it is only 4 bytes ;-) */
-static long *ftrace_nop;
+static unsigned long *ftrace_nop;
 
 union ftrace_code_union {
 	char code[MCOUNT_INSN_SIZE];
-- 
cgit v1.2.3-55-g7522


From ac2b86fdef5b44f194eefaa6b7b6aea9423d1bc2 Mon Sep 17 00:00:00 2001
From: Frédéric Weisbecker
Date: Wed, 24 Sep 2008 16:31:56 +0100
Subject: x86/ftrace: use uaccess in atomic context

With latest -tip I get this bug:

[   49.439988] in_atomic():0, irqs_disabled():1
[   49.440118] INFO: lockdep is turned off.
[   49.440118] Pid: 2814, comm: modprobe Tainted: G        W 2.6.27-rc7 #4
[   49.440118]  [<c01215e1>] __might_sleep+0xe1/0x120
[   49.440118]  [<c01148ea>] ftrace_modify_code+0x2a/0xd0
[   49.440118]  [<c01148a2>] ? ftrace_test_p6nop+0x0/0xa
[   49.440118]  [<c016e80e>] __ftrace_update_code+0xfe/0x2f0
[   49.440118]  [<c01148a2>] ? ftrace_test_p6nop+0x0/0xa
[   49.440118]  [<c016f190>] ftrace_convert_nops+0x50/0x80
[   49.440118]  [<c016f1d6>] ftrace_init_module+0x16/0x20
[   49.440118]  [<c015498b>] load_module+0x185b/0x1d30
[   49.440118]  [<c01767a0>] ? find_get_page+0x0/0xf0
[   49.440118]  [<c02463c0>] ? sprintf+0x0/0x30
[   49.440118]  [<c034e012>] ? mutex_lock_interruptible_nested+0x1f2/0x350
[   49.440118]  [<c0154eb3>] sys_init_module+0x53/0x1b0
[   49.440118]  [<c0352340>] ? do_page_fault+0x0/0x740
[   49.440118]  [<c0104012>] syscall_call+0x7/0xb
[   49.440118]  =======================

It is because ftrace_modify_code() calls copy_to_user and
copy_from_user.
These functions have been inserted after guessing that there
couldn't be any race condition but copy_[to/from]_user might
sleep and __ftrace_update_code is called with local_irq_saved.

These function have been inserted since this commit:
d5e92e8978fd2574e415dc2792c5eb592978243d:
"ftrace: x86 use copy from user function"

Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Acked-by: Steven Rostedt <rostedt@goodmis.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/ftrace.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c
index 66d900248fc2..222507e8157b 100644
--- a/arch/x86/kernel/ftrace.c
+++ b/arch/x86/kernel/ftrace.c
@@ -71,13 +71,13 @@ ftrace_modify_code(unsigned long ip, unsigned char *old_code,
 	 * No real locking needed, this code is run through
 	 * kstop_machine, or before SMP starts.
 	 */
-	if (__copy_from_user(replaced, (char __user *)ip, MCOUNT_INSN_SIZE))
+	if (__copy_from_user_inatomic(replaced, (char __user *)ip, MCOUNT_INSN_SIZE))
 		return 1;
 
 	if (memcmp(replaced, old_code, MCOUNT_INSN_SIZE) != 0)
 		return 2;
 
-	WARN_ON_ONCE(__copy_to_user((char __user *)ip, new_code,
+	WARN_ON_ONCE(__copy_to_user_inatomic((char __user *)ip, new_code,
 				    MCOUNT_INSN_SIZE));
 
 	sync_core();
-- 
cgit v1.2.3-55-g7522


From d74185ed27651ad8d920b37d7851306ad01f7d6f Mon Sep 17 00:00:00 2001
From: Lai Jiangshan
Date: Mon, 29 Sep 2008 16:00:05 +0800
Subject: markers: fix unregister bug and reenter bug

unregister bug:

codes using makers are typically calling marker_probe_unregister()
and then destroying the data that marker_probe_func needs(or
unloading this module). This is bug when the corresponding
marker_probe_func is still running(on other cpus),
it is using the destroying/ed data.

we should call synchronize_sched() after marker_update_probes().

reenter bug:

marker_probe_register(), marker_probe_unregister() and
marker_probe_unregister_private_data() are not reentrant safe
functions. these 3 functions release markers_mutex and then
require it again and do "entry->oldptr = old; ...", but entry->oldptr
maybe is using now for these 3 functions may reenter when markers_mutex
is released.

we use synchronize_sched() instead of call_rcu_sched() to fix
this bug. actually we can do:
"
if (entry->rcu_pending)
		rcu_barrier_sched();
"
after require markers_mutex again. but synchronize_sched()
is better and simpler. For these 3 functions are not critical path.

Signed-off-by: Lai Jiangshan <laijs@cn.fujitsu.com>
Cc: Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 kernel/marker.c | 52 ++++++----------------------------------------------
 1 file changed, 6 insertions(+), 46 deletions(-)

diff --git a/kernel/marker.c b/kernel/marker.c
index 7d1faecd7a51..9f76c4a622ec 100644
--- a/kernel/marker.c
+++ b/kernel/marker.c
@@ -60,9 +60,6 @@ struct marker_entry {
 	struct marker_probe_closure single;
 	struct marker_probe_closure *multi;
 	int refcount;	/* Number of times armed. 0 if disarmed. */
-	struct rcu_head rcu;
-	void *oldptr;
-	unsigned char rcu_pending:1;
 	unsigned char ptype:1;
 	char name[0];	/* Contains name'\0'format'\0' */
 };
@@ -199,16 +196,6 @@ void marker_probe_cb_noarg(const struct marker *mdata, void *call_private, ...)
 }
 EXPORT_SYMBOL_GPL(marker_probe_cb_noarg);
 
-static void free_old_closure(struct rcu_head *head)
-{
-	struct marker_entry *entry = container_of(head,
-		struct marker_entry, rcu);
-	kfree(entry->oldptr);
-	/* Make sure we free the data before setting the pending flag to 0 */
-	smp_wmb();
-	entry->rcu_pending = 0;
-}
-
 static void debug_print_probes(struct marker_entry *entry)
 {
 	int i;
@@ -417,7 +404,6 @@ static struct marker_entry *add_marker(const char *name, const char *format)
 	e->multi = NULL;
 	e->ptype = 0;
 	e->refcount = 0;
-	e->rcu_pending = 0;
 	hlist_add_head(&e->hlist, head);
 	return e;
 }
@@ -447,9 +433,6 @@ static int remove_marker(const char *name)
 	if (e->single.func != __mark_empty_function)
 		return -EBUSY;
 	hlist_del(&e->hlist);
-	/* Make sure the call_rcu has been executed */
-	if (e->rcu_pending)
-		rcu_barrier_sched();
 	kfree(e);
 	return 0;
 }
@@ -479,12 +462,8 @@ static int marker_set_format(struct marker_entry **entry, const char *format)
 	e->multi = (*entry)->multi;
 	e->ptype = (*entry)->ptype;
 	e->refcount = (*entry)->refcount;
-	e->rcu_pending = 0;
 	hlist_add_before(&e->hlist, &(*entry)->hlist);
 	hlist_del(&(*entry)->hlist);
-	/* Make sure the call_rcu has been executed */
-	if ((*entry)->rcu_pending)
-		rcu_barrier_sched();
 	kfree(*entry);
 	*entry = e;
 	trace_mark(core_marker_format, "name %s format %s",
@@ -658,12 +637,6 @@ int marker_probe_register(const char *name, const char *format,
 			goto end;
 		}
 	}
-	/*
-	 * If we detect that a call_rcu is pending for this marker,
-	 * make sure it's executed now.
-	 */
-	if (entry->rcu_pending)
-		rcu_barrier_sched();
 	old = marker_entry_add_probe(entry, probe, probe_private);
 	if (IS_ERR(old)) {
 		ret = PTR_ERR(old);
@@ -671,14 +644,11 @@ int marker_probe_register(const char *name, const char *format,
 	}
 	mutex_unlock(&markers_mutex);
 	marker_update_probes();		/* may update entry */
+	synchronize_sched();
+	kfree(old);
 	mutex_lock(&markers_mutex);
 	entry = get_marker(name);
 	WARN_ON(!entry);
-	entry->oldptr = old;
-	entry->rcu_pending = 1;
-	/* write rcu_pending before calling the RCU callback */
-	smp_wmb();
-	call_rcu_sched(&entry->rcu, free_old_closure);
 end:
 	mutex_unlock(&markers_mutex);
 	return ret;
@@ -708,20 +678,15 @@ int marker_probe_unregister(const char *name,
 	entry = get_marker(name);
 	if (!entry)
 		goto end;
-	if (entry->rcu_pending)
-		rcu_barrier_sched();
 	old = marker_entry_remove_probe(entry, probe, probe_private);
 	mutex_unlock(&markers_mutex);
 	marker_update_probes();		/* may update entry */
+	synchronize_sched();
+	kfree(old);
 	mutex_lock(&markers_mutex);
 	entry = get_marker(name);
 	if (!entry)
 		goto end;
-	entry->oldptr = old;
-	entry->rcu_pending = 1;
-	/* write rcu_pending before calling the RCU callback */
-	smp_wmb();
-	call_rcu_sched(&entry->rcu, free_old_closure);
 	remove_marker(name);	/* Ignore busy error message */
 	ret = 0;
 end:
@@ -787,19 +752,14 @@ int marker_probe_unregister_private_data(marker_probe_func *probe,
 		ret = -ENOENT;
 		goto end;
 	}
-	if (entry->rcu_pending)
-		rcu_barrier_sched();
 	old = marker_entry_remove_probe(entry, NULL, probe_private);
 	mutex_unlock(&markers_mutex);
 	marker_update_probes();		/* may update entry */
+	synchronize_sched();
+	kfree(old);
 	mutex_lock(&markers_mutex);
 	entry = get_marker_from_private_data(probe, probe_private);
 	WARN_ON(!entry);
-	entry->oldptr = old;
-	entry->rcu_pending = 1;
-	/* write rcu_pending before calling the RCU callback */
-	smp_wmb();
-	call_rcu_sched(&entry->rcu, free_old_closure);
 	remove_marker(entry->name);	/* Ignore busy error message */
 end:
 	mutex_unlock(&markers_mutex);
-- 
cgit v1.2.3-55-g7522


From ca2db6cf30d6a45798b7a760d0c4f7735b16799d Mon Sep 17 00:00:00 2001
From: Mathieu Desnoyers
Date: Tue, 30 Sep 2008 01:49:39 -0400
Subject: tracepoints: use rcu sched

Make tracepoints use rcu sched. (cleanup)

Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 kernel/tracepoint.c | 15 ++++++---------
 1 file changed, 6 insertions(+), 9 deletions(-)

diff --git a/kernel/tracepoint.c b/kernel/tracepoint.c
index c7c62a4a75f5..db39961a0d03 100644
--- a/kernel/tracepoint.c
+++ b/kernel/tracepoint.c
@@ -80,10 +80,7 @@ static void tracepoint_entry_free_old(struct tracepoint_entry *entry, void *old)
 	entry->rcu_pending = 1;
 	/* write rcu_pending before calling the RCU callback */
 	smp_wmb();
-#ifdef CONFIG_PREEMPT_RCU
-	synchronize_sched();	/* Until we have the call_rcu_sched() */
-#endif
-	call_rcu(&entry->rcu, free_old_closure);
+	call_rcu_sched(&entry->rcu, free_old_closure);
 }
 
 static void debug_print_probes(struct tracepoint_entry *entry)
@@ -245,9 +242,9 @@ static int remove_tracepoint(const char *name)
 	if (e->refcount)
 		return -EBUSY;
 	hlist_del(&e->hlist);
-	/* Make sure the call_rcu has been executed */
+	/* Make sure the call_rcu_sched has been executed */
 	if (e->rcu_pending)
-		rcu_barrier();
+		rcu_barrier_sched();
 	kfree(e);
 	return 0;
 }
@@ -344,11 +341,11 @@ int tracepoint_probe_register(const char *name, void *probe)
 		}
 	}
 	/*
-	 * If we detect that a call_rcu is pending for this tracepoint,
+	 * If we detect that a call_rcu_sched is pending for this tracepoint,
 	 * make sure it's executed now.
 	 */
 	if (entry->rcu_pending)
-		rcu_barrier();
+		rcu_barrier_sched();
 	old = tracepoint_entry_add_probe(entry, probe);
 	if (IS_ERR(old)) {
 		ret = PTR_ERR(old);
@@ -387,7 +384,7 @@ int tracepoint_probe_unregister(const char *name, void *probe)
 	if (!entry)
 		goto end;
 	if (entry->rcu_pending)
-		rcu_barrier();
+		rcu_barrier_sched();
 	old = tracepoint_entry_remove_probe(entry, probe);
 	mutex_unlock(&tracepoints_mutex);
 	tracepoint_update_probes();		/* may update entry */
-- 
cgit v1.2.3-55-g7522


From 9a1e9693f534174945154197fec4ec92f168ce21 Mon Sep 17 00:00:00 2001
From: Mathieu Desnoyers
Date: Tue, 30 Sep 2008 01:51:12 -0400
Subject: tracepoints: fix reentrancy

The tracepoints had the same problem markers did have wrt reentrancy. Apply a
similar fix using a rcu_barrier after each tracepoint mutex lock.

Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 kernel/tracepoint.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/kernel/tracepoint.c b/kernel/tracepoint.c
index db39961a0d03..f2b7c28a4708 100644
--- a/kernel/tracepoint.c
+++ b/kernel/tracepoint.c
@@ -356,6 +356,8 @@ int tracepoint_probe_register(const char *name, void *probe)
 	mutex_lock(&tracepoints_mutex);
 	entry = get_tracepoint(name);
 	WARN_ON(!entry);
+	if (entry->rcu_pending)
+		rcu_barrier_sched();
 	tracepoint_entry_free_old(entry, old);
 end:
 	mutex_unlock(&tracepoints_mutex);
@@ -392,6 +394,8 @@ int tracepoint_probe_unregister(const char *name, void *probe)
 	entry = get_tracepoint(name);
 	if (!entry)
 		goto end;
+	if (entry->rcu_pending)
+		rcu_barrier_sched();
 	tracepoint_entry_free_old(entry, old);
 	remove_tracepoint(name);	/* Ignore busy error message */
 	ret = 0;
-- 
cgit v1.2.3-55-g7522


From e98d0eabef2748d88fa58760d104e8e68517406b Mon Sep 17 00:00:00 2001
From: Mathieu Desnoyers
Date: Mon, 29 Sep 2008 11:05:13 -0400
Subject: markers: marker_synchronize_unregister()

Create marker_synchronize_unregister() which must be called before the end of
exit() to make sure every probe callers have exited the non preemptible section
and thus are not executing the probe code anymore.

Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/marker.h | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/include/linux/marker.h b/include/linux/marker.h
index 1290653f9241..889196c7fbb1 100644
--- a/include/linux/marker.h
+++ b/include/linux/marker.h
@@ -160,4 +160,11 @@ extern int marker_probe_unregister_private_data(marker_probe_func *probe,
 extern void *marker_get_private_data(const char *name, marker_probe_func *probe,
 	int num);
 
+/*
+ * marker_synchronize_unregister must be called between the last marker probe
+ * unregistration and the end of module exit to make sure there is no caller
+ * executing a probe when it is freed.
+ */
+#define marker_synchronize_unregister() synchronize_sched()
+
 #endif
-- 
cgit v1.2.3-55-g7522


From e2d3b75dbc486253c910722486ac64087f96c59f Mon Sep 17 00:00:00 2001
From: Mathieu Desnoyers
Date: Mon, 29 Sep 2008 11:08:03 -0400
Subject: markers: fix unregister bug and reenter bug, cleanup

Use the new rcu_read_lock_sched/unlock_sched() in marker code around the call
site instead of preempt_disable/enable(). It helps reviewing the code more
easily.

Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 kernel/marker.c | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/kernel/marker.c b/kernel/marker.c
index 9f76c4a622ec..fe5ca72f9659 100644
--- a/kernel/marker.c
+++ b/kernel/marker.c
@@ -100,11 +100,11 @@ void marker_probe_cb(const struct marker *mdata, void *call_private, ...)
 	char ptype;
 
 	/*
-	 * preempt_disable does two things : disabling preemption to make sure
-	 * the teardown of the callbacks can be done correctly when they are in
-	 * modules and they insure RCU read coherency.
+	 * rcu_read_lock_sched does two things : disabling preemption to make
+	 * sure the teardown of the callbacks can be done correctly when they
+	 * are in modules and they insure RCU read coherency.
 	 */
-	preempt_disable();
+	rcu_read_lock_sched();
 	ptype = mdata->ptype;
 	if (likely(!ptype)) {
 		marker_probe_func *func;
@@ -142,7 +142,7 @@ void marker_probe_cb(const struct marker *mdata, void *call_private, ...)
 			va_end(args);
 		}
 	}
-	preempt_enable();
+	rcu_read_unlock_sched();
 }
 EXPORT_SYMBOL_GPL(marker_probe_cb);
 
@@ -159,7 +159,7 @@ void marker_probe_cb_noarg(const struct marker *mdata, void *call_private, ...)
 	va_list args;	/* not initialized */
 	char ptype;
 
-	preempt_disable();
+	rcu_read_lock_sched();
 	ptype = mdata->ptype;
 	if (likely(!ptype)) {
 		marker_probe_func *func;
@@ -192,7 +192,7 @@ void marker_probe_cb_noarg(const struct marker *mdata, void *call_private, ...)
 			multi[i].func(multi[i].probe_private, call_private,
 				mdata->format, &args);
 	}
-	preempt_enable();
+	rcu_read_unlock_sched();
 }
 EXPORT_SYMBOL_GPL(marker_probe_cb_noarg);
 
@@ -539,7 +539,7 @@ static int set_marker(struct marker_entry **entry, struct marker *elem,
  * Disable a marker and its probe callback.
  * Note: only waiting an RCU period after setting elem->call to the empty
  * function insures that the original callback is not used anymore. This insured
- * by preempt_disable around the call site.
+ * by rcu_read_lock_sched around the call site.
  */
 static void disable_marker(struct marker *elem)
 {
-- 
cgit v1.2.3-55-g7522


From 531d297569014e8f889b167a2d15d72429faead1 Mon Sep 17 00:00:00 2001
From: Mathieu Desnoyers
Date: Mon, 29 Sep 2008 11:09:15 -0400
Subject: markers: probe example, fix teardown

Need a marker_synchronize_unregister() before the end of exit() to make sure
every probe callers have exited the non preemptible section and thus are not
executing the probe code anymore.

Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 samples/markers/probe-example.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/samples/markers/probe-example.c b/samples/markers/probe-example.c
index c8e099d4d1fd..2dfb3b32937e 100644
--- a/samples/markers/probe-example.c
+++ b/samples/markers/probe-example.c
@@ -81,6 +81,7 @@ static void __exit probe_fini(void)
 			probe_array[i].probe_func, &probe_array[i]);
 	printk(KERN_INFO "Number of event b : %u\n",
 			atomic_read(&eventb_count));
+	marker_synchronize_unregister();
 }
 
 module_init(probe_init);
-- 
cgit v1.2.3-55-g7522


From 91a8d46c47e7eb1c53c181e4328a3cfa45ae4ad3 Mon Sep 17 00:00:00 2001
From: Mathieu Desnoyers
Date: Mon, 29 Sep 2008 11:10:34 -0400
Subject: markers: documentation fix for teardown

Document the need for a marker_synchronize_unregister() before the end of
exit() to make sure every probe callers have exited the non preemptible
section and thus are not executing the probe code anymore.

Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 Documentation/markers.txt | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/Documentation/markers.txt b/Documentation/markers.txt
index d9f50a19fa0c..089f6138fcd9 100644
--- a/Documentation/markers.txt
+++ b/Documentation/markers.txt
@@ -50,10 +50,12 @@ Connecting a function (probe) to a marker is done by providing a probe (function
 to call) for the specific marker through marker_probe_register() and can be
 activated by calling marker_arm(). Marker deactivation can be done by calling
 marker_disarm() as many times as marker_arm() has been called. Removing a probe
-is done through marker_probe_unregister(); it will disarm the probe and make
-sure there is no caller left using the probe when it returns. Probe removal is
-preempt-safe because preemption is disabled around the probe call. See the
-"Probe example" section below for a sample probe module.
+is done through marker_probe_unregister(); it will disarm the probe.
+marker_synchronize_unregister() must be called before the end of the module exit
+function to make sure there is no caller left using the probe. This, and the
+fact that preemption is disabled around the probe call, make sure that probe
+removal and module unload are safe. See the "Probe example" section below for a
+sample probe module.
 
 The marker mechanism supports inserting multiple instances of the same marker.
 Markers can be put in inline functions, inlined static functions, and
-- 
cgit v1.2.3-55-g7522


From 5b9261d93e5fa3db4995d5b77b5ed365166e001c Mon Sep 17 00:00:00 2001
From: Mathieu Desnoyers
Date: Mon, 29 Sep 2008 11:11:47 -0400
Subject: sputrace: use marker_synchronize_unregister()

We need a marker_synchronize_unregister() before the end of exit() to make sure
every probe callers have exited the non preemptible section and thus are not
executing the probe code anymore.

Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
Acked-by: Jeremy Kerr <jk@ozlabs.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/powerpc/platforms/cell/spufs/sputrace.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/powerpc/platforms/cell/spufs/sputrace.c b/arch/powerpc/platforms/cell/spufs/sputrace.c
index 92d20e993ede..2ece399f2862 100644
--- a/arch/powerpc/platforms/cell/spufs/sputrace.c
+++ b/arch/powerpc/platforms/cell/spufs/sputrace.c
@@ -232,6 +232,7 @@ static void __exit sputrace_exit(void)
 
 	remove_proc_entry("sputrace", NULL);
 	kfree(sputrace_log);
+	marker_synchronize_unregister();
 }
 
 module_init(sputrace_init);
-- 
cgit v1.2.3-55-g7522


From ed86a59071a4ccd0e9bcefc61e013759a21eda78 Mon Sep 17 00:00:00 2001
From: Mathieu Desnoyers
Date: Wed, 1 Oct 2008 12:03:25 -0400
Subject: markers: re-enable fast batch registration

Lai Jiangshan discovered a reentrancy issue with markers and fixed it by
adding synchronize_sched() calls at each registration/unregistraiton.

It works, but it removes the ability to do batch
registration/unregistration and can cause registration of ~100 markers
to take about 30 seconds on a loaded machine (synchronize_sched() is
much slower on such workloads).

This patch implements a version of the fix which won't slow down marker batch
registration/unregistration. It also go back to the original non-synchronized
reg/unreg.

Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 kernel/marker.c | 58 +++++++++++++++++++++++++++++++++++++++++++++++++++------
 1 file changed, 52 insertions(+), 6 deletions(-)

diff --git a/kernel/marker.c b/kernel/marker.c
index fe5ca72f9659..05a25776f71f 100644
--- a/kernel/marker.c
+++ b/kernel/marker.c
@@ -60,6 +60,9 @@ struct marker_entry {
 	struct marker_probe_closure single;
 	struct marker_probe_closure *multi;
 	int refcount;	/* Number of times armed. 0 if disarmed. */
+	struct rcu_head rcu;
+	void *oldptr;
+	unsigned char rcu_pending:1;
 	unsigned char ptype:1;
 	char name[0];	/* Contains name'\0'format'\0' */
 };
@@ -196,6 +199,16 @@ void marker_probe_cb_noarg(const struct marker *mdata, void *call_private, ...)
 }
 EXPORT_SYMBOL_GPL(marker_probe_cb_noarg);
 
+static void free_old_closure(struct rcu_head *head)
+{
+	struct marker_entry *entry = container_of(head,
+		struct marker_entry, rcu);
+	kfree(entry->oldptr);
+	/* Make sure we free the data before setting the pending flag to 0 */
+	smp_wmb();
+	entry->rcu_pending = 0;
+}
+
 static void debug_print_probes(struct marker_entry *entry)
 {
 	int i;
@@ -404,6 +417,7 @@ static struct marker_entry *add_marker(const char *name, const char *format)
 	e->multi = NULL;
 	e->ptype = 0;
 	e->refcount = 0;
+	e->rcu_pending = 0;
 	hlist_add_head(&e->hlist, head);
 	return e;
 }
@@ -433,6 +447,9 @@ static int remove_marker(const char *name)
 	if (e->single.func != __mark_empty_function)
 		return -EBUSY;
 	hlist_del(&e->hlist);
+	/* Make sure the call_rcu has been executed */
+	if (e->rcu_pending)
+		rcu_barrier_sched();
 	kfree(e);
 	return 0;
 }
@@ -462,8 +479,12 @@ static int marker_set_format(struct marker_entry **entry, const char *format)
 	e->multi = (*entry)->multi;
 	e->ptype = (*entry)->ptype;
 	e->refcount = (*entry)->refcount;
+	e->rcu_pending = 0;
 	hlist_add_before(&e->hlist, &(*entry)->hlist);
 	hlist_del(&(*entry)->hlist);
+	/* Make sure the call_rcu has been executed */
+	if ((*entry)->rcu_pending)
+		rcu_barrier_sched();
 	kfree(*entry);
 	*entry = e;
 	trace_mark(core_marker_format, "name %s format %s",
@@ -637,6 +658,12 @@ int marker_probe_register(const char *name, const char *format,
 			goto end;
 		}
 	}
+	/*
+	 * If we detect that a call_rcu is pending for this marker,
+	 * make sure it's executed now.
+	 */
+	if (entry->rcu_pending)
+		rcu_barrier_sched();
 	old = marker_entry_add_probe(entry, probe, probe_private);
 	if (IS_ERR(old)) {
 		ret = PTR_ERR(old);
@@ -644,11 +671,16 @@ int marker_probe_register(const char *name, const char *format,
 	}
 	mutex_unlock(&markers_mutex);
 	marker_update_probes();		/* may update entry */
-	synchronize_sched();
-	kfree(old);
 	mutex_lock(&markers_mutex);
 	entry = get_marker(name);
 	WARN_ON(!entry);
+	if (entry->rcu_pending)
+		rcu_barrier_sched();
+	entry->oldptr = old;
+	entry->rcu_pending = 1;
+	/* write rcu_pending before calling the RCU callback */
+	smp_wmb();
+	call_rcu_sched(&entry->rcu, free_old_closure);
 end:
 	mutex_unlock(&markers_mutex);
 	return ret;
@@ -678,15 +710,22 @@ int marker_probe_unregister(const char *name,
 	entry = get_marker(name);
 	if (!entry)
 		goto end;
+	if (entry->rcu_pending)
+		rcu_barrier_sched();
 	old = marker_entry_remove_probe(entry, probe, probe_private);
 	mutex_unlock(&markers_mutex);
 	marker_update_probes();		/* may update entry */
-	synchronize_sched();
-	kfree(old);
 	mutex_lock(&markers_mutex);
 	entry = get_marker(name);
 	if (!entry)
 		goto end;
+	if (entry->rcu_pending)
+		rcu_barrier_sched();
+	entry->oldptr = old;
+	entry->rcu_pending = 1;
+	/* write rcu_pending before calling the RCU callback */
+	smp_wmb();
+	call_rcu_sched(&entry->rcu, free_old_closure);
 	remove_marker(name);	/* Ignore busy error message */
 	ret = 0;
 end:
@@ -752,14 +791,21 @@ int marker_probe_unregister_private_data(marker_probe_func *probe,
 		ret = -ENOENT;
 		goto end;
 	}
+	if (entry->rcu_pending)
+		rcu_barrier_sched();
 	old = marker_entry_remove_probe(entry, NULL, probe_private);
 	mutex_unlock(&markers_mutex);
 	marker_update_probes();		/* may update entry */
-	synchronize_sched();
-	kfree(old);
 	mutex_lock(&markers_mutex);
 	entry = get_marker_from_private_data(probe, probe_private);
 	WARN_ON(!entry);
+	if (entry->rcu_pending)
+		rcu_barrier_sched();
+	entry->oldptr = old;
+	entry->rcu_pending = 1;
+	/* write rcu_pending before calling the RCU callback */
+	smp_wmb();
+	call_rcu_sched(&entry->rcu, free_old_closure);
 	remove_marker(entry->name);	/* Ignore busy error message */
 end:
 	mutex_unlock(&markers_mutex);
-- 
cgit v1.2.3-55-g7522


From 53c8c8fdfd2d2d515bdcb3d0f2a11d1f3f42ece1 Mon Sep 17 00:00:00 2001
From: Mathieu Desnoyers
Date: Fri, 3 Oct 2008 11:52:54 -0400
Subject: markers: turn marker_synchronize_unregister() into an inline

Turn marker synchronize unregister into a static inline. There is no
reason to keep it as a macro over a static inline.

Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/marker.h | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/include/linux/marker.h b/include/linux/marker.h
index 889196c7fbb1..38e32e781ed7 100644
--- a/include/linux/marker.h
+++ b/include/linux/marker.h
@@ -13,6 +13,7 @@
  */
 
 #include <linux/types.h>
+#include <linux/rcupdate.h>
 
 struct module;
 struct marker;
@@ -165,6 +166,9 @@ extern void *marker_get_private_data(const char *name, marker_probe_func *probe,
  * unregistration and the end of module exit to make sure there is no caller
  * executing a probe when it is freed.
  */
-#define marker_synchronize_unregister() synchronize_sched()
+static inline void marker_synchronize_unregister(void)
+{
+	synchronize_sched();
+}
 
 #endif
-- 
cgit v1.2.3-55-g7522


From 48043bcdf8f398d28e75ffed6ee85208d751f87f Mon Sep 17 00:00:00 2001
From: Lai Jiangshan
Date: Wed, 8 Oct 2008 10:23:36 +0800
Subject: markers: fix unchecked format

when the second, third... probe is registered, its format is
not checked, this patch fixes it.

Signed-off-by: Lai Jiangshan <laijs@cn.fujitsu.com>
Acked-by: Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 kernel/marker.c | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/kernel/marker.c b/kernel/marker.c
index 05a25776f71f..3b75d0e8b5a4 100644
--- a/kernel/marker.c
+++ b/kernel/marker.c
@@ -653,11 +653,17 @@ int marker_probe_register(const char *name, const char *format,
 	entry = get_marker(name);
 	if (!entry) {
 		entry = add_marker(name, format);
-		if (IS_ERR(entry)) {
+		if (IS_ERR(entry))
 			ret = PTR_ERR(entry);
-			goto end;
-		}
+	} else if (format) {
+		if (!entry->format)
+			ret = marker_set_format(&entry, format);
+		else if (strcmp(entry->format, format))
+			ret = -EPERM;
 	}
+	if (ret)
+		goto end;
+
 	/*
 	 * If we detect that a call_rcu is pending for this marker,
 	 * make sure it's executed now.
-- 
cgit v1.2.3-55-g7522


From 1b7ae37c030a9fbbb5ebbf5d7bbfd7208cf805b7 Mon Sep 17 00:00:00 2001
From: Lai Jiangshan
Date: Fri, 10 Oct 2008 14:43:57 +0800
Subject: markers: bit-field is not thread-safe nor smp-safe

bit-field is not thread-safe nor smp-safe.

struct marker_entry.rcu_pending is not protected by any lock
in rcu-callback free_old_closure().
so we must turn it into a safe type.

detail:

I suppose rcu_pending and ptype are store in struct marker_entry.tmp1

free_old_closure() side:           change ptype side:

                                |  load struct marker_entry.tmp1
--------------------------------|--------------------------------
                                |  change ptype bit in tmp1
load struct marker_entry.tmp1   |
change rcu_pending bit in tmp1  |
store tmp1                      |
--------------------------------|--------------------------------
                                |  store tmp1

now this result equals that free_old_closure() do not change rcu_pending
bit, bug! This bug will cause redundant rcu_barrier_sched() called.
not too harmful.

----- corresponding:

free_old_closure() side:           change ptype side:

load struct marker_entry.tmp1   |
--------------------------------|--------------------------------
                                |  load struct marker_entry.tmp1
change rcu_pending bit in tmp1  |
                                |  change ptype bit in tmp1
                                |  store tmp1
--------------------------------|--------------------------------
store tmp1                      |

now this result equals that change ptype side do not change ptype
bit, bug! this bug cause marker_probe_cb() access to invalid memory.
oops!

see also: http://en.wikipedia.org/wiki/Bit_field

Signed-off-by: Lai Jiangshan <laijs@cn.fujitsu.com>
Acked-by: Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 kernel/marker.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kernel/marker.c b/kernel/marker.c
index 3b75d0e8b5a4..e9c6b2bc9400 100644
--- a/kernel/marker.c
+++ b/kernel/marker.c
@@ -62,7 +62,7 @@ struct marker_entry {
 	int refcount;	/* Number of times armed. 0 if disarmed. */
 	struct rcu_head rcu;
 	void *oldptr;
-	unsigned char rcu_pending:1;
+	int rcu_pending;
 	unsigned char ptype:1;
 	char name[0];	/* Contains name'\0'format'\0' */
 };
-- 
cgit v1.2.3-55-g7522


From aa5d9151f745b6ee6a236a1f109118034277eb92 Mon Sep 17 00:00:00 2001
From: Arjan van de Ven
Date: Sat, 13 Sep 2008 09:36:06 -0700
Subject: tracing/fastboot: add a script to visualize the kernel boot process /
 time

When optimizing the kernel boot time, it's very valuable to visualize
what is going on at which time. In addition, with the fastboot asynchronous
initcall level, it's very valuable to see which initcall gets run where
and when.

This patch adds a script to turn a dmesg into a SVG graph (that can be
shown with tools such as InkScape, Gimp or Firefox) and a small change
to the initcall code to print the PID of the thread calling the initcall
(so that the script can work out the parallelism).

Signed-off-by: Arjan van de Ven <arjan@linux.intel.com>
---
 init/main.c          |   3 +-
 scripts/bootgraph.pl | 138 +++++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 140 insertions(+), 1 deletion(-)
 create mode 100644 scripts/bootgraph.pl

diff --git a/init/main.c b/init/main.c
index ded1fae965ab..16abba05c826 100644
--- a/init/main.c
+++ b/init/main.c
@@ -711,7 +711,8 @@ int do_one_initcall(initcall_t fn)
 	int result;
 
 	if (initcall_debug) {
-		printk("calling  %pF\n", fn);
+		printk("calling  %pF", fn);
+		printk(" @ %i\n",  task_pid_nr(current));
 		t0 = ktime_get();
 	}
 
diff --git a/scripts/bootgraph.pl b/scripts/bootgraph.pl
new file mode 100644
index 000000000000..d459b8bdef02
--- /dev/null
+++ b/scripts/bootgraph.pl
@@ -0,0 +1,138 @@
+#!/usr/bin/perl
+
+# Copyright 2008, Intel Corporation
+#
+# This file is part of the Linux kernel
+#
+# This program file is free software; you can redistribute it and/or modify it
+# under the terms of the GNU General Public License as published by the
+# Free Software Foundation; version 2 of the License.
+#
+# This program is distributed in the hope that it will be useful, but WITHOUT
+# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+# FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+# for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program in a file named COPYING; if not, write to the
+# Free Software Foundation, Inc.,
+# 51 Franklin Street, Fifth Floor,
+# Boston, MA 02110-1301 USA
+#
+# Authors:
+# 	Arjan van de Ven <arjan@linux.intel.com>
+
+
+#
+# This script turns a dmesg output into a SVG graphic that shows which
+# functions take how much time. You can view SVG graphics with various
+# programs, including Inkscape, The Gimp and Firefox.
+#
+#
+# For this script to work, the kernel needs to be compiled with the
+# CONFIG_PRINTK_TIME configuration option enabled, and with
+# "initcall_debug" passed on the kernel command line.
+#
+# usage:
+# 	dmesg | perl scripts/bootgraph.pl > output.svg
+#
+
+my @rows;
+my %start, %end, %row;
+my $done = 0;
+my $rowcount = 0;
+my $maxtime = 0;
+my $count = 0;
+while (<>) {
+	my $line = $_;
+	if ($line =~ /([0-9\.]+)\] calling  ([a-zA-Z\_]+)\+/) {
+		my $func = $2;
+		if ($done == 0) {
+			$start{$func} = $1;
+		}
+		$row{$func} = 1;
+		if ($line =~ /\@ ([0-9]+)/) {
+			my $pid = $1;
+			if (!defined($rows[$pid])) {
+				$rowcount = $rowcount + 1;
+				$rows[$pid] = $rowcount;
+			}
+			$row{$func} = $rows[$pid];
+		}
+		$count = $count + 1;
+	}
+
+	if ($line =~ /([0-9\.]+)\] initcall ([a-zA-Z\_]+)\+.*returned/) {
+		if ($done == 0) {
+			$end{$2} = $1;
+			$maxtime = $1;
+		}
+	}
+	if ($line =~ /Write protecting the/) {
+		$done = 1;
+	}
+}
+
+if ($count == 0) {
+	print "No data found in the dmesg. Make sure CONFIG_PRINTK_TIME is enabled and\n";
+	print "that initcall_debug is passed on the kernel command line.\n\n";
+	print "Usage: \n";
+	print "      dmesg | perl scripts/bootgraph.pl > output.svg\n\n";
+	exit;
+}
+
+print "<?xml version=\"1.0\" standalone=\"no\"?> \n";
+print "<svg width=\"1000\" height=\"100%\" version=\"1.1\" xmlns=\"http://www.w3.org/2000/svg\">\n";
+
+my @styles;
+
+$styles[0] = "fill:rgb(0,0,255);fill-opacity:0.5;stroke-width:1;stroke:rgb(0,0,0)";
+$styles[1] = "fill:rgb(0,255,0);fill-opacity:0.5;stroke-width:1;stroke:rgb(0,0,0)";
+$styles[2] = "fill:rgb(255,0,20);fill-opacity:0.5;stroke-width:1;stroke:rgb(0,0,0)";
+$styles[3] = "fill:rgb(255,255,20);fill-opacity:0.5;stroke-width:1;stroke:rgb(0,0,0)";
+$styles[4] = "fill:rgb(255,0,255);fill-opacity:0.5;stroke-width:1;stroke:rgb(0,0,0)";
+$styles[5] = "fill:rgb(0,255,255);fill-opacity:0.5;stroke-width:1;stroke:rgb(0,0,0)";
+$styles[6] = "fill:rgb(0,128,255);fill-opacity:0.5;stroke-width:1;stroke:rgb(0,0,0)";
+$styles[7] = "fill:rgb(0,255,128);fill-opacity:0.5;stroke-width:1;stroke:rgb(0,0,0)";
+$styles[8] = "fill:rgb(255,0,128);fill-opacity:0.5;stroke-width:1;stroke:rgb(0,0,0)";
+$styles[9] = "fill:rgb(255,255,128);fill-opacity:0.5;stroke-width:1;stroke:rgb(0,0,0)";
+$styles[10] = "fill:rgb(255,128,255);fill-opacity:0.5;stroke-width:1;stroke:rgb(0,0,0)";
+$styles[11] = "fill:rgb(128,255,255);fill-opacity:0.5;stroke-width:1;stroke:rgb(0,0,0)";
+
+my $mult = 950.0 / $maxtime;
+my $threshold = 0.0500 / $maxtime;
+my $stylecounter = 0;
+while (($key,$value) = each %start) {
+	my $duration = $end{$key} - $start{$key};
+
+	if ($duration >= $threshold) {
+		my $s, $s2, $e, $y;
+		$s = $value * $mult;
+		$s2 = $s + 6;
+		$e = $end{$key} * $mult;
+		$w = $e - $s;
+
+		$y = $row{$key} * 150;
+		$y2 = $y + 4;
+
+		$style = $styles[$stylecounter];
+		$stylecounter = $stylecounter + 1;
+		if ($stylecounter > 11) {
+			$stylecounter = 0;
+		};
+
+		print "<rect x=\"$s\" width=\"$w\" y=\"$y\" height=\"145\" style=\"$style\"/>\n";
+		print "<text transform=\"translate($s2,$y2) rotate(90)\">$key</text>\n";
+	}
+}
+
+
+# print the time line on top
+my $time = 0.0;
+while ($time < $maxtime) {
+	my $s2 = $time * $mult;
+	print "<text transform=\"translate($s2,89) rotate(90)\">$time</text>\n";
+	$time = $time + 0.1;
+}
+
+print "</svg>\n";
-- 
cgit v1.2.3-55-g7522


From d13744cd6e3fef373a3fe656ac349b4e7c49ff79 Mon Sep 17 00:00:00 2001
From: Frédéric Weisbecker
Date: Tue, 23 Sep 2008 11:32:08 +0100
Subject: tracing/ftrace: add the boot tracer

Add the boot/initcall tracer.

It's primary purpose is to be able to trace the initcalls.

It is intended to be used with scripts/bootgraph.pl after some small
improvements.

Note that it is not active after its init. To avoid tracing (and so
crashing) before the whole tracing engine init, you have to explicitly
call start_boot_trace() after do_pre_smp_initcalls() to enable it.

Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/ftrace.h    |  19 +++++++++
 kernel/trace/trace.h      |   4 ++
 kernel/trace/trace_boot.c | 101 ++++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 124 insertions(+)
 create mode 100644 kernel/trace/trace_boot.c

diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index 5de9903645d5..91954eb6460f 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -5,6 +5,8 @@
 
 #include <linux/linkage.h>
 #include <linux/fs.h>
+#include <linux/init.h>
+#include <linux/types.h>
 
 extern int ftrace_enabled;
 extern int
@@ -209,4 +211,21 @@ ftrace_init_module(unsigned long *start, unsigned long *end) { }
 #endif
 
 
+struct boot_trace {
+	pid_t			caller;
+	initcall_t		func;
+	int			result;
+	unsigned long long	duration;
+};
+
+#ifdef CONFIG_BOOT_TRACER
+extern void trace_boot(struct boot_trace *it);
+extern void start_boot_trace(void);
+#else
+static inline void trace_boot(struct boot_trace *it) { }
+static inline void start_boot_trace(void) { }
+#endif
+
+
+
 #endif /* _LINUX_FTRACE_H */
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index cb2c3fb7dd54..b28bf8812efc 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -6,6 +6,7 @@
 #include <linux/sched.h>
 #include <linux/clocksource.h>
 #include <linux/mmiotrace.h>
+#include <linux/ftrace.h>
 
 enum trace_type {
 	__TRACE_FIRST_TYPE = 0,
@@ -19,6 +20,7 @@ enum trace_type {
 	TRACE_SPECIAL,
 	TRACE_MMIO_RW,
 	TRACE_MMIO_MAP,
+	TRACE_BOOT,
 
 	__TRACE_LAST_TYPE
 };
@@ -30,6 +32,7 @@ struct ftrace_entry {
 	unsigned long		ip;
 	unsigned long		parent_ip;
 };
+extern struct tracer boot_tracer;
 
 /*
  * Context switch trace entry - which task (and prio) we switched from/to:
@@ -108,6 +111,7 @@ struct trace_field {
 		struct print_entry		print;
 		struct mmiotrace_rw		mmiorw;
 		struct mmiotrace_map		mmiomap;
+		struct boot_trace		initcall;
 	};
 };
 
diff --git a/kernel/trace/trace_boot.c b/kernel/trace/trace_boot.c
new file mode 100644
index 000000000000..c65ef8ffd6b4
--- /dev/null
+++ b/kernel/trace/trace_boot.c
@@ -0,0 +1,101 @@
+/*
+ * ring buffer based initcalls tracer
+ *
+ * Copyright (C) 2008 Frederic Weisbecker <fweisbec@gmail.com>
+ *
+ */
+
+#include <linux/init.h>
+#include <linux/debugfs.h>
+#include <linux/ftrace.h>
+
+#include "trace.h"
+
+static struct trace_array *boot_trace;
+static int trace_boot_enabled;
+
+
+/* Should be started after do_pre_smp_initcalls() in init/main.c */
+void start_boot_trace(void)
+{
+	trace_boot_enabled = 1;
+}
+
+void stop_boot_trace(struct trace_array *tr)
+{
+	trace_boot_enabled = 0;
+}
+
+static void boot_trace_init(struct trace_array *tr)
+{
+	int cpu;
+	boot_trace = tr;
+
+	trace_boot_enabled = 0;
+
+	for_each_cpu_mask(cpu, cpu_possible_map)
+		tracing_reset(tr->data[cpu]);
+}
+
+static void boot_trace_ctrl_update(struct trace_array *tr)
+{
+	if (tr->ctrl)
+		start_boot_trace();
+	else
+		stop_boot_trace(tr);
+}
+
+static int initcall_print_line(struct trace_iterator *iter)
+{
+	int ret = 1;
+	struct trace_entry *entry = iter->ent;
+	struct boot_trace *it = &entry->field.initcall;
+	struct trace_seq *s = &iter->seq;
+
+	if (iter->ent->type == TRACE_BOOT)
+		ret = trace_seq_printf(s, "%pF called from %i "
+				       "returned %d after %lld msecs\n",
+				       it->func, it->caller, it->result,
+				       it->duration);
+	if (ret)
+		return 1;
+	return 0;
+}
+
+struct tracer boot_tracer __read_mostly =
+{
+	.name		= "initcall",
+	.init		= boot_trace_init,
+	.reset		= stop_boot_trace,
+	.ctrl_update	= boot_trace_ctrl_update,
+	.print_line	= initcall_print_line,
+};
+
+
+void trace_boot(struct boot_trace *it)
+{
+	struct trace_entry *entry;
+	struct trace_array_cpu *data;
+	unsigned long irq_flags;
+	struct trace_array *tr = boot_trace;
+
+	if (!trace_boot_enabled)
+		return;
+
+	preempt_disable();
+	data = tr->data[smp_processor_id()];
+
+	raw_local_irq_save(irq_flags);
+	__raw_spin_lock(&data->lock);
+
+	entry = tracing_get_trace_entry(tr, data);
+	tracing_generic_entry_update(entry, 0);
+	entry->type = TRACE_BOOT;
+	entry->field.initcall = *it;
+
+	__raw_spin_unlock(&data->lock);
+	raw_local_irq_restore(irq_flags);
+	trace_wake_up();
+
+	preempt_enable();
+}
-- 
cgit v1.2.3-55-g7522


From b5ad384e79add1d87fff54070000dadcf218ffab Mon Sep 17 00:00:00 2001
From: Frédéric Weisbecker
Date: Tue, 23 Sep 2008 11:34:32 +0100
Subject: tracing/ftrace: make tracing suitable to run the boot tracer

The tracing engine have now to be init in early_initcall to set the
boot tracer. Only the debugfs settings will be initialized at
fs_initcall time.

Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 kernel/trace/trace.c | 15 ++++++++++-----
 1 file changed, 10 insertions(+), 5 deletions(-)

diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index f2ef72fa3f17..6ada059832a6 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -2990,7 +2990,7 @@ struct dentry *tracing_init_dentry(void)
 #include "trace_selftest.c"
 #endif
 
-static __init void tracer_init_debugfs(void)
+static __init int tracer_init_debugfs(void)
 {
 	struct dentry *d_tracer;
 	struct dentry *entry;
@@ -3078,6 +3078,7 @@ static __init void tracer_init_debugfs(void)
 #ifdef CONFIG_SYSPROF_TRACER
 	init_tracer_sysprof_debugfs(d_tracer);
 #endif
+	return 0;
 }
 
 int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
@@ -3504,17 +3505,20 @@ __init static int tracer_alloc_buffers(void)
 		pages, trace_nr_entries, (long)TRACE_ENTRY_SIZE);
 	pr_info("   actual entries %ld\n", global_trace.entries);
 
-	tracer_init_debugfs();
-
 	trace_init_cmdlines();
 
 	register_tracer(&nop_trace);
+#ifdef CONFIG_BOOT_TRACER
+	register_tracer(&boot_tracer);
+	current_trace = &boot_tracer;
+	current_trace->init(&global_trace);
+#else
 	current_trace = &nop_trace;
+#endif
 
 	/* All seems OK, enable tracing */
 	global_trace.ctrl = tracer_enabled;
 	tracing_disabled = 0;
-
 	atomic_notifier_chain_register(&panic_notifier_list,
 				       &trace_panic_notifier);
 
@@ -3548,4 +3552,5 @@ __init static int tracer_alloc_buffers(void)
 	}
 	return ret;
 }
-fs_initcall(tracer_alloc_buffers);
+early_initcall(tracer_alloc_buffers);
+fs_initcall(tracer_init_debugfs);
-- 
cgit v1.2.3-55-g7522


From 1f5c2abbdeb2bb07b20c6a66bfecefe6c867b1ee Mon Sep 17 00:00:00 2001
From: Frédéric Weisbecker
Date: Tue, 23 Sep 2008 11:36:20 +0100
Subject: tracing/ftrace: give an entry on the config for boot tracer

Bring the entry to choose the boot tracer on the kernel config.

Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 kernel/trace/Kconfig  | 12 ++++++++++++
 kernel/trace/Makefile |  1 +
 2 files changed, 13 insertions(+)

diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index 254328dec672..81a17ef6b942 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -116,6 +116,18 @@ config CONTEXT_SWITCH_TRACER
 	  This tracer gets called from the context switch and records
 	  all switching of tasks.
 
+config BOOT_TRACER
+	bool "Trace boot initcalls"
+	depends on HAVE_FTRACE
+	depends on DEBUG_KERNEL
+	select TRACING
+	help
+	  This tracer helps developers to optimize boot times: it records
+	  the timings of the initcalls. Its aim is to be parsed by the
+	  /scripts/bootgraph.pl tool to produce pretty graphics about
+	  boot inefficiencies, giving a visual representation of the
+	  delays during initcalls.
+
 config STACK_TRACER
 	bool "Trace max stack"
 	depends on HAVE_FTRACE
diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile
index 73ba13f5a461..35a07f7cfa86 100644
--- a/kernel/trace/Makefile
+++ b/kernel/trace/Makefile
@@ -22,5 +22,6 @@ obj-$(CONFIG_SCHED_TRACER) += trace_sched_wakeup.o
 obj-$(CONFIG_NOP_TRACER) += trace_nop.o
 obj-$(CONFIG_STACK_TRACER) += trace_stack.o
 obj-$(CONFIG_MMIOTRACE) += trace_mmiotrace.o
+obj-$(CONFIG_BOOT_TRACER) += trace_boot.o
 
 libftrace-y := ftrace.o
-- 
cgit v1.2.3-55-g7522


From 3bf77af6e1fef1124bf71d81f9f84885f0ee0dea Mon Sep 17 00:00:00 2001
From: Frédéric Weisbecker
Date: Tue, 23 Sep 2008 11:38:18 +0100
Subject: tracing/ftrace: launch boot tracing after pre-smp initcalls

Launch the boot tracing inside the initcall_debug area. Old printk
have not been removed to keep the old way of initcall tracing for
backward compatibility.

[ mingo@elte.hu: resolved conflicts ]
Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 init/main.c | 16 ++++++++++------
 1 file changed, 10 insertions(+), 6 deletions(-)

diff --git a/init/main.c b/init/main.c
index 16abba05c826..1e39a1eab190 100644
--- a/init/main.c
+++ b/init/main.c
@@ -709,10 +709,12 @@ int do_one_initcall(initcall_t fn)
 	ktime_t t0, t1, delta;
 	char msgbuf[64];
 	int result;
+	struct boot_trace it;
 
 	if (initcall_debug) {
-		printk("calling  %pF", fn);
-		printk(" @ %i\n",  task_pid_nr(current));
+		it.caller = task_pid_nr(current);
+		it.func = fn;
+		printk("calling  %pF @ %i\n", fn, it.caller);
 		t0 = ktime_get();
 	}
 
@@ -721,10 +723,11 @@ int do_one_initcall(initcall_t fn)
 	if (initcall_debug) {
 		t1 = ktime_get();
 		delta = ktime_sub(t1, t0);
-
-		printk("initcall %pF returned %d after %Ld msecs\n",
-			fn, result,
-			(unsigned long long) delta.tv64 >> 20);
+		it.result = result;
+		it.duration = (unsigned long long) delta.tv64 >> 20;
+		printk("initcall %pF returned %d after %Ld msecs\n", fn,
+			result, it.duration);
+		trace_boot(&it);
 	}
 
 	msgbuf[0] = 0;
@@ -859,6 +862,7 @@ static int __init kernel_init(void * unused)
 	smp_prepare_cpus(setup_max_cpus);
 
 	do_pre_smp_initcalls();
+	start_boot_trace();
 
 	smp_init();
 	sched_init_smp();
-- 
cgit v1.2.3-55-g7522


From 3ce2b9200da8b7170cc7463b7ee4212fad7b291e Mon Sep 17 00:00:00 2001
From: Frédéric Weisbecker
Date: Wed, 24 Sep 2008 10:36:09 +0100
Subject: ftrace/fastboot: disable tracers self-tests when boot tracer is
 selected

The tracing engine resets the ring buffer and the tracers touch it
too during self-tests. These self-tests happen during tracers registering
and work against boot tracing which is logging initcalls.

We have to disable tracing self-tests if the boot-tracer is selected.

Reported-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 kernel/trace/Kconfig | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index 81a17ef6b942..4feb3c81f94d 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -126,7 +126,9 @@ config BOOT_TRACER
 	  the timings of the initcalls. Its aim is to be parsed by the
 	  /scripts/bootgraph.pl tool to produce pretty graphics about
 	  boot inefficiencies, giving a visual representation of the
-	  delays during initcalls.
+	  delays during initcalls. Note that tracers self tests can't
+	  be enabled if this tracer is selected since only one tracer
+	  should touch the tracing buffer at a time.
 
 config STACK_TRACER
 	bool "Trace max stack"
@@ -168,8 +170,7 @@ config FTRACE_SELFTEST
 
 config FTRACE_STARTUP_TEST
 	bool "Perform a startup test on ftrace"
-	depends on TRACING
-	depends on DEBUG_KERNEL
+	depends on TRACING && DEBUG_KERNEL && !BOOT_TRACER
 	select FTRACE_SELFTEST
 	help
 	  This option performs a series of startup tests on ftrace. On bootup
-- 
cgit v1.2.3-55-g7522


From 7c572ac0cf5e8cd8e17f084bc6c253296cc42279 Mon Sep 17 00:00:00 2001
From: Frédéric Weisbecker
Date: Thu, 25 Sep 2008 13:25:30 +0100
Subject: tracing/ftrace: don't consume unhandled entries by boot tracer

When the boot tracer can't handle an entry output, it returns 1.
It should return 0 to relay on other output functions.

Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 kernel/trace/trace_boot.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kernel/trace/trace_boot.c b/kernel/trace/trace_boot.c
index c65ef8ffd6b4..d5c9e2e4a9c4 100644
--- a/kernel/trace/trace_boot.c
+++ b/kernel/trace/trace_boot.c
@@ -47,7 +47,7 @@ static void boot_trace_ctrl_update(struct trace_array *tr)
 
 static int initcall_print_line(struct trace_iterator *iter)
 {
-	int ret = 1;
+	int ret = 0;
 	struct trace_entry *entry = iter->ent;
 	struct boot_trace *it = &entry->field.initcall;
 	struct trace_seq *s = &iter->seq;
-- 
cgit v1.2.3-55-g7522


From 5aa60c6073456812251caf9177cb921b2de68f77 Mon Sep 17 00:00:00 2001
From: Steven Rostedt
Date: Mon, 29 Sep 2008 23:02:37 -0400
Subject: ftrace: give time for wakeup test to run

It is possible that the testing thread in the ftrace wakeup test does not
run before we stop the trace. This will cause the trace to fail since nothing
will be in the buffers.

This patch adds a small wait in the wakeup test to allow for the woken task
to run and be traced.

Signed-off-by: Steven Rostedt <srostedt@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 kernel/trace/trace_selftest.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/kernel/trace/trace_selftest.c b/kernel/trace/trace_selftest.c
index 82db9103b9bc..5ebd4b135498 100644
--- a/kernel/trace/trace_selftest.c
+++ b/kernel/trace/trace_selftest.c
@@ -498,6 +498,9 @@ trace_selftest_startup_wakeup(struct tracer *trace, struct trace_array *tr)
 
 	wake_up_process(p);
 
+	/* give a little time to let the thread wake up */
+	msleep(100);
+
 	/* stop the tracing. */
 	tr->ctrl = 0;
 	trace->ctrl_update(tr);
-- 
cgit v1.2.3-55-g7522


From 7a8e76a3829f1067b70f715771ff88baf2fbf3c3 Mon Sep 17 00:00:00 2001
From: Steven Rostedt
Date: Mon, 29 Sep 2008 23:02:38 -0400
Subject: tracing: unified trace buffer

This is a unified tracing buffer that implements a ring buffer that
hopefully everyone will eventually be able to use.

The events recorded into the buffer have the following structure:

  struct ring_buffer_event {
	u32 type:2, len:3, time_delta:27;
	u32 array[];
  };

The minimum size of an event is 8 bytes. All events are 4 byte
aligned inside the buffer.

There are 4 types (all internal use for the ring buffer, only
the data type is exported to the interface users).

 RINGBUF_TYPE_PADDING: this type is used to note extra space at the end
	of a buffer page.

 RINGBUF_TYPE_TIME_EXTENT: This type is used when the time between events
	is greater than the 27 bit delta can hold. We add another
	32 bits, and record that in its own event (8 byte size).

 RINGBUF_TYPE_TIME_STAMP: (Not implemented yet). This will hold data to
	help keep the buffer timestamps in sync.

RINGBUF_TYPE_DATA: The event actually holds user data.

The "len" field is only three bits. Since the data must be
4 byte aligned, this field is shifted left by 2, giving a
max length of 28 bytes. If the data load is greater than 28
bytes, the first array field holds the full length of the
data load and the len field is set to zero.

Example, data size of 7 bytes:

	type = RINGBUF_TYPE_DATA
	len = 2
	time_delta: <time-stamp> - <prev_event-time-stamp>
	array[0..1]: <7 bytes of data> <1 byte empty>

This event is saved in 12 bytes of the buffer.

An event with 82 bytes of data:

	type = RINGBUF_TYPE_DATA
	len = 0
	time_delta: <time-stamp> - <prev_event-time-stamp>
	array[0]: 84 (Note the alignment)
	array[1..14]: <82 bytes of data> <2 bytes empty>

The above event is saved in 92 bytes (if my math is correct).
82 bytes of data, 2 bytes empty, 4 byte header, 4 byte length.

Do not reference the above event struct directly. Use the following
functions to gain access to the event table, since the
ring_buffer_event structure may change in the future.

ring_buffer_event_length(event): get the length of the event.
	This is the size of the memory used to record this
	event, and not the size of the data pay load.

ring_buffer_time_delta(event): get the time delta of the event
	This returns the delta time stamp since the last event.
	Note: Even though this is in the header, there should
		be no reason to access this directly, accept
		for debugging.

ring_buffer_event_data(event): get the data from the event
	This is the function to use to get the actual data
	from the event. Note, it is only a pointer to the
	data inside the buffer. This data must be copied to
	another location otherwise you risk it being written
	over in the buffer.

ring_buffer_lock: A way to lock the entire buffer.
ring_buffer_unlock: unlock the buffer.

ring_buffer_alloc: create a new ring buffer. Can choose between
	overwrite or consumer/producer mode. Overwrite will
	overwrite old data, where as consumer producer will
	throw away new data if the consumer catches up with the
	producer.  The consumer/producer is the default.

ring_buffer_free: free the ring buffer.

ring_buffer_resize: resize the buffer. Changes the size of each cpu
	buffer. Note, it is up to the caller to provide that
	the buffer is not being used while this is happening.
	This requirement may go away but do not count on it.

ring_buffer_lock_reserve: locks the ring buffer and allocates an
	entry on the buffer to write to.
ring_buffer_unlock_commit: unlocks the ring buffer and commits it to
	the buffer.

ring_buffer_write: writes some data into the ring buffer.

ring_buffer_peek: Look at a next item in the cpu buffer.
ring_buffer_consume: get the next item in the cpu buffer and
	consume it. That is, this function increments the head
	pointer.

ring_buffer_read_start: Start an iterator of a cpu buffer.
	For now, this disables the cpu buffer, until you issue
	a finish. This is just because we do not want the iterator
	to be overwritten. This restriction may change in the future.
	But note, this is used for static reading of a buffer which
	is usually done "after" a trace. Live readings would want
	to use the ring_buffer_consume above, which will not
	disable the ring buffer.

ring_buffer_read_finish: Finishes the read iterator and reenables
	the ring buffer.

ring_buffer_iter_peek: Look at the next item in the cpu iterator.
ring_buffer_read: Read the iterator and increment it.
ring_buffer_iter_reset: Reset the iterator to point to the beginning
	of the cpu buffer.
ring_buffer_iter_empty: Returns true if the iterator is at the end
	of the cpu buffer.

ring_buffer_size: returns the size in bytes of each cpu buffer.
	Note, the real size is this times the number of CPUs.

ring_buffer_reset_cpu: Sets the cpu buffer to empty
ring_buffer_reset: sets all cpu buffers to empty

ring_buffer_swap_cpu: swaps a cpu buffer from one buffer with a
	cpu buffer of another buffer. This is handy when you
	want to take a snap shot of a running trace on just one
	cpu. Having a backup buffer, to swap with facilitates this.
	Ftrace max latencies use this.

ring_buffer_empty: Returns true if the ring buffer is empty.
ring_buffer_empty_cpu: Returns true if the cpu buffer is empty.

ring_buffer_record_disable: disable all cpu buffers (read only)
ring_buffer_record_disable_cpu: disable a single cpu buffer (read only)
ring_buffer_record_enable: enable all cpu buffers.
ring_buffer_record_enabl_cpu: enable a single cpu buffer.

ring_buffer_entries: The number of entries in a ring buffer.
ring_buffer_overruns: The number of entries removed due to writing wrap.

ring_buffer_time_stamp: Get the time stamp used by the ring buffer
ring_buffer_normalize_time_stamp: normalize the ring buffer time stamp
	into nanosecs.

I still need to implement the GTOD feature. But we need support from
the cpu frequency infrastructure.  But this can be done at a later
time without affecting the ring buffer interface.

Signed-off-by: Steven Rostedt <srostedt@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/ring_buffer.h |  130 ++++
 kernel/trace/Kconfig        |    4 +
 kernel/trace/Makefile       |    1 +
 kernel/trace/ring_buffer.c  | 1672 +++++++++++++++++++++++++++++++++++++++++++
 4 files changed, 1807 insertions(+)
 create mode 100644 include/linux/ring_buffer.h
 create mode 100644 kernel/trace/ring_buffer.c

diff --git a/include/linux/ring_buffer.h b/include/linux/ring_buffer.h
new file mode 100644
index 000000000000..c52375b8330d
--- /dev/null
+++ b/include/linux/ring_buffer.h
@@ -0,0 +1,130 @@
+#ifndef _LINUX_RING_BUFFER_H
+#define _LINUX_RING_BUFFER_H
+
+#include <linux/mm.h>
+#include <linux/seq_file.h>
+
+struct ring_buffer;
+struct ring_buffer_iter;
+
+/*
+ * Don't reference this struct directly, use functions below.
+ */
+struct ring_buffer_event {
+	u32		type:2, len:3, time_delta:27;
+	u32		array[];
+};
+
+/**
+ * enum ring_buffer_type - internal ring buffer types
+ *
+ * @RINGBUF_TYPE_PADDING:	Left over page padding
+ *				 array is ignored
+ *				 size is variable depending on how much
+ *				  padding is needed
+ *
+ * @RINGBUF_TYPE_TIME_EXTEND:	Extend the time delta
+ *				 array[0] = time delta (28 .. 59)
+ *				 size = 8 bytes
+ *
+ * @RINGBUF_TYPE_TIME_STAMP:	Sync time stamp with external clock
+ *				 array[0] = tv_nsec
+ *				 array[1] = tv_sec
+ *				 size = 16 bytes
+ *
+ * @RINGBUF_TYPE_DATA:		Data record
+ *				 If len is zero:
+ *				  array[0] holds the actual length
+ *				  array[1..(length+3)/4-1] holds data
+ *				 else
+ *				  length = len << 2
+ *				  array[0..(length+3)/4] holds data
+ */
+enum ring_buffer_type {
+	RINGBUF_TYPE_PADDING,
+	RINGBUF_TYPE_TIME_EXTEND,
+	/* FIXME: RINGBUF_TYPE_TIME_STAMP not implemented */
+	RINGBUF_TYPE_TIME_STAMP,
+	RINGBUF_TYPE_DATA,
+};
+
+unsigned ring_buffer_event_length(struct ring_buffer_event *event);
+void *ring_buffer_event_data(struct ring_buffer_event *event);
+
+/**
+ * ring_buffer_event_time_delta - return the delta timestamp of the event
+ * @event: the event to get the delta timestamp of
+ *
+ * The delta timestamp is the 27 bit timestamp since the last event.
+ */
+static inline unsigned
+ring_buffer_event_time_delta(struct ring_buffer_event *event)
+{
+	return event->time_delta;
+}
+
+void ring_buffer_lock(struct ring_buffer *buffer, unsigned long *flags);
+void ring_buffer_unlock(struct ring_buffer *buffer, unsigned long flags);
+
+/*
+ * size is in bytes for each per CPU buffer.
+ */
+struct ring_buffer *
+ring_buffer_alloc(unsigned long size, unsigned flags);
+void ring_buffer_free(struct ring_buffer *buffer);
+
+int ring_buffer_resize(struct ring_buffer *buffer, unsigned long size);
+
+struct ring_buffer_event *
+ring_buffer_lock_reserve(struct ring_buffer *buffer,
+			 unsigned long length,
+			 unsigned long *flags);
+int ring_buffer_unlock_commit(struct ring_buffer *buffer,
+			      struct ring_buffer_event *event,
+			      unsigned long flags);
+int ring_buffer_write(struct ring_buffer *buffer,
+		      unsigned long length, void *data);
+
+struct ring_buffer_event *
+ring_buffer_peek(struct ring_buffer *buffer, int cpu, u64 *ts);
+struct ring_buffer_event *
+ring_buffer_consume(struct ring_buffer *buffer, int cpu, u64 *ts);
+
+struct ring_buffer_iter *
+ring_buffer_read_start(struct ring_buffer *buffer, int cpu);
+void ring_buffer_read_finish(struct ring_buffer_iter *iter);
+
+struct ring_buffer_event *
+ring_buffer_iter_peek(struct ring_buffer_iter *iter, u64 *ts);
+struct ring_buffer_event *
+ring_buffer_read(struct ring_buffer_iter *iter, u64 *ts);
+void ring_buffer_iter_reset(struct ring_buffer_iter *iter);
+int ring_buffer_iter_empty(struct ring_buffer_iter *iter);
+
+unsigned long ring_buffer_size(struct ring_buffer *buffer);
+
+void ring_buffer_reset_cpu(struct ring_buffer *buffer, int cpu);
+void ring_buffer_reset(struct ring_buffer *buffer);
+
+int ring_buffer_swap_cpu(struct ring_buffer *buffer_a,
+			 struct ring_buffer *buffer_b, int cpu);
+
+int ring_buffer_empty(struct ring_buffer *buffer);
+int ring_buffer_empty_cpu(struct ring_buffer *buffer, int cpu);
+
+void ring_buffer_record_disable(struct ring_buffer *buffer);
+void ring_buffer_record_enable(struct ring_buffer *buffer);
+void ring_buffer_record_disable_cpu(struct ring_buffer *buffer, int cpu);
+void ring_buffer_record_enable_cpu(struct ring_buffer *buffer, int cpu);
+
+unsigned long ring_buffer_entries(struct ring_buffer *buffer);
+unsigned long ring_buffer_overruns(struct ring_buffer *buffer);
+
+u64 ring_buffer_time_stamp(int cpu);
+void ring_buffer_normalize_time_stamp(int cpu, u64 *ts);
+
+enum ring_buffer_flags {
+	RB_FL_OVERWRITE		= 1 << 0,
+};
+
+#endif /* _LINUX_RING_BUFFER_H */
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index 4feb3c81f94d..396aea11398e 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -18,9 +18,13 @@ config HAVE_FTRACE_MCOUNT_RECORD
 config TRACER_MAX_TRACE
 	bool
 
+config RING_BUFFER
+	bool
+
 config TRACING
 	bool
 	select DEBUG_FS
+	select RING_BUFFER
 	select STACKTRACE
 	select TRACEPOINTS
 
diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile
index 35a07f7cfa86..a85dfba88ba0 100644
--- a/kernel/trace/Makefile
+++ b/kernel/trace/Makefile
@@ -11,6 +11,7 @@ obj-y += trace_selftest_dynamic.o
 endif
 
 obj-$(CONFIG_FTRACE) += libftrace.o
+obj-$(CONFIG_RING_BUFFER) += ring_buffer.o
 
 obj-$(CONFIG_TRACING) += trace.o
 obj-$(CONFIG_CONTEXT_SWITCH_TRACER) += trace_sched_switch.o
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
new file mode 100644
index 000000000000..830a2930dd91
--- /dev/null
+++ b/kernel/trace/ring_buffer.c
@@ -0,0 +1,1672 @@
+/*
+ * Generic ring buffer
+ *
+ * Copyright (C) 2008 Steven Rostedt <srostedt@redhat.com>
+ */
+#include <linux/ring_buffer.h>
+#include <linux/spinlock.h>
+#include <linux/debugfs.h>
+#include <linux/uaccess.h>
+#include <linux/module.h>
+#include <linux/percpu.h>
+#include <linux/mutex.h>
+#include <linux/sched.h>	/* used for sched_clock() (for now) */
+#include <linux/init.h>
+#include <linux/hash.h>
+#include <linux/list.h>
+#include <linux/fs.h>
+
+/* Up this if you want to test the TIME_EXTENTS and normalization */
+#define DEBUG_SHIFT 0
+
+/* FIXME!!! */
+u64 ring_buffer_time_stamp(int cpu)
+{
+	/* shift to debug/test normalization and TIME_EXTENTS */
+	return sched_clock() << DEBUG_SHIFT;
+}
+
+void ring_buffer_normalize_time_stamp(int cpu, u64 *ts)
+{
+	/* Just stupid testing the normalize function and deltas */
+	*ts >>= DEBUG_SHIFT;
+}
+
+#define RB_EVNT_HDR_SIZE (sizeof(struct ring_buffer_event))
+#define RB_ALIGNMENT_SHIFT	2
+#define RB_ALIGNMENT		(1 << RB_ALIGNMENT_SHIFT)
+#define RB_MAX_SMALL_DATA	28
+
+enum {
+	RB_LEN_TIME_EXTEND = 8,
+	RB_LEN_TIME_STAMP = 16,
+};
+
+/* inline for ring buffer fast paths */
+static inline unsigned
+rb_event_length(struct ring_buffer_event *event)
+{
+	unsigned length;
+
+	switch (event->type) {
+	case RINGBUF_TYPE_PADDING:
+		/* undefined */
+		return -1;
+
+	case RINGBUF_TYPE_TIME_EXTEND:
+		return RB_LEN_TIME_EXTEND;
+
+	case RINGBUF_TYPE_TIME_STAMP:
+		return RB_LEN_TIME_STAMP;
+
+	case RINGBUF_TYPE_DATA:
+		if (event->len)
+			length = event->len << RB_ALIGNMENT_SHIFT;
+		else
+			length = event->array[0];
+		return length + RB_EVNT_HDR_SIZE;
+	default:
+		BUG();
+	}
+	/* not hit */
+	return 0;
+}
+
+/**
+ * ring_buffer_event_length - return the length of the event
+ * @event: the event to get the length of
+ */
+unsigned ring_buffer_event_length(struct ring_buffer_event *event)
+{
+	return rb_event_length(event);
+}
+
+/* inline for ring buffer fast paths */
+static inline void *
+rb_event_data(struct ring_buffer_event *event)
+{
+	BUG_ON(event->type != RINGBUF_TYPE_DATA);
+	/* If length is in len field, then array[0] has the data */
+	if (event->len)
+		return (void *)&event->array[0];
+	/* Otherwise length is in array[0] and array[1] has the data */
+	return (void *)&event->array[1];
+}
+
+/**
+ * ring_buffer_event_data - return the data of the event
+ * @event: the event to get the data from
+ */
+void *ring_buffer_event_data(struct ring_buffer_event *event)
+{
+	return rb_event_data(event);
+}
+
+#define for_each_buffer_cpu(buffer, cpu)		\
+	for_each_cpu_mask(cpu, buffer->cpumask)
+
+#define TS_SHIFT	27
+#define TS_MASK		((1ULL << TS_SHIFT) - 1)
+#define TS_DELTA_TEST	(~TS_MASK)
+
+/*
+ * This hack stolen from mm/slob.c.
+ * We can store per page timing information in the page frame of the page.
+ * Thanks to Peter Zijlstra for suggesting this idea.
+ */
+struct buffer_page {
+	union {
+		struct {
+			unsigned long	 flags;		/* mandatory */
+			atomic_t	 _count;	/* mandatory */
+			u64		 time_stamp;	/* page time stamp */
+			unsigned	 size;		/* size of page data */
+			struct list_head list;		/* list of free pages */
+		};
+		struct page page;
+	};
+};
+
+/*
+ * We need to fit the time_stamp delta into 27 bits.
+ */
+static inline int test_time_stamp(u64 delta)
+{
+	if (delta & TS_DELTA_TEST)
+		return 1;
+	return 0;
+}
+
+#define BUF_PAGE_SIZE PAGE_SIZE
+
+/*
+ * head_page == tail_page && head == tail then buffer is empty.
+ */
+struct ring_buffer_per_cpu {
+	int				cpu;
+	struct ring_buffer		*buffer;
+	spinlock_t			lock;
+	struct lock_class_key		lock_key;
+	struct list_head		pages;
+	unsigned long			head;	/* read from head */
+	unsigned long			tail;	/* write to tail */
+	struct buffer_page		*head_page;
+	struct buffer_page		*tail_page;
+	unsigned long			overrun;
+	unsigned long			entries;
+	u64				write_stamp;
+	u64				read_stamp;
+	atomic_t			record_disabled;
+};
+
+struct ring_buffer {
+	unsigned long			size;
+	unsigned			pages;
+	unsigned			flags;
+	int				cpus;
+	cpumask_t			cpumask;
+	atomic_t			record_disabled;
+
+	struct mutex			mutex;
+
+	struct ring_buffer_per_cpu	**buffers;
+};
+
+struct ring_buffer_iter {
+	struct ring_buffer_per_cpu	*cpu_buffer;
+	unsigned long			head;
+	struct buffer_page		*head_page;
+	u64				read_stamp;
+};
+
+#define RB_WARN_ON(buffer, cond)			\
+	if (unlikely(cond)) {				\
+		atomic_inc(&buffer->record_disabled);	\
+		WARN_ON(1);				\
+		return -1;				\
+	}
+
+/**
+ * check_pages - integrity check of buffer pages
+ * @cpu_buffer: CPU buffer with pages to test
+ *
+ * As a safty measure we check to make sure the data pages have not
+ * been corrupted.
+ */
+static int rb_check_pages(struct ring_buffer_per_cpu *cpu_buffer)
+{
+	struct list_head *head = &cpu_buffer->pages;
+	struct buffer_page *page, *tmp;
+
+	RB_WARN_ON(cpu_buffer, head->next->prev != head);
+	RB_WARN_ON(cpu_buffer, head->prev->next != head);
+
+	list_for_each_entry_safe(page, tmp, head, list) {
+		RB_WARN_ON(cpu_buffer, page->list.next->prev != &page->list);
+		RB_WARN_ON(cpu_buffer, page->list.prev->next != &page->list);
+	}
+
+	return 0;
+}
+
+static unsigned rb_head_size(struct ring_buffer_per_cpu *cpu_buffer)
+{
+	return cpu_buffer->head_page->size;
+}
+
+static int rb_allocate_pages(struct ring_buffer_per_cpu *cpu_buffer,
+			     unsigned nr_pages)
+{
+	struct list_head *head = &cpu_buffer->pages;
+	struct buffer_page *page, *tmp;
+	unsigned long addr;
+	LIST_HEAD(pages);
+	unsigned i;
+
+	for (i = 0; i < nr_pages; i++) {
+		addr = __get_free_page(GFP_KERNEL);
+		if (!addr)
+			goto free_pages;
+		page = (struct buffer_page *)virt_to_page(addr);
+		list_add(&page->list, &pages);
+	}
+
+	list_splice(&pages, head);
+
+	rb_check_pages(cpu_buffer);
+
+	return 0;
+
+ free_pages:
+	list_for_each_entry_safe(page, tmp, &pages, list) {
+		list_del_init(&page->list);
+		__free_page(&page->page);
+	}
+	return -ENOMEM;
+}
+
+static struct ring_buffer_per_cpu *
+rb_allocate_cpu_buffer(struct ring_buffer *buffer, int cpu)
+{
+	struct ring_buffer_per_cpu *cpu_buffer;
+	int ret;
+
+	cpu_buffer = kzalloc_node(ALIGN(sizeof(*cpu_buffer), cache_line_size()),
+				  GFP_KERNEL, cpu_to_node(cpu));
+	if (!cpu_buffer)
+		return NULL;
+
+	cpu_buffer->cpu = cpu;
+	cpu_buffer->buffer = buffer;
+	spin_lock_init(&cpu_buffer->lock);
+	INIT_LIST_HEAD(&cpu_buffer->pages);
+
+	ret = rb_allocate_pages(cpu_buffer, buffer->pages);
+	if (ret < 0)
+		goto fail_free_buffer;
+
+	cpu_buffer->head_page
+		= list_entry(cpu_buffer->pages.next, struct buffer_page, list);
+	cpu_buffer->tail_page
+		= list_entry(cpu_buffer->pages.next, struct buffer_page, list);
+
+	return cpu_buffer;
+
+ fail_free_buffer:
+	kfree(cpu_buffer);
+	return NULL;
+}
+
+static void rb_free_cpu_buffer(struct ring_buffer_per_cpu *cpu_buffer)
+{
+	struct list_head *head = &cpu_buffer->pages;
+	struct buffer_page *page, *tmp;
+
+	list_for_each_entry_safe(page, tmp, head, list) {
+		list_del_init(&page->list);
+		__free_page(&page->page);
+	}
+	kfree(cpu_buffer);
+}
+
+/**
+ * ring_buffer_alloc - allocate a new ring_buffer
+ * @size: the size in bytes that is needed.
+ * @flags: attributes to set for the ring buffer.
+ *
+ * Currently the only flag that is available is the RB_FL_OVERWRITE
+ * flag. This flag means that the buffer will overwrite old data
+ * when the buffer wraps. If this flag is not set, the buffer will
+ * drop data when the tail hits the head.
+ */
+struct ring_buffer *ring_buffer_alloc(unsigned long size, unsigned flags)
+{
+	struct ring_buffer *buffer;
+	int bsize;
+	int cpu;
+
+	/* keep it in its own cache line */
+	buffer = kzalloc(ALIGN(sizeof(*buffer), cache_line_size()),
+			 GFP_KERNEL);
+	if (!buffer)
+		return NULL;
+
+	buffer->pages = DIV_ROUND_UP(size, BUF_PAGE_SIZE);
+	buffer->flags = flags;
+
+	/* need at least two pages */
+	if (buffer->pages == 1)
+		buffer->pages++;
+
+	buffer->cpumask = cpu_possible_map;
+	buffer->cpus = nr_cpu_ids;
+
+	bsize = sizeof(void *) * nr_cpu_ids;
+	buffer->buffers = kzalloc(ALIGN(bsize, cache_line_size()),
+				  GFP_KERNEL);
+	if (!buffer->buffers)
+		goto fail_free_buffer;
+
+	for_each_buffer_cpu(buffer, cpu) {
+		buffer->buffers[cpu] =
+			rb_allocate_cpu_buffer(buffer, cpu);
+		if (!buffer->buffers[cpu])
+			goto fail_free_buffers;
+	}
+
+	mutex_init(&buffer->mutex);
+
+	return buffer;
+
+ fail_free_buffers:
+	for_each_buffer_cpu(buffer, cpu) {
+		if (buffer->buffers[cpu])
+			rb_free_cpu_buffer(buffer->buffers[cpu]);
+	}
+	kfree(buffer->buffers);
+
+ fail_free_buffer:
+	kfree(buffer);
+	return NULL;
+}
+
+/**
+ * ring_buffer_free - free a ring buffer.
+ * @buffer: the buffer to free.
+ */
+void
+ring_buffer_free(struct ring_buffer *buffer)
+{
+	int cpu;
+
+	for_each_buffer_cpu(buffer, cpu)
+		rb_free_cpu_buffer(buffer->buffers[cpu]);
+
+	kfree(buffer);
+}
+
+static void rb_reset_cpu(struct ring_buffer_per_cpu *cpu_buffer);
+
+static void
+rb_remove_pages(struct ring_buffer_per_cpu *cpu_buffer, unsigned nr_pages)
+{
+	struct buffer_page *page;
+	struct list_head *p;
+	unsigned i;
+
+	atomic_inc(&cpu_buffer->record_disabled);
+	synchronize_sched();
+
+	for (i = 0; i < nr_pages; i++) {
+		BUG_ON(list_empty(&cpu_buffer->pages));
+		p = cpu_buffer->pages.next;
+		page = list_entry(p, struct buffer_page, list);
+		list_del_init(&page->list);
+		__free_page(&page->page);
+	}
+	BUG_ON(list_empty(&cpu_buffer->pages));
+
+	rb_reset_cpu(cpu_buffer);
+
+	rb_check_pages(cpu_buffer);
+
+	atomic_dec(&cpu_buffer->record_disabled);
+
+}
+
+static void
+rb_insert_pages(struct ring_buffer_per_cpu *cpu_buffer,
+		struct list_head *pages, unsigned nr_pages)
+{
+	struct buffer_page *page;
+	struct list_head *p;
+	unsigned i;
+
+	atomic_inc(&cpu_buffer->record_disabled);
+	synchronize_sched();
+
+	for (i = 0; i < nr_pages; i++) {
+		BUG_ON(list_empty(pages));
+		p = pages->next;
+		page = list_entry(p, struct buffer_page, list);
+		list_del_init(&page->list);
+		list_add_tail(&page->list, &cpu_buffer->pages);
+	}
+	rb_reset_cpu(cpu_buffer);
+
+	rb_check_pages(cpu_buffer);
+
+	atomic_dec(&cpu_buffer->record_disabled);
+}
+
+/**
+ * ring_buffer_resize - resize the ring buffer
+ * @buffer: the buffer to resize.
+ * @size: the new size.
+ *
+ * The tracer is responsible for making sure that the buffer is
+ * not being used while changing the size.
+ * Note: We may be able to change the above requirement by using
+ *  RCU synchronizations.
+ *
+ * Minimum size is 2 * BUF_PAGE_SIZE.
+ *
+ * Returns -1 on failure.
+ */
+int ring_buffer_resize(struct ring_buffer *buffer, unsigned long size)
+{
+	struct ring_buffer_per_cpu *cpu_buffer;
+	unsigned nr_pages, rm_pages, new_pages;
+	struct buffer_page *page, *tmp;
+	unsigned long buffer_size;
+	unsigned long addr;
+	LIST_HEAD(pages);
+	int i, cpu;
+
+	size = DIV_ROUND_UP(size, BUF_PAGE_SIZE);
+	size *= BUF_PAGE_SIZE;
+	buffer_size = buffer->pages * BUF_PAGE_SIZE;
+
+	/* we need a minimum of two pages */
+	if (size < BUF_PAGE_SIZE * 2)
+		size = BUF_PAGE_SIZE * 2;
+
+	if (size == buffer_size)
+		return size;
+
+	mutex_lock(&buffer->mutex);
+
+	nr_pages = DIV_ROUND_UP(size, BUF_PAGE_SIZE);
+
+	if (size < buffer_size) {
+
+		/* easy case, just free pages */
+		BUG_ON(nr_pages >= buffer->pages);
+
+		rm_pages = buffer->pages - nr_pages;
+
+		for_each_buffer_cpu(buffer, cpu) {
+			cpu_buffer = buffer->buffers[cpu];
+			rb_remove_pages(cpu_buffer, rm_pages);
+		}
+		goto out;
+	}
+
+	/*
+	 * This is a bit more difficult. We only want to add pages
+	 * when we can allocate enough for all CPUs. We do this
+	 * by allocating all the pages and storing them on a local
+	 * link list. If we succeed in our allocation, then we
+	 * add these pages to the cpu_buffers. Otherwise we just free
+	 * them all and return -ENOMEM;
+	 */
+	BUG_ON(nr_pages <= buffer->pages);
+	new_pages = nr_pages - buffer->pages;
+
+	for_each_buffer_cpu(buffer, cpu) {
+		for (i = 0; i < new_pages; i++) {
+			addr = __get_free_page(GFP_KERNEL);
+			if (!addr)
+				goto free_pages;
+			page = (struct buffer_page *)virt_to_page(addr);
+			list_add(&page->list, &pages);
+		}
+	}
+
+	for_each_buffer_cpu(buffer, cpu) {
+		cpu_buffer = buffer->buffers[cpu];
+		rb_insert_pages(cpu_buffer, &pages, new_pages);
+	}
+
+	BUG_ON(!list_empty(&pages));
+
+ out:
+	buffer->pages = nr_pages;
+	mutex_unlock(&buffer->mutex);
+
+	return size;
+
+ free_pages:
+	list_for_each_entry_safe(page, tmp, &pages, list) {
+		list_del_init(&page->list);
+		__free_page(&page->page);
+	}
+	return -ENOMEM;
+}
+
+static inline int rb_per_cpu_empty(struct ring_buffer_per_cpu *cpu_buffer)
+{
+	return cpu_buffer->head_page == cpu_buffer->tail_page &&
+		cpu_buffer->head == cpu_buffer->tail;
+}
+
+static inline int rb_null_event(struct ring_buffer_event *event)
+{
+	return event->type == RINGBUF_TYPE_PADDING;
+}
+
+static inline void *rb_page_index(struct buffer_page *page, unsigned index)
+{
+	void *addr = page_address(&page->page);
+
+	return addr + index;
+}
+
+static inline struct ring_buffer_event *
+rb_head_event(struct ring_buffer_per_cpu *cpu_buffer)
+{
+	return rb_page_index(cpu_buffer->head_page,
+			     cpu_buffer->head);
+}
+
+static inline struct ring_buffer_event *
+rb_iter_head_event(struct ring_buffer_iter *iter)
+{
+	return rb_page_index(iter->head_page,
+			     iter->head);
+}
+
+/*
+ * When the tail hits the head and the buffer is in overwrite mode,
+ * the head jumps to the next page and all content on the previous
+ * page is discarded. But before doing so, we update the overrun
+ * variable of the buffer.
+ */
+static void rb_update_overflow(struct ring_buffer_per_cpu *cpu_buffer)
+{
+	struct ring_buffer_event *event;
+	unsigned long head;
+
+	for (head = 0; head < rb_head_size(cpu_buffer);
+	     head += rb_event_length(event)) {
+
+		event = rb_page_index(cpu_buffer->head_page, head);
+		BUG_ON(rb_null_event(event));
+		/* Only count data entries */
+		if (event->type != RINGBUF_TYPE_DATA)
+			continue;
+		cpu_buffer->overrun++;
+		cpu_buffer->entries--;
+	}
+}
+
+static inline void rb_inc_page(struct ring_buffer_per_cpu *cpu_buffer,
+			       struct buffer_page **page)
+{
+	struct list_head *p = (*page)->list.next;
+
+	if (p == &cpu_buffer->pages)
+		p = p->next;
+
+	*page = list_entry(p, struct buffer_page, list);
+}
+
+static inline void
+rb_add_stamp(struct ring_buffer_per_cpu *cpu_buffer, u64 *ts)
+{
+	cpu_buffer->tail_page->time_stamp = *ts;
+	cpu_buffer->write_stamp = *ts;
+}
+
+static void rb_reset_read_page(struct ring_buffer_per_cpu *cpu_buffer)
+{
+	cpu_buffer->read_stamp = cpu_buffer->head_page->time_stamp;
+	cpu_buffer->head = 0;
+}
+
+static void
+rb_reset_iter_read_page(struct ring_buffer_iter *iter)
+{
+	iter->read_stamp = iter->head_page->time_stamp;
+	iter->head = 0;
+}
+
+/**
+ * ring_buffer_update_event - update event type and data
+ * @event: the even to update
+ * @type: the type of event
+ * @length: the size of the event field in the ring buffer
+ *
+ * Update the type and data fields of the event. The length
+ * is the actual size that is written to the ring buffer,
+ * and with this, we can determine what to place into the
+ * data field.
+ */
+static inline void
+rb_update_event(struct ring_buffer_event *event,
+			 unsigned type, unsigned length)
+{
+	event->type = type;
+
+	switch (type) {
+
+	case RINGBUF_TYPE_PADDING:
+		break;
+
+	case RINGBUF_TYPE_TIME_EXTEND:
+		event->len =
+			(RB_LEN_TIME_EXTEND + (RB_ALIGNMENT-1))
+			>> RB_ALIGNMENT_SHIFT;
+		break;
+
+	case RINGBUF_TYPE_TIME_STAMP:
+		event->len =
+			(RB_LEN_TIME_STAMP + (RB_ALIGNMENT-1))
+			>> RB_ALIGNMENT_SHIFT;
+		break;
+
+	case RINGBUF_TYPE_DATA:
+		length -= RB_EVNT_HDR_SIZE;
+		if (length > RB_MAX_SMALL_DATA) {
+			event->len = 0;
+			event->array[0] = length;
+		} else
+			event->len =
+				(length + (RB_ALIGNMENT-1))
+				>> RB_ALIGNMENT_SHIFT;
+		break;
+	default:
+		BUG();
+	}
+}
+
+static inline unsigned rb_calculate_event_length(unsigned length)
+{
+	struct ring_buffer_event event; /* Used only for sizeof array */
+
+	/* zero length can cause confusions */
+	if (!length)
+		length = 1;
+
+	if (length > RB_MAX_SMALL_DATA)
+		length += sizeof(event.array[0]);
+
+	length += RB_EVNT_HDR_SIZE;
+	length = ALIGN(length, RB_ALIGNMENT);
+
+	return length;
+}
+
+static struct ring_buffer_event *
+__rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer,
+		  unsigned type, unsigned long length, u64 *ts)
+{
+	struct buffer_page *head_page, *tail_page;
+	unsigned long tail;
+	struct ring_buffer *buffer = cpu_buffer->buffer;
+	struct ring_buffer_event *event;
+
+	tail_page = cpu_buffer->tail_page;
+	head_page = cpu_buffer->head_page;
+	tail = cpu_buffer->tail;
+
+	if (tail + length > BUF_PAGE_SIZE) {
+		struct buffer_page *next_page = tail_page;
+
+		rb_inc_page(cpu_buffer, &next_page);
+
+		if (next_page == head_page) {
+			if (!(buffer->flags & RB_FL_OVERWRITE))
+				return NULL;
+
+			/* count overflows */
+			rb_update_overflow(cpu_buffer);
+
+			rb_inc_page(cpu_buffer, &head_page);
+			cpu_buffer->head_page = head_page;
+			rb_reset_read_page(cpu_buffer);
+		}
+
+		if (tail != BUF_PAGE_SIZE) {
+			event = rb_page_index(tail_page, tail);
+			/* page padding */
+			event->type = RINGBUF_TYPE_PADDING;
+		}
+
+		tail_page->size = tail;
+		tail_page = next_page;
+		tail_page->size = 0;
+		tail = 0;
+		cpu_buffer->tail_page = tail_page;
+		cpu_buffer->tail = tail;
+		rb_add_stamp(cpu_buffer, ts);
+	}
+
+	BUG_ON(tail + length > BUF_PAGE_SIZE);
+
+	event = rb_page_index(tail_page, tail);
+	rb_update_event(event, type, length);
+
+	return event;
+}
+
+static int
+rb_add_time_stamp(struct ring_buffer_per_cpu *cpu_buffer,
+		  u64 *ts, u64 *delta)
+{
+	struct ring_buffer_event *event;
+	static int once;
+
+	if (unlikely(*delta > (1ULL << 59) && !once++)) {
+		printk(KERN_WARNING "Delta way too big! %llu"
+		       " ts=%llu write stamp = %llu\n",
+		       *delta, *ts, cpu_buffer->write_stamp);
+		WARN_ON(1);
+	}
+
+	/*
+	 * The delta is too big, we to add a
+	 * new timestamp.
+	 */
+	event = __rb_reserve_next(cpu_buffer,
+				  RINGBUF_TYPE_TIME_EXTEND,
+				  RB_LEN_TIME_EXTEND,
+				  ts);
+	if (!event)
+		return -1;
+
+	/* check to see if we went to the next page */
+	if (cpu_buffer->tail) {
+		/* Still on same page, update timestamp */
+		event->time_delta = *delta & TS_MASK;
+		event->array[0] = *delta >> TS_SHIFT;
+		/* commit the time event */
+		cpu_buffer->tail +=
+			rb_event_length(event);
+		cpu_buffer->write_stamp = *ts;
+		*delta = 0;
+	}
+
+	return 0;
+}
+
+static struct ring_buffer_event *
+rb_reserve_next_event(struct ring_buffer_per_cpu *cpu_buffer,
+		      unsigned type, unsigned long length)
+{
+	struct ring_buffer_event *event;
+	u64 ts, delta;
+
+	ts = ring_buffer_time_stamp(cpu_buffer->cpu);
+
+	if (cpu_buffer->tail) {
+		delta = ts - cpu_buffer->write_stamp;
+
+		if (test_time_stamp(delta)) {
+			int ret;
+
+			ret = rb_add_time_stamp(cpu_buffer, &ts, &delta);
+			if (ret < 0)
+				return NULL;
+		}
+	} else {
+		rb_add_stamp(cpu_buffer, &ts);
+		delta = 0;
+	}
+
+	event = __rb_reserve_next(cpu_buffer, type, length, &ts);
+	if (!event)
+		return NULL;
+
+	/* If the reserve went to the next page, our delta is zero */
+	if (!cpu_buffer->tail)
+		delta = 0;
+
+	event->time_delta = delta;
+
+	return event;
+}
+
+/**
+ * ring_buffer_lock_reserve - reserve a part of the buffer
+ * @buffer: the ring buffer to reserve from
+ * @length: the length of the data to reserve (excluding event header)
+ * @flags: a pointer to save the interrupt flags
+ *
+ * Returns a reseverd event on the ring buffer to copy directly to.
+ * The user of this interface will need to get the body to write into
+ * and can use the ring_buffer_event_data() interface.
+ *
+ * The length is the length of the data needed, not the event length
+ * which also includes the event header.
+ *
+ * Must be paired with ring_buffer_unlock_commit, unless NULL is returned.
+ * If NULL is returned, then nothing has been allocated or locked.
+ */
+struct ring_buffer_event *
+ring_buffer_lock_reserve(struct ring_buffer *buffer,
+			 unsigned long length,
+			 unsigned long *flags)
+{
+	struct ring_buffer_per_cpu *cpu_buffer;
+	struct ring_buffer_event *event;
+	int cpu;
+
+	if (atomic_read(&buffer->record_disabled))
+		return NULL;
+
+	raw_local_irq_save(*flags);
+	cpu = raw_smp_processor_id();
+
+	if (!cpu_isset(cpu, buffer->cpumask))
+		goto out_irq;
+
+	cpu_buffer = buffer->buffers[cpu];
+	spin_lock(&cpu_buffer->lock);
+
+	if (atomic_read(&cpu_buffer->record_disabled))
+		goto no_record;
+
+	length = rb_calculate_event_length(length);
+	if (length > BUF_PAGE_SIZE)
+		return NULL;
+
+	event = rb_reserve_next_event(cpu_buffer, RINGBUF_TYPE_DATA, length);
+	if (!event)
+		goto no_record;
+
+	return event;
+
+ no_record:
+	spin_unlock(&cpu_buffer->lock);
+ out_irq:
+	local_irq_restore(*flags);
+	return NULL;
+}
+
+static void rb_commit(struct ring_buffer_per_cpu *cpu_buffer,
+		      struct ring_buffer_event *event)
+{
+	cpu_buffer->tail += rb_event_length(event);
+	cpu_buffer->tail_page->size = cpu_buffer->tail;
+	cpu_buffer->write_stamp += event->time_delta;
+	cpu_buffer->entries++;
+}
+
+/**
+ * ring_buffer_unlock_commit - commit a reserved
+ * @buffer: The buffer to commit to
+ * @event: The event pointer to commit.
+ * @flags: the interrupt flags received from ring_buffer_lock_reserve.
+ *
+ * This commits the data to the ring buffer, and releases any locks held.
+ *
+ * Must be paired with ring_buffer_lock_reserve.
+ */
+int ring_buffer_unlock_commit(struct ring_buffer *buffer,
+			      struct ring_buffer_event *event,
+			      unsigned long flags)
+{
+	struct ring_buffer_per_cpu *cpu_buffer;
+	int cpu = raw_smp_processor_id();
+
+	cpu_buffer = buffer->buffers[cpu];
+
+	assert_spin_locked(&cpu_buffer->lock);
+
+	rb_commit(cpu_buffer, event);
+
+	spin_unlock(&cpu_buffer->lock);
+	raw_local_irq_restore(flags);
+
+	return 0;
+}
+
+/**
+ * ring_buffer_write - write data to the buffer without reserving
+ * @buffer: The ring buffer to write to.
+ * @length: The length of the data being written (excluding the event header)
+ * @data: The data to write to the buffer.
+ *
+ * This is like ring_buffer_lock_reserve and ring_buffer_unlock_commit as
+ * one function. If you already have the data to write to the buffer, it
+ * may be easier to simply call this function.
+ *
+ * Note, like ring_buffer_lock_reserve, the length is the length of the data
+ * and not the length of the event which would hold the header.
+ */
+int ring_buffer_write(struct ring_buffer *buffer,
+			unsigned long length,
+			void *data)
+{
+	struct ring_buffer_per_cpu *cpu_buffer;
+	struct ring_buffer_event *event;
+	unsigned long event_length, flags;
+	void *body;
+	int ret = -EBUSY;
+	int cpu;
+
+	if (atomic_read(&buffer->record_disabled))
+		return -EBUSY;
+
+	local_irq_save(flags);
+	cpu = raw_smp_processor_id();
+
+	if (!cpu_isset(cpu, buffer->cpumask))
+		goto out_irq;
+
+	cpu_buffer = buffer->buffers[cpu];
+	spin_lock(&cpu_buffer->lock);
+
+	if (atomic_read(&cpu_buffer->record_disabled))
+		goto out;
+
+	event_length = rb_calculate_event_length(length);
+	event = rb_reserve_next_event(cpu_buffer,
+				      RINGBUF_TYPE_DATA, event_length);
+	if (!event)
+		goto out;
+
+	body = rb_event_data(event);
+
+	memcpy(body, data, length);
+
+	rb_commit(cpu_buffer, event);
+
+	ret = 0;
+ out:
+	spin_unlock(&cpu_buffer->lock);
+ out_irq:
+	local_irq_restore(flags);
+
+	return ret;
+}
+
+/**
+ * ring_buffer_lock - lock the ring buffer
+ * @buffer: The ring buffer to lock
+ * @flags: The place to store the interrupt flags
+ *
+ * This locks all the per CPU buffers.
+ *
+ * Must be unlocked by ring_buffer_unlock.
+ */
+void ring_buffer_lock(struct ring_buffer *buffer, unsigned long *flags)
+{
+	struct ring_buffer_per_cpu *cpu_buffer;
+	int cpu;
+
+	local_irq_save(*flags);
+
+	for_each_buffer_cpu(buffer, cpu) {
+		cpu_buffer = buffer->buffers[cpu];
+		spin_lock(&cpu_buffer->lock);
+	}
+}
+
+/**
+ * ring_buffer_unlock - unlock a locked buffer
+ * @buffer: The locked buffer to unlock
+ * @flags: The interrupt flags received by ring_buffer_lock
+ */
+void ring_buffer_unlock(struct ring_buffer *buffer, unsigned long flags)
+{
+	struct ring_buffer_per_cpu *cpu_buffer;
+	int cpu;
+
+	for (cpu = buffer->cpus - 1; cpu >= 0; cpu--) {
+		if (!cpu_isset(cpu, buffer->cpumask))
+			continue;
+		cpu_buffer = buffer->buffers[cpu];
+		spin_unlock(&cpu_buffer->lock);
+	}
+
+	local_irq_restore(flags);
+}
+
+/**
+ * ring_buffer_record_disable - stop all writes into the buffer
+ * @buffer: The ring buffer to stop writes to.
+ *
+ * This prevents all writes to the buffer. Any attempt to write
+ * to the buffer after this will fail and return NULL.
+ *
+ * The caller should call synchronize_sched() after this.
+ */
+void ring_buffer_record_disable(struct ring_buffer *buffer)
+{
+	atomic_inc(&buffer->record_disabled);
+}
+
+/**
+ * ring_buffer_record_enable - enable writes to the buffer
+ * @buffer: The ring buffer to enable writes
+ *
+ * Note, multiple disables will need the same number of enables
+ * to truely enable the writing (much like preempt_disable).
+ */
+void ring_buffer_record_enable(struct ring_buffer *buffer)
+{
+	atomic_dec(&buffer->record_disabled);
+}
+
+/**
+ * ring_buffer_record_disable_cpu - stop all writes into the cpu_buffer
+ * @buffer: The ring buffer to stop writes to.
+ * @cpu: The CPU buffer to stop
+ *
+ * This prevents all writes to the buffer. Any attempt to write
+ * to the buffer after this will fail and return NULL.
+ *
+ * The caller should call synchronize_sched() after this.
+ */
+void ring_buffer_record_disable_cpu(struct ring_buffer *buffer, int cpu)
+{
+	struct ring_buffer_per_cpu *cpu_buffer;
+
+	if (!cpu_isset(cpu, buffer->cpumask))
+		return;
+
+	cpu_buffer = buffer->buffers[cpu];
+	atomic_inc(&cpu_buffer->record_disabled);
+}
+
+/**
+ * ring_buffer_record_enable_cpu - enable writes to the buffer
+ * @buffer: The ring buffer to enable writes
+ * @cpu: The CPU to enable.
+ *
+ * Note, multiple disables will need the same number of enables
+ * to truely enable the writing (much like preempt_disable).
+ */
+void ring_buffer_record_enable_cpu(struct ring_buffer *buffer, int cpu)
+{
+	struct ring_buffer_per_cpu *cpu_buffer;
+
+	if (!cpu_isset(cpu, buffer->cpumask))
+		return;
+
+	cpu_buffer = buffer->buffers[cpu];
+	atomic_dec(&cpu_buffer->record_disabled);
+}
+
+/**
+ * ring_buffer_entries_cpu - get the number of entries in a cpu buffer
+ * @buffer: The ring buffer
+ * @cpu: The per CPU buffer to get the entries from.
+ */
+unsigned long ring_buffer_entries_cpu(struct ring_buffer *buffer, int cpu)
+{
+	struct ring_buffer_per_cpu *cpu_buffer;
+
+	if (!cpu_isset(cpu, buffer->cpumask))
+		return 0;
+
+	cpu_buffer = buffer->buffers[cpu];
+	return cpu_buffer->entries;
+}
+
+/**
+ * ring_buffer_overrun_cpu - get the number of overruns in a cpu_buffer
+ * @buffer: The ring buffer
+ * @cpu: The per CPU buffer to get the number of overruns from
+ */
+unsigned long ring_buffer_overrun_cpu(struct ring_buffer *buffer, int cpu)
+{
+	struct ring_buffer_per_cpu *cpu_buffer;
+
+	if (!cpu_isset(cpu, buffer->cpumask))
+		return 0;
+
+	cpu_buffer = buffer->buffers[cpu];
+	return cpu_buffer->overrun;
+}
+
+/**
+ * ring_buffer_entries - get the number of entries in a buffer
+ * @buffer: The ring buffer
+ *
+ * Returns the total number of entries in the ring buffer
+ * (all CPU entries)
+ */
+unsigned long ring_buffer_entries(struct ring_buffer *buffer)
+{
+	struct ring_buffer_per_cpu *cpu_buffer;
+	unsigned long entries = 0;
+	int cpu;
+
+	/* if you care about this being correct, lock the buffer */
+	for_each_buffer_cpu(buffer, cpu) {
+		cpu_buffer = buffer->buffers[cpu];
+		entries += cpu_buffer->entries;
+	}
+
+	return entries;
+}
+
+/**
+ * ring_buffer_overrun_cpu - get the number of overruns in buffer
+ * @buffer: The ring buffer
+ *
+ * Returns the total number of overruns in the ring buffer
+ * (all CPU entries)
+ */
+unsigned long ring_buffer_overruns(struct ring_buffer *buffer)
+{
+	struct ring_buffer_per_cpu *cpu_buffer;
+	unsigned long overruns = 0;
+	int cpu;
+
+	/* if you care about this being correct, lock the buffer */
+	for_each_buffer_cpu(buffer, cpu) {
+		cpu_buffer = buffer->buffers[cpu];
+		overruns += cpu_buffer->overrun;
+	}
+
+	return overruns;
+}
+
+/**
+ * ring_buffer_iter_reset - reset an iterator
+ * @iter: The iterator to reset
+ *
+ * Resets the iterator, so that it will start from the beginning
+ * again.
+ */
+void ring_buffer_iter_reset(struct ring_buffer_iter *iter)
+{
+	struct ring_buffer_per_cpu *cpu_buffer = iter->cpu_buffer;
+
+	iter->head_page = cpu_buffer->head_page;
+	iter->head = cpu_buffer->head;
+	rb_reset_iter_read_page(iter);
+}
+
+/**
+ * ring_buffer_iter_empty - check if an iterator has no more to read
+ * @iter: The iterator to check
+ */
+int ring_buffer_iter_empty(struct ring_buffer_iter *iter)
+{
+	struct ring_buffer_per_cpu *cpu_buffer;
+
+	cpu_buffer = iter->cpu_buffer;
+
+	return iter->head_page == cpu_buffer->tail_page &&
+		iter->head == cpu_buffer->tail;
+}
+
+static void
+rb_update_read_stamp(struct ring_buffer_per_cpu *cpu_buffer,
+		     struct ring_buffer_event *event)
+{
+	u64 delta;
+
+	switch (event->type) {
+	case RINGBUF_TYPE_PADDING:
+		return;
+
+	case RINGBUF_TYPE_TIME_EXTEND:
+		delta = event->array[0];
+		delta <<= TS_SHIFT;
+		delta += event->time_delta;
+		cpu_buffer->read_stamp += delta;
+		return;
+
+	case RINGBUF_TYPE_TIME_STAMP:
+		/* FIXME: not implemented */
+		return;
+
+	case RINGBUF_TYPE_DATA:
+		cpu_buffer->read_stamp += event->time_delta;
+		return;
+
+	default:
+		BUG();
+	}
+	return;
+}
+
+static void
+rb_update_iter_read_stamp(struct ring_buffer_iter *iter,
+			  struct ring_buffer_event *event)
+{
+	u64 delta;
+
+	switch (event->type) {
+	case RINGBUF_TYPE_PADDING:
+		return;
+
+	case RINGBUF_TYPE_TIME_EXTEND:
+		delta = event->array[0];
+		delta <<= TS_SHIFT;
+		delta += event->time_delta;
+		iter->read_stamp += delta;
+		return;
+
+	case RINGBUF_TYPE_TIME_STAMP:
+		/* FIXME: not implemented */
+		return;
+
+	case RINGBUF_TYPE_DATA:
+		iter->read_stamp += event->time_delta;
+		return;
+
+	default:
+		BUG();
+	}
+	return;
+}
+
+static void rb_advance_head(struct ring_buffer_per_cpu *cpu_buffer)
+{
+	struct ring_buffer_event *event;
+	unsigned length;
+
+	/*
+	 * Check if we are at the end of the buffer.
+	 */
+	if (cpu_buffer->head >= cpu_buffer->head_page->size) {
+		BUG_ON(cpu_buffer->head_page == cpu_buffer->tail_page);
+		rb_inc_page(cpu_buffer, &cpu_buffer->head_page);
+		rb_reset_read_page(cpu_buffer);
+		return;
+	}
+
+	event = rb_head_event(cpu_buffer);
+
+	if (event->type == RINGBUF_TYPE_DATA)
+		cpu_buffer->entries--;
+
+	length = rb_event_length(event);
+
+	/*
+	 * This should not be called to advance the header if we are
+	 * at the tail of the buffer.
+	 */
+	BUG_ON((cpu_buffer->head_page == cpu_buffer->tail_page) &&
+	       (cpu_buffer->head + length > cpu_buffer->tail));
+
+	rb_update_read_stamp(cpu_buffer, event);
+
+	cpu_buffer->head += length;
+
+	/* check for end of page */
+	if ((cpu_buffer->head >= cpu_buffer->head_page->size) &&
+	    (cpu_buffer->head_page != cpu_buffer->tail_page))
+		rb_advance_head(cpu_buffer);
+}
+
+static void rb_advance_iter(struct ring_buffer_iter *iter)
+{
+	struct ring_buffer *buffer;
+	struct ring_buffer_per_cpu *cpu_buffer;
+	struct ring_buffer_event *event;
+	unsigned length;
+
+	cpu_buffer = iter->cpu_buffer;
+	buffer = cpu_buffer->buffer;
+
+	/*
+	 * Check if we are at the end of the buffer.
+	 */
+	if (iter->head >= iter->head_page->size) {
+		BUG_ON(iter->head_page == cpu_buffer->tail_page);
+		rb_inc_page(cpu_buffer, &iter->head_page);
+		rb_reset_iter_read_page(iter);
+		return;
+	}
+
+	event = rb_iter_head_event(iter);
+
+	length = rb_event_length(event);
+
+	/*
+	 * This should not be called to advance the header if we are
+	 * at the tail of the buffer.
+	 */
+	BUG_ON((iter->head_page == cpu_buffer->tail_page) &&
+	       (iter->head + length > cpu_buffer->tail));
+
+	rb_update_iter_read_stamp(iter, event);
+
+	iter->head += length;
+
+	/* check for end of page padding */
+	if ((iter->head >= iter->head_page->size) &&
+	    (iter->head_page != cpu_buffer->tail_page))
+		rb_advance_iter(iter);
+}
+
+/**
+ * ring_buffer_peek - peek at the next event to be read
+ * @buffer: The ring buffer to read
+ * @cpu: The cpu to peak at
+ * @ts: The timestamp counter of this event.
+ *
+ * This will return the event that will be read next, but does
+ * not consume the data.
+ */
+struct ring_buffer_event *
+ring_buffer_peek(struct ring_buffer *buffer, int cpu, u64 *ts)
+{
+	struct ring_buffer_per_cpu *cpu_buffer;
+	struct ring_buffer_event *event;
+
+	if (!cpu_isset(cpu, buffer->cpumask))
+		return NULL;
+
+	cpu_buffer = buffer->buffers[cpu];
+
+ again:
+	if (rb_per_cpu_empty(cpu_buffer))
+		return NULL;
+
+	event = rb_head_event(cpu_buffer);
+
+	switch (event->type) {
+	case RINGBUF_TYPE_PADDING:
+		rb_inc_page(cpu_buffer, &cpu_buffer->head_page);
+		rb_reset_read_page(cpu_buffer);
+		goto again;
+
+	case RINGBUF_TYPE_TIME_EXTEND:
+		/* Internal data, OK to advance */
+		rb_advance_head(cpu_buffer);
+		goto again;
+
+	case RINGBUF_TYPE_TIME_STAMP:
+		/* FIXME: not implemented */
+		rb_advance_head(cpu_buffer);
+		goto again;
+
+	case RINGBUF_TYPE_DATA:
+		if (ts) {
+			*ts = cpu_buffer->read_stamp + event->time_delta;
+			ring_buffer_normalize_time_stamp(cpu_buffer->cpu, ts);
+		}
+		return event;
+
+	default:
+		BUG();
+	}
+
+	return NULL;
+}
+
+/**
+ * ring_buffer_iter_peek - peek at the next event to be read
+ * @iter: The ring buffer iterator
+ * @ts: The timestamp counter of this event.
+ *
+ * This will return the event that will be read next, but does
+ * not increment the iterator.
+ */
+struct ring_buffer_event *
+ring_buffer_iter_peek(struct ring_buffer_iter *iter, u64 *ts)
+{
+	struct ring_buffer *buffer;
+	struct ring_buffer_per_cpu *cpu_buffer;
+	struct ring_buffer_event *event;
+
+	if (ring_buffer_iter_empty(iter))
+		return NULL;
+
+	cpu_buffer = iter->cpu_buffer;
+	buffer = cpu_buffer->buffer;
+
+ again:
+	if (rb_per_cpu_empty(cpu_buffer))
+		return NULL;
+
+	event = rb_iter_head_event(iter);
+
+	switch (event->type) {
+	case RINGBUF_TYPE_PADDING:
+		rb_inc_page(cpu_buffer, &iter->head_page);
+		rb_reset_iter_read_page(iter);
+		goto again;
+
+	case RINGBUF_TYPE_TIME_EXTEND:
+		/* Internal data, OK to advance */
+		rb_advance_iter(iter);
+		goto again;
+
+	case RINGBUF_TYPE_TIME_STAMP:
+		/* FIXME: not implemented */
+		rb_advance_iter(iter);
+		goto again;
+
+	case RINGBUF_TYPE_DATA:
+		if (ts) {
+			*ts = iter->read_stamp + event->time_delta;
+			ring_buffer_normalize_time_stamp(cpu_buffer->cpu, ts);
+		}
+		return event;
+
+	default:
+		BUG();
+	}
+
+	return NULL;
+}
+
+/**
+ * ring_buffer_consume - return an event and consume it
+ * @buffer: The ring buffer to get the next event from
+ *
+ * Returns the next event in the ring buffer, and that event is consumed.
+ * Meaning, that sequential reads will keep returning a different event,
+ * and eventually empty the ring buffer if the producer is slower.
+ */
+struct ring_buffer_event *
+ring_buffer_consume(struct ring_buffer *buffer, int cpu, u64 *ts)
+{
+	struct ring_buffer_per_cpu *cpu_buffer;
+	struct ring_buffer_event *event;
+
+	if (!cpu_isset(cpu, buffer->cpumask))
+		return NULL;
+
+	event = ring_buffer_peek(buffer, cpu, ts);
+	if (!event)
+		return NULL;
+
+	cpu_buffer = buffer->buffers[cpu];
+	rb_advance_head(cpu_buffer);
+
+	return event;
+}
+
+/**
+ * ring_buffer_read_start - start a non consuming read of the buffer
+ * @buffer: The ring buffer to read from
+ * @cpu: The cpu buffer to iterate over
+ *
+ * This starts up an iteration through the buffer. It also disables
+ * the recording to the buffer until the reading is finished.
+ * This prevents the reading from being corrupted. This is not
+ * a consuming read, so a producer is not expected.
+ *
+ * Must be paired with ring_buffer_finish.
+ */
+struct ring_buffer_iter *
+ring_buffer_read_start(struct ring_buffer *buffer, int cpu)
+{
+	struct ring_buffer_per_cpu *cpu_buffer;
+	struct ring_buffer_iter *iter;
+
+	if (!cpu_isset(cpu, buffer->cpumask))
+		return NULL;
+
+	iter = kmalloc(sizeof(*iter), GFP_KERNEL);
+	if (!iter)
+		return NULL;
+
+	cpu_buffer = buffer->buffers[cpu];
+
+	iter->cpu_buffer = cpu_buffer;
+
+	atomic_inc(&cpu_buffer->record_disabled);
+	synchronize_sched();
+
+	spin_lock(&cpu_buffer->lock);
+	iter->head = cpu_buffer->head;
+	iter->head_page = cpu_buffer->head_page;
+	rb_reset_iter_read_page(iter);
+	spin_unlock(&cpu_buffer->lock);
+
+	return iter;
+}
+
+/**
+ * ring_buffer_finish - finish reading the iterator of the buffer
+ * @iter: The iterator retrieved by ring_buffer_start
+ *
+ * This re-enables the recording to the buffer, and frees the
+ * iterator.
+ */
+void
+ring_buffer_read_finish(struct ring_buffer_iter *iter)
+{
+	struct ring_buffer_per_cpu *cpu_buffer = iter->cpu_buffer;
+
+	atomic_dec(&cpu_buffer->record_disabled);
+	kfree(iter);
+}
+
+/**
+ * ring_buffer_read - read the next item in the ring buffer by the iterator
+ * @iter: The ring buffer iterator
+ * @ts: The time stamp of the event read.
+ *
+ * This reads the next event in the ring buffer and increments the iterator.
+ */
+struct ring_buffer_event *
+ring_buffer_read(struct ring_buffer_iter *iter, u64 *ts)
+{
+	struct ring_buffer_event *event;
+
+	event = ring_buffer_iter_peek(iter, ts);
+	if (!event)
+		return NULL;
+
+	rb_advance_iter(iter);
+
+	return event;
+}
+
+/**
+ * ring_buffer_size - return the size of the ring buffer (in bytes)
+ * @buffer: The ring buffer.
+ */
+unsigned long ring_buffer_size(struct ring_buffer *buffer)
+{
+	return BUF_PAGE_SIZE * buffer->pages;
+}
+
+static void
+rb_reset_cpu(struct ring_buffer_per_cpu *cpu_buffer)
+{
+	cpu_buffer->head_page
+		= list_entry(cpu_buffer->pages.next, struct buffer_page, list);
+	cpu_buffer->tail_page
+		= list_entry(cpu_buffer->pages.next, struct buffer_page, list);
+
+	cpu_buffer->head = cpu_buffer->tail = 0;
+	cpu_buffer->overrun = 0;
+	cpu_buffer->entries = 0;
+}
+
+/**
+ * ring_buffer_reset_cpu - reset a ring buffer per CPU buffer
+ * @buffer: The ring buffer to reset a per cpu buffer of
+ * @cpu: The CPU buffer to be reset
+ */
+void ring_buffer_reset_cpu(struct ring_buffer *buffer, int cpu)
+{
+	struct ring_buffer_per_cpu *cpu_buffer = buffer->buffers[cpu];
+	unsigned long flags;
+
+	if (!cpu_isset(cpu, buffer->cpumask))
+		return;
+
+	raw_local_irq_save(flags);
+	spin_lock(&cpu_buffer->lock);
+
+	rb_reset_cpu(cpu_buffer);
+
+	spin_unlock(&cpu_buffer->lock);
+	raw_local_irq_restore(flags);
+}
+
+/**
+ * ring_buffer_reset - reset a ring buffer
+ * @buffer: The ring buffer to reset all cpu buffers
+ */
+void ring_buffer_reset(struct ring_buffer *buffer)
+{
+	unsigned long flags;
+	int cpu;
+
+	ring_buffer_lock(buffer, &flags);
+
+	for_each_buffer_cpu(buffer, cpu)
+		rb_reset_cpu(buffer->buffers[cpu]);
+
+	ring_buffer_unlock(buffer, flags);
+}
+
+/**
+ * rind_buffer_empty - is the ring buffer empty?
+ * @buffer: The ring buffer to test
+ */
+int ring_buffer_empty(struct ring_buffer *buffer)
+{
+	struct ring_buffer_per_cpu *cpu_buffer;
+	int cpu;
+
+	/* yes this is racy, but if you don't like the race, lock the buffer */
+	for_each_buffer_cpu(buffer, cpu) {
+		cpu_buffer = buffer->buffers[cpu];
+		if (!rb_per_cpu_empty(cpu_buffer))
+			return 0;
+	}
+	return 1;
+}
+
+/**
+ * ring_buffer_empty_cpu - is a cpu buffer of a ring buffer empty?
+ * @buffer: The ring buffer
+ * @cpu: The CPU buffer to test
+ */
+int ring_buffer_empty_cpu(struct ring_buffer *buffer, int cpu)
+{
+	struct ring_buffer_per_cpu *cpu_buffer;
+
+	if (!cpu_isset(cpu, buffer->cpumask))
+		return 1;
+
+	cpu_buffer = buffer->buffers[cpu];
+	return rb_per_cpu_empty(cpu_buffer);
+}
+
+/**
+ * ring_buffer_swap_cpu - swap a CPU buffer between two ring buffers
+ * @buffer_a: One buffer to swap with
+ * @buffer_b: The other buffer to swap with
+ *
+ * This function is useful for tracers that want to take a "snapshot"
+ * of a CPU buffer and has another back up buffer lying around.
+ * it is expected that the tracer handles the cpu buffer not being
+ * used at the moment.
+ */
+int ring_buffer_swap_cpu(struct ring_buffer *buffer_a,
+			 struct ring_buffer *buffer_b, int cpu)
+{
+	struct ring_buffer_per_cpu *cpu_buffer_a;
+	struct ring_buffer_per_cpu *cpu_buffer_b;
+
+	if (!cpu_isset(cpu, buffer_a->cpumask) ||
+	    !cpu_isset(cpu, buffer_b->cpumask))
+		return -EINVAL;
+
+	/* At least make sure the two buffers are somewhat the same */
+	if (buffer_a->size != buffer_b->size ||
+	    buffer_a->pages != buffer_b->pages)
+		return -EINVAL;
+
+	cpu_buffer_a = buffer_a->buffers[cpu];
+	cpu_buffer_b = buffer_b->buffers[cpu];
+
+	/*
+	 * We can't do a synchronize_sched here because this
+	 * function can be called in atomic context.
+	 * Normally this will be called from the same CPU as cpu.
+	 * If not it's up to the caller to protect this.
+	 */
+	atomic_inc(&cpu_buffer_a->record_disabled);
+	atomic_inc(&cpu_buffer_b->record_disabled);
+
+	buffer_a->buffers[cpu] = cpu_buffer_b;
+	buffer_b->buffers[cpu] = cpu_buffer_a;
+
+	cpu_buffer_b->buffer = buffer_a;
+	cpu_buffer_a->buffer = buffer_b;
+
+	atomic_dec(&cpu_buffer_a->record_disabled);
+	atomic_dec(&cpu_buffer_b->record_disabled);
+
+	return 0;
+}
+
-- 
cgit v1.2.3-55-g7522


From a7b1374333407f409cf8df7e623b12490f073c84 Mon Sep 17 00:00:00 2001
From: Steven Rostedt
Date: Mon, 29 Sep 2008 23:02:39 -0400
Subject: ring_buffer: add paranoid check for buffer page

If for some strange reason the buffer_page gets bigger, or the page struct
gets smaller, I want to know this ASAP.  The best way is to not let the
kernel compile.

This patch adds code to test the size of the struct buffer_page against the
page struct and will cause compile issues if the buffer_page ever gets bigger
than the page struct.

Signed-off-by: Steven Rostedt <srostedt@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 kernel/trace/ring_buffer.c | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index 830a2930dd91..95ca9338cb6c 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -289,6 +289,12 @@ static void rb_free_cpu_buffer(struct ring_buffer_per_cpu *cpu_buffer)
 	kfree(cpu_buffer);
 }
 
+/*
+ * Causes compile errors if the struct buffer_page gets bigger
+ * than the struct page.
+ */
+extern int ring_buffer_page_too_big(void);
+
 /**
  * ring_buffer_alloc - allocate a new ring_buffer
  * @size: the size in bytes that is needed.
@@ -305,6 +311,11 @@ struct ring_buffer *ring_buffer_alloc(unsigned long size, unsigned flags)
 	int bsize;
 	int cpu;
 
+	/* Paranoid! Optimizes out when all is well */
+	if (sizeof(struct buffer_page) > sizeof(struct page))
+		ring_buffer_page_too_big();
+
+
 	/* keep it in its own cache line */
 	buffer = kzalloc(ALIGN(sizeof(*buffer), cache_line_size()),
 			 GFP_KERNEL);
-- 
cgit v1.2.3-55-g7522


From ed56829cb3195de499f97fa6108fe9134319bae6 Mon Sep 17 00:00:00 2001
From: Steven Rostedt
Date: Mon, 29 Sep 2008 23:02:40 -0400
Subject: ring_buffer: reset buffer page when freeing

Mathieu Desnoyers pointed out that the freeing of the page frame needs
to be reset otherwise we might trigger BUG_ON in the page free code.

Signed-off-by: Steven Rostedt <srostedt@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 kernel/trace/ring_buffer.c | 19 +++++++++++++++----
 1 file changed, 15 insertions(+), 4 deletions(-)

diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index 95ca9338cb6c..cfa711374d9a 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -127,6 +127,17 @@ struct buffer_page {
 	};
 };
 
+/*
+ * Also stolen from mm/slob.c. Thanks to Mathieu Desnoyers for pointing
+ * this issue out.
+ */
+static inline void free_buffer_page(struct buffer_page *bpage)
+{
+	reset_page_mapcount(&bpage->page);
+	bpage->page.mapping = NULL;
+	__free_page(&bpage->page);
+}
+
 /*
  * We need to fit the time_stamp delta into 27 bits.
  */
@@ -240,7 +251,7 @@ static int rb_allocate_pages(struct ring_buffer_per_cpu *cpu_buffer,
  free_pages:
 	list_for_each_entry_safe(page, tmp, &pages, list) {
 		list_del_init(&page->list);
-		__free_page(&page->page);
+		free_buffer_page(page);
 	}
 	return -ENOMEM;
 }
@@ -284,7 +295,7 @@ static void rb_free_cpu_buffer(struct ring_buffer_per_cpu *cpu_buffer)
 
 	list_for_each_entry_safe(page, tmp, head, list) {
 		list_del_init(&page->list);
-		__free_page(&page->page);
+		free_buffer_page(page);
 	}
 	kfree(cpu_buffer);
 }
@@ -393,7 +404,7 @@ rb_remove_pages(struct ring_buffer_per_cpu *cpu_buffer, unsigned nr_pages)
 		p = cpu_buffer->pages.next;
 		page = list_entry(p, struct buffer_page, list);
 		list_del_init(&page->list);
-		__free_page(&page->page);
+		free_buffer_page(page);
 	}
 	BUG_ON(list_empty(&cpu_buffer->pages));
 
@@ -520,7 +531,7 @@ int ring_buffer_resize(struct ring_buffer *buffer, unsigned long size)
  free_pages:
 	list_for_each_entry_safe(page, tmp, &pages, list) {
 		list_del_init(&page->list);
-		__free_page(&page->page);
+		free_buffer_page(page);
 	}
 	return -ENOMEM;
 }
-- 
cgit v1.2.3-55-g7522


From 3928a8a2d98081d1bc3c0a84a2d70e29b90ecf1c Mon Sep 17 00:00:00 2001
From: Steven Rostedt
Date: Mon, 29 Sep 2008 23:02:41 -0400
Subject: ftrace: make work with new ring buffer

This patch ports ftrace over to the new ring buffer.

Signed-off-by: Steven Rostedt <srostedt@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 kernel/trace/trace.c              | 932 ++++++++++----------------------------
 kernel/trace/trace.h              |  22 +-
 kernel/trace/trace_boot.c         |  16 +-
 kernel/trace/trace_functions.c    |   2 +-
 kernel/trace/trace_irqsoff.c      |   6 +-
 kernel/trace/trace_mmiotrace.c    |  40 +-
 kernel/trace/trace_nop.c          |   2 +-
 kernel/trace/trace_sched_switch.c |   2 +-
 kernel/trace/trace_sched_wakeup.c |   2 +-
 kernel/trace/trace_selftest.c     |  60 +--
 kernel/trace/trace_sysprof.c      |   2 +-
 11 files changed, 288 insertions(+), 798 deletions(-)

diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 6ada059832a6..ef80793858b8 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -33,25 +33,22 @@
 #include <linux/writeback.h>
 
 #include <linux/stacktrace.h>
+#include <linux/ring_buffer.h>
 
 #include "trace.h"
 
+#define TRACE_BUFFER_FLAGS	(RB_FL_OVERWRITE)
+
 unsigned long __read_mostly	tracing_max_latency = (cycle_t)ULONG_MAX;
 unsigned long __read_mostly	tracing_thresh;
 
-static unsigned long __read_mostly	tracing_nr_buffers;
 static cpumask_t __read_mostly		tracing_buffer_mask;
 
 #define for_each_tracing_cpu(cpu)	\
 	for_each_cpu_mask(cpu, tracing_buffer_mask)
 
-static int trace_alloc_page(void);
-static int trace_free_page(void);
-
 static int tracing_disabled = 1;
 
-static unsigned long tracing_pages_allocated;
-
 long
 ns2usecs(cycle_t nsec)
 {
@@ -62,7 +59,9 @@ ns2usecs(cycle_t nsec)
 
 cycle_t ftrace_now(int cpu)
 {
-	return cpu_clock(cpu);
+	u64 ts = ring_buffer_time_stamp(cpu);
+	ring_buffer_normalize_time_stamp(cpu, &ts);
+	return ts;
 }
 
 /*
@@ -102,18 +101,18 @@ static int			tracer_enabled = 1;
 int				ftrace_function_enabled;
 
 /*
- * trace_nr_entries is the number of entries that is allocated
- * for a buffer. Note, the number of entries is always rounded
- * to ENTRIES_PER_PAGE.
+ * trace_buf_size is the size in bytes that is allocated
+ * for a buffer. Note, the number of bytes is always rounded
+ * to page size.
  *
  * This number is purposely set to a low number of 16384.
  * If the dump on oops happens, it will be much appreciated
  * to not have to wait for all that output. Anyway this can be
  * boot time and run time configurable.
  */
-#define TRACE_ENTRIES_DEFAULT	16384UL
+#define TRACE_BUF_SIZE_DEFAULT	1441792UL /* 16384 * 88 (sizeof(entry)) */
 
-static unsigned long		trace_nr_entries = TRACE_ENTRIES_DEFAULT;
+static unsigned long		trace_buf_size = TRACE_BUF_SIZE_DEFAULT;
 
 /* trace_types holds a link list of available tracers. */
 static struct tracer		*trace_types __read_mostly;
@@ -158,23 +157,21 @@ void trace_wake_up(void)
 		wake_up(&trace_wait);
 }
 
-#define ENTRIES_PER_PAGE (PAGE_SIZE / sizeof(struct trace_entry))
-
-static int __init set_nr_entries(char *str)
+static int __init set_buf_size(char *str)
 {
-	unsigned long nr_entries;
+	unsigned long buf_size;
 	int ret;
 
 	if (!str)
 		return 0;
-	ret = strict_strtoul(str, 0, &nr_entries);
+	ret = strict_strtoul(str, 0, &buf_size);
 	/* nr_entries can not be zero */
-	if (ret < 0 || nr_entries == 0)
+	if (ret < 0 || buf_size == 0)
 		return 0;
-	trace_nr_entries = nr_entries;
+	trace_buf_size = buf_size;
 	return 1;
 }
-__setup("trace_entries=", set_nr_entries);
+__setup("trace_buf_size=", set_buf_size);
 
 unsigned long nsecs_to_usecs(unsigned long nsecs)
 {
@@ -243,54 +240,6 @@ __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
 	tracing_record_cmdline(current);
 }
 
-#define CHECK_COND(cond)			\
-	if (unlikely(cond)) {			\
-		tracing_disabled = 1;		\
-		WARN_ON(1);			\
-		return -1;			\
-	}
-
-/**
- * check_pages - integrity check of trace buffers
- *
- * As a safty measure we check to make sure the data pages have not
- * been corrupted.
- */
-int check_pages(struct trace_array_cpu *data)
-{
-	struct page *page, *tmp;
-
-	CHECK_COND(data->trace_pages.next->prev != &data->trace_pages);
-	CHECK_COND(data->trace_pages.prev->next != &data->trace_pages);
-
-	list_for_each_entry_safe(page, tmp, &data->trace_pages, lru) {
-		CHECK_COND(page->lru.next->prev != &page->lru);
-		CHECK_COND(page->lru.prev->next != &page->lru);
-	}
-
-	return 0;
-}
-
-/**
- * head_page - page address of the first page in per_cpu buffer.
- *
- * head_page returns the page address of the first page in
- * a per_cpu buffer. This also preforms various consistency
- * checks to make sure the buffer has not been corrupted.
- */
-void *head_page(struct trace_array_cpu *data)
-{
-	struct page *page;
-
-	if (list_empty(&data->trace_pages))
-		return NULL;
-
-	page = list_entry(data->trace_pages.next, struct page, lru);
-	BUG_ON(&page->lru == &data->trace_pages);
-
-	return page_address(page);
-}
-
 /**
  * trace_seq_printf - sequence printing of trace information
  * @s: trace sequence descriptor
@@ -437,34 +386,6 @@ trace_print_seq(struct seq_file *m, struct trace_seq *s)
 	trace_seq_reset(s);
 }
 
-/*
- * flip the trace buffers between two trace descriptors.
- * This usually is the buffers between the global_trace and
- * the max_tr to record a snapshot of a current trace.
- *
- * The ftrace_max_lock must be held.
- */
-static void
-flip_trace(struct trace_array_cpu *tr1, struct trace_array_cpu *tr2)
-{
-	struct list_head flip_pages;
-
-	INIT_LIST_HEAD(&flip_pages);
-
-	memcpy(&tr1->trace_head_idx, &tr2->trace_head_idx,
-		sizeof(struct trace_array_cpu) -
-		offsetof(struct trace_array_cpu, trace_head_idx));
-
-	check_pages(tr1);
-	check_pages(tr2);
-	list_splice_init(&tr1->trace_pages, &flip_pages);
-	list_splice_init(&tr2->trace_pages, &tr1->trace_pages);
-	list_splice_init(&flip_pages, &tr2->trace_pages);
-	BUG_ON(!list_empty(&flip_pages));
-	check_pages(tr1);
-	check_pages(tr2);
-}
-
 /**
  * update_max_tr - snapshot all trace buffers from global_trace to max_tr
  * @tr: tracer
@@ -477,17 +398,15 @@ flip_trace(struct trace_array_cpu *tr1, struct trace_array_cpu *tr2)
 void
 update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
 {
-	struct trace_array_cpu *data;
-	int i;
+	struct ring_buffer *buf = tr->buffer;
 
 	WARN_ON_ONCE(!irqs_disabled());
 	__raw_spin_lock(&ftrace_max_lock);
-	/* clear out all the previous traces */
-	for_each_tracing_cpu(i) {
-		data = tr->data[i];
-		flip_trace(max_tr.data[i], data);
-		tracing_reset(data);
-	}
+
+	tr->buffer = max_tr.buffer;
+	max_tr.buffer = buf;
+
+	ring_buffer_reset(tr->buffer);
 
 	__update_max_tr(tr, tsk, cpu);
 	__raw_spin_unlock(&ftrace_max_lock);
@@ -504,16 +423,15 @@ update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
 void
 update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
 {
-	struct trace_array_cpu *data = tr->data[cpu];
-	int i;
+	int ret;
 
 	WARN_ON_ONCE(!irqs_disabled());
 	__raw_spin_lock(&ftrace_max_lock);
-	for_each_tracing_cpu(i)
-		tracing_reset(max_tr.data[i]);
 
-	flip_trace(max_tr.data[cpu], data);
-	tracing_reset(data);
+	ring_buffer_reset(max_tr.buffer);
+	ret = ring_buffer_swap_cpu(max_tr.buffer, tr->buffer, cpu);
+
+	WARN_ON_ONCE(ret);
 
 	__update_max_tr(tr, tsk, cpu);
 	__raw_spin_unlock(&ftrace_max_lock);
@@ -550,7 +468,6 @@ int register_tracer(struct tracer *type)
 #ifdef CONFIG_FTRACE_STARTUP_TEST
 	if (type->selftest) {
 		struct tracer *saved_tracer = current_trace;
-		struct trace_array_cpu *data;
 		struct trace_array *tr = &global_trace;
 		int saved_ctrl = tr->ctrl;
 		int i;
@@ -562,10 +479,7 @@ int register_tracer(struct tracer *type)
 		 * If we fail, we do not register this tracer.
 		 */
 		for_each_tracing_cpu(i) {
-			data = tr->data[i];
-			if (!head_page(data))
-				continue;
-			tracing_reset(data);
+			tracing_reset(tr, i);
 		}
 		current_trace = type;
 		tr->ctrl = 0;
@@ -581,10 +495,7 @@ int register_tracer(struct tracer *type)
 		}
 		/* Only reset on passing, to avoid touching corrupted buffers */
 		for_each_tracing_cpu(i) {
-			data = tr->data[i];
-			if (!head_page(data))
-				continue;
-			tracing_reset(data);
+			tracing_reset(tr, i);
 		}
 		printk(KERN_CONT "PASSED\n");
 	}
@@ -630,13 +541,9 @@ void unregister_tracer(struct tracer *type)
 	mutex_unlock(&trace_types_lock);
 }
 
-void tracing_reset(struct trace_array_cpu *data)
+void tracing_reset(struct trace_array *tr, int cpu)
 {
-	data->trace_idx = 0;
-	data->overrun = 0;
-	data->trace_head = data->trace_tail = head_page(data);
-	data->trace_head_idx = 0;
-	data->trace_tail_idx = 0;
+	ring_buffer_reset_cpu(tr->buffer, cpu);
 }
 
 #define SAVED_CMDLINES 128
@@ -722,70 +629,6 @@ void tracing_record_cmdline(struct task_struct *tsk)
 	trace_save_cmdline(tsk);
 }
 
-static inline struct list_head *
-trace_next_list(struct trace_array_cpu *data, struct list_head *next)
-{
-	/*
-	 * Roundrobin - but skip the head (which is not a real page):
-	 */
-	next = next->next;
-	if (unlikely(next == &data->trace_pages))
-		next = next->next;
-	BUG_ON(next == &data->trace_pages);
-
-	return next;
-}
-
-static inline void *
-trace_next_page(struct trace_array_cpu *data, void *addr)
-{
-	struct list_head *next;
-	struct page *page;
-
-	page = virt_to_page(addr);
-
-	next = trace_next_list(data, &page->lru);
-	page = list_entry(next, struct page, lru);
-
-	return page_address(page);
-}
-
-struct trace_entry *
-tracing_get_trace_entry(struct trace_array *tr, struct trace_array_cpu *data)
-{
-	unsigned long idx, idx_next;
-	struct trace_entry *entry;
-
-	data->trace_idx++;
-	idx = data->trace_head_idx;
-	idx_next = idx + 1;
-
-	BUG_ON(idx * TRACE_ENTRY_SIZE >= PAGE_SIZE);
-
-	entry = data->trace_head + idx * TRACE_ENTRY_SIZE;
-
-	if (unlikely(idx_next >= ENTRIES_PER_PAGE)) {
-		data->trace_head = trace_next_page(data, data->trace_head);
-		idx_next = 0;
-	}
-
-	if (data->trace_head == data->trace_tail &&
-	    idx_next == data->trace_tail_idx) {
-		/* overrun */
-		data->overrun++;
-		data->trace_tail_idx++;
-		if (data->trace_tail_idx >= ENTRIES_PER_PAGE) {
-			data->trace_tail =
-				trace_next_page(data, data->trace_tail);
-			data->trace_tail_idx = 0;
-		}
-	}
-
-	data->trace_head_idx = idx_next;
-
-	return entry;
-}
-
 void
 tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags)
 {
@@ -796,7 +639,6 @@ tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags)
 
 	entry->field.preempt_count	= pc & 0xff;
 	entry->field.pid		= (tsk) ? tsk->pid : 0;
-	entry->field.t			= ftrace_now(raw_smp_processor_id());
 	entry->field.flags =
 		(irqs_disabled_flags(flags) ? TRACE_FLAG_IRQS_OFF : 0) |
 		((pc & HARDIRQ_MASK) ? TRACE_FLAG_HARDIRQ : 0) |
@@ -808,18 +650,20 @@ void
 trace_function(struct trace_array *tr, struct trace_array_cpu *data,
 	       unsigned long ip, unsigned long parent_ip, unsigned long flags)
 {
+	struct ring_buffer_event *event;
 	struct trace_entry *entry;
 	unsigned long irq_flags;
 
-	raw_local_irq_save(irq_flags);
-	__raw_spin_lock(&data->lock);
-	entry				= tracing_get_trace_entry(tr, data);
+	event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry),
+					 &irq_flags);
+	if (!event)
+		return;
+	entry	= ring_buffer_event_data(event);
 	tracing_generic_entry_update(entry, flags);
 	entry->type			= TRACE_FN;
 	entry->field.fn.ip		= ip;
 	entry->field.fn.parent_ip	= parent_ip;
-	__raw_spin_unlock(&data->lock);
-	raw_local_irq_restore(irq_flags);
+	ring_buffer_unlock_commit(tr->buffer, event, irq_flags);
 }
 
 void
@@ -835,13 +679,19 @@ void __trace_stack(struct trace_array *tr,
 		   unsigned long flags,
 		   int skip)
 {
+	struct ring_buffer_event *event;
 	struct trace_entry *entry;
 	struct stack_trace trace;
+	unsigned long irq_flags;
 
 	if (!(trace_flags & TRACE_ITER_STACKTRACE))
 		return;
 
-	entry			= tracing_get_trace_entry(tr, data);
+	event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry),
+					 &irq_flags);
+	if (!event)
+		return;
+	entry	= ring_buffer_event_data(event);
 	tracing_generic_entry_update(entry, flags);
 	entry->type		= TRACE_STACK;
 
@@ -853,28 +703,31 @@ void __trace_stack(struct trace_array *tr,
 	trace.entries		= entry->field.stack.caller;
 
 	save_stack_trace(&trace);
+	ring_buffer_unlock_commit(tr->buffer, event, irq_flags);
 }
 
 void
 __trace_special(void *__tr, void *__data,
 		unsigned long arg1, unsigned long arg2, unsigned long arg3)
 {
+	struct ring_buffer_event *event;
 	struct trace_array_cpu *data = __data;
 	struct trace_array *tr = __tr;
 	struct trace_entry *entry;
 	unsigned long irq_flags;
 
-	raw_local_irq_save(irq_flags);
-	__raw_spin_lock(&data->lock);
-	entry				= tracing_get_trace_entry(tr, data);
+	event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry),
+					 &irq_flags);
+	if (!event)
+		return;
+	entry	= ring_buffer_event_data(event);
 	tracing_generic_entry_update(entry, 0);
 	entry->type			= TRACE_SPECIAL;
 	entry->field.special.arg1	= arg1;
 	entry->field.special.arg2	= arg2;
 	entry->field.special.arg3	= arg3;
+	ring_buffer_unlock_commit(tr->buffer, event, irq_flags);
 	__trace_stack(tr, data, irq_flags, 4);
-	__raw_spin_unlock(&data->lock);
-	raw_local_irq_restore(irq_flags);
 
 	trace_wake_up();
 }
@@ -886,12 +739,15 @@ tracing_sched_switch_trace(struct trace_array *tr,
 			   struct task_struct *next,
 			   unsigned long flags)
 {
+	struct ring_buffer_event *event;
 	struct trace_entry *entry;
 	unsigned long irq_flags;
 
-	raw_local_irq_save(irq_flags);
-	__raw_spin_lock(&data->lock);
-	entry				= tracing_get_trace_entry(tr, data);
+	event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry),
+					   &irq_flags);
+	if (!event)
+		return;
+	entry	= ring_buffer_event_data(event);
 	tracing_generic_entry_update(entry, flags);
 	entry->type			= TRACE_CTX;
 	entry->field.ctx.prev_pid	= prev->pid;
@@ -901,9 +757,8 @@ tracing_sched_switch_trace(struct trace_array *tr,
 	entry->field.ctx.next_prio	= next->prio;
 	entry->field.ctx.next_state	= next->state;
 	entry->field.ctx.next_cpu	= task_cpu(next);
+	ring_buffer_unlock_commit(tr->buffer, event, irq_flags);
 	__trace_stack(tr, data, flags, 5);
-	__raw_spin_unlock(&data->lock);
-	raw_local_irq_restore(irq_flags);
 }
 
 void
@@ -913,12 +768,15 @@ tracing_sched_wakeup_trace(struct trace_array *tr,
 			   struct task_struct *curr,
 			   unsigned long flags)
 {
+	struct ring_buffer_event *event;
 	struct trace_entry *entry;
 	unsigned long irq_flags;
 
-	raw_local_irq_save(irq_flags);
-	__raw_spin_lock(&data->lock);
-	entry			= tracing_get_trace_entry(tr, data);
+	event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry),
+					   &irq_flags);
+	if (!event)
+		return;
+	entry	= ring_buffer_event_data(event);
 	tracing_generic_entry_update(entry, flags);
 	entry->type		= TRACE_WAKE;
 	entry->field.ctx.prev_pid	= curr->pid;
@@ -928,9 +786,8 @@ tracing_sched_wakeup_trace(struct trace_array *tr,
 	entry->field.ctx.next_prio	= wakee->prio;
 	entry->field.ctx.next_state	= wakee->state;
 	entry->field.ctx.next_cpu	= task_cpu(wakee);
+	ring_buffer_unlock_commit(tr->buffer, event, irq_flags);
 	__trace_stack(tr, data, flags, 6);
-	__raw_spin_unlock(&data->lock);
-	raw_local_irq_restore(irq_flags);
 
 	trace_wake_up();
 }
@@ -1011,183 +868,77 @@ enum trace_file_type {
 	TRACE_FILE_LAT_FMT	= 1,
 };
 
-/* Return the current entry.  */
-static struct trace_entry *
-trace_entry_idx(struct trace_array *tr, struct trace_array_cpu *data,
-		struct trace_iterator *iter, int cpu)
-{
-	struct page *page;
-	struct trace_entry *array;
-
-	if (iter->next_idx[cpu] >= tr->entries ||
-	    iter->next_idx[cpu] >= data->trace_idx ||
-	    (data->trace_head == data->trace_tail &&
-	     data->trace_head_idx == data->trace_tail_idx))
-		return NULL;
-
-	if (!iter->next_page[cpu]) {
-		/* Initialize the iterator for this cpu trace buffer */
-		WARN_ON(!data->trace_tail);
-		page = virt_to_page(data->trace_tail);
-		iter->next_page[cpu] = &page->lru;
-		iter->next_page_idx[cpu] = data->trace_tail_idx;
-	}
-
-	page = list_entry(iter->next_page[cpu], struct page, lru);
-	BUG_ON(&data->trace_pages == &page->lru);
-
-	array = page_address(page);
-
-	WARN_ON(iter->next_page_idx[cpu] >= ENTRIES_PER_PAGE);
-	return &array[iter->next_page_idx[cpu]];
-}
-
-/* Increment the index counter of an iterator by one */
-static void __trace_iterator_increment(struct trace_iterator *iter, int cpu)
-{
-	iter->next_idx[cpu]++;
-	iter->next_page_idx[cpu]++;
-
-	if (iter->next_page_idx[cpu] >= ENTRIES_PER_PAGE) {
-		struct trace_array_cpu *data = iter->tr->data[cpu];
-
-		iter->next_page_idx[cpu] = 0;
-		iter->next_page[cpu] =
-			trace_next_list(data, iter->next_page[cpu]);
-	}
-}
-
 static void trace_iterator_increment(struct trace_iterator *iter, int cpu)
 {
 	iter->idx++;
-	__trace_iterator_increment(iter, cpu);
+	ring_buffer_read(iter->buffer_iter[iter->cpu], NULL);
 }
 
 static struct trace_entry *
-trace_entry_next(struct trace_array *tr, struct trace_array_cpu *data,
-		 struct trace_iterator *iter, int cpu)
+peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts)
 {
-	struct list_head *next_page;
-	struct trace_entry *ent;
-	int idx, next_idx, next_page_idx;
-
-	ent = trace_entry_idx(tr, tr->data[cpu], iter, cpu);
-
-	if (likely(!ent || ent->type != TRACE_CONT))
-		return ent;
-
-	/* save the iterator details */
-	idx		= iter->idx;
-	next_idx	= iter->next_idx[cpu];
-	next_page_idx	= iter->next_page_idx[cpu];
-	next_page	= iter->next_page[cpu];
-
-	/* find a real entry */
-	do {
-		__trace_iterator_increment(iter, cpu);
-		ent = trace_entry_idx(tr, tr->data[cpu], iter, cpu);
-	} while (ent && ent->type != TRACE_CONT);
-
-	/* reset the iterator */
-	iter->idx			= idx;
-	iter->next_idx[cpu]		= next_idx;
-	iter->next_page_idx[cpu]	= next_page_idx;
-	iter->next_page[cpu]		= next_page;
+	struct ring_buffer_event *event;
+	struct ring_buffer_iter *buf_iter = iter->buffer_iter[cpu];
 
-	return ent;
+	event = ring_buffer_iter_peek(buf_iter, ts);
+	return event ? ring_buffer_event_data(event) : NULL;
 }
-
 static struct trace_entry *
-__find_next_entry(struct trace_iterator *iter, int *ent_cpu, int inc)
+__find_next_entry(struct trace_iterator *iter, int *ent_cpu, u64 *ent_ts)
 {
-	struct trace_array *tr = iter->tr;
+	struct ring_buffer *buffer = iter->tr->buffer;
 	struct trace_entry *ent, *next = NULL;
+	u64 next_ts = 0, ts;
 	int next_cpu = -1;
 	int cpu;
 
 	for_each_tracing_cpu(cpu) {
-		if (!head_page(tr->data[cpu]))
-			continue;
 
-		ent = trace_entry_idx(tr, tr->data[cpu], iter, cpu);
+		if (ring_buffer_empty_cpu(buffer, cpu))
+			continue;
 
-		if (ent && ent->type == TRACE_CONT) {
-			struct trace_array_cpu *data = tr->data[cpu];
-
-			if (!inc)
-				ent = trace_entry_next(tr, data, iter, cpu);
-			else {
-				while (ent && ent->type == TRACE_CONT) {
-					__trace_iterator_increment(iter, cpu);
-					ent = trace_entry_idx(tr, tr->data[cpu],
-							      iter, cpu);
-				}
-			}
-		}
+		ent = peek_next_entry(iter, cpu, &ts);
 
 		/*
 		 * Pick the entry with the smallest timestamp:
 		 */
-		if (ent && (!next || ent->field.t < next->field.t)) {
+		if (ent && (!next || ts < next_ts)) {
 			next = ent;
 			next_cpu = cpu;
+			next_ts = ts;
 		}
 	}
 
 	if (ent_cpu)
 		*ent_cpu = next_cpu;
 
+	if (ent_ts)
+		*ent_ts = next_ts;
+
 	return next;
 }
 
 /* Find the next real entry, without updating the iterator itself */
 static struct trace_entry *
-find_next_entry(struct trace_iterator *iter, int *ent_cpu)
+find_next_entry(struct trace_iterator *iter, int *ent_cpu, u64 *ent_ts)
 {
-	return __find_next_entry(iter, ent_cpu, 0);
+	return __find_next_entry(iter, ent_cpu, ent_ts);
 }
 
 /* Find the next real entry, and increment the iterator to the next entry */
 static void *find_next_entry_inc(struct trace_iterator *iter)
 {
-	struct trace_entry *next;
-	int next_cpu = -1;
-
-	next = __find_next_entry(iter, &next_cpu, 1);
-
-	iter->prev_ent = iter->ent;
-	iter->prev_cpu = iter->cpu;
+	iter->ent = __find_next_entry(iter, &iter->cpu, &iter->ts);
 
-	iter->ent = next;
-	iter->cpu = next_cpu;
-
-	if (next)
+	if (iter->ent)
 		trace_iterator_increment(iter, iter->cpu);
 
-	return next ? iter : NULL;
+	return iter->ent ? iter : NULL;
 }
 
 static void trace_consume(struct trace_iterator *iter)
 {
-	struct trace_array_cpu *data = iter->tr->data[iter->cpu];
-	struct trace_entry *ent;
-
- again:
-	data->trace_tail_idx++;
-	if (data->trace_tail_idx >= ENTRIES_PER_PAGE) {
-		data->trace_tail = trace_next_page(data, data->trace_tail);
-		data->trace_tail_idx = 0;
-	}
-
-	/* Check if we empty it, then reset the index */
-	if (data->trace_head == data->trace_tail &&
-	    data->trace_head_idx == data->trace_tail_idx)
-		data->trace_idx = 0;
-
-	ent = trace_entry_idx(iter->tr, iter->tr->data[iter->cpu],
-			      iter, iter->cpu);
-	if (ent && ent->type == TRACE_CONT)
-		goto again;
+	ring_buffer_consume(iter->tr->buffer, iter->cpu, &iter->ts);
 }
 
 static void *s_next(struct seq_file *m, void *v, loff_t *pos)
@@ -1220,7 +971,7 @@ static void *s_start(struct seq_file *m, loff_t *pos)
 	struct trace_iterator *iter = m->private;
 	void *p = NULL;
 	loff_t l = 0;
-	int i;
+	int cpu;
 
 	mutex_lock(&trace_types_lock);
 
@@ -1239,12 +990,9 @@ static void *s_start(struct seq_file *m, loff_t *pos)
 		iter->ent = NULL;
 		iter->cpu = 0;
 		iter->idx = -1;
-		iter->prev_ent = NULL;
-		iter->prev_cpu = -1;
 
-		for_each_tracing_cpu(i) {
-			iter->next_idx[i] = 0;
-			iter->next_page[i] = NULL;
+		for_each_tracing_cpu(cpu) {
+			ring_buffer_iter_reset(iter->buffer_iter[cpu]);
 		}
 
 		for (p = iter; p && l < *pos; p = s_next(m, p, &l))
@@ -1365,23 +1113,16 @@ print_trace_header(struct seq_file *m, struct trace_iterator *iter)
 	struct trace_array *tr = iter->tr;
 	struct trace_array_cpu *data = tr->data[tr->cpu];
 	struct tracer *type = current_trace;
-	unsigned long total   = 0;
-	unsigned long entries = 0;
-	int cpu;
+	unsigned long total;
+	unsigned long entries;
 	const char *name = "preemption";
 
 	if (type)
 		name = type->name;
 
-	for_each_tracing_cpu(cpu) {
-		if (head_page(tr->data[cpu])) {
-			total += tr->data[cpu]->trace_idx;
-			if (tr->data[cpu]->trace_idx > tr->entries)
-				entries += tr->entries;
-			else
-				entries += tr->data[cpu]->trace_idx;
-		}
-	}
+	entries = ring_buffer_entries(iter->tr->buffer);
+	total = entries +
+		ring_buffer_overruns(iter->tr->buffer);
 
 	seq_printf(m, "%s latency trace v1.1.5 on %s\n",
 		   name, UTS_RELEASE);
@@ -1468,7 +1209,7 @@ lat_print_generic(struct trace_seq *s, struct trace_entry *entry, int cpu)
 unsigned long preempt_mark_thresh = 100;
 
 static void
-lat_print_timestamp(struct trace_seq *s, unsigned long long abs_usecs,
+lat_print_timestamp(struct trace_seq *s, u64 abs_usecs,
 		    unsigned long rel_usecs)
 {
 	trace_seq_printf(s, " %4lldus", abs_usecs);
@@ -1488,12 +1229,10 @@ static const char state_to_char[] = TASK_STATE_TO_CHAR_STR;
  */
 void trace_seq_print_cont(struct trace_seq *s, struct trace_iterator *iter)
 {
-	struct trace_array *tr = iter->tr;
-	struct trace_array_cpu *data = tr->data[iter->cpu];
 	struct trace_entry *ent;
 	bool ok = true;
 
-	ent = trace_entry_idx(tr, data, iter, iter->cpu);
+	ent = peek_next_entry(iter, iter->cpu, NULL);
 	if (!ent || ent->type != TRACE_CONT) {
 		trace_seq_putc(s, '\n');
 		return;
@@ -1502,8 +1241,8 @@ void trace_seq_print_cont(struct trace_seq *s, struct trace_iterator *iter)
 	do {
 		if (ok)
 			ok = (trace_seq_printf(s, "%s", ent->cont.buf) > 0);
-		__trace_iterator_increment(iter, iter->cpu);
-		ent = trace_entry_idx(tr, data, iter, iter->cpu);
+		ring_buffer_read(iter->buffer_iter[iter->cpu], NULL);
+		ent = peek_next_entry(iter, iter->cpu, NULL);
 	} while (ent && ent->type == TRACE_CONT);
 
 	if (!ok)
@@ -1515,25 +1254,26 @@ print_lat_fmt(struct trace_iterator *iter, unsigned int trace_idx, int cpu)
 {
 	struct trace_seq *s = &iter->seq;
 	unsigned long sym_flags = (trace_flags & TRACE_ITER_SYM_MASK);
-	struct trace_entry *next_entry = find_next_entry(iter, NULL);
+	struct trace_entry *next_entry;
 	unsigned long verbose = (trace_flags & TRACE_ITER_VERBOSE);
 	struct trace_entry *entry = iter->ent;
 	struct trace_field *field = &entry->field;
 	unsigned long abs_usecs;
 	unsigned long rel_usecs;
+	u64 next_ts;
 	char *comm;
 	int S, T;
 	int i;
 	unsigned state;
 
-	if (!next_entry)
-		next_entry = entry;
-
 	if (entry->type == TRACE_CONT)
 		return 1;
 
-	rel_usecs = ns2usecs(next_entry->field.t - entry->field.t);
-	abs_usecs = ns2usecs(entry->field.t - iter->tr->time_start);
+	next_entry = find_next_entry(iter, NULL, &next_ts);
+	if (!next_entry)
+		next_ts = iter->ts;
+	rel_usecs = ns2usecs(next_ts - iter->ts);
+	abs_usecs = ns2usecs(iter->ts - iter->tr->time_start);
 
 	if (verbose) {
 		comm = trace_find_cmdline(field->pid);
@@ -1542,7 +1282,7 @@ print_lat_fmt(struct trace_iterator *iter, unsigned int trace_idx, int cpu)
 				 comm,
 				 field->pid, cpu, field->flags,
 				 field->preempt_count, trace_idx,
-				 ns2usecs(field->t),
+				 ns2usecs(iter->ts),
 				 abs_usecs/1000,
 				 abs_usecs % 1000, rel_usecs/1000,
 				 rel_usecs % 1000);
@@ -1627,7 +1367,7 @@ static int print_trace_fmt(struct trace_iterator *iter)
 
 	comm = trace_find_cmdline(iter->ent->field.pid);
 
-	t = ns2usecs(field->t);
+	t = ns2usecs(iter->ts);
 	usec_rem = do_div(t, 1000000ULL);
 	secs = (unsigned long)t;
 
@@ -1732,7 +1472,7 @@ static int print_raw_fmt(struct trace_iterator *iter)
 	field = &entry->field;
 
 	ret = trace_seq_printf(s, "%d %d %llu ",
-		field->pid, iter->cpu, field->t);
+		field->pid, iter->cpu, iter->ts);
 	if (!ret)
 		return 0;
 
@@ -1811,7 +1551,7 @@ static int print_hex_fmt(struct trace_iterator *iter)
 
 	SEQ_PUT_HEX_FIELD_RET(s, field->pid);
 	SEQ_PUT_HEX_FIELD_RET(s, iter->cpu);
-	SEQ_PUT_HEX_FIELD_RET(s, field->t);
+	SEQ_PUT_HEX_FIELD_RET(s, iter->ts);
 
 	switch (entry->type) {
 	case TRACE_FN:
@@ -1861,7 +1601,7 @@ static int print_bin_fmt(struct trace_iterator *iter)
 
 	SEQ_PUT_FIELD_RET(s, field->pid);
 	SEQ_PUT_FIELD_RET(s, field->cpu);
-	SEQ_PUT_FIELD_RET(s, field->t);
+	SEQ_PUT_FIELD_RET(s, iter->ts);
 
 	switch (entry->type) {
 	case TRACE_FN:
@@ -1888,15 +1628,10 @@ static int print_bin_fmt(struct trace_iterator *iter)
 
 static int trace_empty(struct trace_iterator *iter)
 {
-	struct trace_array_cpu *data;
 	int cpu;
 
 	for_each_tracing_cpu(cpu) {
-		data = iter->tr->data[cpu];
-
-		if (head_page(data) && data->trace_idx &&
-		    (data->trace_tail != data->trace_head ||
-		     data->trace_tail_idx != data->trace_head_idx))
+		if (!ring_buffer_iter_empty(iter->buffer_iter[cpu]))
 			return 0;
 	}
 	return 1;
@@ -1961,6 +1696,8 @@ static struct trace_iterator *
 __tracing_open(struct inode *inode, struct file *file, int *ret)
 {
 	struct trace_iterator *iter;
+	struct seq_file *m;
+	int cpu;
 
 	if (tracing_disabled) {
 		*ret = -ENODEV;
@@ -1981,28 +1718,43 @@ __tracing_open(struct inode *inode, struct file *file, int *ret)
 	iter->trace = current_trace;
 	iter->pos = -1;
 
+	for_each_tracing_cpu(cpu) {
+		iter->buffer_iter[cpu] =
+			ring_buffer_read_start(iter->tr->buffer, cpu);
+		if (!iter->buffer_iter[cpu])
+			goto fail_buffer;
+	}
+
 	/* TODO stop tracer */
 	*ret = seq_open(file, &tracer_seq_ops);
-	if (!*ret) {
-		struct seq_file *m = file->private_data;
-		m->private = iter;
+	if (*ret)
+		goto fail_buffer;
 
-		/* stop the trace while dumping */
-		if (iter->tr->ctrl) {
-			tracer_enabled = 0;
-			ftrace_function_enabled = 0;
-		}
+	m = file->private_data;
+	m->private = iter;
 
-		if (iter->trace && iter->trace->open)
-			iter->trace->open(iter);
-	} else {
-		kfree(iter);
-		iter = NULL;
+	/* stop the trace while dumping */
+	if (iter->tr->ctrl) {
+		tracer_enabled = 0;
+		ftrace_function_enabled = 0;
 	}
+
+	if (iter->trace && iter->trace->open)
+			iter->trace->open(iter);
+
 	mutex_unlock(&trace_types_lock);
 
  out:
 	return iter;
+
+ fail_buffer:
+	for_each_tracing_cpu(cpu) {
+		if (iter->buffer_iter[cpu])
+			ring_buffer_read_finish(iter->buffer_iter[cpu]);
+	}
+	mutex_unlock(&trace_types_lock);
+
+	return ERR_PTR(-ENOMEM);
 }
 
 int tracing_open_generic(struct inode *inode, struct file *filp)
@@ -2018,8 +1770,14 @@ int tracing_release(struct inode *inode, struct file *file)
 {
 	struct seq_file *m = (struct seq_file *)file->private_data;
 	struct trace_iterator *iter = m->private;
+	int cpu;
 
 	mutex_lock(&trace_types_lock);
+	for_each_tracing_cpu(cpu) {
+		if (iter->buffer_iter[cpu])
+			ring_buffer_read_finish(iter->buffer_iter[cpu]);
+	}
+
 	if (iter->trace && iter->trace->close)
 		iter->trace->close(iter);
 
@@ -2526,6 +2284,7 @@ static atomic_t tracing_reader;
 static int tracing_open_pipe(struct inode *inode, struct file *filp)
 {
 	struct trace_iterator *iter;
+	int cpu;
 
 	if (tracing_disabled)
 		return -ENODEV;
@@ -2546,17 +2305,38 @@ static int tracing_open_pipe(struct inode *inode, struct file *filp)
 	iter->trace = current_trace;
 	filp->private_data = iter;
 
+	for_each_tracing_cpu(cpu) {
+		iter->buffer_iter[cpu] =
+			ring_buffer_read_start(iter->tr->buffer, cpu);
+		if (!iter->buffer_iter[cpu])
+			goto fail_buffer;
+	}
+
 	if (iter->trace->pipe_open)
 		iter->trace->pipe_open(iter);
 	mutex_unlock(&trace_types_lock);
 
 	return 0;
+
+ fail_buffer:
+	for_each_tracing_cpu(cpu) {
+		if (iter->buffer_iter[cpu])
+			ring_buffer_read_finish(iter->buffer_iter[cpu]);
+	}
+	mutex_unlock(&trace_types_lock);
+
+	return -ENOMEM;
 }
 
 static int tracing_release_pipe(struct inode *inode, struct file *file)
 {
 	struct trace_iterator *iter = file->private_data;
+	int cpu;
 
+	for_each_tracing_cpu(cpu) {
+		if (iter->buffer_iter[cpu])
+			ring_buffer_read_finish(iter->buffer_iter[cpu]);
+	}
 	kfree(iter);
 	atomic_dec(&tracing_reader);
 
@@ -2592,13 +2372,10 @@ tracing_read_pipe(struct file *filp, char __user *ubuf,
 		  size_t cnt, loff_t *ppos)
 {
 	struct trace_iterator *iter = filp->private_data;
-	struct trace_array_cpu *data;
-	static cpumask_t mask;
 	unsigned long flags;
 #ifdef CONFIG_FTRACE
 	int ftrace_save;
 #endif
-	int cpu;
 	ssize_t sret;
 
 	/* return any leftover data */
@@ -2687,32 +2464,13 @@ tracing_read_pipe(struct file *filp, char __user *ubuf,
 	 * and then release the locks again.
 	 */
 
-	cpus_clear(mask);
-	local_irq_save(flags);
+	local_irq_disable();
 #ifdef CONFIG_FTRACE
 	ftrace_save = ftrace_enabled;
 	ftrace_enabled = 0;
 #endif
 	smp_wmb();
-	for_each_tracing_cpu(cpu) {
-		data = iter->tr->data[cpu];
-
-		if (!head_page(data) || !data->trace_idx)
-			continue;
-
-		atomic_inc(&data->disabled);
-		cpu_set(cpu, mask);
-	}
-
-	for_each_cpu_mask(cpu, mask) {
-		data = iter->tr->data[cpu];
-		__raw_spin_lock(&data->lock);
-
-		if (data->overrun > iter->last_overrun[cpu])
-			iter->overrun[cpu] +=
-				data->overrun - iter->last_overrun[cpu];
-		iter->last_overrun[cpu] = data->overrun;
-	}
+	ring_buffer_lock(iter->tr->buffer, &flags);
 
 	while (find_next_entry_inc(iter) != NULL) {
 		int ret;
@@ -2731,19 +2489,11 @@ tracing_read_pipe(struct file *filp, char __user *ubuf,
 			break;
 	}
 
-	for_each_cpu_mask(cpu, mask) {
-		data = iter->tr->data[cpu];
-		__raw_spin_unlock(&data->lock);
-	}
-
-	for_each_cpu_mask(cpu, mask) {
-		data = iter->tr->data[cpu];
-		atomic_dec(&data->disabled);
-	}
+	ring_buffer_unlock(iter->tr->buffer, flags);
 #ifdef CONFIG_FTRACE
 	ftrace_enabled = ftrace_save;
 #endif
-	local_irq_restore(flags);
+	local_irq_enable();
 
 	/* Now copy what we have to the user */
 	sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
@@ -2776,7 +2526,7 @@ tracing_entries_write(struct file *filp, const char __user *ubuf,
 {
 	unsigned long val;
 	char buf[64];
-	int i, ret;
+	int ret;
 	struct trace_array *tr = filp->private_data;
 
 	if (cnt >= sizeof(buf))
@@ -2804,52 +2554,31 @@ tracing_entries_write(struct file *filp, const char __user *ubuf,
 		goto out;
 	}
 
-	if (val > global_trace.entries) {
-		long pages_requested;
-		unsigned long freeable_pages;
-
-		/* make sure we have enough memory before mapping */
-		pages_requested =
-			(val + (ENTRIES_PER_PAGE-1)) / ENTRIES_PER_PAGE;
-
-		/* account for each buffer (and max_tr) */
-		pages_requested *= tracing_nr_buffers * 2;
-
-		/* Check for overflow */
-		if (pages_requested < 0) {
-			cnt = -ENOMEM;
-			goto out;
-		}
-
-		freeable_pages = determine_dirtyable_memory();
-
-		/* we only allow to request 1/4 of useable memory */
-		if (pages_requested >
-		    ((freeable_pages + tracing_pages_allocated) / 4)) {
-			cnt = -ENOMEM;
+	if (val != global_trace.entries) {
+		ret = ring_buffer_resize(global_trace.buffer, val);
+		if (ret < 0) {
+			cnt = ret;
 			goto out;
 		}
 
-		while (global_trace.entries < val) {
-			if (trace_alloc_page()) {
-				cnt = -ENOMEM;
-				goto out;
+		ret = ring_buffer_resize(max_tr.buffer, val);
+		if (ret < 0) {
+			int r;
+			cnt = ret;
+			r = ring_buffer_resize(global_trace.buffer,
+					       global_trace.entries);
+			if (r < 0) {
+				/* AARGH! We are left with different
+				 * size max buffer!!!! */
+				WARN_ON(1);
+				tracing_disabled = 1;
 			}
-			/* double check that we don't go over the known pages */
-			if (tracing_pages_allocated > pages_requested)
-				break;
+			goto out;
 		}
 
-	} else {
-		/* include the number of entries in val (inc of page entries) */
-		while (global_trace.entries > val + (ENTRIES_PER_PAGE - 1))
-			trace_free_page();
+		global_trace.entries = val;
 	}
 
-	/* check integrity */
-	for_each_tracing_cpu(i)
-		check_pages(global_trace.data[i]);
-
 	filp->f_pos += cnt;
 
 	/* If check pages failed, return ENOMEM */
@@ -3086,10 +2815,11 @@ int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
 	static DEFINE_SPINLOCK(trace_buf_lock);
 	static char trace_buf[TRACE_BUF_SIZE];
 
+	struct ring_buffer_event *event;
 	struct trace_array *tr = &global_trace;
 	struct trace_array_cpu *data;
 	struct trace_entry *entry;
-	unsigned long flags;
+	unsigned long flags, irq_flags;
 	long disabled;
 	int cpu, len = 0, write, written = 0;
 
@@ -3110,8 +2840,11 @@ int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
 	len = min(len, TRACE_BUF_SIZE-1);
 	trace_buf[len] = 0;
 
-	__raw_spin_lock(&data->lock);
-	entry				= tracing_get_trace_entry(tr, data);
+	event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry),
+					 &irq_flags);
+	if (!event)
+		goto out_unlock;
+	entry	= ring_buffer_event_data(event);
 	tracing_generic_entry_update(entry, flags);
 	entry->type			= TRACE_PRINT;
 	entry->field.print.ip		= ip;
@@ -3121,21 +2854,27 @@ int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
 	memcpy(&entry->field.print.buf, trace_buf, write);
 	entry->field.print.buf[write] = 0;
 	written = write;
+	ring_buffer_unlock_commit(tr->buffer, event, irq_flags);
 
 	if (written != len)
 		entry->field.flags |= TRACE_FLAG_CONT;
 
 	while (written != len) {
-		entry = tracing_get_trace_entry(tr, data);
+		event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry),
+						 &irq_flags);
+		if (!event)
+			goto out_unlock;
+		entry	= ring_buffer_event_data(event);
 
 		entry->type = TRACE_CONT;
 		write = min(len - written, (int)(TRACE_CONT_BUF_SIZE-1));
 		memcpy(&entry->cont.buf, trace_buf+written, write);
 		entry->cont.buf[write] = 0;
 		written += write;
+		ring_buffer_unlock_commit(tr->buffer, event, irq_flags);
 	}
-	__raw_spin_unlock(&data->lock);
 
+ out_unlock:
 	spin_unlock(&trace_buf_lock);
 
  out:
@@ -3227,12 +2966,10 @@ void ftrace_dump(void)
 	static DEFINE_SPINLOCK(ftrace_dump_lock);
 	/* use static because iter can be a bit big for the stack */
 	static struct trace_iterator iter;
-	struct trace_array_cpu *data;
 	static cpumask_t mask;
 	static int dump_ran;
-	unsigned long flags;
+	unsigned long flags, irq_flags;
 	int cnt = 0;
-	int cpu;
 
 	/* only one dump */
 	spin_lock_irqsave(&ftrace_dump_lock, flags);
@@ -3258,25 +2995,7 @@ void ftrace_dump(void)
 
 	cpus_clear(mask);
 
-	for_each_tracing_cpu(cpu) {
-		data = iter.tr->data[cpu];
-
-		if (!head_page(data) || !data->trace_idx)
-			continue;
-
-		atomic_inc(&data->disabled);
-		cpu_set(cpu, mask);
-	}
-
-	for_each_cpu_mask(cpu, mask) {
-		data = iter.tr->data[cpu];
-		__raw_spin_lock(&data->lock);
-
-		if (data->overrun > iter.last_overrun[cpu])
-			iter.overrun[cpu] +=
-				data->overrun - iter.last_overrun[cpu];
-		iter.last_overrun[cpu] = data->overrun;
-	}
+	ring_buffer_lock(iter.tr->buffer, &irq_flags);
 
 	while (!trace_empty(&iter)) {
 
@@ -3305,205 +3024,47 @@ void ftrace_dump(void)
 	else
 		printk(KERN_TRACE "---------------------------------\n");
 
-	for_each_cpu_mask(cpu, mask) {
-		data = iter.tr->data[cpu];
-		__raw_spin_unlock(&data->lock);
-	}
-
-	for_each_cpu_mask(cpu, mask) {
-		data = iter.tr->data[cpu];
-		atomic_dec(&data->disabled);
-	}
-
+	ring_buffer_unlock(iter.tr->buffer, irq_flags);
 
  out:
 	spin_unlock_irqrestore(&ftrace_dump_lock, flags);
 }
 
-static int trace_alloc_page(void)
+__init static int tracer_alloc_buffers(void)
 {
 	struct trace_array_cpu *data;
-	struct page *page, *tmp;
-	LIST_HEAD(pages);
-	void *array;
-	unsigned pages_allocated = 0;
 	int i;
 
-	/* first allocate a page for each CPU */
-	for_each_tracing_cpu(i) {
-		array = (void *)__get_free_page(GFP_KERNEL);
-		if (array == NULL) {
-			printk(KERN_ERR "tracer: failed to allocate page"
-			       "for trace buffer!\n");
-			goto free_pages;
-		}
-
-		pages_allocated++;
-		page = virt_to_page(array);
-		list_add(&page->lru, &pages);
+	/* TODO: make the number of buffers hot pluggable with CPUS */
+	tracing_buffer_mask = cpu_possible_map;
 
-/* Only allocate if we are actually using the max trace */
-#ifdef CONFIG_TRACER_MAX_TRACE
-		array = (void *)__get_free_page(GFP_KERNEL);
-		if (array == NULL) {
-			printk(KERN_ERR "tracer: failed to allocate page"
-			       "for trace buffer!\n");
-			goto free_pages;
-		}
-		pages_allocated++;
-		page = virt_to_page(array);
-		list_add(&page->lru, &pages);
-#endif
+	global_trace.buffer = ring_buffer_alloc(trace_buf_size,
+						   TRACE_BUFFER_FLAGS);
+	if (!global_trace.buffer) {
+		printk(KERN_ERR "tracer: failed to allocate ring buffer!\n");
+		WARN_ON(1);
+		return 0;
 	}
-
-	/* Now that we successfully allocate a page per CPU, add them */
-	for_each_tracing_cpu(i) {
-		data = global_trace.data[i];
-		page = list_entry(pages.next, struct page, lru);
-		list_del_init(&page->lru);
-		list_add_tail(&page->lru, &data->trace_pages);
-		ClearPageLRU(page);
+	global_trace.entries = ring_buffer_size(global_trace.buffer);
 
 #ifdef CONFIG_TRACER_MAX_TRACE
-		data = max_tr.data[i];
-		page = list_entry(pages.next, struct page, lru);
-		list_del_init(&page->lru);
-		list_add_tail(&page->lru, &data->trace_pages);
-		SetPageLRU(page);
-#endif
-	}
-	tracing_pages_allocated += pages_allocated;
-	global_trace.entries += ENTRIES_PER_PAGE;
-
-	return 0;
-
- free_pages:
-	list_for_each_entry_safe(page, tmp, &pages, lru) {
-		list_del_init(&page->lru);
-		__free_page(page);
+	max_tr.buffer = ring_buffer_alloc(trace_buf_size,
+					     TRACE_BUFFER_FLAGS);
+	if (!max_tr.buffer) {
+		printk(KERN_ERR "tracer: failed to allocate max ring buffer!\n");
+		WARN_ON(1);
+		ring_buffer_free(global_trace.buffer);
+		return 0;
 	}
-	return -ENOMEM;
-}
-
-static int trace_free_page(void)
-{
-	struct trace_array_cpu *data;
-	struct page *page;
-	struct list_head *p;
-	int i;
-	int ret = 0;
-
-	/* free one page from each buffer */
-	for_each_tracing_cpu(i) {
-		data = global_trace.data[i];
-		p = data->trace_pages.next;
-		if (p == &data->trace_pages) {
-			/* should never happen */
-			WARN_ON(1);
-			tracing_disabled = 1;
-			ret = -1;
-			break;
-		}
-		page = list_entry(p, struct page, lru);
-		ClearPageLRU(page);
-		list_del(&page->lru);
-		tracing_pages_allocated--;
-		tracing_pages_allocated--;
-		__free_page(page);
-
-		tracing_reset(data);
-
-#ifdef CONFIG_TRACER_MAX_TRACE
-		data = max_tr.data[i];
-		p = data->trace_pages.next;
-		if (p == &data->trace_pages) {
-			/* should never happen */
-			WARN_ON(1);
-			tracing_disabled = 1;
-			ret = -1;
-			break;
-		}
-		page = list_entry(p, struct page, lru);
-		ClearPageLRU(page);
-		list_del(&page->lru);
-		__free_page(page);
-
-		tracing_reset(data);
+	max_tr.entries = ring_buffer_size(max_tr.buffer);
+	WARN_ON(max_tr.entries != global_trace.entries);
 #endif
-	}
-	global_trace.entries -= ENTRIES_PER_PAGE;
-
-	return ret;
-}
-
-__init static int tracer_alloc_buffers(void)
-{
-	struct trace_array_cpu *data;
-	void *array;
-	struct page *page;
-	int pages = 0;
-	int ret = -ENOMEM;
-	int i;
-
-	/* TODO: make the number of buffers hot pluggable with CPUS */
-	tracing_nr_buffers = num_possible_cpus();
-	tracing_buffer_mask = cpu_possible_map;
 
 	/* Allocate the first page for all buffers */
 	for_each_tracing_cpu(i) {
 		data = global_trace.data[i] = &per_cpu(global_trace_cpu, i);
 		max_tr.data[i] = &per_cpu(max_data, i);
-
-		array = (void *)__get_free_page(GFP_KERNEL);
-		if (array == NULL) {
-			printk(KERN_ERR "tracer: failed to allocate page"
-			       "for trace buffer!\n");
-			goto free_buffers;
-		}
-
-		/* set the array to the list */
-		INIT_LIST_HEAD(&data->trace_pages);
-		page = virt_to_page(array);
-		list_add(&page->lru, &data->trace_pages);
-		/* use the LRU flag to differentiate the two buffers */
-		ClearPageLRU(page);
-
-		data->lock = (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED;
-		max_tr.data[i]->lock = (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED;
-
-/* Only allocate if we are actually using the max trace */
-#ifdef CONFIG_TRACER_MAX_TRACE
-		array = (void *)__get_free_page(GFP_KERNEL);
-		if (array == NULL) {
-			printk(KERN_ERR "tracer: failed to allocate page"
-			       "for trace buffer!\n");
-			goto free_buffers;
-		}
-
-		INIT_LIST_HEAD(&max_tr.data[i]->trace_pages);
-		page = virt_to_page(array);
-		list_add(&page->lru, &max_tr.data[i]->trace_pages);
-		SetPageLRU(page);
-#endif
-	}
-
-	/*
-	 * Since we allocate by orders of pages, we may be able to
-	 * round up a bit.
-	 */
-	global_trace.entries = ENTRIES_PER_PAGE;
-	pages++;
-
-	while (global_trace.entries < trace_nr_entries) {
-		if (trace_alloc_page())
-			break;
-		pages++;
 	}
-	max_tr.entries = global_trace.entries;
-
-	pr_info("tracer: %d pages allocated for %ld entries of %ld bytes\n",
-		pages, trace_nr_entries, (long)TRACE_ENTRY_SIZE);
-	pr_info("   actual entries %ld\n", global_trace.entries);
 
 	trace_init_cmdlines();
 
@@ -3519,38 +3080,13 @@ __init static int tracer_alloc_buffers(void)
 	/* All seems OK, enable tracing */
 	global_trace.ctrl = tracer_enabled;
 	tracing_disabled = 0;
+
 	atomic_notifier_chain_register(&panic_notifier_list,
 				       &trace_panic_notifier);
 
 	register_die_notifier(&trace_die_notifier);
 
 	return 0;
-
- free_buffers:
-	for (i-- ; i >= 0; i--) {
-		struct page *page, *tmp;
-		struct trace_array_cpu *data = global_trace.data[i];
-
-		if (data) {
-			list_for_each_entry_safe(page, tmp,
-						 &data->trace_pages, lru) {
-				list_del_init(&page->lru);
-				__free_page(page);
-			}
-		}
-
-#ifdef CONFIG_TRACER_MAX_TRACE
-		data = max_tr.data[i];
-		if (data) {
-			list_for_each_entry_safe(page, tmp,
-						 &data->trace_pages, lru) {
-				list_del_init(&page->lru);
-				__free_page(page);
-			}
-		}
-#endif
-	}
-	return ret;
 }
 early_initcall(tracer_alloc_buffers);
 fs_initcall(tracer_init_debugfs);
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index b28bf8812efc..f6965f775b43 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -5,6 +5,7 @@
 #include <asm/atomic.h>
 #include <linux/sched.h>
 #include <linux/clocksource.h>
+#include <linux/ring_buffer.h>
 #include <linux/mmiotrace.h>
 #include <linux/ftrace.h>
 
@@ -102,7 +103,6 @@ struct trace_field {
 	char			flags;
 	char			preempt_count;
 	int			pid;
-	cycle_t			t;
 	union {
 		struct ftrace_entry		fn;
 		struct ctx_switch_entry		ctx;
@@ -139,16 +139,9 @@ struct trace_entry {
  * the trace, etc.)
  */
 struct trace_array_cpu {
-	struct list_head	trace_pages;
 	atomic_t		disabled;
-	raw_spinlock_t		lock;
-	struct lock_class_key	lock_key;
 
 	/* these fields get copied into max-trace: */
-	unsigned		trace_head_idx;
-	unsigned		trace_tail_idx;
-	void			*trace_head; /* producer */
-	void			*trace_tail; /* consumer */
 	unsigned long		trace_idx;
 	unsigned long		overrun;
 	unsigned long		saved_latency;
@@ -172,6 +165,7 @@ struct trace_iterator;
  * They have on/off state as well:
  */
 struct trace_array {
+	struct ring_buffer	*buffer;
 	unsigned long		entries;
 	long			ctrl;
 	int			cpu;
@@ -219,27 +213,21 @@ struct trace_iterator {
 	struct trace_array	*tr;
 	struct tracer		*trace;
 	void			*private;
-	long			last_overrun[NR_CPUS];
-	long			overrun[NR_CPUS];
+	struct ring_buffer_iter	*buffer_iter[NR_CPUS];
 
 	/* The below is zeroed out in pipe_read */
 	struct trace_seq	seq;
 	struct trace_entry	*ent;
 	int			cpu;
-
-	struct trace_entry	*prev_ent;
-	int			prev_cpu;
+	u64			ts;
 
 	unsigned long		iter_flags;
 	loff_t			pos;
-	unsigned long		next_idx[NR_CPUS];
-	struct list_head	*next_page[NR_CPUS];
-	unsigned		next_page_idx[NR_CPUS];
 	long			idx;
 };
 
 void trace_wake_up(void);
-void tracing_reset(struct trace_array_cpu *data);
+void tracing_reset(struct trace_array *tr, int cpu);
 int tracing_open_generic(struct inode *inode, struct file *filp);
 struct dentry *tracing_init_dentry(void);
 void init_tracer_sysprof_debugfs(struct dentry *d_tracer);
diff --git a/kernel/trace/trace_boot.c b/kernel/trace/trace_boot.c
index d5c9e2e4a9c4..3657eec6b87d 100644
--- a/kernel/trace/trace_boot.c
+++ b/kernel/trace/trace_boot.c
@@ -34,7 +34,7 @@ static void boot_trace_init(struct trace_array *tr)
 	trace_boot_enabled = 0;
 
 	for_each_cpu_mask(cpu, cpu_possible_map)
-		tracing_reset(tr->data[cpu]);
+		tracing_reset(tr, cpu);
 }
 
 static void boot_trace_ctrl_update(struct trace_array *tr)
@@ -74,6 +74,7 @@ struct tracer boot_tracer __read_mostly =
 
 void trace_boot(struct boot_trace *it)
 {
+	struct ring_buffer_event *event;
 	struct trace_entry *entry;
 	struct trace_array_cpu *data;
 	unsigned long irq_flags;
@@ -85,17 +86,18 @@ void trace_boot(struct boot_trace *it)
 	preempt_disable();
 	data = tr->data[smp_processor_id()];
 
-	raw_local_irq_save(irq_flags);
-	__raw_spin_lock(&data->lock);
-
-	entry = tracing_get_trace_entry(tr, data);
+	event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry),
+					 &irq_flags);
+	if (!event)
+		goto out;
+	entry	= ring_buffer_event_data(event);
 	tracing_generic_entry_update(entry, 0);
 	entry->type = TRACE_BOOT;
 	entry->field.initcall = *it;
+	ring_buffer_unlock_commit(tr->buffer, event, irq_flags);
 
-	__raw_spin_unlock(&data->lock);
-	raw_local_irq_restore(irq_flags);
 	trace_wake_up();
 
+ out:
 	preempt_enable();
 }
diff --git a/kernel/trace/trace_functions.c b/kernel/trace/trace_functions.c
index 312144897970..e90eb0c2c56c 100644
--- a/kernel/trace/trace_functions.c
+++ b/kernel/trace/trace_functions.c
@@ -23,7 +23,7 @@ static void function_reset(struct trace_array *tr)
 	tr->time_start = ftrace_now(tr->cpu);
 
 	for_each_online_cpu(cpu)
-		tracing_reset(tr->data[cpu]);
+		tracing_reset(tr, cpu);
 }
 
 static void start_function_trace(struct trace_array *tr)
diff --git a/kernel/trace/trace_irqsoff.c b/kernel/trace/trace_irqsoff.c
index ece6cfb649fa..37ad49407f27 100644
--- a/kernel/trace/trace_irqsoff.c
+++ b/kernel/trace/trace_irqsoff.c
@@ -173,7 +173,7 @@ out_unlock:
 out:
 	data->critical_sequence = max_sequence;
 	data->preempt_timestamp = ftrace_now(cpu);
-	tracing_reset(data);
+	tracing_reset(tr, cpu);
 	trace_function(tr, data, CALLER_ADDR0, parent_ip, flags);
 }
 
@@ -203,7 +203,7 @@ start_critical_timing(unsigned long ip, unsigned long parent_ip)
 	data->critical_sequence = max_sequence;
 	data->preempt_timestamp = ftrace_now(cpu);
 	data->critical_start = parent_ip ? : ip;
-	tracing_reset(data);
+	tracing_reset(tr, cpu);
 
 	local_save_flags(flags);
 
@@ -234,7 +234,7 @@ stop_critical_timing(unsigned long ip, unsigned long parent_ip)
 
 	data = tr->data[cpu];
 
-	if (unlikely(!data) || unlikely(!head_page(data)) ||
+	if (unlikely(!data) ||
 	    !data->critical_start || atomic_read(&data->disabled))
 		return;
 
diff --git a/kernel/trace/trace_mmiotrace.c b/kernel/trace/trace_mmiotrace.c
index a108c326f36e..bdbf09d8413c 100644
--- a/kernel/trace/trace_mmiotrace.c
+++ b/kernel/trace/trace_mmiotrace.c
@@ -27,7 +27,7 @@ static void mmio_reset_data(struct trace_array *tr)
 	tr->time_start = ftrace_now(tr->cpu);
 
 	for_each_online_cpu(cpu)
-		tracing_reset(tr->data[cpu]);
+		tracing_reset(tr, cpu);
 }
 
 static void mmio_trace_init(struct trace_array *tr)
@@ -130,10 +130,14 @@ static unsigned long count_overruns(struct trace_iterator *iter)
 {
 	int cpu;
 	unsigned long cnt = 0;
+/* FIXME: */
+#if 0
 	for_each_online_cpu(cpu) {
 		cnt += iter->overrun[cpu];
 		iter->overrun[cpu] = 0;
 	}
+#endif
+	(void)cpu;
 	return cnt;
 }
 
@@ -176,7 +180,7 @@ static int mmio_print_rw(struct trace_iterator *iter)
 	struct trace_entry *entry = iter->ent;
 	struct mmiotrace_rw *rw	= &entry->field.mmiorw;
 	struct trace_seq *s	= &iter->seq;
-	unsigned long long t	= ns2usecs(entry->field.t);
+	unsigned long long t	= ns2usecs(iter->ts);
 	unsigned long usec_rem	= do_div(t, 1000000ULL);
 	unsigned secs		= (unsigned long)t;
 	int ret = 1;
@@ -218,7 +222,7 @@ static int mmio_print_map(struct trace_iterator *iter)
 	struct trace_entry *entry = iter->ent;
 	struct mmiotrace_map *m	= &entry->field.mmiomap;
 	struct trace_seq *s	= &iter->seq;
-	unsigned long long t	= ns2usecs(entry->field.t);
+	unsigned long long t	= ns2usecs(iter->ts);
 	unsigned long usec_rem	= do_div(t, 1000000ULL);
 	unsigned secs		= (unsigned long)t;
 	int ret = 1;
@@ -250,7 +254,7 @@ static int mmio_print_mark(struct trace_iterator *iter)
 	struct trace_entry *entry = iter->ent;
 	const char *msg		= entry->field.print.buf;
 	struct trace_seq *s	= &iter->seq;
-	unsigned long long t	= ns2usecs(entry->field.t);
+	unsigned long long t	= ns2usecs(iter->ts);
 	unsigned long usec_rem	= do_div(t, 1000000ULL);
 	unsigned secs		= (unsigned long)t;
 	int ret;
@@ -303,19 +307,19 @@ static void __trace_mmiotrace_rw(struct trace_array *tr,
 				struct trace_array_cpu *data,
 				struct mmiotrace_rw *rw)
 {
+	struct ring_buffer_event *event;
 	struct trace_entry *entry;
 	unsigned long irq_flags;
 
-	raw_local_irq_save(irq_flags);
-	__raw_spin_lock(&data->lock);
-
-	entry				= tracing_get_trace_entry(tr, data);
+	event	= ring_buffer_lock_reserve(tr->buffer, sizeof(*entry),
+					   &irq_flags);
+	if (!event)
+		return;
+	entry	= ring_buffer_event_data(event);
 	tracing_generic_entry_update(entry, 0);
 	entry->type			= TRACE_MMIO_RW;
 	entry->field.mmiorw		= *rw;
-
-	__raw_spin_unlock(&data->lock);
-	raw_local_irq_restore(irq_flags);
+	ring_buffer_unlock_commit(tr->buffer, event, irq_flags);
 
 	trace_wake_up();
 }
@@ -331,19 +335,19 @@ static void __trace_mmiotrace_map(struct trace_array *tr,
 				struct trace_array_cpu *data,
 				struct mmiotrace_map *map)
 {
+	struct ring_buffer_event *event;
 	struct trace_entry *entry;
 	unsigned long irq_flags;
 
-	raw_local_irq_save(irq_flags);
-	__raw_spin_lock(&data->lock);
-
-	entry				= tracing_get_trace_entry(tr, data);
+	event	= ring_buffer_lock_reserve(tr->buffer, sizeof(*entry),
+					   &irq_flags);
+	if (!event)
+		return;
+	entry	= ring_buffer_event_data(event);
 	tracing_generic_entry_update(entry, 0);
 	entry->type			= TRACE_MMIO_MAP;
 	entry->field.mmiomap		= *map;
-
-	__raw_spin_unlock(&data->lock);
-	raw_local_irq_restore(irq_flags);
+	ring_buffer_unlock_commit(tr->buffer, event, irq_flags);
 
 	trace_wake_up();
 }
diff --git a/kernel/trace/trace_nop.c b/kernel/trace/trace_nop.c
index 16c9ba060bab..4592b4862515 100644
--- a/kernel/trace/trace_nop.c
+++ b/kernel/trace/trace_nop.c
@@ -30,7 +30,7 @@ static void nop_trace_init(struct trace_array *tr)
 	ctx_trace = tr;
 
 	for_each_online_cpu(cpu)
-		tracing_reset(tr->data[cpu]);
+		tracing_reset(tr, cpu);
 
 	if (tr->ctrl)
 		start_nop_trace(tr);
diff --git a/kernel/trace/trace_sched_switch.c b/kernel/trace/trace_sched_switch.c
index 789e927abc9c..e0b06db0f7af 100644
--- a/kernel/trace/trace_sched_switch.c
+++ b/kernel/trace/trace_sched_switch.c
@@ -81,7 +81,7 @@ static void sched_switch_reset(struct trace_array *tr)
 	tr->time_start = ftrace_now(tr->cpu);
 
 	for_each_online_cpu(cpu)
-		tracing_reset(tr->data[cpu]);
+		tracing_reset(tr, cpu);
 }
 
 static int tracing_sched_register(void)
diff --git a/kernel/trace/trace_sched_wakeup.c b/kernel/trace/trace_sched_wakeup.c
index 08206b4e29c4..01e75e0639b7 100644
--- a/kernel/trace/trace_sched_wakeup.c
+++ b/kernel/trace/trace_sched_wakeup.c
@@ -191,7 +191,7 @@ static void __wakeup_reset(struct trace_array *tr)
 
 	for_each_possible_cpu(cpu) {
 		data = tr->data[cpu];
-		tracing_reset(data);
+		tracing_reset(tr, cpu);
 	}
 
 	wakeup_cpu = -1;
diff --git a/kernel/trace/trace_selftest.c b/kernel/trace/trace_selftest.c
index 5ebd4b135498..09cf230d7eca 100644
--- a/kernel/trace/trace_selftest.c
+++ b/kernel/trace/trace_selftest.c
@@ -18,58 +18,20 @@ static inline int trace_valid_entry(struct trace_entry *entry)
 	return 0;
 }
 
-static int
-trace_test_buffer_cpu(struct trace_array *tr, struct trace_array_cpu *data)
+static int trace_test_buffer_cpu(struct trace_array *tr, int cpu)
 {
-	struct trace_entry *entries;
-	struct page *page;
-	int idx = 0;
-	int i;
+	struct ring_buffer_event *event;
+	struct trace_entry *entry;
 
-	BUG_ON(list_empty(&data->trace_pages));
-	page = list_entry(data->trace_pages.next, struct page, lru);
-	entries = page_address(page);
+	while ((event = ring_buffer_consume(tr->buffer, cpu, NULL))) {
+		entry = ring_buffer_event_data(event);
 
-	check_pages(data);
-	if (head_page(data) != entries)
-		goto failed;
-
-	/*
-	 * The starting trace buffer always has valid elements,
-	 * if any element exists.
-	 */
-	entries = head_page(data);
-
-	for (i = 0; i < tr->entries; i++) {
-
-		if (i < data->trace_idx && !trace_valid_entry(&entries[idx])) {
+		if (!trace_valid_entry(entry)) {
 			printk(KERN_CONT ".. invalid entry %d ",
-				entries[idx].type);
+				entry->type);
 			goto failed;
 		}
-
-		idx++;
-		if (idx >= ENTRIES_PER_PAGE) {
-			page = virt_to_page(entries);
-			if (page->lru.next == &data->trace_pages) {
-				if (i != tr->entries - 1) {
-					printk(KERN_CONT ".. entries buffer mismatch");
-					goto failed;
-				}
-			} else {
-				page = list_entry(page->lru.next, struct page, lru);
-				entries = page_address(page);
-			}
-			idx = 0;
-		}
-	}
-
-	page = virt_to_page(entries);
-	if (page->lru.next != &data->trace_pages) {
-		printk(KERN_CONT ".. too many entries");
-		goto failed;
 	}
-
 	return 0;
 
  failed:
@@ -91,13 +53,11 @@ static int trace_test_buffer(struct trace_array *tr, unsigned long *count)
 	/* Don't allow flipping of max traces now */
 	raw_local_irq_save(flags);
 	__raw_spin_lock(&ftrace_max_lock);
-	for_each_possible_cpu(cpu) {
-		if (!head_page(tr->data[cpu]))
-			continue;
 
-		cnt += tr->data[cpu]->trace_idx;
+	cnt = ring_buffer_entries(tr->buffer);
 
-		ret = trace_test_buffer_cpu(tr, tr->data[cpu]);
+	for_each_possible_cpu(cpu) {
+		ret = trace_test_buffer_cpu(tr, cpu);
 		if (ret)
 			break;
 	}
diff --git a/kernel/trace/trace_sysprof.c b/kernel/trace/trace_sysprof.c
index db58fb66a135..9587d3bcba55 100644
--- a/kernel/trace/trace_sysprof.c
+++ b/kernel/trace/trace_sysprof.c
@@ -241,7 +241,7 @@ static void stack_reset(struct trace_array *tr)
 	tr->time_start = ftrace_now(tr->cpu);
 
 	for_each_online_cpu(cpu)
-		tracing_reset(tr->data[cpu]);
+		tracing_reset(tr, cpu);
 }
 
 static void start_stack_trace(struct trace_array *tr)
-- 
cgit v1.2.3-55-g7522


From 777e208d40d0953efc6fb4ab58590da3f7d8f02d Mon Sep 17 00:00:00 2001
From: Steven Rostedt
Date: Mon, 29 Sep 2008 23:02:42 -0400
Subject: ftrace: take advantage of variable length entries

Now that the underlining ring buffer for ftrace now hold variable length
entries, we can take advantage of this by only storing the size of the
actual event into the buffer. This happens to increase the number of
entries in the buffer dramatically.

We can also get rid of the "trace_cont" operation, but I'm keeping that
until we have no more users. Some of the ftrace tracers can now change
their code to adapt to this new feature.

Signed-off-by: Steven Rostedt <srostedt@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 kernel/trace/trace.c           | 439 ++++++++++++++++++++++-------------------
 kernel/trace/trace.h           |  81 ++++----
 kernel/trace/trace_boot.c      |  13 +-
 kernel/trace/trace_mmiotrace.c |  31 +--
 4 files changed, 301 insertions(+), 263 deletions(-)

diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index ef80793858b8..ed9e47c18810 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -637,9 +637,9 @@ tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags)
 
 	pc = preempt_count();
 
-	entry->field.preempt_count	= pc & 0xff;
-	entry->field.pid		= (tsk) ? tsk->pid : 0;
-	entry->field.flags =
+	entry->preempt_count		= pc & 0xff;
+	entry->pid			= (tsk) ? tsk->pid : 0;
+	entry->flags =
 		(irqs_disabled_flags(flags) ? TRACE_FLAG_IRQS_OFF : 0) |
 		((pc & HARDIRQ_MASK) ? TRACE_FLAG_HARDIRQ : 0) |
 		((pc & SOFTIRQ_MASK) ? TRACE_FLAG_SOFTIRQ : 0) |
@@ -651,7 +651,7 @@ trace_function(struct trace_array *tr, struct trace_array_cpu *data,
 	       unsigned long ip, unsigned long parent_ip, unsigned long flags)
 {
 	struct ring_buffer_event *event;
-	struct trace_entry *entry;
+	struct ftrace_entry *entry;
 	unsigned long irq_flags;
 
 	event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry),
@@ -659,10 +659,10 @@ trace_function(struct trace_array *tr, struct trace_array_cpu *data,
 	if (!event)
 		return;
 	entry	= ring_buffer_event_data(event);
-	tracing_generic_entry_update(entry, flags);
-	entry->type			= TRACE_FN;
-	entry->field.fn.ip		= ip;
-	entry->field.fn.parent_ip	= parent_ip;
+	tracing_generic_entry_update(&entry->ent, flags);
+	entry->ent.type			= TRACE_FN;
+	entry->ip			= ip;
+	entry->parent_ip		= parent_ip;
 	ring_buffer_unlock_commit(tr->buffer, event, irq_flags);
 }
 
@@ -680,7 +680,7 @@ void __trace_stack(struct trace_array *tr,
 		   int skip)
 {
 	struct ring_buffer_event *event;
-	struct trace_entry *entry;
+	struct stack_entry *entry;
 	struct stack_trace trace;
 	unsigned long irq_flags;
 
@@ -692,15 +692,15 @@ void __trace_stack(struct trace_array *tr,
 	if (!event)
 		return;
 	entry	= ring_buffer_event_data(event);
-	tracing_generic_entry_update(entry, flags);
-	entry->type		= TRACE_STACK;
+	tracing_generic_entry_update(&entry->ent, flags);
+	entry->ent.type		= TRACE_STACK;
 
-	memset(&entry->field.stack, 0, sizeof(entry->field.stack));
+	memset(&entry->caller, 0, sizeof(entry->caller));
 
 	trace.nr_entries	= 0;
 	trace.max_entries	= FTRACE_STACK_ENTRIES;
 	trace.skip		= skip;
-	trace.entries		= entry->field.stack.caller;
+	trace.entries		= entry->caller;
 
 	save_stack_trace(&trace);
 	ring_buffer_unlock_commit(tr->buffer, event, irq_flags);
@@ -713,7 +713,7 @@ __trace_special(void *__tr, void *__data,
 	struct ring_buffer_event *event;
 	struct trace_array_cpu *data = __data;
 	struct trace_array *tr = __tr;
-	struct trace_entry *entry;
+	struct special_entry *entry;
 	unsigned long irq_flags;
 
 	event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry),
@@ -721,11 +721,11 @@ __trace_special(void *__tr, void *__data,
 	if (!event)
 		return;
 	entry	= ring_buffer_event_data(event);
-	tracing_generic_entry_update(entry, 0);
-	entry->type			= TRACE_SPECIAL;
-	entry->field.special.arg1	= arg1;
-	entry->field.special.arg2	= arg2;
-	entry->field.special.arg3	= arg3;
+	tracing_generic_entry_update(&entry->ent, 0);
+	entry->ent.type			= TRACE_SPECIAL;
+	entry->arg1			= arg1;
+	entry->arg2			= arg2;
+	entry->arg3			= arg3;
 	ring_buffer_unlock_commit(tr->buffer, event, irq_flags);
 	__trace_stack(tr, data, irq_flags, 4);
 
@@ -740,7 +740,7 @@ tracing_sched_switch_trace(struct trace_array *tr,
 			   unsigned long flags)
 {
 	struct ring_buffer_event *event;
-	struct trace_entry *entry;
+	struct ctx_switch_entry *entry;
 	unsigned long irq_flags;
 
 	event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry),
@@ -748,15 +748,15 @@ tracing_sched_switch_trace(struct trace_array *tr,
 	if (!event)
 		return;
 	entry	= ring_buffer_event_data(event);
-	tracing_generic_entry_update(entry, flags);
-	entry->type			= TRACE_CTX;
-	entry->field.ctx.prev_pid	= prev->pid;
-	entry->field.ctx.prev_prio	= prev->prio;
-	entry->field.ctx.prev_state	= prev->state;
-	entry->field.ctx.next_pid	= next->pid;
-	entry->field.ctx.next_prio	= next->prio;
-	entry->field.ctx.next_state	= next->state;
-	entry->field.ctx.next_cpu	= task_cpu(next);
+	tracing_generic_entry_update(&entry->ent, flags);
+	entry->ent.type			= TRACE_CTX;
+	entry->prev_pid			= prev->pid;
+	entry->prev_prio		= prev->prio;
+	entry->prev_state		= prev->state;
+	entry->next_pid			= next->pid;
+	entry->next_prio		= next->prio;
+	entry->next_state		= next->state;
+	entry->next_cpu	= task_cpu(next);
 	ring_buffer_unlock_commit(tr->buffer, event, irq_flags);
 	__trace_stack(tr, data, flags, 5);
 }
@@ -769,7 +769,7 @@ tracing_sched_wakeup_trace(struct trace_array *tr,
 			   unsigned long flags)
 {
 	struct ring_buffer_event *event;
-	struct trace_entry *entry;
+	struct ctx_switch_entry *entry;
 	unsigned long irq_flags;
 
 	event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry),
@@ -777,15 +777,15 @@ tracing_sched_wakeup_trace(struct trace_array *tr,
 	if (!event)
 		return;
 	entry	= ring_buffer_event_data(event);
-	tracing_generic_entry_update(entry, flags);
-	entry->type		= TRACE_WAKE;
-	entry->field.ctx.prev_pid	= curr->pid;
-	entry->field.ctx.prev_prio	= curr->prio;
-	entry->field.ctx.prev_state	= curr->state;
-	entry->field.ctx.next_pid	= wakee->pid;
-	entry->field.ctx.next_prio	= wakee->prio;
-	entry->field.ctx.next_state	= wakee->state;
-	entry->field.ctx.next_cpu	= task_cpu(wakee);
+	tracing_generic_entry_update(&entry->ent, flags);
+	entry->ent.type			= TRACE_WAKE;
+	entry->prev_pid			= curr->pid;
+	entry->prev_prio		= curr->prio;
+	entry->prev_state		= curr->state;
+	entry->next_pid			= wakee->pid;
+	entry->next_prio		= wakee->prio;
+	entry->next_state		= wakee->state;
+	entry->next_cpu			= task_cpu(wakee);
 	ring_buffer_unlock_commit(tr->buffer, event, irq_flags);
 	__trace_stack(tr, data, flags, 6);
 
@@ -1173,20 +1173,19 @@ print_trace_header(struct seq_file *m, struct trace_iterator *iter)
 static void
 lat_print_generic(struct trace_seq *s, struct trace_entry *entry, int cpu)
 {
-	struct trace_field *field = &entry->field;
 	int hardirq, softirq;
 	char *comm;
 
-	comm = trace_find_cmdline(field->pid);
+	comm = trace_find_cmdline(entry->pid);
 
-	trace_seq_printf(s, "%8.8s-%-5d ", comm, field->pid);
+	trace_seq_printf(s, "%8.8s-%-5d ", comm, entry->pid);
 	trace_seq_printf(s, "%3d", cpu);
 	trace_seq_printf(s, "%c%c",
-			(field->flags & TRACE_FLAG_IRQS_OFF) ? 'd' : '.',
-			((field->flags & TRACE_FLAG_NEED_RESCHED) ? 'N' : '.'));
+			(entry->flags & TRACE_FLAG_IRQS_OFF) ? 'd' : '.',
+			((entry->flags & TRACE_FLAG_NEED_RESCHED) ? 'N' : '.'));
 
-	hardirq = field->flags & TRACE_FLAG_HARDIRQ;
-	softirq = field->flags & TRACE_FLAG_SOFTIRQ;
+	hardirq = entry->flags & TRACE_FLAG_HARDIRQ;
+	softirq = entry->flags & TRACE_FLAG_SOFTIRQ;
 	if (hardirq && softirq) {
 		trace_seq_putc(s, 'H');
 	} else {
@@ -1200,8 +1199,8 @@ lat_print_generic(struct trace_seq *s, struct trace_entry *entry, int cpu)
 		}
 	}
 
-	if (field->preempt_count)
-		trace_seq_printf(s, "%x", field->preempt_count);
+	if (entry->preempt_count)
+		trace_seq_printf(s, "%x", entry->preempt_count);
 	else
 		trace_seq_puts(s, ".");
 }
@@ -1230,6 +1229,7 @@ static const char state_to_char[] = TASK_STATE_TO_CHAR_STR;
 void trace_seq_print_cont(struct trace_seq *s, struct trace_iterator *iter)
 {
 	struct trace_entry *ent;
+	struct trace_field_cont *cont;
 	bool ok = true;
 
 	ent = peek_next_entry(iter, iter->cpu, NULL);
@@ -1239,8 +1239,9 @@ void trace_seq_print_cont(struct trace_seq *s, struct trace_iterator *iter)
 	}
 
 	do {
+		cont = (struct trace_field_cont *)ent;
 		if (ok)
-			ok = (trace_seq_printf(s, "%s", ent->cont.buf) > 0);
+			ok = (trace_seq_printf(s, "%s", cont->buf) > 0);
 		ring_buffer_read(iter->buffer_iter[iter->cpu], NULL);
 		ent = peek_next_entry(iter, iter->cpu, NULL);
 	} while (ent && ent->type == TRACE_CONT);
@@ -1257,7 +1258,6 @@ print_lat_fmt(struct trace_iterator *iter, unsigned int trace_idx, int cpu)
 	struct trace_entry *next_entry;
 	unsigned long verbose = (trace_flags & TRACE_ITER_VERBOSE);
 	struct trace_entry *entry = iter->ent;
-	struct trace_field *field = &entry->field;
 	unsigned long abs_usecs;
 	unsigned long rel_usecs;
 	u64 next_ts;
@@ -1276,12 +1276,12 @@ print_lat_fmt(struct trace_iterator *iter, unsigned int trace_idx, int cpu)
 	abs_usecs = ns2usecs(iter->ts - iter->tr->time_start);
 
 	if (verbose) {
-		comm = trace_find_cmdline(field->pid);
+		comm = trace_find_cmdline(entry->pid);
 		trace_seq_printf(s, "%16s %5d %3d %d %08x %08x [%08lx]"
 				 " %ld.%03ldms (+%ld.%03ldms): ",
 				 comm,
-				 field->pid, cpu, field->flags,
-				 field->preempt_count, trace_idx,
+				 entry->pid, cpu, entry->flags,
+				 entry->preempt_count, trace_idx,
 				 ns2usecs(iter->ts),
 				 abs_usecs/1000,
 				 abs_usecs % 1000, rel_usecs/1000,
@@ -1291,53 +1291,69 @@ print_lat_fmt(struct trace_iterator *iter, unsigned int trace_idx, int cpu)
 		lat_print_timestamp(s, abs_usecs, rel_usecs);
 	}
 	switch (entry->type) {
-	case TRACE_FN:
-		seq_print_ip_sym(s, field->fn.ip, sym_flags);
+	case TRACE_FN: {
+		struct ftrace_entry *field = (struct ftrace_entry *)entry;
+
+		seq_print_ip_sym(s, field->ip, sym_flags);
 		trace_seq_puts(s, " (");
-		if (kretprobed(field->fn.parent_ip))
+		if (kretprobed(field->parent_ip))
 			trace_seq_puts(s, KRETPROBE_MSG);
 		else
-			seq_print_ip_sym(s, field->fn.parent_ip, sym_flags);
+			seq_print_ip_sym(s, field->parent_ip, sym_flags);
 		trace_seq_puts(s, ")\n");
 		break;
+	}
 	case TRACE_CTX:
-	case TRACE_WAKE:
-		T = field->ctx.next_state < sizeof(state_to_char) ?
-			state_to_char[field->ctx.next_state] : 'X';
+	case TRACE_WAKE: {
+		struct ctx_switch_entry *field =
+			(struct ctx_switch_entry *)entry;
+
+		T = field->next_state < sizeof(state_to_char) ?
+			state_to_char[field->next_state] : 'X';
 
-		state = field->ctx.prev_state ?
-			__ffs(field->ctx.prev_state) + 1 : 0;
+		state = field->prev_state ?
+			__ffs(field->prev_state) + 1 : 0;
 		S = state < sizeof(state_to_char) - 1 ? state_to_char[state] : 'X';
-		comm = trace_find_cmdline(field->ctx.next_pid);
+		comm = trace_find_cmdline(field->next_pid);
 		trace_seq_printf(s, " %5d:%3d:%c %s [%03d] %5d:%3d:%c %s\n",
-				 field->ctx.prev_pid,
-				 field->ctx.prev_prio,
+				 field->prev_pid,
+				 field->prev_prio,
 				 S, entry->type == TRACE_CTX ? "==>" : "  +",
-				 field->ctx.next_cpu,
-				 field->ctx.next_pid,
-				 field->ctx.next_prio,
+				 field->next_cpu,
+				 field->next_pid,
+				 field->next_prio,
 				 T, comm);
 		break;
-	case TRACE_SPECIAL:
+	}
+	case TRACE_SPECIAL: {
+		struct special_entry *field = (struct special_entry *)entry;
+
 		trace_seq_printf(s, "# %ld %ld %ld\n",
-				 field->special.arg1,
-				 field->special.arg2,
-				 field->special.arg3);
+				 field->arg1,
+				 field->arg2,
+				 field->arg3);
 		break;
-	case TRACE_STACK:
+	}
+	case TRACE_STACK: {
+		struct stack_entry *field = (struct stack_entry *)entry;
+
 		for (i = 0; i < FTRACE_STACK_ENTRIES; i++) {
 			if (i)
 				trace_seq_puts(s, " <= ");
-			seq_print_ip_sym(s, field->stack.caller[i], sym_flags);
+			seq_print_ip_sym(s, field->caller[i], sym_flags);
 		}
 		trace_seq_puts(s, "\n");
 		break;
-	case TRACE_PRINT:
-		seq_print_ip_sym(s, field->print.ip, sym_flags);
-		trace_seq_printf(s, ": %s", field->print.buf);
-		if (field->flags & TRACE_FLAG_CONT)
+	}
+	case TRACE_PRINT: {
+		struct print_entry *field = (struct print_entry *)entry;
+
+		seq_print_ip_sym(s, field->ip, sym_flags);
+		trace_seq_printf(s, ": %s", field->buf);
+		if (entry->flags & TRACE_FLAG_CONT)
 			trace_seq_print_cont(s, iter);
 		break;
+	}
 	default:
 		trace_seq_printf(s, "Unknown type %d\n", entry->type);
 	}
@@ -1349,7 +1365,6 @@ static int print_trace_fmt(struct trace_iterator *iter)
 	struct trace_seq *s = &iter->seq;
 	unsigned long sym_flags = (trace_flags & TRACE_ITER_SYM_MASK);
 	struct trace_entry *entry;
-	struct trace_field *field;
 	unsigned long usec_rem;
 	unsigned long long t;
 	unsigned long secs;
@@ -1363,15 +1378,13 @@ static int print_trace_fmt(struct trace_iterator *iter)
 	if (entry->type == TRACE_CONT)
 		return 1;
 
-	field = &entry->field;
-
-	comm = trace_find_cmdline(iter->ent->field.pid);
+	comm = trace_find_cmdline(iter->ent->pid);
 
 	t = ns2usecs(iter->ts);
 	usec_rem = do_div(t, 1000000ULL);
 	secs = (unsigned long)t;
 
-	ret = trace_seq_printf(s, "%16s-%-5d ", comm, field->pid);
+	ret = trace_seq_printf(s, "%16s-%-5d ", comm, entry->pid);
 	if (!ret)
 		return 0;
 	ret = trace_seq_printf(s, "[%03d] ", iter->cpu);
@@ -1382,20 +1395,22 @@ static int print_trace_fmt(struct trace_iterator *iter)
 		return 0;
 
 	switch (entry->type) {
-	case TRACE_FN:
-		ret = seq_print_ip_sym(s, field->fn.ip, sym_flags);
+	case TRACE_FN: {
+		struct ftrace_entry *field = (struct ftrace_entry *)entry;
+
+		ret = seq_print_ip_sym(s, field->ip, sym_flags);
 		if (!ret)
 			return 0;
 		if ((sym_flags & TRACE_ITER_PRINT_PARENT) &&
-						field->fn.parent_ip) {
+						field->parent_ip) {
 			ret = trace_seq_printf(s, " <-");
 			if (!ret)
 				return 0;
-			if (kretprobed(field->fn.parent_ip))
+			if (kretprobed(field->parent_ip))
 				ret = trace_seq_puts(s, KRETPROBE_MSG);
 			else
 				ret = seq_print_ip_sym(s,
-						       field->fn.parent_ip,
+						       field->parent_ip,
 						       sym_flags);
 			if (!ret)
 				return 0;
@@ -1404,40 +1419,50 @@ static int print_trace_fmt(struct trace_iterator *iter)
 		if (!ret)
 			return 0;
 		break;
+	}
 	case TRACE_CTX:
-	case TRACE_WAKE:
-		S = field->ctx.prev_state < sizeof(state_to_char) ?
-			state_to_char[field->ctx.prev_state] : 'X';
-		T = field->ctx.next_state < sizeof(state_to_char) ?
-			state_to_char[field->ctx.next_state] : 'X';
+	case TRACE_WAKE: {
+		struct ctx_switch_entry *field =
+			(struct ctx_switch_entry *)entry;
+
+		S = field->prev_state < sizeof(state_to_char) ?
+			state_to_char[field->prev_state] : 'X';
+		T = field->next_state < sizeof(state_to_char) ?
+			state_to_char[field->next_state] : 'X';
 		ret = trace_seq_printf(s, " %5d:%3d:%c %s [%03d] %5d:%3d:%c\n",
-				       field->ctx.prev_pid,
-				       field->ctx.prev_prio,
+				       field->prev_pid,
+				       field->prev_prio,
 				       S,
 				       entry->type == TRACE_CTX ? "==>" : "  +",
-				       field->ctx.next_cpu,
-				       field->ctx.next_pid,
-				       field->ctx.next_prio,
+				       field->next_cpu,
+				       field->next_pid,
+				       field->next_prio,
 				       T);
 		if (!ret)
 			return 0;
 		break;
-	case TRACE_SPECIAL:
+	}
+	case TRACE_SPECIAL: {
+		struct special_entry *field = (struct special_entry *)entry;
+
 		ret = trace_seq_printf(s, "# %ld %ld %ld\n",
-				 field->special.arg1,
-				 field->special.arg2,
-				 field->special.arg3);
+				 field->arg1,
+				 field->arg2,
+				 field->arg3);
 		if (!ret)
 			return 0;
 		break;
-	case TRACE_STACK:
+	}
+	case TRACE_STACK: {
+		struct stack_entry *field = (struct stack_entry *)entry;
+
 		for (i = 0; i < FTRACE_STACK_ENTRIES; i++) {
 			if (i) {
 				ret = trace_seq_puts(s, " <= ");
 				if (!ret)
 					return 0;
 			}
-			ret = seq_print_ip_sym(s, field->stack.caller[i],
+			ret = seq_print_ip_sym(s, field->caller[i],
 					       sym_flags);
 			if (!ret)
 				return 0;
@@ -1446,13 +1471,17 @@ static int print_trace_fmt(struct trace_iterator *iter)
 		if (!ret)
 			return 0;
 		break;
-	case TRACE_PRINT:
-		seq_print_ip_sym(s, field->print.ip, sym_flags);
-		trace_seq_printf(s, ": %s", field->print.buf);
-		if (field->flags & TRACE_FLAG_CONT)
+	}
+	case TRACE_PRINT: {
+		struct print_entry *field = (struct print_entry *)entry;
+
+		seq_print_ip_sym(s, field->ip, sym_flags);
+		trace_seq_printf(s, ": %s", field->buf);
+		if (entry->flags & TRACE_FLAG_CONT)
 			trace_seq_print_cont(s, iter);
 		break;
 	}
+	}
 	return 1;
 }
 
@@ -1460,7 +1489,6 @@ static int print_raw_fmt(struct trace_iterator *iter)
 {
 	struct trace_seq *s = &iter->seq;
 	struct trace_entry *entry;
-	struct trace_field *field;
 	int ret;
 	int S, T;
 
@@ -1469,56 +1497,66 @@ static int print_raw_fmt(struct trace_iterator *iter)
 	if (entry->type == TRACE_CONT)
 		return 1;
 
-	field = &entry->field;
-
 	ret = trace_seq_printf(s, "%d %d %llu ",
-		field->pid, iter->cpu, iter->ts);
+		entry->pid, iter->cpu, iter->ts);
 	if (!ret)
 		return 0;
 
 	switch (entry->type) {
-	case TRACE_FN:
+	case TRACE_FN: {
+		struct ftrace_entry *field = (struct ftrace_entry *)entry;
+
 		ret = trace_seq_printf(s, "%x %x\n",
-					field->fn.ip,
-					field->fn.parent_ip);
+					field->ip,
+					field->parent_ip);
 		if (!ret)
 			return 0;
 		break;
+	}
 	case TRACE_CTX:
-	case TRACE_WAKE:
-		S = field->ctx.prev_state < sizeof(state_to_char) ?
-			state_to_char[field->ctx.prev_state] : 'X';
-		T = field->ctx.next_state < sizeof(state_to_char) ?
-			state_to_char[field->ctx.next_state] : 'X';
+	case TRACE_WAKE: {
+		struct ctx_switch_entry *field =
+			(struct ctx_switch_entry *)entry;
+
+		S = field->prev_state < sizeof(state_to_char) ?
+			state_to_char[field->prev_state] : 'X';
+		T = field->next_state < sizeof(state_to_char) ?
+			state_to_char[field->next_state] : 'X';
 		if (entry->type == TRACE_WAKE)
 			S = '+';
 		ret = trace_seq_printf(s, "%d %d %c %d %d %d %c\n",
-				       field->ctx.prev_pid,
-				       field->ctx.prev_prio,
+				       field->prev_pid,
+				       field->prev_prio,
 				       S,
-				       field->ctx.next_cpu,
-				       field->ctx.next_pid,
-				       field->ctx.next_prio,
+				       field->next_cpu,
+				       field->next_pid,
+				       field->next_prio,
 				       T);
 		if (!ret)
 			return 0;
 		break;
+	}
 	case TRACE_SPECIAL:
-	case TRACE_STACK:
+	case TRACE_STACK: {
+		struct special_entry *field = (struct special_entry *)entry;
+
 		ret = trace_seq_printf(s, "# %ld %ld %ld\n",
-				 field->special.arg1,
-				 field->special.arg2,
-				 field->special.arg3);
+				 field->arg1,
+				 field->arg2,
+				 field->arg3);
 		if (!ret)
 			return 0;
 		break;
-	case TRACE_PRINT:
-		trace_seq_printf(s, "# %lx %s",
-				 field->print.ip, field->print.buf);
-		if (field->flags & TRACE_FLAG_CONT)
+	}
+	case TRACE_PRINT: {
+		struct print_entry *field = (struct print_entry *)entry;
+
+		trace_seq_printf(s, "# %lx %s", field->ip, field->buf);
+		if (entry->flags & TRACE_FLAG_CONT)
 			trace_seq_print_cont(s, iter);
 		break;
 	}
+	}
 	return 1;
 }
 
@@ -1539,7 +1577,6 @@ static int print_hex_fmt(struct trace_iterator *iter)
 	struct trace_seq *s = &iter->seq;
 	unsigned char newline = '\n';
 	struct trace_entry *entry;
-	struct trace_field *field;
 	int S, T;
 
 	entry = iter->ent;
@@ -1547,40 +1584,48 @@ static int print_hex_fmt(struct trace_iterator *iter)
 	if (entry->type == TRACE_CONT)
 		return 1;
 
-	field = &entry->field;
-
-	SEQ_PUT_HEX_FIELD_RET(s, field->pid);
+	SEQ_PUT_HEX_FIELD_RET(s, entry->pid);
 	SEQ_PUT_HEX_FIELD_RET(s, iter->cpu);
 	SEQ_PUT_HEX_FIELD_RET(s, iter->ts);
 
 	switch (entry->type) {
-	case TRACE_FN:
-		SEQ_PUT_HEX_FIELD_RET(s, field->fn.ip);
-		SEQ_PUT_HEX_FIELD_RET(s, field->fn.parent_ip);
+	case TRACE_FN: {
+		struct ftrace_entry *field = (struct ftrace_entry *)entry;
+
+		SEQ_PUT_HEX_FIELD_RET(s, field->ip);
+		SEQ_PUT_HEX_FIELD_RET(s, field->parent_ip);
 		break;
+	}
 	case TRACE_CTX:
-	case TRACE_WAKE:
-		S = field->ctx.prev_state < sizeof(state_to_char) ?
-			state_to_char[field->ctx.prev_state] : 'X';
-		T = field->ctx.next_state < sizeof(state_to_char) ?
-			state_to_char[field->ctx.next_state] : 'X';
+	case TRACE_WAKE: {
+		struct ctx_switch_entry *field =
+			(struct ctx_switch_entry *)entry;
+
+		S = field->prev_state < sizeof(state_to_char) ?
+			state_to_char[field->prev_state] : 'X';
+		T = field->next_state < sizeof(state_to_char) ?
+			state_to_char[field->next_state] : 'X';
 		if (entry->type == TRACE_WAKE)
 			S = '+';
-		SEQ_PUT_HEX_FIELD_RET(s, field->ctx.prev_pid);
-		SEQ_PUT_HEX_FIELD_RET(s, field->ctx.prev_prio);
+		SEQ_PUT_HEX_FIELD_RET(s, field->prev_pid);
+		SEQ_PUT_HEX_FIELD_RET(s, field->prev_prio);
 		SEQ_PUT_HEX_FIELD_RET(s, S);
-		SEQ_PUT_HEX_FIELD_RET(s, field->ctx.next_cpu);
-		SEQ_PUT_HEX_FIELD_RET(s, field->ctx.next_pid);
-		SEQ_PUT_HEX_FIELD_RET(s, field->ctx.next_prio);
+		SEQ_PUT_HEX_FIELD_RET(s, field->next_cpu);
+		SEQ_PUT_HEX_FIELD_RET(s, field->next_pid);
+		SEQ_PUT_HEX_FIELD_RET(s, field->next_prio);
 		SEQ_PUT_HEX_FIELD_RET(s, T);
 		break;
+	}
 	case TRACE_SPECIAL:
-	case TRACE_STACK:
-		SEQ_PUT_HEX_FIELD_RET(s, field->special.arg1);
-		SEQ_PUT_HEX_FIELD_RET(s, field->special.arg2);
-		SEQ_PUT_HEX_FIELD_RET(s, field->special.arg3);
+	case TRACE_STACK: {
+		struct special_entry *field = (struct special_entry *)entry;
+
+		SEQ_PUT_HEX_FIELD_RET(s, field->arg1);
+		SEQ_PUT_HEX_FIELD_RET(s, field->arg2);
+		SEQ_PUT_HEX_FIELD_RET(s, field->arg3);
 		break;
 	}
+	}
 	SEQ_PUT_FIELD_RET(s, newline);
 
 	return 1;
@@ -1590,39 +1635,46 @@ static int print_bin_fmt(struct trace_iterator *iter)
 {
 	struct trace_seq *s = &iter->seq;
 	struct trace_entry *entry;
-	struct trace_field *field;
 
 	entry = iter->ent;
 
 	if (entry->type == TRACE_CONT)
 		return 1;
 
-	field = &entry->field;
-
-	SEQ_PUT_FIELD_RET(s, field->pid);
-	SEQ_PUT_FIELD_RET(s, field->cpu);
+	SEQ_PUT_FIELD_RET(s, entry->pid);
+	SEQ_PUT_FIELD_RET(s, iter->cpu);
 	SEQ_PUT_FIELD_RET(s, iter->ts);
 
 	switch (entry->type) {
-	case TRACE_FN:
-		SEQ_PUT_FIELD_RET(s, field->fn.ip);
-		SEQ_PUT_FIELD_RET(s, field->fn.parent_ip);
+	case TRACE_FN: {
+		struct ftrace_entry *field = (struct ftrace_entry *)entry;
+
+		SEQ_PUT_FIELD_RET(s, field->ip);
+		SEQ_PUT_FIELD_RET(s, field->parent_ip);
 		break;
-	case TRACE_CTX:
-		SEQ_PUT_FIELD_RET(s, field->ctx.prev_pid);
-		SEQ_PUT_FIELD_RET(s, field->ctx.prev_prio);
-		SEQ_PUT_FIELD_RET(s, field->ctx.prev_state);
-		SEQ_PUT_FIELD_RET(s, field->ctx.next_pid);
-		SEQ_PUT_FIELD_RET(s, field->ctx.next_prio);
-		SEQ_PUT_FIELD_RET(s, field->ctx.next_state);
+	}
+	case TRACE_CTX: {
+		struct ctx_switch_entry *field =
+			(struct ctx_switch_entry *)entry;
+
+		SEQ_PUT_FIELD_RET(s, field->prev_pid);
+		SEQ_PUT_FIELD_RET(s, field->prev_prio);
+		SEQ_PUT_FIELD_RET(s, field->prev_state);
+		SEQ_PUT_FIELD_RET(s, field->next_pid);
+		SEQ_PUT_FIELD_RET(s, field->next_prio);
+		SEQ_PUT_FIELD_RET(s, field->next_state);
 		break;
+	}
 	case TRACE_SPECIAL:
-	case TRACE_STACK:
-		SEQ_PUT_FIELD_RET(s, field->special.arg1);
-		SEQ_PUT_FIELD_RET(s, field->special.arg2);
-		SEQ_PUT_FIELD_RET(s, field->special.arg3);
+	case TRACE_STACK: {
+		struct special_entry *field = (struct special_entry *)entry;
+
+		SEQ_PUT_FIELD_RET(s, field->arg1);
+		SEQ_PUT_FIELD_RET(s, field->arg2);
+		SEQ_PUT_FIELD_RET(s, field->arg3);
 		break;
 	}
+	}
 	return 1;
 }
 
@@ -2818,10 +2870,10 @@ int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
 	struct ring_buffer_event *event;
 	struct trace_array *tr = &global_trace;
 	struct trace_array_cpu *data;
-	struct trace_entry *entry;
+	struct print_entry *entry;
 	unsigned long flags, irq_flags;
 	long disabled;
-	int cpu, len = 0, write, written = 0;
+	int cpu, len = 0, size;
 
 	if (!tr->ctrl || tracing_disabled)
 		return 0;
@@ -2840,40 +2892,19 @@ int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
 	len = min(len, TRACE_BUF_SIZE-1);
 	trace_buf[len] = 0;
 
-	event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry),
-					 &irq_flags);
+	size = sizeof(*entry) + len + 1;
+	event = ring_buffer_lock_reserve(tr->buffer, size, &irq_flags);
 	if (!event)
 		goto out_unlock;
-	entry	= ring_buffer_event_data(event);
-	tracing_generic_entry_update(entry, flags);
-	entry->type			= TRACE_PRINT;
-	entry->field.print.ip		= ip;
+	entry = ring_buffer_event_data(event);
+	tracing_generic_entry_update(&entry->ent, flags);
+	entry->ent.type			= TRACE_PRINT;
+	entry->ip			= ip;
 
-	write = min(len, (int)(TRACE_PRINT_BUF_SIZE-1));
-
-	memcpy(&entry->field.print.buf, trace_buf, write);
-	entry->field.print.buf[write] = 0;
-	written = write;
+	memcpy(&entry->buf, trace_buf, len);
+	entry->buf[len] = 0;
 	ring_buffer_unlock_commit(tr->buffer, event, irq_flags);
 
-	if (written != len)
-		entry->field.flags |= TRACE_FLAG_CONT;
-
-	while (written != len) {
-		event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry),
-						 &irq_flags);
-		if (!event)
-			goto out_unlock;
-		entry	= ring_buffer_event_data(event);
-
-		entry->type = TRACE_CONT;
-		write = min(len - written, (int)(TRACE_CONT_BUF_SIZE-1));
-		memcpy(&entry->cont.buf, trace_buf+written, write);
-		entry->cont.buf[write] = 0;
-		written += write;
-		ring_buffer_unlock_commit(tr->buffer, event, irq_flags);
-	}
-
  out_unlock:
 	spin_unlock(&trace_buf_lock);
 
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index f6965f775b43..e541a6b7e312 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -26,10 +26,25 @@ enum trace_type {
 	__TRACE_LAST_TYPE
 };
 
+/*
+ * The trace entry - the most basic unit of tracing. This is what
+ * is printed in the end as a single line in the trace output, such as:
+ *
+ *     bash-15816 [01]   235.197585: idle_cpu <- irq_enter
+ */
+struct trace_entry {
+	unsigned char		type;
+	unsigned char		cpu;
+	unsigned char		flags;
+	unsigned char		preempt_count;
+	int			pid;
+};
+
 /*
  * Function trace entry - function address and parent function addres:
  */
 struct ftrace_entry {
+	struct trace_entry	ent;
 	unsigned long		ip;
 	unsigned long		parent_ip;
 };
@@ -39,6 +54,7 @@ extern struct tracer boot_tracer;
  * Context switch trace entry - which task (and prio) we switched from/to:
  */
 struct ctx_switch_entry {
+	struct trace_entry	ent;
 	unsigned int		prev_pid;
 	unsigned char		prev_prio;
 	unsigned char		prev_state;
@@ -52,6 +68,7 @@ struct ctx_switch_entry {
  * Special (free-form) trace entry:
  */
 struct special_entry {
+	struct trace_entry	ent;
 	unsigned long		arg1;
 	unsigned long		arg2;
 	unsigned long		arg3;
@@ -64,6 +81,7 @@ struct special_entry {
 #define FTRACE_STACK_ENTRIES	8
 
 struct stack_entry {
+	struct trace_entry	ent;
 	unsigned long		caller[FTRACE_STACK_ENTRIES];
 };
 
@@ -71,10 +89,34 @@ struct stack_entry {
  * ftrace_printk entry:
  */
 struct print_entry {
+	struct trace_entry	ent;
 	unsigned long		ip;
 	char			buf[];
 };
 
+#define TRACE_OLD_SIZE		88
+
+struct trace_field_cont {
+	unsigned char		type;
+	/* Temporary till we get rid of this completely */
+	char			buf[TRACE_OLD_SIZE - 1];
+};
+
+struct trace_mmiotrace_rw {
+	struct trace_entry	ent;
+	struct mmiotrace_rw	rw;
+};
+
+struct trace_mmiotrace_map {
+	struct trace_entry	ent;
+	struct mmiotrace_map	map;
+};
+
+struct trace_boot {
+	struct trace_entry	ent;
+	struct boot_trace	initcall;
+};
+
 /*
  * trace_flag_type is an enumeration that holds different
  * states when a trace occurs. These are:
@@ -92,46 +134,7 @@ enum trace_flag_type {
 	TRACE_FLAG_CONT			= 0x10,
 };
 
-/*
- * The trace field - the most basic unit of tracing. This is what
- * is printed in the end as a single line in the trace output, such as:
- *
- *     bash-15816 [01]   235.197585: idle_cpu <- irq_enter
- */
-struct trace_field {
-	char			cpu;
-	char			flags;
-	char			preempt_count;
-	int			pid;
-	union {
-		struct ftrace_entry		fn;
-		struct ctx_switch_entry		ctx;
-		struct special_entry		special;
-		struct stack_entry		stack;
-		struct print_entry		print;
-		struct mmiotrace_rw		mmiorw;
-		struct mmiotrace_map		mmiomap;
-		struct boot_trace		initcall;
-	};
-};
-
-struct trace_field_cont {
-	char				buf[sizeof(struct trace_field)];
-};
-
-struct trace_entry {
-	char 				type;
-	union {
-		struct trace_field	field;
-		struct trace_field_cont	cont;
-	};
-};
-
-#define TRACE_ENTRY_SIZE	sizeof(struct trace_entry)
 #define TRACE_BUF_SIZE		1024
-#define TRACE_PRINT_BUF_SIZE \
-	(sizeof(struct trace_field) - offsetof(struct trace_field, print.buf))
-#define TRACE_CONT_BUF_SIZE	sizeof(struct trace_field)
 
 /*
  * The CPU trace array - it consists of thousands of trace entries
diff --git a/kernel/trace/trace_boot.c b/kernel/trace/trace_boot.c
index 3657eec6b87d..fa8cca1be115 100644
--- a/kernel/trace/trace_boot.c
+++ b/kernel/trace/trace_boot.c
@@ -49,10 +49,11 @@ static int initcall_print_line(struct trace_iterator *iter)
 {
 	int ret = 0;
 	struct trace_entry *entry = iter->ent;
-	struct boot_trace *it = &entry->field.initcall;
+	struct trace_boot *field = (struct trace_boot *)entry;
+	struct boot_trace *it = &field->initcall;
 	struct trace_seq *s = &iter->seq;
 
-	if (iter->ent->type == TRACE_BOOT)
+	if (entry->type == TRACE_BOOT)
 		ret = trace_seq_printf(s, "%pF called from %i "
 				       "returned %d after %lld msecs\n",
 				       it->func, it->caller, it->result,
@@ -75,7 +76,7 @@ struct tracer boot_tracer __read_mostly =
 void trace_boot(struct boot_trace *it)
 {
 	struct ring_buffer_event *event;
-	struct trace_entry *entry;
+	struct trace_boot *entry;
 	struct trace_array_cpu *data;
 	unsigned long irq_flags;
 	struct trace_array *tr = boot_trace;
@@ -91,9 +92,9 @@ void trace_boot(struct boot_trace *it)
 	if (!event)
 		goto out;
 	entry	= ring_buffer_event_data(event);
-	tracing_generic_entry_update(entry, 0);
-	entry->type = TRACE_BOOT;
-	entry->field.initcall = *it;
+	tracing_generic_entry_update(&entry->ent, 0);
+	entry->ent.type = TRACE_BOOT;
+	entry->initcall = *it;
 	ring_buffer_unlock_commit(tr->buffer, event, irq_flags);
 
 	trace_wake_up();
diff --git a/kernel/trace/trace_mmiotrace.c b/kernel/trace/trace_mmiotrace.c
index bdbf09d8413c..3df441ea2749 100644
--- a/kernel/trace/trace_mmiotrace.c
+++ b/kernel/trace/trace_mmiotrace.c
@@ -178,14 +178,16 @@ print_out:
 static int mmio_print_rw(struct trace_iterator *iter)
 {
 	struct trace_entry *entry = iter->ent;
-	struct mmiotrace_rw *rw	= &entry->field.mmiorw;
+	struct trace_mmiotrace_rw *field =
+		(struct trace_mmiotrace_rw *)entry;
+	struct mmiotrace_rw *rw	= &field->rw;
 	struct trace_seq *s	= &iter->seq;
 	unsigned long long t	= ns2usecs(iter->ts);
 	unsigned long usec_rem	= do_div(t, 1000000ULL);
 	unsigned secs		= (unsigned long)t;
 	int ret = 1;
 
-	switch (entry->field.mmiorw.opcode) {
+	switch (rw->opcode) {
 	case MMIO_READ:
 		ret = trace_seq_printf(s,
 			"R %d %lu.%06lu %d 0x%llx 0x%lx 0x%lx %d\n",
@@ -220,14 +222,14 @@ static int mmio_print_rw(struct trace_iterator *iter)
 static int mmio_print_map(struct trace_iterator *iter)
 {
 	struct trace_entry *entry = iter->ent;
-	struct mmiotrace_map *m	= &entry->field.mmiomap;
+	struct mmiotrace_map *m	= (struct mmiotrace_map *)entry;
 	struct trace_seq *s	= &iter->seq;
 	unsigned long long t	= ns2usecs(iter->ts);
 	unsigned long usec_rem	= do_div(t, 1000000ULL);
 	unsigned secs		= (unsigned long)t;
 	int ret = 1;
 
-	switch (entry->field.mmiorw.opcode) {
+	switch (m->opcode) {
 	case MMIO_PROBE:
 		ret = trace_seq_printf(s,
 			"MAP %lu.%06lu %d 0x%llx 0x%lx 0x%lx 0x%lx %d\n",
@@ -252,7 +254,8 @@ static int mmio_print_map(struct trace_iterator *iter)
 static int mmio_print_mark(struct trace_iterator *iter)
 {
 	struct trace_entry *entry = iter->ent;
-	const char *msg		= entry->field.print.buf;
+	struct print_entry *print = (struct print_entry *)entry;
+	const char *msg		= print->buf;
 	struct trace_seq *s	= &iter->seq;
 	unsigned long long t	= ns2usecs(iter->ts);
 	unsigned long usec_rem	= do_div(t, 1000000ULL);
@@ -264,7 +267,7 @@ static int mmio_print_mark(struct trace_iterator *iter)
 	if (!ret)
 		return 0;
 
-	if (entry->field.flags & TRACE_FLAG_CONT)
+	if (entry->flags & TRACE_FLAG_CONT)
 		trace_seq_print_cont(s, iter);
 
 	return 1;
@@ -308,7 +311,7 @@ static void __trace_mmiotrace_rw(struct trace_array *tr,
 				struct mmiotrace_rw *rw)
 {
 	struct ring_buffer_event *event;
-	struct trace_entry *entry;
+	struct trace_mmiotrace_rw *entry;
 	unsigned long irq_flags;
 
 	event	= ring_buffer_lock_reserve(tr->buffer, sizeof(*entry),
@@ -316,9 +319,9 @@ static void __trace_mmiotrace_rw(struct trace_array *tr,
 	if (!event)
 		return;
 	entry	= ring_buffer_event_data(event);
-	tracing_generic_entry_update(entry, 0);
-	entry->type			= TRACE_MMIO_RW;
-	entry->field.mmiorw		= *rw;
+	tracing_generic_entry_update(&entry->ent, 0);
+	entry->ent.type			= TRACE_MMIO_RW;
+	entry->rw			= *rw;
 	ring_buffer_unlock_commit(tr->buffer, event, irq_flags);
 
 	trace_wake_up();
@@ -336,7 +339,7 @@ static void __trace_mmiotrace_map(struct trace_array *tr,
 				struct mmiotrace_map *map)
 {
 	struct ring_buffer_event *event;
-	struct trace_entry *entry;
+	struct trace_mmiotrace_map *entry;
 	unsigned long irq_flags;
 
 	event	= ring_buffer_lock_reserve(tr->buffer, sizeof(*entry),
@@ -344,9 +347,9 @@ static void __trace_mmiotrace_map(struct trace_array *tr,
 	if (!event)
 		return;
 	entry	= ring_buffer_event_data(event);
-	tracing_generic_entry_update(entry, 0);
-	entry->type			= TRACE_MMIO_MAP;
-	entry->field.mmiomap		= *map;
+	tracing_generic_entry_update(&entry->ent, 0);
+	entry->ent.type			= TRACE_MMIO_MAP;
+	entry->map			= *map;
 	ring_buffer_unlock_commit(tr->buffer, event, irq_flags);
 
 	trace_wake_up();
-- 
cgit v1.2.3-55-g7522


From 2c4f035f6c3e8fda661eb6105aa51ef07aa71607 Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker
Date: Mon, 29 Sep 2008 20:18:34 +0200
Subject: tracing/ftrace: change the type of the print_line callback

We need a kind of disambiguation when a print_line callback
returns 0.

_There is not enough space to print all the entry.
 Please flush the seq and retry.
_I can't handle this type of entry

This patch changes the type of this callback for better information.

Also some changes have been made in this V2.

_ Only relay to default functions after the print_line callback fails.
_ This patch doesn't fix the issue with the broken pipe (see patch 2/4 for that)

Some things are still in discussion:

_ Find better names for the enum print_line_t values
_ Change the type of print_trace_line into boolean.

Patches to change that can be sent later.

Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Acked-by: Pekka Paalanen <pq@iki.fi>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 kernel/trace/trace.c | 77 ++++++++++++++++++++++++++++------------------------
 kernel/trace/trace.h | 10 ++++++-
 2 files changed, 50 insertions(+), 37 deletions(-)

diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index ed9e47c18810..b38a4bb40548 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -1250,7 +1250,7 @@ void trace_seq_print_cont(struct trace_seq *s, struct trace_iterator *iter)
 		trace_seq_putc(s, '\n');
 }
 
-static int
+static enum print_line_t
 print_lat_fmt(struct trace_iterator *iter, unsigned int trace_idx, int cpu)
 {
 	struct trace_seq *s = &iter->seq;
@@ -1267,7 +1267,7 @@ print_lat_fmt(struct trace_iterator *iter, unsigned int trace_idx, int cpu)
 	unsigned state;
 
 	if (entry->type == TRACE_CONT)
-		return 1;
+		return TRACE_TYPE_HANDLED;
 
 	next_entry = find_next_entry(iter, NULL, &next_ts);
 	if (!next_entry)
@@ -1357,10 +1357,10 @@ print_lat_fmt(struct trace_iterator *iter, unsigned int trace_idx, int cpu)
 	default:
 		trace_seq_printf(s, "Unknown type %d\n", entry->type);
 	}
-	return 1;
+	return TRACE_TYPE_HANDLED;
 }
 
-static int print_trace_fmt(struct trace_iterator *iter)
+static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
 {
 	struct trace_seq *s = &iter->seq;
 	unsigned long sym_flags = (trace_flags & TRACE_ITER_SYM_MASK);
@@ -1376,7 +1376,7 @@ static int print_trace_fmt(struct trace_iterator *iter)
 	entry = iter->ent;
 
 	if (entry->type == TRACE_CONT)
-		return 1;
+		return TRACE_TYPE_HANDLED;
 
 	comm = trace_find_cmdline(iter->ent->pid);
 
@@ -1386,13 +1386,13 @@ static int print_trace_fmt(struct trace_iterator *iter)
 
 	ret = trace_seq_printf(s, "%16s-%-5d ", comm, entry->pid);
 	if (!ret)
-		return 0;
+		return TRACE_TYPE_PARTIAL_LINE;
 	ret = trace_seq_printf(s, "[%03d] ", iter->cpu);
 	if (!ret)
-		return 0;
+		return TRACE_TYPE_PARTIAL_LINE;
 	ret = trace_seq_printf(s, "%5lu.%06lu: ", secs, usec_rem);
 	if (!ret)
-		return 0;
+		return TRACE_TYPE_PARTIAL_LINE;
 
 	switch (entry->type) {
 	case TRACE_FN: {
@@ -1400,12 +1400,12 @@ static int print_trace_fmt(struct trace_iterator *iter)
 
 		ret = seq_print_ip_sym(s, field->ip, sym_flags);
 		if (!ret)
-			return 0;
+			return TRACE_TYPE_PARTIAL_LINE;
 		if ((sym_flags & TRACE_ITER_PRINT_PARENT) &&
 						field->parent_ip) {
 			ret = trace_seq_printf(s, " <-");
 			if (!ret)
-				return 0;
+				return TRACE_TYPE_PARTIAL_LINE;
 			if (kretprobed(field->parent_ip))
 				ret = trace_seq_puts(s, KRETPROBE_MSG);
 			else
@@ -1413,11 +1413,11 @@ static int print_trace_fmt(struct trace_iterator *iter)
 						       field->parent_ip,
 						       sym_flags);
 			if (!ret)
-				return 0;
+				return TRACE_TYPE_PARTIAL_LINE;
 		}
 		ret = trace_seq_printf(s, "\n");
 		if (!ret)
-			return 0;
+			return TRACE_TYPE_PARTIAL_LINE;
 		break;
 	}
 	case TRACE_CTX:
@@ -1439,7 +1439,7 @@ static int print_trace_fmt(struct trace_iterator *iter)
 				       field->next_prio,
 				       T);
 		if (!ret)
-			return 0;
+			return TRACE_TYPE_PARTIAL_LINE;
 		break;
 	}
 	case TRACE_SPECIAL: {
@@ -1450,7 +1450,7 @@ static int print_trace_fmt(struct trace_iterator *iter)
 				 field->arg2,
 				 field->arg3);
 		if (!ret)
-			return 0;
+			return TRACE_TYPE_PARTIAL_LINE;
 		break;
 	}
 	case TRACE_STACK: {
@@ -1460,16 +1460,16 @@ static int print_trace_fmt(struct trace_iterator *iter)
 			if (i) {
 				ret = trace_seq_puts(s, " <= ");
 				if (!ret)
-					return 0;
+					return TRACE_TYPE_PARTIAL_LINE;
 			}
 			ret = seq_print_ip_sym(s, field->caller[i],
 					       sym_flags);
 			if (!ret)
-				return 0;
+				return TRACE_TYPE_PARTIAL_LINE;
 		}
 		ret = trace_seq_puts(s, "\n");
 		if (!ret)
-			return 0;
+			return TRACE_TYPE_PARTIAL_LINE;
 		break;
 	}
 	case TRACE_PRINT: {
@@ -1482,10 +1482,10 @@ static int print_trace_fmt(struct trace_iterator *iter)
 		break;
 	}
 	}
-	return 1;
+	return TRACE_TYPE_HANDLED;
 }
 
-static int print_raw_fmt(struct trace_iterator *iter)
+static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
 {
 	struct trace_seq *s = &iter->seq;
 	struct trace_entry *entry;
@@ -1495,12 +1495,12 @@ static int print_raw_fmt(struct trace_iterator *iter)
 	entry = iter->ent;
 
 	if (entry->type == TRACE_CONT)
-		return 1;
+		return TRACE_TYPE_HANDLED;
 
 	ret = trace_seq_printf(s, "%d %d %llu ",
 		entry->pid, iter->cpu, iter->ts);
 	if (!ret)
-		return 0;
+		return TRACE_TYPE_PARTIAL_LINE;
 
 	switch (entry->type) {
 	case TRACE_FN: {
@@ -1510,7 +1510,7 @@ static int print_raw_fmt(struct trace_iterator *iter)
 					field->ip,
 					field->parent_ip);
 		if (!ret)
-			return 0;
+			return TRACE_TYPE_PARTIAL_LINE;
 		break;
 	}
 	case TRACE_CTX:
@@ -1533,7 +1533,7 @@ static int print_raw_fmt(struct trace_iterator *iter)
 				       field->next_prio,
 				       T);
 		if (!ret)
-			return 0;
+			return TRACE_TYPE_PARTIAL_LINE;
 		break;
 	}
 	case TRACE_SPECIAL:
@@ -1545,7 +1545,7 @@ static int print_raw_fmt(struct trace_iterator *iter)
 				 field->arg2,
 				 field->arg3);
 		if (!ret)
-			return 0;
+			return TRACE_TYPE_PARTIAL_LINE;
 		break;
 	}
 	case TRACE_PRINT: {
@@ -1557,7 +1557,7 @@ static int print_raw_fmt(struct trace_iterator *iter)
 		break;
 	}
 	}
-	return 1;
+	return TRACE_TYPE_HANDLED;
 }
 
 #define SEQ_PUT_FIELD_RET(s, x)				\
@@ -1572,7 +1572,7 @@ do {							\
 		return 0;				\
 } while (0)
 
-static int print_hex_fmt(struct trace_iterator *iter)
+static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
 {
 	struct trace_seq *s = &iter->seq;
 	unsigned char newline = '\n';
@@ -1582,7 +1582,7 @@ static int print_hex_fmt(struct trace_iterator *iter)
 	entry = iter->ent;
 
 	if (entry->type == TRACE_CONT)
-		return 1;
+		return TRACE_TYPE_HANDLED;
 
 	SEQ_PUT_HEX_FIELD_RET(s, entry->pid);
 	SEQ_PUT_HEX_FIELD_RET(s, iter->cpu);
@@ -1628,10 +1628,10 @@ static int print_hex_fmt(struct trace_iterator *iter)
 	}
 	SEQ_PUT_FIELD_RET(s, newline);
 
-	return 1;
+	return TRACE_TYPE_HANDLED;
 }
 
-static int print_bin_fmt(struct trace_iterator *iter)
+static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
 {
 	struct trace_seq *s = &iter->seq;
 	struct trace_entry *entry;
@@ -1639,7 +1639,7 @@ static int print_bin_fmt(struct trace_iterator *iter)
 	entry = iter->ent;
 
 	if (entry->type == TRACE_CONT)
-		return 1;
+		return TRACE_TYPE_HANDLED;
 
 	SEQ_PUT_FIELD_RET(s, entry->pid);
 	SEQ_PUT_FIELD_RET(s, iter->cpu);
@@ -1686,13 +1686,18 @@ static int trace_empty(struct trace_iterator *iter)
 		if (!ring_buffer_iter_empty(iter->buffer_iter[cpu]))
 			return 0;
 	}
-	return 1;
+	return TRACE_TYPE_HANDLED;
 }
 
-static int print_trace_line(struct trace_iterator *iter)
+static enum print_line_t print_trace_line(struct trace_iterator *iter)
 {
-	if (iter->trace && iter->trace->print_line)
-		return iter->trace->print_line(iter);
+	enum print_line_t ret;
+
+	if (iter->trace && iter->trace->print_line) {
+		ret = iter->trace->print_line(iter);
+		if (ret != TRACE_TYPE_UNHANDLED)
+			return ret;
+	}
 
 	if (trace_flags & TRACE_ITER_BIN)
 		return print_bin_fmt(iter);
@@ -2525,11 +2530,11 @@ tracing_read_pipe(struct file *filp, char __user *ubuf,
 	ring_buffer_lock(iter->tr->buffer, &flags);
 
 	while (find_next_entry_inc(iter) != NULL) {
-		int ret;
+		enum print_line_t ret;
 		int len = iter->seq.len;
 
 		ret = print_trace_line(iter);
-		if (!ret) {
+		if (ret == TRACE_TYPE_PARTIAL_LINE) {
 			/* don't print partial lines */
 			iter->seq.len = len;
 			break;
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index e541a6b7e312..a921ba5d292d 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -177,6 +177,14 @@ struct trace_array {
 	struct trace_array_cpu	*data[NR_CPUS];
 };
 
+
+/* Return values for print_line callback */
+enum print_line_t {
+	TRACE_TYPE_PARTIAL_LINE	= 0,	/* Retry after flushing the seq */
+	TRACE_TYPE_HANDLED	= 1,
+	TRACE_TYPE_UNHANDLED	= 2	/* Relay to other output functions */
+};
+
 /*
  * A specific tracer, represented by methods that operate on a trace array:
  */
@@ -197,7 +205,7 @@ struct tracer {
 	int			(*selftest)(struct tracer *trace,
 					    struct trace_array *tr);
 #endif
-	int			(*print_line)(struct trace_iterator *iter);
+	enum print_line_t	(*print_line)(struct trace_iterator *iter);
 	struct tracer		*next;
 	int			print_max;
 };
-- 
cgit v1.2.3-55-g7522


From 9ff4b9744c187cae58c3774361ea090addbc4130 Mon Sep 17 00:00:00 2001
From: Pekka Paalanen
Date: Mon, 29 Sep 2008 20:23:48 +0200
Subject: tracing/ftrace: fix pipe breaking

This patch fixes a bug which break the pipe when the seq is empty.

Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 kernel/trace/trace.c | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index b38a4bb40548..6a1c76bb56ba 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -2439,7 +2439,6 @@ tracing_read_pipe(struct file *filp, char __user *ubuf,
 	sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
 	if (sret != -EBUSY)
 		return sret;
-	sret = 0;
 
 	trace_seq_reset(&iter->seq);
 
@@ -2450,6 +2449,8 @@ tracing_read_pipe(struct file *filp, char __user *ubuf,
 			goto out;
 	}
 
+waitagain:
+	sret = 0;
 	while (trace_empty(iter)) {
 
 		if ((filp->f_flags & O_NONBLOCK)) {
@@ -2556,8 +2557,13 @@ tracing_read_pipe(struct file *filp, char __user *ubuf,
 	sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
 	if (iter->seq.readpos >= iter->seq.len)
 		trace_seq_reset(&iter->seq);
+
+	/*
+	 * If there was nothing to send to user, inspite of consuming trace
+	 * entries, go back to wait for more entries.
+	 */
 	if (sret == -EBUSY)
-		sret = 0;
+		goto waitagain;
 
 out:
 	mutex_unlock(&trace_types_lock);
-- 
cgit v1.2.3-55-g7522


From 07f4e4f790895d55f46aaf89e7437da4a585989c Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker
Date: Mon, 29 Sep 2008 20:27:42 +0200
Subject: tracing/ftrace: adapt mmiotrace to the new type of print_line

Adapt mmiotrace to the new print_line type.
By default, it ignores (and consumes) types it doesn't support.

Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Acked-by: Pekka Paalanen <pq@iki.fi>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 kernel/trace/trace_mmiotrace.c | 25 ++++++++++++-------------
 1 file changed, 12 insertions(+), 13 deletions(-)

diff --git a/kernel/trace/trace_mmiotrace.c b/kernel/trace/trace_mmiotrace.c
index 3df441ea2749..1a266aa08e1a 100644
--- a/kernel/trace/trace_mmiotrace.c
+++ b/kernel/trace/trace_mmiotrace.c
@@ -175,7 +175,7 @@ print_out:
 	return (ret == -EBUSY) ? 0 : ret;
 }
 
-static int mmio_print_rw(struct trace_iterator *iter)
+static enum print_line_t mmio_print_rw(struct trace_iterator *iter)
 {
 	struct trace_entry *entry = iter->ent;
 	struct trace_mmiotrace_rw *field =
@@ -215,11 +215,11 @@ static int mmio_print_rw(struct trace_iterator *iter)
 		break;
 	}
 	if (ret)
-		return 1;
-	return 0;
+		return TRACE_TYPE_HANDLED;
+	return TRACE_TYPE_PARTIAL_LINE;
 }
 
-static int mmio_print_map(struct trace_iterator *iter)
+static enum print_line_t mmio_print_map(struct trace_iterator *iter)
 {
 	struct trace_entry *entry = iter->ent;
 	struct mmiotrace_map *m	= (struct mmiotrace_map *)entry;
@@ -227,7 +227,7 @@ static int mmio_print_map(struct trace_iterator *iter)
 	unsigned long long t	= ns2usecs(iter->ts);
 	unsigned long usec_rem	= do_div(t, 1000000ULL);
 	unsigned secs		= (unsigned long)t;
-	int ret = 1;
+	int ret;
 
 	switch (m->opcode) {
 	case MMIO_PROBE:
@@ -247,11 +247,11 @@ static int mmio_print_map(struct trace_iterator *iter)
 		break;
 	}
 	if (ret)
-		return 1;
-	return 0;
+		return TRACE_TYPE_HANDLED;
+	return TRACE_TYPE_PARTIAL_LINE;
 }
 
-static int mmio_print_mark(struct trace_iterator *iter)
+static enum print_line_t mmio_print_mark(struct trace_iterator *iter)
 {
 	struct trace_entry *entry = iter->ent;
 	struct print_entry *print = (struct print_entry *)entry;
@@ -265,16 +265,15 @@ static int mmio_print_mark(struct trace_iterator *iter)
 	/* The trailing newline must be in the message. */
 	ret = trace_seq_printf(s, "MARK %lu.%06lu %s", secs, usec_rem, msg);
 	if (!ret)
-		return 0;
+		return TRACE_TYPE_PARTIAL_LINE;
 
 	if (entry->flags & TRACE_FLAG_CONT)
 		trace_seq_print_cont(s, iter);
 
-	return 1;
+	return TRACE_TYPE_HANDLED;
 }
 
-/* return 0 to abort printing without consuming current entry in pipe mode */
-static int mmio_print_line(struct trace_iterator *iter)
+static enum print_line_t mmio_print_line(struct trace_iterator *iter)
 {
 	switch (iter->ent->type) {
 	case TRACE_MMIO_RW:
@@ -284,7 +283,7 @@ static int mmio_print_line(struct trace_iterator *iter)
 	case TRACE_PRINT:
 		return mmio_print_mark(iter);
 	default:
-		return 1; /* ignore unknown entries */
+		return TRACE_TYPE_HANDLED; /* ignore unknown entries */
 	}
 }
 
-- 
cgit v1.2.3-55-g7522


From 9e9efffb7848fe53c334996819139b431b983ac2 Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker
Date: Mon, 29 Sep 2008 20:31:58 +0200
Subject: tracing/ftrace: adapt the boot tracer to the new print_line type

This patch adapts the boot tracer to the new type of the
print_line callback.

It still relays entries it doesn't support to default output
functions.

Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Acked-by: Pekka Paalanen <pq@iki.fi>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 kernel/trace/trace_boot.c | 15 +++++++++------
 1 file changed, 9 insertions(+), 6 deletions(-)

diff --git a/kernel/trace/trace_boot.c b/kernel/trace/trace_boot.c
index fa8cca1be115..43bde20b95bd 100644
--- a/kernel/trace/trace_boot.c
+++ b/kernel/trace/trace_boot.c
@@ -45,22 +45,25 @@ static void boot_trace_ctrl_update(struct trace_array *tr)
 		stop_boot_trace(tr);
 }
 
-static int initcall_print_line(struct trace_iterator *iter)
+static enum print_line_t initcall_print_line(struct trace_iterator *iter)
 {
-	int ret = 0;
+	int ret;
 	struct trace_entry *entry = iter->ent;
 	struct trace_boot *field = (struct trace_boot *)entry;
 	struct boot_trace *it = &field->initcall;
 	struct trace_seq *s = &iter->seq;
 
-	if (entry->type == TRACE_BOOT)
+	if (entry->type == TRACE_BOOT) {
 		ret = trace_seq_printf(s, "%pF called from %i "
 				       "returned %d after %lld msecs\n",
 				       it->func, it->caller, it->result,
 				       it->duration);
-	if (ret)
-		return 1;
-	return 0;
+		if (ret)
+			return TRACE_TYPE_HANDLED;
+		else
+			return TRACE_TYPE_PARTIAL_LINE;
+	}
+	return TRACE_TYPE_UNHANDLED;
 }
 
 struct tracer boot_tracer __read_mostly =
-- 
cgit v1.2.3-55-g7522


From 70255b5e3f1bd1a5af5b1e425ec2c4db7c735112 Mon Sep 17 00:00:00 2001
From: Steven Rostedt
Date: Wed, 1 Oct 2008 00:29:52 -0400
Subject: ring_buffer: remove raw from local_irq_save

The raw_local_irq_save causes issues with lockdep. We don't need it
so replace them with local_irq_save.

Signed-off-by: Steven Rostedt <srostedt@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 kernel/trace/ring_buffer.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index cfa711374d9a..8e7392fd0db9 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -847,7 +847,7 @@ ring_buffer_lock_reserve(struct ring_buffer *buffer,
 	if (atomic_read(&buffer->record_disabled))
 		return NULL;
 
-	raw_local_irq_save(*flags);
+	local_irq_save(*flags);
 	cpu = raw_smp_processor_id();
 
 	if (!cpu_isset(cpu, buffer->cpumask))
@@ -909,7 +909,7 @@ int ring_buffer_unlock_commit(struct ring_buffer *buffer,
 	rb_commit(cpu_buffer, event);
 
 	spin_unlock(&cpu_buffer->lock);
-	raw_local_irq_restore(flags);
+	local_irq_restore(flags);
 
 	return 0;
 }
@@ -1583,13 +1583,13 @@ void ring_buffer_reset_cpu(struct ring_buffer *buffer, int cpu)
 	if (!cpu_isset(cpu, buffer->cpumask))
 		return;
 
-	raw_local_irq_save(flags);
+	local_irq_save(flags);
 	spin_lock(&cpu_buffer->lock);
 
 	rb_reset_cpu(cpu_buffer);
 
 	spin_unlock(&cpu_buffer->lock);
-	raw_local_irq_restore(flags);
+	local_irq_restore(flags);
 }
 
 /**
-- 
cgit v1.2.3-55-g7522


From d769041f865330034131525ee6a7f72eb4af2a24 Mon Sep 17 00:00:00 2001
From: Steven Rostedt
Date: Wed, 1 Oct 2008 00:29:53 -0400
Subject: ring_buffer: implement new locking

The old "lock always" scheme had issues with lockdep, and was not very
efficient anyways.

This patch does a new design to be partially lockless on writes.
Writes will add new entries to the per cpu pages by simply disabling
interrupts. When a write needs to go to another page than it will
grab the lock.

A new "read page" has been added so that the reader can pull out a page
from the ring buffer to read without worrying about the writer writing over
it. This allows us to not take the lock for all reads. The lock is
now only taken when a read needs to go to a new page.

This is far from lockless, and interrupts still need to be disabled,
but it is a step towards a more lockless solution, and it also
solves a lot of the issues that were noticed by the first conversion
of ftrace to the ring buffers.

Note: the ring_buffer_{un}lock API has been removed.

Signed-off-by: Steven Rostedt <srostedt@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/ring_buffer.h |   3 -
 kernel/trace/ring_buffer.c  | 298 +++++++++++++++++++++++++-------------------
 kernel/trace/trace.c        | 113 +++++++++++------
 3 files changed, 247 insertions(+), 167 deletions(-)

diff --git a/include/linux/ring_buffer.h b/include/linux/ring_buffer.h
index c52375b8330d..536b0ca46a03 100644
--- a/include/linux/ring_buffer.h
+++ b/include/linux/ring_buffer.h
@@ -63,9 +63,6 @@ ring_buffer_event_time_delta(struct ring_buffer_event *event)
 	return event->time_delta;
 }
 
-void ring_buffer_lock(struct ring_buffer *buffer, unsigned long *flags);
-void ring_buffer_unlock(struct ring_buffer *buffer, unsigned long flags);
-
 /*
  * size is in bytes for each per CPU buffer.
  */
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index 8e7392fd0db9..9631abf2ae29 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -161,8 +161,10 @@ struct ring_buffer_per_cpu {
 	struct list_head		pages;
 	unsigned long			head;	/* read from head */
 	unsigned long			tail;	/* write to tail */
+	unsigned long			reader;
 	struct buffer_page		*head_page;
 	struct buffer_page		*tail_page;
+	struct buffer_page		*reader_page;
 	unsigned long			overrun;
 	unsigned long			entries;
 	u64				write_stamp;
@@ -260,6 +262,7 @@ static struct ring_buffer_per_cpu *
 rb_allocate_cpu_buffer(struct ring_buffer *buffer, int cpu)
 {
 	struct ring_buffer_per_cpu *cpu_buffer;
+	unsigned long addr;
 	int ret;
 
 	cpu_buffer = kzalloc_node(ALIGN(sizeof(*cpu_buffer), cache_line_size()),
@@ -272,9 +275,16 @@ rb_allocate_cpu_buffer(struct ring_buffer *buffer, int cpu)
 	spin_lock_init(&cpu_buffer->lock);
 	INIT_LIST_HEAD(&cpu_buffer->pages);
 
+	addr = __get_free_page(GFP_KERNEL);
+	if (!addr)
+		goto fail_free_buffer;
+	cpu_buffer->reader_page = (struct buffer_page *)virt_to_page(addr);
+	INIT_LIST_HEAD(&cpu_buffer->reader_page->list);
+	cpu_buffer->reader_page->size = 0;
+
 	ret = rb_allocate_pages(cpu_buffer, buffer->pages);
 	if (ret < 0)
-		goto fail_free_buffer;
+		goto fail_free_reader;
 
 	cpu_buffer->head_page
 		= list_entry(cpu_buffer->pages.next, struct buffer_page, list);
@@ -283,6 +293,9 @@ rb_allocate_cpu_buffer(struct ring_buffer *buffer, int cpu)
 
 	return cpu_buffer;
 
+ fail_free_reader:
+	free_buffer_page(cpu_buffer->reader_page);
+
  fail_free_buffer:
 	kfree(cpu_buffer);
 	return NULL;
@@ -293,6 +306,9 @@ static void rb_free_cpu_buffer(struct ring_buffer_per_cpu *cpu_buffer)
 	struct list_head *head = &cpu_buffer->pages;
 	struct buffer_page *page, *tmp;
 
+	list_del_init(&cpu_buffer->reader_page->list);
+	free_buffer_page(cpu_buffer->reader_page);
+
 	list_for_each_entry_safe(page, tmp, head, list) {
 		list_del_init(&page->list);
 		free_buffer_page(page);
@@ -538,8 +554,10 @@ int ring_buffer_resize(struct ring_buffer *buffer, unsigned long size)
 
 static inline int rb_per_cpu_empty(struct ring_buffer_per_cpu *cpu_buffer)
 {
-	return cpu_buffer->head_page == cpu_buffer->tail_page &&
-		cpu_buffer->head == cpu_buffer->tail;
+	return (cpu_buffer->reader == cpu_buffer->reader_page->size &&
+		(cpu_buffer->tail_page == cpu_buffer->reader_page ||
+		 (cpu_buffer->tail_page == cpu_buffer->head_page &&
+		  cpu_buffer->head == cpu_buffer->tail)));
 }
 
 static inline int rb_null_event(struct ring_buffer_event *event)
@@ -555,10 +573,10 @@ static inline void *rb_page_index(struct buffer_page *page, unsigned index)
 }
 
 static inline struct ring_buffer_event *
-rb_head_event(struct ring_buffer_per_cpu *cpu_buffer)
+rb_reader_event(struct ring_buffer_per_cpu *cpu_buffer)
 {
-	return rb_page_index(cpu_buffer->head_page,
-			     cpu_buffer->head);
+	return rb_page_index(cpu_buffer->reader_page,
+			     cpu_buffer->reader);
 }
 
 static inline struct ring_buffer_event *
@@ -610,15 +628,32 @@ rb_add_stamp(struct ring_buffer_per_cpu *cpu_buffer, u64 *ts)
 	cpu_buffer->write_stamp = *ts;
 }
 
-static void rb_reset_read_page(struct ring_buffer_per_cpu *cpu_buffer)
+static void rb_reset_head_page(struct ring_buffer_per_cpu *cpu_buffer)
 {
-	cpu_buffer->read_stamp = cpu_buffer->head_page->time_stamp;
 	cpu_buffer->head = 0;
 }
 
-static void
-rb_reset_iter_read_page(struct ring_buffer_iter *iter)
+static void rb_reset_reader_page(struct ring_buffer_per_cpu *cpu_buffer)
 {
+	cpu_buffer->read_stamp = cpu_buffer->reader_page->time_stamp;
+	cpu_buffer->reader = 0;
+}
+
+static inline void rb_inc_iter(struct ring_buffer_iter *iter)
+{
+	struct ring_buffer_per_cpu *cpu_buffer = iter->cpu_buffer;
+
+	/*
+	 * The iterator could be on the reader page (it starts there).
+	 * But the head could have moved, since the reader was
+	 * found. Check for this case and assign the iterator
+	 * to the head page instead of next.
+	 */
+	if (iter->head_page == cpu_buffer->reader_page)
+		iter->head_page = cpu_buffer->head_page;
+	else
+		rb_inc_page(cpu_buffer, &iter->head_page);
+
 	iter->read_stamp = iter->head_page->time_stamp;
 	iter->head = 0;
 }
@@ -693,30 +728,39 @@ static struct ring_buffer_event *
 __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer,
 		  unsigned type, unsigned long length, u64 *ts)
 {
-	struct buffer_page *head_page, *tail_page;
+	struct buffer_page *tail_page, *head_page, *reader_page;
 	unsigned long tail;
 	struct ring_buffer *buffer = cpu_buffer->buffer;
 	struct ring_buffer_event *event;
 
+	/* No locking needed for tail page */
 	tail_page = cpu_buffer->tail_page;
-	head_page = cpu_buffer->head_page;
 	tail = cpu_buffer->tail;
 
 	if (tail + length > BUF_PAGE_SIZE) {
 		struct buffer_page *next_page = tail_page;
 
+		spin_lock(&cpu_buffer->lock);
 		rb_inc_page(cpu_buffer, &next_page);
 
+		head_page = cpu_buffer->head_page;
+		reader_page = cpu_buffer->reader_page;
+
+		/* we grabbed the lock before incrementing */
+		WARN_ON(next_page == reader_page);
+
 		if (next_page == head_page) {
-			if (!(buffer->flags & RB_FL_OVERWRITE))
+			if (!(buffer->flags & RB_FL_OVERWRITE)) {
+				spin_unlock(&cpu_buffer->lock);
 				return NULL;
+			}
 
 			/* count overflows */
 			rb_update_overflow(cpu_buffer);
 
 			rb_inc_page(cpu_buffer, &head_page);
 			cpu_buffer->head_page = head_page;
-			rb_reset_read_page(cpu_buffer);
+			rb_reset_head_page(cpu_buffer);
 		}
 
 		if (tail != BUF_PAGE_SIZE) {
@@ -732,6 +776,7 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer,
 		cpu_buffer->tail_page = tail_page;
 		cpu_buffer->tail = tail;
 		rb_add_stamp(cpu_buffer, ts);
+		spin_unlock(&cpu_buffer->lock);
 	}
 
 	BUG_ON(tail + length > BUF_PAGE_SIZE);
@@ -802,7 +847,9 @@ rb_reserve_next_event(struct ring_buffer_per_cpu *cpu_buffer,
 				return NULL;
 		}
 	} else {
+		spin_lock(&cpu_buffer->lock);
 		rb_add_stamp(cpu_buffer, &ts);
+		spin_unlock(&cpu_buffer->lock);
 		delta = 0;
 	}
 
@@ -851,13 +898,12 @@ ring_buffer_lock_reserve(struct ring_buffer *buffer,
 	cpu = raw_smp_processor_id();
 
 	if (!cpu_isset(cpu, buffer->cpumask))
-		goto out_irq;
+		goto out;
 
 	cpu_buffer = buffer->buffers[cpu];
-	spin_lock(&cpu_buffer->lock);
 
 	if (atomic_read(&cpu_buffer->record_disabled))
-		goto no_record;
+		goto out;
 
 	length = rb_calculate_event_length(length);
 	if (length > BUF_PAGE_SIZE)
@@ -865,13 +911,11 @@ ring_buffer_lock_reserve(struct ring_buffer *buffer,
 
 	event = rb_reserve_next_event(cpu_buffer, RINGBUF_TYPE_DATA, length);
 	if (!event)
-		goto no_record;
+		goto out;
 
 	return event;
 
- no_record:
-	spin_unlock(&cpu_buffer->lock);
- out_irq:
+ out:
 	local_irq_restore(*flags);
 	return NULL;
 }
@@ -904,11 +948,8 @@ int ring_buffer_unlock_commit(struct ring_buffer *buffer,
 
 	cpu_buffer = buffer->buffers[cpu];
 
-	assert_spin_locked(&cpu_buffer->lock);
-
 	rb_commit(cpu_buffer, event);
 
-	spin_unlock(&cpu_buffer->lock);
 	local_irq_restore(flags);
 
 	return 0;
@@ -945,10 +986,9 @@ int ring_buffer_write(struct ring_buffer *buffer,
 	cpu = raw_smp_processor_id();
 
 	if (!cpu_isset(cpu, buffer->cpumask))
-		goto out_irq;
+		goto out;
 
 	cpu_buffer = buffer->buffers[cpu];
-	spin_lock(&cpu_buffer->lock);
 
 	if (atomic_read(&cpu_buffer->record_disabled))
 		goto out;
@@ -967,55 +1007,11 @@ int ring_buffer_write(struct ring_buffer *buffer,
 
 	ret = 0;
  out:
-	spin_unlock(&cpu_buffer->lock);
- out_irq:
 	local_irq_restore(flags);
 
 	return ret;
 }
 
-/**
- * ring_buffer_lock - lock the ring buffer
- * @buffer: The ring buffer to lock
- * @flags: The place to store the interrupt flags
- *
- * This locks all the per CPU buffers.
- *
- * Must be unlocked by ring_buffer_unlock.
- */
-void ring_buffer_lock(struct ring_buffer *buffer, unsigned long *flags)
-{
-	struct ring_buffer_per_cpu *cpu_buffer;
-	int cpu;
-
-	local_irq_save(*flags);
-
-	for_each_buffer_cpu(buffer, cpu) {
-		cpu_buffer = buffer->buffers[cpu];
-		spin_lock(&cpu_buffer->lock);
-	}
-}
-
-/**
- * ring_buffer_unlock - unlock a locked buffer
- * @buffer: The locked buffer to unlock
- * @flags: The interrupt flags received by ring_buffer_lock
- */
-void ring_buffer_unlock(struct ring_buffer *buffer, unsigned long flags)
-{
-	struct ring_buffer_per_cpu *cpu_buffer;
-	int cpu;
-
-	for (cpu = buffer->cpus - 1; cpu >= 0; cpu--) {
-		if (!cpu_isset(cpu, buffer->cpumask))
-			continue;
-		cpu_buffer = buffer->buffers[cpu];
-		spin_unlock(&cpu_buffer->lock);
-	}
-
-	local_irq_restore(flags);
-}
-
 /**
  * ring_buffer_record_disable - stop all writes into the buffer
  * @buffer: The ring buffer to stop writes to.
@@ -1169,9 +1165,18 @@ void ring_buffer_iter_reset(struct ring_buffer_iter *iter)
 {
 	struct ring_buffer_per_cpu *cpu_buffer = iter->cpu_buffer;
 
-	iter->head_page = cpu_buffer->head_page;
-	iter->head = cpu_buffer->head;
-	rb_reset_iter_read_page(iter);
+	/* Iterator usage is expected to have record disabled */
+	if (list_empty(&cpu_buffer->reader_page->list)) {
+		iter->head_page = cpu_buffer->head_page;
+		iter->head = cpu_buffer->head;
+	} else {
+		iter->head_page = cpu_buffer->reader_page;
+		iter->head = cpu_buffer->reader;
+	}
+	if (iter->head)
+		iter->read_stamp = cpu_buffer->read_stamp;
+	else
+		iter->read_stamp = iter->head_page->time_stamp;
 }
 
 /**
@@ -1250,43 +1255,84 @@ rb_update_iter_read_stamp(struct ring_buffer_iter *iter,
 	return;
 }
 
-static void rb_advance_head(struct ring_buffer_per_cpu *cpu_buffer)
+static struct buffer_page *
+rb_get_reader_page(struct ring_buffer_per_cpu *cpu_buffer)
 {
-	struct ring_buffer_event *event;
-	unsigned length;
+	struct buffer_page *reader = NULL;
+	unsigned long flags;
+
+	spin_lock_irqsave(&cpu_buffer->lock, flags);
+
+ again:
+	reader = cpu_buffer->reader_page;
+
+	/* If there's more to read, return this page */
+	if (cpu_buffer->reader < reader->size)
+		goto out;
+
+	/* Never should we have an index greater than the size */
+	WARN_ON(cpu_buffer->reader > reader->size);
+
+	/* check if we caught up to the tail */
+	reader = NULL;
+	if (cpu_buffer->tail_page == cpu_buffer->reader_page)
+		goto out;
 
 	/*
-	 * Check if we are at the end of the buffer.
+	 * Splice the empty reader page into the list around the head.
+	 * Reset the reader page to size zero.
 	 */
-	if (cpu_buffer->head >= cpu_buffer->head_page->size) {
-		BUG_ON(cpu_buffer->head_page == cpu_buffer->tail_page);
-		rb_inc_page(cpu_buffer, &cpu_buffer->head_page);
-		rb_reset_read_page(cpu_buffer);
-		return;
-	}
 
-	event = rb_head_event(cpu_buffer);
+	reader = cpu_buffer->head_page;
+	cpu_buffer->reader_page->list.next = reader->list.next;
+	cpu_buffer->reader_page->list.prev = reader->list.prev;
+	cpu_buffer->reader_page->size = 0;
 
-	if (event->type == RINGBUF_TYPE_DATA)
-		cpu_buffer->entries--;
-
-	length = rb_event_length(event);
+	/* Make the reader page now replace the head */
+	reader->list.prev->next = &cpu_buffer->reader_page->list;
+	reader->list.next->prev = &cpu_buffer->reader_page->list;
 
 	/*
-	 * This should not be called to advance the header if we are
-	 * at the tail of the buffer.
+	 * If the tail is on the reader, then we must set the head
+	 * to the inserted page, otherwise we set it one before.
 	 */
-	BUG_ON((cpu_buffer->head_page == cpu_buffer->tail_page) &&
-	       (cpu_buffer->head + length > cpu_buffer->tail));
+	cpu_buffer->head_page = cpu_buffer->reader_page;
 
-	rb_update_read_stamp(cpu_buffer, event);
+	if (cpu_buffer->tail_page != reader)
+		rb_inc_page(cpu_buffer, &cpu_buffer->head_page);
+
+	/* Finally update the reader page to the new head */
+	cpu_buffer->reader_page = reader;
+	rb_reset_reader_page(cpu_buffer);
+
+	goto again;
+
+ out:
+	spin_unlock_irqrestore(&cpu_buffer->lock, flags);
+
+	return reader;
+}
+
+static void rb_advance_reader(struct ring_buffer_per_cpu *cpu_buffer)
+{
+	struct ring_buffer_event *event;
+	struct buffer_page *reader;
+	unsigned length;
+
+	reader = rb_get_reader_page(cpu_buffer);
 
-	cpu_buffer->head += length;
+	/* This function should not be called when buffer is empty */
+	BUG_ON(!reader);
 
-	/* check for end of page */
-	if ((cpu_buffer->head >= cpu_buffer->head_page->size) &&
-	    (cpu_buffer->head_page != cpu_buffer->tail_page))
-		rb_advance_head(cpu_buffer);
+	event = rb_reader_event(cpu_buffer);
+
+	if (event->type == RINGBUF_TYPE_DATA)
+		cpu_buffer->entries--;
+
+	rb_update_read_stamp(cpu_buffer, event);
+
+	length = rb_event_length(event);
+	cpu_buffer->reader += length;
 }
 
 static void rb_advance_iter(struct ring_buffer_iter *iter)
@@ -1304,8 +1350,7 @@ static void rb_advance_iter(struct ring_buffer_iter *iter)
 	 */
 	if (iter->head >= iter->head_page->size) {
 		BUG_ON(iter->head_page == cpu_buffer->tail_page);
-		rb_inc_page(cpu_buffer, &iter->head_page);
-		rb_reset_iter_read_page(iter);
+		rb_inc_iter(iter);
 		return;
 	}
 
@@ -1344,6 +1389,7 @@ ring_buffer_peek(struct ring_buffer *buffer, int cpu, u64 *ts)
 {
 	struct ring_buffer_per_cpu *cpu_buffer;
 	struct ring_buffer_event *event;
+	struct buffer_page *reader;
 
 	if (!cpu_isset(cpu, buffer->cpumask))
 		return NULL;
@@ -1351,25 +1397,26 @@ ring_buffer_peek(struct ring_buffer *buffer, int cpu, u64 *ts)
 	cpu_buffer = buffer->buffers[cpu];
 
  again:
-	if (rb_per_cpu_empty(cpu_buffer))
+	reader = rb_get_reader_page(cpu_buffer);
+	if (!reader)
 		return NULL;
 
-	event = rb_head_event(cpu_buffer);
+	event = rb_reader_event(cpu_buffer);
 
 	switch (event->type) {
 	case RINGBUF_TYPE_PADDING:
-		rb_inc_page(cpu_buffer, &cpu_buffer->head_page);
-		rb_reset_read_page(cpu_buffer);
-		goto again;
+		WARN_ON(1);
+		rb_advance_reader(cpu_buffer);
+		return NULL;
 
 	case RINGBUF_TYPE_TIME_EXTEND:
 		/* Internal data, OK to advance */
-		rb_advance_head(cpu_buffer);
+		rb_advance_reader(cpu_buffer);
 		goto again;
 
 	case RINGBUF_TYPE_TIME_STAMP:
 		/* FIXME: not implemented */
-		rb_advance_head(cpu_buffer);
+		rb_advance_reader(cpu_buffer);
 		goto again;
 
 	case RINGBUF_TYPE_DATA:
@@ -1415,8 +1462,7 @@ ring_buffer_iter_peek(struct ring_buffer_iter *iter, u64 *ts)
 
 	switch (event->type) {
 	case RINGBUF_TYPE_PADDING:
-		rb_inc_page(cpu_buffer, &iter->head_page);
-		rb_reset_iter_read_page(iter);
+		rb_inc_iter(iter);
 		goto again;
 
 	case RINGBUF_TYPE_TIME_EXTEND:
@@ -1465,7 +1511,7 @@ ring_buffer_consume(struct ring_buffer *buffer, int cpu, u64 *ts)
 		return NULL;
 
 	cpu_buffer = buffer->buffers[cpu];
-	rb_advance_head(cpu_buffer);
+	rb_advance_reader(cpu_buffer);
 
 	return event;
 }
@@ -1487,6 +1533,7 @@ ring_buffer_read_start(struct ring_buffer *buffer, int cpu)
 {
 	struct ring_buffer_per_cpu *cpu_buffer;
 	struct ring_buffer_iter *iter;
+	unsigned long flags;
 
 	if (!cpu_isset(cpu, buffer->cpumask))
 		return NULL;
@@ -1502,11 +1549,9 @@ ring_buffer_read_start(struct ring_buffer *buffer, int cpu)
 	atomic_inc(&cpu_buffer->record_disabled);
 	synchronize_sched();
 
-	spin_lock(&cpu_buffer->lock);
-	iter->head = cpu_buffer->head;
-	iter->head_page = cpu_buffer->head_page;
-	rb_reset_iter_read_page(iter);
-	spin_unlock(&cpu_buffer->lock);
+	spin_lock_irqsave(&cpu_buffer->lock, flags);
+	ring_buffer_iter_reset(iter);
+	spin_unlock_irqrestore(&cpu_buffer->lock, flags);
 
 	return iter;
 }
@@ -1562,10 +1607,14 @@ rb_reset_cpu(struct ring_buffer_per_cpu *cpu_buffer)
 {
 	cpu_buffer->head_page
 		= list_entry(cpu_buffer->pages.next, struct buffer_page, list);
-	cpu_buffer->tail_page
-		= list_entry(cpu_buffer->pages.next, struct buffer_page, list);
+	cpu_buffer->head_page->size = 0;
+	cpu_buffer->tail_page = cpu_buffer->head_page;
+	cpu_buffer->tail_page->size = 0;
+	INIT_LIST_HEAD(&cpu_buffer->reader_page->list);
+	cpu_buffer->reader_page->size = 0;
+
+	cpu_buffer->head = cpu_buffer->tail = cpu_buffer->reader = 0;
 
-	cpu_buffer->head = cpu_buffer->tail = 0;
 	cpu_buffer->overrun = 0;
 	cpu_buffer->entries = 0;
 }
@@ -1583,13 +1632,11 @@ void ring_buffer_reset_cpu(struct ring_buffer *buffer, int cpu)
 	if (!cpu_isset(cpu, buffer->cpumask))
 		return;
 
-	local_irq_save(flags);
-	spin_lock(&cpu_buffer->lock);
+	spin_lock_irqsave(&cpu_buffer->lock, flags);
 
 	rb_reset_cpu(cpu_buffer);
 
-	spin_unlock(&cpu_buffer->lock);
-	local_irq_restore(flags);
+	spin_unlock_irqrestore(&cpu_buffer->lock, flags);
 }
 
 /**
@@ -1598,15 +1645,10 @@ void ring_buffer_reset_cpu(struct ring_buffer *buffer, int cpu)
  */
 void ring_buffer_reset(struct ring_buffer *buffer)
 {
-	unsigned long flags;
 	int cpu;
 
-	ring_buffer_lock(buffer, &flags);
-
 	for_each_buffer_cpu(buffer, cpu)
-		rb_reset_cpu(buffer->buffers[cpu]);
-
-	ring_buffer_unlock(buffer, flags);
+		ring_buffer_reset_cpu(buffer, cpu);
 }
 
 /**
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 6a1c76bb56ba..b542f8837801 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -42,6 +42,20 @@
 unsigned long __read_mostly	tracing_max_latency = (cycle_t)ULONG_MAX;
 unsigned long __read_mostly	tracing_thresh;
 
+static DEFINE_PER_CPU(local_t, ftrace_cpu_disabled);
+
+static inline void ftrace_disable_cpu(void)
+{
+	preempt_disable();
+	local_inc(&__get_cpu_var(ftrace_cpu_disabled));
+}
+
+static inline void ftrace_enable_cpu(void)
+{
+	local_dec(&__get_cpu_var(ftrace_cpu_disabled));
+	preempt_enable();
+}
+
 static cpumask_t __read_mostly		tracing_buffer_mask;
 
 #define for_each_tracing_cpu(cpu)	\
@@ -406,7 +420,9 @@ update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
 	tr->buffer = max_tr.buffer;
 	max_tr.buffer = buf;
 
+	ftrace_disable_cpu();
 	ring_buffer_reset(tr->buffer);
+	ftrace_enable_cpu();
 
 	__update_max_tr(tr, tsk, cpu);
 	__raw_spin_unlock(&ftrace_max_lock);
@@ -428,9 +444,13 @@ update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
 	WARN_ON_ONCE(!irqs_disabled());
 	__raw_spin_lock(&ftrace_max_lock);
 
+	ftrace_disable_cpu();
+
 	ring_buffer_reset(max_tr.buffer);
 	ret = ring_buffer_swap_cpu(max_tr.buffer, tr->buffer, cpu);
 
+	ftrace_enable_cpu();
+
 	WARN_ON_ONCE(ret);
 
 	__update_max_tr(tr, tsk, cpu);
@@ -543,7 +563,9 @@ void unregister_tracer(struct tracer *type)
 
 void tracing_reset(struct trace_array *tr, int cpu)
 {
+	ftrace_disable_cpu();
 	ring_buffer_reset_cpu(tr->buffer, cpu);
+	ftrace_enable_cpu();
 }
 
 #define SAVED_CMDLINES 128
@@ -654,6 +676,10 @@ trace_function(struct trace_array *tr, struct trace_array_cpu *data,
 	struct ftrace_entry *entry;
 	unsigned long irq_flags;
 
+	/* If we are reading the ring buffer, don't trace */
+	if (unlikely(local_read(&__get_cpu_var(ftrace_cpu_disabled))))
+		return;
+
 	event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry),
 					 &irq_flags);
 	if (!event)
@@ -870,8 +896,14 @@ enum trace_file_type {
 
 static void trace_iterator_increment(struct trace_iterator *iter, int cpu)
 {
+	/* Don't allow ftrace to trace into the ring buffers */
+	ftrace_disable_cpu();
+
 	iter->idx++;
-	ring_buffer_read(iter->buffer_iter[iter->cpu], NULL);
+	if (iter->buffer_iter[iter->cpu])
+		ring_buffer_read(iter->buffer_iter[iter->cpu], NULL);
+
+	ftrace_enable_cpu();
 }
 
 static struct trace_entry *
@@ -880,9 +912,19 @@ peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts)
 	struct ring_buffer_event *event;
 	struct ring_buffer_iter *buf_iter = iter->buffer_iter[cpu];
 
-	event = ring_buffer_iter_peek(buf_iter, ts);
+	/* Don't allow ftrace to trace into the ring buffers */
+	ftrace_disable_cpu();
+
+	if (buf_iter)
+		event = ring_buffer_iter_peek(buf_iter, ts);
+	else
+		event = ring_buffer_peek(iter->tr->buffer, cpu, ts);
+
+	ftrace_enable_cpu();
+
 	return event ? ring_buffer_event_data(event) : NULL;
 }
+
 static struct trace_entry *
 __find_next_entry(struct trace_iterator *iter, int *ent_cpu, u64 *ent_ts)
 {
@@ -938,7 +980,10 @@ static void *find_next_entry_inc(struct trace_iterator *iter)
 
 static void trace_consume(struct trace_iterator *iter)
 {
+	/* Don't allow ftrace to trace into the ring buffers */
+	ftrace_disable_cpu();
 	ring_buffer_consume(iter->tr->buffer, iter->cpu, &iter->ts);
+	ftrace_enable_cpu();
 }
 
 static void *s_next(struct seq_file *m, void *v, loff_t *pos)
@@ -991,10 +1036,14 @@ static void *s_start(struct seq_file *m, loff_t *pos)
 		iter->cpu = 0;
 		iter->idx = -1;
 
+		ftrace_disable_cpu();
+
 		for_each_tracing_cpu(cpu) {
 			ring_buffer_iter_reset(iter->buffer_iter[cpu]);
 		}
 
+		ftrace_enable_cpu();
+
 		for (p = iter; p && l < *pos; p = s_next(m, p, &l))
 			;
 
@@ -1242,7 +1291,16 @@ void trace_seq_print_cont(struct trace_seq *s, struct trace_iterator *iter)
 		cont = (struct trace_field_cont *)ent;
 		if (ok)
 			ok = (trace_seq_printf(s, "%s", cont->buf) > 0);
-		ring_buffer_read(iter->buffer_iter[iter->cpu], NULL);
+
+		ftrace_disable_cpu();
+
+		if (iter->buffer_iter[iter->cpu])
+			ring_buffer_read(iter->buffer_iter[iter->cpu], NULL);
+		else
+			ring_buffer_consume(iter->tr->buffer, iter->cpu, NULL);
+
+		ftrace_enable_cpu();
+
 		ent = peek_next_entry(iter, iter->cpu, NULL);
 	} while (ent && ent->type == TRACE_CONT);
 
@@ -1683,9 +1741,15 @@ static int trace_empty(struct trace_iterator *iter)
 	int cpu;
 
 	for_each_tracing_cpu(cpu) {
-		if (!ring_buffer_iter_empty(iter->buffer_iter[cpu]))
-			return 0;
+		if (iter->buffer_iter[cpu]) {
+			if (!ring_buffer_iter_empty(iter->buffer_iter[cpu]))
+				return 0;
+		} else {
+			if (!ring_buffer_empty_cpu(iter->tr->buffer, cpu))
+				return 0;
+		}
 	}
+
 	return TRACE_TYPE_HANDLED;
 }
 
@@ -1776,8 +1840,10 @@ __tracing_open(struct inode *inode, struct file *file, int *ret)
 	iter->pos = -1;
 
 	for_each_tracing_cpu(cpu) {
+
 		iter->buffer_iter[cpu] =
 			ring_buffer_read_start(iter->tr->buffer, cpu);
+
 		if (!iter->buffer_iter[cpu])
 			goto fail_buffer;
 	}
@@ -2341,7 +2407,6 @@ static atomic_t tracing_reader;
 static int tracing_open_pipe(struct inode *inode, struct file *filp)
 {
 	struct trace_iterator *iter;
-	int cpu;
 
 	if (tracing_disabled)
 		return -ENODEV;
@@ -2362,38 +2427,17 @@ static int tracing_open_pipe(struct inode *inode, struct file *filp)
 	iter->trace = current_trace;
 	filp->private_data = iter;
 
-	for_each_tracing_cpu(cpu) {
-		iter->buffer_iter[cpu] =
-			ring_buffer_read_start(iter->tr->buffer, cpu);
-		if (!iter->buffer_iter[cpu])
-			goto fail_buffer;
-	}
-
 	if (iter->trace->pipe_open)
 		iter->trace->pipe_open(iter);
 	mutex_unlock(&trace_types_lock);
 
 	return 0;
-
- fail_buffer:
-	for_each_tracing_cpu(cpu) {
-		if (iter->buffer_iter[cpu])
-			ring_buffer_read_finish(iter->buffer_iter[cpu]);
-	}
-	mutex_unlock(&trace_types_lock);
-
-	return -ENOMEM;
 }
 
 static int tracing_release_pipe(struct inode *inode, struct file *file)
 {
 	struct trace_iterator *iter = file->private_data;
-	int cpu;
 
-	for_each_tracing_cpu(cpu) {
-		if (iter->buffer_iter[cpu])
-			ring_buffer_read_finish(iter->buffer_iter[cpu]);
-	}
 	kfree(iter);
 	atomic_dec(&tracing_reader);
 
@@ -2429,7 +2473,6 @@ tracing_read_pipe(struct file *filp, char __user *ubuf,
 		  size_t cnt, loff_t *ppos)
 {
 	struct trace_iterator *iter = filp->private_data;
-	unsigned long flags;
 #ifdef CONFIG_FTRACE
 	int ftrace_save;
 #endif
@@ -2528,7 +2571,6 @@ waitagain:
 	ftrace_enabled = 0;
 #endif
 	smp_wmb();
-	ring_buffer_lock(iter->tr->buffer, &flags);
 
 	while (find_next_entry_inc(iter) != NULL) {
 		enum print_line_t ret;
@@ -2547,7 +2589,6 @@ waitagain:
 			break;
 	}
 
-	ring_buffer_unlock(iter->tr->buffer, flags);
 #ifdef CONFIG_FTRACE
 	ftrace_enabled = ftrace_save;
 #endif
@@ -3010,8 +3051,8 @@ void ftrace_dump(void)
 	static struct trace_iterator iter;
 	static cpumask_t mask;
 	static int dump_ran;
-	unsigned long flags, irq_flags;
-	int cnt = 0;
+	unsigned long flags;
+	int cnt = 0, cpu;
 
 	/* only one dump */
 	spin_lock_irqsave(&ftrace_dump_lock, flags);
@@ -3023,6 +3064,10 @@ void ftrace_dump(void)
 	/* No turning back! */
 	ftrace_kill_atomic();
 
+	for_each_tracing_cpu(cpu) {
+		atomic_inc(&global_trace.data[cpu]->disabled);
+	}
+
 	printk(KERN_TRACE "Dumping ftrace buffer:\n");
 
 	iter.tr = &global_trace;
@@ -3037,8 +3082,6 @@ void ftrace_dump(void)
 
 	cpus_clear(mask);
 
-	ring_buffer_lock(iter.tr->buffer, &irq_flags);
-
 	while (!trace_empty(&iter)) {
 
 		if (!cnt)
@@ -3066,8 +3109,6 @@ void ftrace_dump(void)
 	else
 		printk(KERN_TRACE "---------------------------------\n");
 
-	ring_buffer_unlock(iter.tr->buffer, irq_flags);
-
  out:
 	spin_unlock_irqrestore(&ftrace_dump_lock, flags);
 }
-- 
cgit v1.2.3-55-g7522


From 797d3712a9dd75c720558612be05f42c031a7bb5 Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker
Date: Tue, 30 Sep 2008 18:13:45 +0200
Subject: tracing/ftrace: adapt mmiotrace to the new type of print_line, fix

Correct the value's type of trace_empty function

Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 kernel/trace/trace.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index b542f8837801..c1634068adfa 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -1750,7 +1750,7 @@ static int trace_empty(struct trace_iterator *iter)
 		}
 	}
 
-	return TRACE_TYPE_HANDLED;
+	return 1;
 }
 
 static enum print_line_t print_trace_line(struct trace_iterator *iter)
-- 
cgit v1.2.3-55-g7522


From 7104f300c5a69b46dda00d898034dd05c9f21739 Mon Sep 17 00:00:00 2001
From: Steven Rostedt
Date: Wed, 1 Oct 2008 10:52:51 -0400
Subject: ftrace: type cast filter+verifier

The mmiotrace map had a bug that would typecast the entry from
the trace to the wrong type. That is a known danger of C typecasts,
there's absolutely zero checking done on them.

Help that problem a bit by using a GCC extension to implement a
type filter that restricts the types that a trace record can be
cast into, and by adding a dynamic check (in debug mode) to verify
the type of the entry.

This patch adds a macro to assign all entries of ftrace using the type
of the variable and checking the entry id. The typecasts are now done
in the macro for only those types that it knows about, which should
be all the types that are allowed to be read from the tracer.

Signed-off-by: Steven Rostedt <srostedt@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 kernel/trace/trace.c           | 85 +++++++++++++++++++++++++++++-------------
 kernel/trace/trace.h           | 42 +++++++++++++++++++++
 kernel/trace/trace_mmiotrace.c | 14 +++++--
 3 files changed, 112 insertions(+), 29 deletions(-)

diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index c1634068adfa..948f7d821c62 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -1350,7 +1350,9 @@ print_lat_fmt(struct trace_iterator *iter, unsigned int trace_idx, int cpu)
 	}
 	switch (entry->type) {
 	case TRACE_FN: {
-		struct ftrace_entry *field = (struct ftrace_entry *)entry;
+		struct ftrace_entry *field;
+
+		trace_assign_type(field, entry);
 
 		seq_print_ip_sym(s, field->ip, sym_flags);
 		trace_seq_puts(s, " (");
@@ -1363,8 +1365,9 @@ print_lat_fmt(struct trace_iterator *iter, unsigned int trace_idx, int cpu)
 	}
 	case TRACE_CTX:
 	case TRACE_WAKE: {
-		struct ctx_switch_entry *field =
-			(struct ctx_switch_entry *)entry;
+		struct ctx_switch_entry *field;
+
+		trace_assign_type(field, entry);
 
 		T = field->next_state < sizeof(state_to_char) ?
 			state_to_char[field->next_state] : 'X';
@@ -1384,7 +1387,9 @@ print_lat_fmt(struct trace_iterator *iter, unsigned int trace_idx, int cpu)
 		break;
 	}
 	case TRACE_SPECIAL: {
-		struct special_entry *field = (struct special_entry *)entry;
+		struct special_entry *field;
+
+		trace_assign_type(field, entry);
 
 		trace_seq_printf(s, "# %ld %ld %ld\n",
 				 field->arg1,
@@ -1393,7 +1398,9 @@ print_lat_fmt(struct trace_iterator *iter, unsigned int trace_idx, int cpu)
 		break;
 	}
 	case TRACE_STACK: {
-		struct stack_entry *field = (struct stack_entry *)entry;
+		struct stack_entry *field;
+
+		trace_assign_type(field, entry);
 
 		for (i = 0; i < FTRACE_STACK_ENTRIES; i++) {
 			if (i)
@@ -1404,7 +1411,9 @@ print_lat_fmt(struct trace_iterator *iter, unsigned int trace_idx, int cpu)
 		break;
 	}
 	case TRACE_PRINT: {
-		struct print_entry *field = (struct print_entry *)entry;
+		struct print_entry *field;
+
+		trace_assign_type(field, entry);
 
 		seq_print_ip_sym(s, field->ip, sym_flags);
 		trace_seq_printf(s, ": %s", field->buf);
@@ -1454,7 +1463,9 @@ static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
 
 	switch (entry->type) {
 	case TRACE_FN: {
-		struct ftrace_entry *field = (struct ftrace_entry *)entry;
+		struct ftrace_entry *field;
+
+		trace_assign_type(field, entry);
 
 		ret = seq_print_ip_sym(s, field->ip, sym_flags);
 		if (!ret)
@@ -1480,8 +1491,9 @@ static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
 	}
 	case TRACE_CTX:
 	case TRACE_WAKE: {
-		struct ctx_switch_entry *field =
-			(struct ctx_switch_entry *)entry;
+		struct ctx_switch_entry *field;
+
+		trace_assign_type(field, entry);
 
 		S = field->prev_state < sizeof(state_to_char) ?
 			state_to_char[field->prev_state] : 'X';
@@ -1501,7 +1513,9 @@ static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
 		break;
 	}
 	case TRACE_SPECIAL: {
-		struct special_entry *field = (struct special_entry *)entry;
+		struct special_entry *field;
+
+		trace_assign_type(field, entry);
 
 		ret = trace_seq_printf(s, "# %ld %ld %ld\n",
 				 field->arg1,
@@ -1512,7 +1526,9 @@ static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
 		break;
 	}
 	case TRACE_STACK: {
-		struct stack_entry *field = (struct stack_entry *)entry;
+		struct stack_entry *field;
+
+		trace_assign_type(field, entry);
 
 		for (i = 0; i < FTRACE_STACK_ENTRIES; i++) {
 			if (i) {
@@ -1531,7 +1547,9 @@ static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
 		break;
 	}
 	case TRACE_PRINT: {
-		struct print_entry *field = (struct print_entry *)entry;
+		struct print_entry *field;
+
+		trace_assign_type(field, entry);
 
 		seq_print_ip_sym(s, field->ip, sym_flags);
 		trace_seq_printf(s, ": %s", field->buf);
@@ -1562,7 +1580,9 @@ static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
 
 	switch (entry->type) {
 	case TRACE_FN: {
-		struct ftrace_entry *field = (struct ftrace_entry *)entry;
+		struct ftrace_entry *field;
+
+		trace_assign_type(field, entry);
 
 		ret = trace_seq_printf(s, "%x %x\n",
 					field->ip,
@@ -1573,8 +1593,9 @@ static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
 	}
 	case TRACE_CTX:
 	case TRACE_WAKE: {
-		struct ctx_switch_entry *field =
-			(struct ctx_switch_entry *)entry;
+		struct ctx_switch_entry *field;
+
+		trace_assign_type(field, entry);
 
 		S = field->prev_state < sizeof(state_to_char) ?
 			state_to_char[field->prev_state] : 'X';
@@ -1596,7 +1617,9 @@ static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
 	}
 	case TRACE_SPECIAL:
 	case TRACE_STACK: {
-		struct special_entry *field = (struct special_entry *)entry;
+		struct special_entry *field;
+
+		trace_assign_type(field, entry);
 
 		ret = trace_seq_printf(s, "# %ld %ld %ld\n",
 				 field->arg1,
@@ -1607,7 +1630,9 @@ static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
 		break;
 	}
 	case TRACE_PRINT: {
-		struct print_entry *field = (struct print_entry *)entry;
+		struct print_entry *field;
+
+		trace_assign_type(field, entry);
 
 		trace_seq_printf(s, "# %lx %s", field->ip, field->buf);
 		if (entry->flags & TRACE_FLAG_CONT)
@@ -1648,7 +1673,9 @@ static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
 
 	switch (entry->type) {
 	case TRACE_FN: {
-		struct ftrace_entry *field = (struct ftrace_entry *)entry;
+		struct ftrace_entry *field;
+
+		trace_assign_type(field, entry);
 
 		SEQ_PUT_HEX_FIELD_RET(s, field->ip);
 		SEQ_PUT_HEX_FIELD_RET(s, field->parent_ip);
@@ -1656,8 +1683,9 @@ static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
 	}
 	case TRACE_CTX:
 	case TRACE_WAKE: {
-		struct ctx_switch_entry *field =
-			(struct ctx_switch_entry *)entry;
+		struct ctx_switch_entry *field;
+
+		trace_assign_type(field, entry);
 
 		S = field->prev_state < sizeof(state_to_char) ?
 			state_to_char[field->prev_state] : 'X';
@@ -1676,7 +1704,9 @@ static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
 	}
 	case TRACE_SPECIAL:
 	case TRACE_STACK: {
-		struct special_entry *field = (struct special_entry *)entry;
+		struct special_entry *field;
+
+		trace_assign_type(field, entry);
 
 		SEQ_PUT_HEX_FIELD_RET(s, field->arg1);
 		SEQ_PUT_HEX_FIELD_RET(s, field->arg2);
@@ -1705,15 +1735,18 @@ static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
 
 	switch (entry->type) {
 	case TRACE_FN: {
-		struct ftrace_entry *field = (struct ftrace_entry *)entry;
+		struct ftrace_entry *field;
+
+		trace_assign_type(field, entry);
 
 		SEQ_PUT_FIELD_RET(s, field->ip);
 		SEQ_PUT_FIELD_RET(s, field->parent_ip);
 		break;
 	}
 	case TRACE_CTX: {
-		struct ctx_switch_entry *field =
-			(struct ctx_switch_entry *)entry;
+		struct ctx_switch_entry *field;
+
+		trace_assign_type(field, entry);
 
 		SEQ_PUT_FIELD_RET(s, field->prev_pid);
 		SEQ_PUT_FIELD_RET(s, field->prev_prio);
@@ -1725,7 +1758,9 @@ static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
 	}
 	case TRACE_SPECIAL:
 	case TRACE_STACK: {
-		struct special_entry *field = (struct special_entry *)entry;
+		struct special_entry *field;
+
+		trace_assign_type(field, entry);
 
 		SEQ_PUT_FIELD_RET(s, field->arg1);
 		SEQ_PUT_FIELD_RET(s, field->arg2);
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index a921ba5d292d..f02042d0d828 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -177,6 +177,48 @@ struct trace_array {
 	struct trace_array_cpu	*data[NR_CPUS];
 };
 
+#define FTRACE_CMP_TYPE(var, type) \
+	__builtin_types_compatible_p(typeof(var), type *)
+
+#undef IF_ASSIGN
+#define IF_ASSIGN(var, entry, etype, id)		\
+	if (FTRACE_CMP_TYPE(var, etype)) {		\
+		var = (typeof(var))(entry);		\
+		WARN_ON(id && (entry)->type != id);	\
+		break;					\
+	}
+
+/* Will cause compile errors if type is not found. */
+extern void __ftrace_bad_type(void);
+
+/*
+ * The trace_assign_type is a verifier that the entry type is
+ * the same as the type being assigned. To add new types simply
+ * add a line with the following format:
+ *
+ * IF_ASSIGN(var, ent, type, id);
+ *
+ *  Where "type" is the trace type that includes the trace_entry
+ *  as the "ent" item. And "id" is the trace identifier that is
+ *  used in the trace_type enum.
+ *
+ *  If the type can have more than one id, then use zero.
+ */
+#define trace_assign_type(var, ent)					\
+	do {								\
+		IF_ASSIGN(var, ent, struct ftrace_entry, TRACE_FN);	\
+		IF_ASSIGN(var, ent, struct ctx_switch_entry, 0);	\
+		IF_ASSIGN(var, ent, struct trace_field_cont, TRACE_CONT); \
+		IF_ASSIGN(var, ent, struct stack_entry, TRACE_STACK);	\
+		IF_ASSIGN(var, ent, struct print_entry, TRACE_PRINT);	\
+		IF_ASSIGN(var, ent, struct special_entry, 0);		\
+		IF_ASSIGN(var, ent, struct trace_mmiotrace_rw,		\
+			  TRACE_MMIO_RW);				\
+		IF_ASSIGN(var, ent, struct trace_mmiotrace_map,		\
+			  TRACE_MMIO_MAP);				\
+		IF_ASSIGN(var, ent, struct trace_boot, TRACE_BOOT);	\
+		__ftrace_bad_type();					\
+	} while (0)
 
 /* Return values for print_line callback */
 enum print_line_t {
diff --git a/kernel/trace/trace_mmiotrace.c b/kernel/trace/trace_mmiotrace.c
index 1a266aa08e1a..0e819f47bb7a 100644
--- a/kernel/trace/trace_mmiotrace.c
+++ b/kernel/trace/trace_mmiotrace.c
@@ -178,15 +178,17 @@ print_out:
 static enum print_line_t mmio_print_rw(struct trace_iterator *iter)
 {
 	struct trace_entry *entry = iter->ent;
-	struct trace_mmiotrace_rw *field =
-		(struct trace_mmiotrace_rw *)entry;
-	struct mmiotrace_rw *rw	= &field->rw;
+	struct trace_mmiotrace_rw *field;
+	struct mmiotrace_rw *rw;
 	struct trace_seq *s	= &iter->seq;
 	unsigned long long t	= ns2usecs(iter->ts);
 	unsigned long usec_rem	= do_div(t, 1000000ULL);
 	unsigned secs		= (unsigned long)t;
 	int ret = 1;
 
+	trace_assign_type(field, entry);
+	rw = &field->rw;
+
 	switch (rw->opcode) {
 	case MMIO_READ:
 		ret = trace_seq_printf(s,
@@ -222,13 +224,17 @@ static enum print_line_t mmio_print_rw(struct trace_iterator *iter)
 static enum print_line_t mmio_print_map(struct trace_iterator *iter)
 {
 	struct trace_entry *entry = iter->ent;
-	struct mmiotrace_map *m	= (struct mmiotrace_map *)entry;
+	struct trace_mmiotrace_map *field;
+	struct mmiotrace_map *m;
 	struct trace_seq *s	= &iter->seq;
 	unsigned long long t	= ns2usecs(iter->ts);
 	unsigned long usec_rem	= do_div(t, 1000000ULL);
 	unsigned secs		= (unsigned long)t;
 	int ret;
 
+	trace_assign_type(field, entry);
+	m = &field->map;
+
 	switch (m->opcode) {
 	case MMIO_PROBE:
 		ret = trace_seq_printf(s,
-- 
cgit v1.2.3-55-g7522


From e4c2ce82ca2710e17cb4df8eb2b249fa2eb5af30 Mon Sep 17 00:00:00 2001
From: Steven Rostedt
Date: Wed, 1 Oct 2008 11:14:54 -0400
Subject: ring_buffer: allocate buffer page pointer

The current method of overlaying the page frame as the buffer page pointer
can be very dangerous and limits our ability to do other things with
a page from the buffer, like send it off to disk.

This patch allocates the buffer_page instead of overlaying the page's
page frame. The use of the buffer_page has hardly changed due to this.

Signed-off-by: Steven Rostedt <srostedt@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 kernel/trace/ring_buffer.c | 54 +++++++++++++++++++++++++++-------------------
 1 file changed, 32 insertions(+), 22 deletions(-)

diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index 9631abf2ae29..98145718988d 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -115,16 +115,10 @@ void *ring_buffer_event_data(struct ring_buffer_event *event)
  * Thanks to Peter Zijlstra for suggesting this idea.
  */
 struct buffer_page {
-	union {
-		struct {
-			unsigned long	 flags;		/* mandatory */
-			atomic_t	 _count;	/* mandatory */
-			u64		 time_stamp;	/* page time stamp */
-			unsigned	 size;		/* size of page data */
-			struct list_head list;		/* list of free pages */
-		};
-		struct page page;
-	};
+	u64		 time_stamp;	/* page time stamp */
+	unsigned	 size;		/* size of page data */
+	struct list_head list;		/* list of free pages */
+	void *page;			/* Actual data page */
 };
 
 /*
@@ -133,9 +127,9 @@ struct buffer_page {
  */
 static inline void free_buffer_page(struct buffer_page *bpage)
 {
-	reset_page_mapcount(&bpage->page);
-	bpage->page.mapping = NULL;
-	__free_page(&bpage->page);
+	if (bpage->page)
+		__free_page(bpage->page);
+	kfree(bpage);
 }
 
 /*
@@ -237,11 +231,16 @@ static int rb_allocate_pages(struct ring_buffer_per_cpu *cpu_buffer,
 	unsigned i;
 
 	for (i = 0; i < nr_pages; i++) {
+		page = kzalloc_node(ALIGN(sizeof(*page), cache_line_size()),
+				    GFP_KERNEL, cpu_to_node(cpu));
+		if (!page)
+			goto free_pages;
+		list_add(&page->list, &pages);
+
 		addr = __get_free_page(GFP_KERNEL);
 		if (!addr)
 			goto free_pages;
-		page = (struct buffer_page *)virt_to_page(addr);
-		list_add(&page->list, &pages);
+		page->page = (void *)addr;
 	}
 
 	list_splice(&pages, head);
@@ -262,6 +261,7 @@ static struct ring_buffer_per_cpu *
 rb_allocate_cpu_buffer(struct ring_buffer *buffer, int cpu)
 {
 	struct ring_buffer_per_cpu *cpu_buffer;
+	struct buffer_page *page;
 	unsigned long addr;
 	int ret;
 
@@ -275,10 +275,17 @@ rb_allocate_cpu_buffer(struct ring_buffer *buffer, int cpu)
 	spin_lock_init(&cpu_buffer->lock);
 	INIT_LIST_HEAD(&cpu_buffer->pages);
 
+	page = kzalloc_node(ALIGN(sizeof(*page), cache_line_size()),
+			    GFP_KERNEL, cpu_to_node(cpu));
+	if (!page)
+		goto fail_free_buffer;
+
+	cpu_buffer->reader_page = page;
 	addr = __get_free_page(GFP_KERNEL);
 	if (!addr)
-		goto fail_free_buffer;
-	cpu_buffer->reader_page = (struct buffer_page *)virt_to_page(addr);
+		goto fail_free_reader;
+	page->page = (void *)addr;
+
 	INIT_LIST_HEAD(&cpu_buffer->reader_page->list);
 	cpu_buffer->reader_page->size = 0;
 
@@ -523,11 +530,16 @@ int ring_buffer_resize(struct ring_buffer *buffer, unsigned long size)
 
 	for_each_buffer_cpu(buffer, cpu) {
 		for (i = 0; i < new_pages; i++) {
+			page = kzalloc_node(ALIGN(sizeof(*page),
+						  cache_line_size()),
+					    GFP_KERNEL, cpu_to_node(cpu));
+			if (!page)
+				goto free_pages;
+			list_add(&page->list, &pages);
 			addr = __get_free_page(GFP_KERNEL);
 			if (!addr)
 				goto free_pages;
-			page = (struct buffer_page *)virt_to_page(addr);
-			list_add(&page->list, &pages);
+			page->page = (void *)addr;
 		}
 	}
 
@@ -567,9 +579,7 @@ static inline int rb_null_event(struct ring_buffer_event *event)
 
 static inline void *rb_page_index(struct buffer_page *page, unsigned index)
 {
-	void *addr = page_address(&page->page);
-
-	return addr + index;
+	return page->page + index;
 }
 
 static inline struct ring_buffer_event *
-- 
cgit v1.2.3-55-g7522


From 38697053fa006411224a1790e2adb8216440ab0f Mon Sep 17 00:00:00 2001
From: Steven Rostedt
Date: Wed, 1 Oct 2008 13:14:09 -0400
Subject: ftrace: preempt disable over interrupt disable

With the new ring buffer infrastructure in ftrace, I'm trying to make
ftrace a little more light weight.

This patch converts a lot of the local_irq_save/restore into
preempt_disable/enable.  The original preempt count in a lot of cases
has to be sent in as a parameter so that it can be recorded correctly.
Some places were recording it incorrectly before anyway.

This is also laying the ground work to make ftrace a little bit
more reentrant, and remove all locking. The function tracers must
still protect from reentrancy.

Note: All the function tracers must be careful when using preempt_disable.
  It must do the following:

  resched = need_resched();
  preempt_disable_notrace();
  [...]
  if (resched)
	preempt_enable_no_resched_notrace();
  else
	preempt_enable_notrace();

The reason is that if this function traces schedule() itself, the
preempt_enable_notrace() will cause a schedule, which will lead
us into a recursive failure.

If we needed to reschedule before calling preempt_disable, we
should have already scheduled. Since we did not, this is most
likely that we should not and are probably inside a schedule
function.

If resched was not set, we still need to catch the need resched
flag being set when preemption was off and the if case at the
end will catch that for us.

Signed-off-by: Steven Rostedt <srostedt@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 kernel/trace/trace.c              | 123 +++++++++++++++++++-------------------
 kernel/trace/trace.h              |  13 ++--
 kernel/trace/trace_boot.c         |   2 +-
 kernel/trace/trace_irqsoff.c      |  13 ++--
 kernel/trace/trace_mmiotrace.c    |   4 +-
 kernel/trace/trace_sched_switch.c |   9 ++-
 kernel/trace/trace_sched_wakeup.c |  13 +++-
 7 files changed, 97 insertions(+), 80 deletions(-)

diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 948f7d821c62..1cd2e8143bb4 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -652,12 +652,10 @@ void tracing_record_cmdline(struct task_struct *tsk)
 }
 
 void
-tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags)
+tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags,
+			     int pc)
 {
 	struct task_struct *tsk = current;
-	unsigned long pc;
-
-	pc = preempt_count();
 
 	entry->preempt_count		= pc & 0xff;
 	entry->pid			= (tsk) ? tsk->pid : 0;
@@ -670,7 +668,8 @@ tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags)
 
 void
 trace_function(struct trace_array *tr, struct trace_array_cpu *data,
-	       unsigned long ip, unsigned long parent_ip, unsigned long flags)
+	       unsigned long ip, unsigned long parent_ip, unsigned long flags,
+	       int pc)
 {
 	struct ring_buffer_event *event;
 	struct ftrace_entry *entry;
@@ -685,7 +684,7 @@ trace_function(struct trace_array *tr, struct trace_array_cpu *data,
 	if (!event)
 		return;
 	entry	= ring_buffer_event_data(event);
-	tracing_generic_entry_update(&entry->ent, flags);
+	tracing_generic_entry_update(&entry->ent, flags, pc);
 	entry->ent.type			= TRACE_FN;
 	entry->ip			= ip;
 	entry->parent_ip		= parent_ip;
@@ -694,16 +693,17 @@ trace_function(struct trace_array *tr, struct trace_array_cpu *data,
 
 void
 ftrace(struct trace_array *tr, struct trace_array_cpu *data,
-       unsigned long ip, unsigned long parent_ip, unsigned long flags)
+       unsigned long ip, unsigned long parent_ip, unsigned long flags,
+       int pc)
 {
 	if (likely(!atomic_read(&data->disabled)))
-		trace_function(tr, data, ip, parent_ip, flags);
+		trace_function(tr, data, ip, parent_ip, flags, pc);
 }
 
-void __trace_stack(struct trace_array *tr,
-		   struct trace_array_cpu *data,
-		   unsigned long flags,
-		   int skip)
+static void ftrace_trace_stack(struct trace_array *tr,
+			       struct trace_array_cpu *data,
+			       unsigned long flags,
+			       int skip, int pc)
 {
 	struct ring_buffer_event *event;
 	struct stack_entry *entry;
@@ -718,7 +718,7 @@ void __trace_stack(struct trace_array *tr,
 	if (!event)
 		return;
 	entry	= ring_buffer_event_data(event);
-	tracing_generic_entry_update(&entry->ent, flags);
+	tracing_generic_entry_update(&entry->ent, flags, pc);
 	entry->ent.type		= TRACE_STACK;
 
 	memset(&entry->caller, 0, sizeof(entry->caller));
@@ -732,9 +732,18 @@ void __trace_stack(struct trace_array *tr,
 	ring_buffer_unlock_commit(tr->buffer, event, irq_flags);
 }
 
-void
-__trace_special(void *__tr, void *__data,
-		unsigned long arg1, unsigned long arg2, unsigned long arg3)
+void __trace_stack(struct trace_array *tr,
+		   struct trace_array_cpu *data,
+		   unsigned long flags,
+		   int skip)
+{
+	ftrace_trace_stack(tr, data, flags, skip, preempt_count());
+}
+
+static void
+ftrace_trace_special(void *__tr, void *__data,
+		     unsigned long arg1, unsigned long arg2, unsigned long arg3,
+		     int pc)
 {
 	struct ring_buffer_event *event;
 	struct trace_array_cpu *data = __data;
@@ -747,23 +756,30 @@ __trace_special(void *__tr, void *__data,
 	if (!event)
 		return;
 	entry	= ring_buffer_event_data(event);
-	tracing_generic_entry_update(&entry->ent, 0);
+	tracing_generic_entry_update(&entry->ent, 0, pc);
 	entry->ent.type			= TRACE_SPECIAL;
 	entry->arg1			= arg1;
 	entry->arg2			= arg2;
 	entry->arg3			= arg3;
 	ring_buffer_unlock_commit(tr->buffer, event, irq_flags);
-	__trace_stack(tr, data, irq_flags, 4);
+	ftrace_trace_stack(tr, data, irq_flags, 4, pc);
 
 	trace_wake_up();
 }
 
+void
+__trace_special(void *__tr, void *__data,
+		unsigned long arg1, unsigned long arg2, unsigned long arg3)
+{
+	ftrace_trace_special(__tr, __data, arg1, arg2, arg3, preempt_count());
+}
+
 void
 tracing_sched_switch_trace(struct trace_array *tr,
 			   struct trace_array_cpu *data,
 			   struct task_struct *prev,
 			   struct task_struct *next,
-			   unsigned long flags)
+			   unsigned long flags, int pc)
 {
 	struct ring_buffer_event *event;
 	struct ctx_switch_entry *entry;
@@ -774,7 +790,7 @@ tracing_sched_switch_trace(struct trace_array *tr,
 	if (!event)
 		return;
 	entry	= ring_buffer_event_data(event);
-	tracing_generic_entry_update(&entry->ent, flags);
+	tracing_generic_entry_update(&entry->ent, flags, pc);
 	entry->ent.type			= TRACE_CTX;
 	entry->prev_pid			= prev->pid;
 	entry->prev_prio		= prev->prio;
@@ -784,7 +800,7 @@ tracing_sched_switch_trace(struct trace_array *tr,
 	entry->next_state		= next->state;
 	entry->next_cpu	= task_cpu(next);
 	ring_buffer_unlock_commit(tr->buffer, event, irq_flags);
-	__trace_stack(tr, data, flags, 5);
+	ftrace_trace_stack(tr, data, flags, 5, pc);
 }
 
 void
@@ -792,7 +808,7 @@ tracing_sched_wakeup_trace(struct trace_array *tr,
 			   struct trace_array_cpu *data,
 			   struct task_struct *wakee,
 			   struct task_struct *curr,
-			   unsigned long flags)
+			   unsigned long flags, int pc)
 {
 	struct ring_buffer_event *event;
 	struct ctx_switch_entry *entry;
@@ -803,7 +819,7 @@ tracing_sched_wakeup_trace(struct trace_array *tr,
 	if (!event)
 		return;
 	entry	= ring_buffer_event_data(event);
-	tracing_generic_entry_update(&entry->ent, flags);
+	tracing_generic_entry_update(&entry->ent, flags, pc);
 	entry->ent.type			= TRACE_WAKE;
 	entry->prev_pid			= curr->pid;
 	entry->prev_prio		= curr->prio;
@@ -813,7 +829,7 @@ tracing_sched_wakeup_trace(struct trace_array *tr,
 	entry->next_state		= wakee->state;
 	entry->next_cpu			= task_cpu(wakee);
 	ring_buffer_unlock_commit(tr->buffer, event, irq_flags);
-	__trace_stack(tr, data, flags, 6);
+	ftrace_trace_stack(tr, data, flags, 6, pc);
 
 	trace_wake_up();
 }
@@ -823,23 +839,24 @@ ftrace_special(unsigned long arg1, unsigned long arg2, unsigned long arg3)
 {
 	struct trace_array *tr = &global_trace;
 	struct trace_array_cpu *data;
-	unsigned long flags;
 	long disabled;
 	int cpu;
+	int pc;
 
 	if (tracing_disabled || !tr->ctrl)
 		return;
 
-	local_irq_save(flags);
+	pc = preempt_count();
+	preempt_disable_notrace();
 	cpu = raw_smp_processor_id();
 	data = tr->data[cpu];
 	disabled = atomic_inc_return(&data->disabled);
 
 	if (likely(disabled == 1))
-		__trace_special(tr, data, arg1, arg2, arg3);
+		ftrace_trace_special(tr, data, arg1, arg2, arg3, pc);
 
 	atomic_dec(&data->disabled);
-	local_irq_restore(flags);
+	preempt_enable_notrace();
 }
 
 #ifdef CONFIG_FTRACE
@@ -850,7 +867,8 @@ function_trace_call(unsigned long ip, unsigned long parent_ip)
 	struct trace_array_cpu *data;
 	unsigned long flags;
 	long disabled;
-	int cpu;
+	int cpu, resched;
+	int pc;
 
 	if (unlikely(!ftrace_function_enabled))
 		return;
@@ -858,16 +876,22 @@ function_trace_call(unsigned long ip, unsigned long parent_ip)
 	if (skip_trace(ip))
 		return;
 
-	local_irq_save(flags);
+	pc = preempt_count();
+	resched = need_resched();
+	preempt_disable_notrace();
+	local_save_flags(flags);
 	cpu = raw_smp_processor_id();
 	data = tr->data[cpu];
 	disabled = atomic_inc_return(&data->disabled);
 
 	if (likely(disabled == 1))
-		trace_function(tr, data, ip, parent_ip, flags);
+		trace_function(tr, data, ip, parent_ip, flags, pc);
 
 	atomic_dec(&data->disabled);
-	local_irq_restore(flags);
+	if (resched)
+		preempt_enable_no_resched_notrace();
+	else
+		preempt_enable_notrace();
 }
 
 static struct ftrace_ops trace_ops __read_mostly =
@@ -2508,9 +2532,6 @@ tracing_read_pipe(struct file *filp, char __user *ubuf,
 		  size_t cnt, loff_t *ppos)
 {
 	struct trace_iterator *iter = filp->private_data;
-#ifdef CONFIG_FTRACE
-	int ftrace_save;
-#endif
 	ssize_t sret;
 
 	/* return any leftover data */
@@ -2593,20 +2614,6 @@ waitagain:
 	       offsetof(struct trace_iterator, seq));
 	iter->pos = -1;
 
-	/*
-	 * We need to stop all tracing on all CPUS to read the
-	 * the next buffer. This is a bit expensive, but is
-	 * not done often. We fill all what we can read,
-	 * and then release the locks again.
-	 */
-
-	local_irq_disable();
-#ifdef CONFIG_FTRACE
-	ftrace_save = ftrace_enabled;
-	ftrace_enabled = 0;
-#endif
-	smp_wmb();
-
 	while (find_next_entry_inc(iter) != NULL) {
 		enum print_line_t ret;
 		int len = iter->seq.len;
@@ -2624,11 +2631,6 @@ waitagain:
 			break;
 	}
 
-#ifdef CONFIG_FTRACE
-	ftrace_enabled = ftrace_save;
-#endif
-	local_irq_enable();
-
 	/* Now copy what we have to the user */
 	sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
 	if (iter->seq.readpos >= iter->seq.len)
@@ -2960,12 +2962,13 @@ int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
 	struct print_entry *entry;
 	unsigned long flags, irq_flags;
 	long disabled;
-	int cpu, len = 0, size;
+	int cpu, len = 0, size, pc;
 
 	if (!tr->ctrl || tracing_disabled)
 		return 0;
 
-	local_irq_save(flags);
+	pc = preempt_count();
+	preempt_disable_notrace();
 	cpu = raw_smp_processor_id();
 	data = tr->data[cpu];
 	disabled = atomic_inc_return(&data->disabled);
@@ -2973,7 +2976,7 @@ int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
 	if (unlikely(disabled != 1))
 		goto out;
 
-	spin_lock(&trace_buf_lock);
+	spin_lock_irqsave(&trace_buf_lock, flags);
 	len = vsnprintf(trace_buf, TRACE_BUF_SIZE, fmt, args);
 
 	len = min(len, TRACE_BUF_SIZE-1);
@@ -2984,7 +2987,7 @@ int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
 	if (!event)
 		goto out_unlock;
 	entry = ring_buffer_event_data(event);
-	tracing_generic_entry_update(&entry->ent, flags);
+	tracing_generic_entry_update(&entry->ent, flags, pc);
 	entry->ent.type			= TRACE_PRINT;
 	entry->ip			= ip;
 
@@ -2993,11 +2996,11 @@ int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
 	ring_buffer_unlock_commit(tr->buffer, event, irq_flags);
 
  out_unlock:
-	spin_unlock(&trace_buf_lock);
+	spin_unlock_irqrestore(&trace_buf_lock, flags);
 
  out:
 	atomic_dec(&data->disabled);
-	local_irq_restore(flags);
+	preempt_enable_notrace();
 
 	return len;
 }
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index f02042d0d828..f1f99572cde7 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -288,35 +288,36 @@ void init_tracer_sysprof_debugfs(struct dentry *d_tracer);
 struct trace_entry *tracing_get_trace_entry(struct trace_array *tr,
 						struct trace_array_cpu *data);
 void tracing_generic_entry_update(struct trace_entry *entry,
-						unsigned long flags);
+				  unsigned long flags,
+				  int pc);
 
 void ftrace(struct trace_array *tr,
 			    struct trace_array_cpu *data,
 			    unsigned long ip,
 			    unsigned long parent_ip,
-			    unsigned long flags);
+			    unsigned long flags, int pc);
 void tracing_sched_switch_trace(struct trace_array *tr,
 				struct trace_array_cpu *data,
 				struct task_struct *prev,
 				struct task_struct *next,
-				unsigned long flags);
+				unsigned long flags, int pc);
 void tracing_record_cmdline(struct task_struct *tsk);
 
 void tracing_sched_wakeup_trace(struct trace_array *tr,
 				struct trace_array_cpu *data,
 				struct task_struct *wakee,
 				struct task_struct *cur,
-				unsigned long flags);
+				unsigned long flags, int pc);
 void trace_special(struct trace_array *tr,
 		   struct trace_array_cpu *data,
 		   unsigned long arg1,
 		   unsigned long arg2,
-		   unsigned long arg3);
+		   unsigned long arg3, int pc);
 void trace_function(struct trace_array *tr,
 		    struct trace_array_cpu *data,
 		    unsigned long ip,
 		    unsigned long parent_ip,
-		    unsigned long flags);
+		    unsigned long flags, int pc);
 
 void tracing_start_cmdline_record(void);
 void tracing_stop_cmdline_record(void);
diff --git a/kernel/trace/trace_boot.c b/kernel/trace/trace_boot.c
index 43bde20b95bd..f2dac6f1cf06 100644
--- a/kernel/trace/trace_boot.c
+++ b/kernel/trace/trace_boot.c
@@ -95,7 +95,7 @@ void trace_boot(struct boot_trace *it)
 	if (!event)
 		goto out;
 	entry	= ring_buffer_event_data(event);
-	tracing_generic_entry_update(&entry->ent, 0);
+	tracing_generic_entry_update(&entry->ent, 0, 0);
 	entry->ent.type = TRACE_BOOT;
 	entry->initcall = *it;
 	ring_buffer_unlock_commit(tr->buffer, event, irq_flags);
diff --git a/kernel/trace/trace_irqsoff.c b/kernel/trace/trace_irqsoff.c
index 37ad49407f27..f925dbbff2a6 100644
--- a/kernel/trace/trace_irqsoff.c
+++ b/kernel/trace/trace_irqsoff.c
@@ -95,7 +95,7 @@ irqsoff_tracer_call(unsigned long ip, unsigned long parent_ip)
 	disabled = atomic_inc_return(&data->disabled);
 
 	if (likely(disabled == 1))
-		trace_function(tr, data, ip, parent_ip, flags);
+		trace_function(tr, data, ip, parent_ip, flags, preempt_count());
 
 	atomic_dec(&data->disabled);
 }
@@ -130,6 +130,7 @@ check_critical_timing(struct trace_array *tr,
 	unsigned long latency, t0, t1;
 	cycle_t T0, T1, delta;
 	unsigned long flags;
+	int pc;
 
 	/*
 	 * usecs conversion is slow so we try to delay the conversion
@@ -144,13 +145,15 @@ check_critical_timing(struct trace_array *tr,
 	if (!report_latency(delta))
 		goto out;
 
+	pc = preempt_count();
+
 	spin_lock_irqsave(&max_trace_lock, flags);
 
 	/* check if we are still the max latency */
 	if (!report_latency(delta))
 		goto out_unlock;
 
-	trace_function(tr, data, CALLER_ADDR0, parent_ip, flags);
+	trace_function(tr, data, CALLER_ADDR0, parent_ip, flags, pc);
 
 	latency = nsecs_to_usecs(delta);
 
@@ -174,7 +177,7 @@ out:
 	data->critical_sequence = max_sequence;
 	data->preempt_timestamp = ftrace_now(cpu);
 	tracing_reset(tr, cpu);
-	trace_function(tr, data, CALLER_ADDR0, parent_ip, flags);
+	trace_function(tr, data, CALLER_ADDR0, parent_ip, flags, pc);
 }
 
 static inline void
@@ -207,7 +210,7 @@ start_critical_timing(unsigned long ip, unsigned long parent_ip)
 
 	local_save_flags(flags);
 
-	trace_function(tr, data, ip, parent_ip, flags);
+	trace_function(tr, data, ip, parent_ip, flags, preempt_count());
 
 	per_cpu(tracing_cpu, cpu) = 1;
 
@@ -241,7 +244,7 @@ stop_critical_timing(unsigned long ip, unsigned long parent_ip)
 	atomic_inc(&data->disabled);
 
 	local_save_flags(flags);
-	trace_function(tr, data, ip, parent_ip, flags);
+	trace_function(tr, data, ip, parent_ip, flags, preempt_count());
 	check_critical_timing(tr, data, parent_ip ? : ip, cpu);
 	data->critical_start = 0;
 	atomic_dec(&data->disabled);
diff --git a/kernel/trace/trace_mmiotrace.c b/kernel/trace/trace_mmiotrace.c
index 0e819f47bb7a..f28484618ff0 100644
--- a/kernel/trace/trace_mmiotrace.c
+++ b/kernel/trace/trace_mmiotrace.c
@@ -324,7 +324,7 @@ static void __trace_mmiotrace_rw(struct trace_array *tr,
 	if (!event)
 		return;
 	entry	= ring_buffer_event_data(event);
-	tracing_generic_entry_update(&entry->ent, 0);
+	tracing_generic_entry_update(&entry->ent, 0, preempt_count());
 	entry->ent.type			= TRACE_MMIO_RW;
 	entry->rw			= *rw;
 	ring_buffer_unlock_commit(tr->buffer, event, irq_flags);
@@ -352,7 +352,7 @@ static void __trace_mmiotrace_map(struct trace_array *tr,
 	if (!event)
 		return;
 	entry	= ring_buffer_event_data(event);
-	tracing_generic_entry_update(&entry->ent, 0);
+	tracing_generic_entry_update(&entry->ent, 0, preempt_count());
 	entry->ent.type			= TRACE_MMIO_MAP;
 	entry->map			= *map;
 	ring_buffer_unlock_commit(tr->buffer, event, irq_flags);
diff --git a/kernel/trace/trace_sched_switch.c b/kernel/trace/trace_sched_switch.c
index e0b06db0f7af..c7fa08a5b7f4 100644
--- a/kernel/trace/trace_sched_switch.c
+++ b/kernel/trace/trace_sched_switch.c
@@ -26,6 +26,7 @@ probe_sched_switch(struct rq *__rq, struct task_struct *prev,
 	unsigned long flags;
 	long disabled;
 	int cpu;
+	int pc;
 
 	if (!atomic_read(&sched_ref))
 		return;
@@ -36,13 +37,14 @@ probe_sched_switch(struct rq *__rq, struct task_struct *prev,
 	if (!tracer_enabled)
 		return;
 
+	pc = preempt_count();
 	local_irq_save(flags);
 	cpu = raw_smp_processor_id();
 	data = ctx_trace->data[cpu];
 	disabled = atomic_inc_return(&data->disabled);
 
 	if (likely(disabled == 1))
-		tracing_sched_switch_trace(ctx_trace, data, prev, next, flags);
+		tracing_sched_switch_trace(ctx_trace, data, prev, next, flags, pc);
 
 	atomic_dec(&data->disabled);
 	local_irq_restore(flags);
@@ -54,11 +56,12 @@ probe_sched_wakeup(struct rq *__rq, struct task_struct *wakee)
 	struct trace_array_cpu *data;
 	unsigned long flags;
 	long disabled;
-	int cpu;
+	int cpu, pc;
 
 	if (!likely(tracer_enabled))
 		return;
 
+	pc = preempt_count();
 	tracing_record_cmdline(current);
 
 	local_irq_save(flags);
@@ -68,7 +71,7 @@ probe_sched_wakeup(struct rq *__rq, struct task_struct *wakee)
 
 	if (likely(disabled == 1))
 		tracing_sched_wakeup_trace(ctx_trace, data, wakee, current,
-			flags);
+					   flags, pc);
 
 	atomic_dec(&data->disabled);
 	local_irq_restore(flags);
diff --git a/kernel/trace/trace_sched_wakeup.c b/kernel/trace/trace_sched_wakeup.c
index 01e75e0639b7..fe4a252c2363 100644
--- a/kernel/trace/trace_sched_wakeup.c
+++ b/kernel/trace/trace_sched_wakeup.c
@@ -44,10 +44,12 @@ wakeup_tracer_call(unsigned long ip, unsigned long parent_ip)
 	long disabled;
 	int resched;
 	int cpu;
+	int pc;
 
 	if (likely(!wakeup_task))
 		return;
 
+	pc = preempt_count();
 	resched = need_resched();
 	preempt_disable_notrace();
 
@@ -70,7 +72,7 @@ wakeup_tracer_call(unsigned long ip, unsigned long parent_ip)
 	if (task_cpu(wakeup_task) != cpu)
 		goto unlock;
 
-	trace_function(tr, data, ip, parent_ip, flags);
+	trace_function(tr, data, ip, parent_ip, flags, pc);
 
  unlock:
 	__raw_spin_unlock(&wakeup_lock);
@@ -121,6 +123,7 @@ probe_wakeup_sched_switch(struct rq *rq, struct task_struct *prev,
 	unsigned long flags;
 	long disabled;
 	int cpu;
+	int pc;
 
 	tracing_record_cmdline(prev);
 
@@ -139,6 +142,8 @@ probe_wakeup_sched_switch(struct rq *rq, struct task_struct *prev,
 	if (next != wakeup_task)
 		return;
 
+	pc = preempt_count();
+
 	/* The task we are waiting for is waking up */
 	data = wakeup_trace->data[wakeup_cpu];
 
@@ -155,7 +160,7 @@ probe_wakeup_sched_switch(struct rq *rq, struct task_struct *prev,
 	if (unlikely(!tracer_enabled || next != wakeup_task))
 		goto out_unlock;
 
-	trace_function(wakeup_trace, data, CALLER_ADDR1, CALLER_ADDR2, flags);
+	trace_function(wakeup_trace, data, CALLER_ADDR1, CALLER_ADDR2, flags, pc);
 
 	/*
 	 * usecs conversion is slow so we try to delay the conversion
@@ -220,6 +225,7 @@ probe_wakeup(struct rq *rq, struct task_struct *p)
 	int cpu = smp_processor_id();
 	unsigned long flags;
 	long disabled;
+	int pc;
 
 	if (likely(!tracer_enabled))
 		return;
@@ -232,6 +238,7 @@ probe_wakeup(struct rq *rq, struct task_struct *p)
 			p->prio >= current->prio)
 		return;
 
+	pc = preempt_count();
 	disabled = atomic_inc_return(&wakeup_trace->data[cpu]->disabled);
 	if (unlikely(disabled != 1))
 		goto out;
@@ -256,7 +263,7 @@ probe_wakeup(struct rq *rq, struct task_struct *p)
 
 	wakeup_trace->data[wakeup_cpu]->preempt_timestamp = ftrace_now(cpu);
 	trace_function(wakeup_trace, wakeup_trace->data[wakeup_cpu],
-		       CALLER_ADDR1, CALLER_ADDR2, flags);
+		       CALLER_ADDR1, CALLER_ADDR2, flags, pc);
 
 out_locked:
 	__raw_spin_unlock(&wakeup_lock);
-- 
cgit v1.2.3-55-g7522


From 77ae11f63befb7fc41ec256f1fcb72ca7e4160d5 Mon Sep 17 00:00:00 2001
From: Ingo Molnar
Date: Thu, 2 Oct 2008 11:04:14 +0200
Subject: ring-buffer: fix build error

fix:

 kernel/trace/ring_buffer.c: In function ‘rb_allocate_pages’:
 kernel/trace/ring_buffer.c:235: error: ‘cpu’ undeclared (first use in this function)
 kernel/trace/ring_buffer.c:235: error: (Each undeclared identifier is reported only once
 kernel/trace/ring_buffer.c:235: error: for each function it appears in.)

Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 kernel/trace/ring_buffer.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index 98145718988d..54a30986493a 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -232,7 +232,7 @@ static int rb_allocate_pages(struct ring_buffer_per_cpu *cpu_buffer,
 
 	for (i = 0; i < nr_pages; i++) {
 		page = kzalloc_node(ALIGN(sizeof(*page), cache_line_size()),
-				    GFP_KERNEL, cpu_to_node(cpu));
+				    GFP_KERNEL, cpu_to_node(i));
 		if (!page)
 			goto free_pages;
 		list_add(&page->list, &pages);
-- 
cgit v1.2.3-55-g7522


From cb5ab74204a6e2579d1119bf1348eb806526b12b Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker
Date: Thu, 2 Oct 2008 12:59:20 +0200
Subject: tracing/fastboot: change the printing of boot tracer according to
 bootgraph.pl

Change the boot tracer printing to make it parsable for
the scripts/bootgraph.pl script.

We have now to output two lines for each initcall, according to the
printk in do_one_initcall() in init/main.c
We need now the call's time and the return's time.

Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/ftrace.h    |  2 ++
 init/main.c               | 20 +++++++++-----------
 kernel/trace/trace_boot.c | 22 +++++++++++++++-------
 3 files changed, 26 insertions(+), 18 deletions(-)

diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index 91954eb6460f..4455490d91bd 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -216,6 +216,8 @@ struct boot_trace {
 	initcall_t		func;
 	int			result;
 	unsigned long long	duration;
+	ktime_t			calltime;
+	ktime_t			rettime;
 };
 
 #ifdef CONFIG_BOOT_TRACER
diff --git a/init/main.c b/init/main.c
index 1e39a1eab190..61eb66159391 100644
--- a/init/main.c
+++ b/init/main.c
@@ -706,34 +706,32 @@ __setup("initcall_debug", initcall_debug_setup);
 int do_one_initcall(initcall_t fn)
 {
 	int count = preempt_count();
-	ktime_t t0, t1, delta;
+	ktime_t delta;
 	char msgbuf[64];
-	int result;
 	struct boot_trace it;
 
 	if (initcall_debug) {
 		it.caller = task_pid_nr(current);
 		it.func = fn;
 		printk("calling  %pF @ %i\n", fn, it.caller);
-		t0 = ktime_get();
+		it.calltime = ktime_get();
 	}
 
-	result = fn();
+	it.result = fn();
 
 	if (initcall_debug) {
-		t1 = ktime_get();
-		delta = ktime_sub(t1, t0);
-		it.result = result;
+		it.rettime = ktime_get();
+		delta = ktime_sub(it.rettime, it.calltime);
 		it.duration = (unsigned long long) delta.tv64 >> 20;
 		printk("initcall %pF returned %d after %Ld msecs\n", fn,
-			result, it.duration);
+			it.result, it.duration);
 		trace_boot(&it);
 	}
 
 	msgbuf[0] = 0;
 
-	if (result && result != -ENODEV && initcall_debug)
-		sprintf(msgbuf, "error code %d ", result);
+	if (it.result && it.result != -ENODEV && initcall_debug)
+		sprintf(msgbuf, "error code %d ", it.result);
 
 	if (preempt_count() != count) {
 		strlcat(msgbuf, "preemption imbalance ", sizeof(msgbuf));
@@ -747,7 +745,7 @@ int do_one_initcall(initcall_t fn)
 		printk("initcall %pF returned with %s\n", fn, msgbuf);
 	}
 
-	return result;
+	return it.result;
 }
 
 
diff --git a/kernel/trace/trace_boot.c b/kernel/trace/trace_boot.c
index f2dac6f1cf06..7c15f3e68ba3 100644
--- a/kernel/trace/trace_boot.c
+++ b/kernel/trace/trace_boot.c
@@ -52,16 +52,24 @@ static enum print_line_t initcall_print_line(struct trace_iterator *iter)
 	struct trace_boot *field = (struct trace_boot *)entry;
 	struct boot_trace *it = &field->initcall;
 	struct trace_seq *s = &iter->seq;
+	struct timespec calltime = ktime_to_timespec(it->calltime);
+	struct timespec rettime = ktime_to_timespec(it->rettime);
 
 	if (entry->type == TRACE_BOOT) {
-		ret = trace_seq_printf(s, "%pF called from %i "
-				       "returned %d after %lld msecs\n",
-				       it->func, it->caller, it->result,
-				       it->duration);
-		if (ret)
-			return TRACE_TYPE_HANDLED;
-		else
+		ret = trace_seq_printf(s, "[%5ld.%06ld] calling  %pF @ %i\n",
+					  calltime.tv_sec,
+					  calltime.tv_nsec,
+					  it->func, it->caller);
+		if (!ret)
 			return TRACE_TYPE_PARTIAL_LINE;
+		ret = trace_seq_printf(s, "[%5ld.%06ld] initcall %pF "
+					  "returned %d after %lld msecs\n",
+					  rettime.tv_sec,
+					  rettime.tv_nsec,
+					  it->func, it->result, it->duration);
+		if (!ret)
+			return TRACE_TYPE_PARTIAL_LINE;
+		return TRACE_TYPE_HANDLED;
 	}
 	return TRACE_TYPE_UNHANDLED;
 }
-- 
cgit v1.2.3-55-g7522


From 5601020feb0c3010e9e3e0131e9697ac6a06777b Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker
Date: Thu, 2 Oct 2008 13:26:05 +0200
Subject: tracing/fastboot: get the initcall name before it disappears

After some initcall traces, some initcall names may be inconsistent.
That's because these functions will disappear from the .init section
and also their name from the symbols table.

So we have to copy the name of the function in a buffer large enough
during the trace appending. It is not costly for the ring_buffer because
the number of initcall entries is commonly not really large.

Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/ftrace.h    |  7 ++++---
 init/main.c               |  3 +--
 kernel/trace/trace_boot.c | 14 ++++++++++----
 3 files changed, 15 insertions(+), 9 deletions(-)

diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index 4455490d91bd..e672e51c40a9 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -7,6 +7,7 @@
 #include <linux/fs.h>
 #include <linux/init.h>
 #include <linux/types.h>
+#include <linux/kallsyms.h>
 
 extern int ftrace_enabled;
 extern int
@@ -213,7 +214,7 @@ ftrace_init_module(unsigned long *start, unsigned long *end) { }
 
 struct boot_trace {
 	pid_t			caller;
-	initcall_t		func;
+	char 			func[KSYM_NAME_LEN];
 	int			result;
 	unsigned long long	duration;
 	ktime_t			calltime;
@@ -221,10 +222,10 @@ struct boot_trace {
 };
 
 #ifdef CONFIG_BOOT_TRACER
-extern void trace_boot(struct boot_trace *it);
+extern void trace_boot(struct boot_trace *it, initcall_t fn);
 extern void start_boot_trace(void);
 #else
-static inline void trace_boot(struct boot_trace *it) { }
+static inline void trace_boot(struct boot_trace *it, initcall_t fn) { }
 static inline void start_boot_trace(void) { }
 #endif
 
diff --git a/init/main.c b/init/main.c
index 61eb66159391..8e96a0ef17f4 100644
--- a/init/main.c
+++ b/init/main.c
@@ -712,7 +712,6 @@ int do_one_initcall(initcall_t fn)
 
 	if (initcall_debug) {
 		it.caller = task_pid_nr(current);
-		it.func = fn;
 		printk("calling  %pF @ %i\n", fn, it.caller);
 		it.calltime = ktime_get();
 	}
@@ -725,7 +724,7 @@ int do_one_initcall(initcall_t fn)
 		it.duration = (unsigned long long) delta.tv64 >> 20;
 		printk("initcall %pF returned %d after %Ld msecs\n", fn,
 			it.result, it.duration);
-		trace_boot(&it);
+		trace_boot(&it, fn);
 	}
 
 	msgbuf[0] = 0;
diff --git a/kernel/trace/trace_boot.c b/kernel/trace/trace_boot.c
index 7c15f3e68ba3..b9dc2c0093ab 100644
--- a/kernel/trace/trace_boot.c
+++ b/kernel/trace/trace_boot.c
@@ -8,6 +8,7 @@
 #include <linux/init.h>
 #include <linux/debugfs.h>
 #include <linux/ftrace.h>
+#include <linux/kallsyms.h>
 
 #include "trace.h"
 
@@ -56,17 +57,19 @@ static enum print_line_t initcall_print_line(struct trace_iterator *iter)
 	struct timespec rettime = ktime_to_timespec(it->rettime);
 
 	if (entry->type == TRACE_BOOT) {
-		ret = trace_seq_printf(s, "[%5ld.%06ld] calling  %pF @ %i\n",
+		ret = trace_seq_printf(s, "[%5ld.%06ld] calling  %s @ %i\n",
 					  calltime.tv_sec,
 					  calltime.tv_nsec,
 					  it->func, it->caller);
 		if (!ret)
 			return TRACE_TYPE_PARTIAL_LINE;
-		ret = trace_seq_printf(s, "[%5ld.%06ld] initcall %pF "
+
+		ret = trace_seq_printf(s, "[%5ld.%06ld] initcall %s "
 					  "returned %d after %lld msecs\n",
 					  rettime.tv_sec,
 					  rettime.tv_nsec,
 					  it->func, it->result, it->duration);
+
 		if (!ret)
 			return TRACE_TYPE_PARTIAL_LINE;
 		return TRACE_TYPE_HANDLED;
@@ -83,8 +86,7 @@ struct tracer boot_tracer __read_mostly =
 	.print_line	= initcall_print_line,
 };
 
-
-void trace_boot(struct boot_trace *it)
+void trace_boot(struct boot_trace *it, initcall_t fn)
 {
 	struct ring_buffer_event *event;
 	struct trace_boot *entry;
@@ -95,6 +97,10 @@ void trace_boot(struct boot_trace *it)
 	if (!trace_boot_enabled)
 		return;
 
+	/* Get its name now since this function could
+	 * disappear because it is in the .init section.
+	 */
+	sprint_symbol(it->func, (unsigned long)fn);
 	preempt_disable();
 	data = tr->data[smp_processor_id()];
 
-- 
cgit v1.2.3-55-g7522


From 3e1932ad59726d794a865cc159c0593d54bf0cb6 Mon Sep 17 00:00:00 2001
From: Ingo Molnar
Date: Thu, 2 Oct 2008 17:45:47 +0200
Subject: tracing/fastboot: build fix

fix:

 In file included from kernel/sysctl.c:52:
 include/linux/ftrace.h:217: error: 'KSYM_NAME_LEN' undeclared here (not in a function)

Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/ftrace.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index e672e51c40a9..deded114dffd 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -1,14 +1,14 @@
 #ifndef _LINUX_FTRACE_H
 #define _LINUX_FTRACE_H
 
-#ifdef CONFIG_FTRACE
-
 #include <linux/linkage.h>
 #include <linux/fs.h>
 #include <linux/init.h>
 #include <linux/types.h>
 #include <linux/kallsyms.h>
 
+#ifdef CONFIG_FTRACE
+
 extern int ftrace_enabled;
 extern int
 ftrace_enable_sysctl(struct ctl_table *table, int write,
-- 
cgit v1.2.3-55-g7522


From eb7fa935274bb233686fdf7a53f40c5d9ee76ed6 Mon Sep 17 00:00:00 2001
From: Steven Noonan
Date: Thu, 2 Oct 2008 12:00:07 -0700
Subject: ftrace: ktime.h not included in ftrace.h

Including <linux/ktime.h> eliminates the following error:

include/linux/ftrace.h:220: error: expected specifier-qualifier-list
before 'ktime_t'

Signed-off-by: Steven Noonan <steven@uplinklabs.net>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/ftrace.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index deded114dffd..ed53265d1f63 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -3,6 +3,7 @@
 
 #include <linux/linkage.h>
 #include <linux/fs.h>
+#include <linux/ktime.h>
 #include <linux/init.h>
 #include <linux/types.h>
 #include <linux/kallsyms.h>
-- 
cgit v1.2.3-55-g7522


From aa1e0e3bcf95ce684d005bedb16e5d4559455685 Mon Sep 17 00:00:00 2001
From: Steven Rostedt
Date: Thu, 2 Oct 2008 19:18:09 -0400
Subject: ring_buffer: map to cpu not page

My original patch had a compile bug when NUMA was configured. I
referenced cpu when it should have been cpu_buffer->cpu.

Ingo quickly fixed this bug by replacing cpu with 'i' because that
was the loop counter. Unfortunately, the 'i' was the counter of
pages, not CPUs. This caused a crash when the number of pages allocated
for the buffers exceeded the number of pages, which would usually
be the case.

Signed-off-by: Steven Rostedt <srostedt@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 kernel/trace/ring_buffer.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index 54a30986493a..6b8dac02364f 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -232,7 +232,7 @@ static int rb_allocate_pages(struct ring_buffer_per_cpu *cpu_buffer,
 
 	for (i = 0; i < nr_pages; i++) {
 		page = kzalloc_node(ALIGN(sizeof(*page), cache_line_size()),
-				    GFP_KERNEL, cpu_to_node(i));
+				    GFP_KERNEL, cpu_to_node(cpu_buffer->cpu));
 		if (!page)
 			goto free_pages;
 		list_add(&page->list, &pages);
-- 
cgit v1.2.3-55-g7522


From 6450c1d3213e27b0dcbf34cce7ad1ae74244c520 Mon Sep 17 00:00:00 2001
From: Steven Rostedt
Date: Thu, 2 Oct 2008 19:23:04 -0400
Subject: ftrace: move pc counter in irqtrace

The assigning of the pc counter is in the wrong spot in the
check_critical_timing function. The pc variable is used in the
out jump.

Signed-off-by: Steven Rostedt <srostedt@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 kernel/trace/trace_irqsoff.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/kernel/trace/trace_irqsoff.c b/kernel/trace/trace_irqsoff.c
index f925dbbff2a6..a7db7f040ae0 100644
--- a/kernel/trace/trace_irqsoff.c
+++ b/kernel/trace/trace_irqsoff.c
@@ -142,11 +142,11 @@ check_critical_timing(struct trace_array *tr,
 
 	local_save_flags(flags);
 
+	pc = preempt_count();
+
 	if (!report_latency(delta))
 		goto out;
 
-	pc = preempt_count();
-
 	spin_lock_irqsave(&max_trace_lock, flags);
 
 	/* check if we are still the max latency */
-- 
cgit v1.2.3-55-g7522


From 097d036a2f25eecc42435c57e010aaf4a2eed2d9 Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker
Date: Fri, 3 Oct 2008 15:39:21 +0200
Subject: tracing/fastboot: only trace non-module initcalls

At this time, only built-in initcalls interest us.
We can't really produce a relevant graph if we include
the modules initcall too.

I had good results after this patch (see svg in attachment).

Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/ftrace.h    |  2 ++
 init/main.c               |  1 +
 kernel/trace/trace_boot.c | 11 ++++++++---
 3 files changed, 11 insertions(+), 3 deletions(-)

diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index ed53265d1f63..5812dba4ee24 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -225,9 +225,11 @@ struct boot_trace {
 #ifdef CONFIG_BOOT_TRACER
 extern void trace_boot(struct boot_trace *it, initcall_t fn);
 extern void start_boot_trace(void);
+extern void stop_boot_trace(void);
 #else
 static inline void trace_boot(struct boot_trace *it, initcall_t fn) { }
 static inline void start_boot_trace(void) { }
+static inline void stop_boot_trace(void) { }
 #endif
 
 
diff --git a/init/main.c b/init/main.c
index 8e96a0ef17f4..e7939de80f3e 100644
--- a/init/main.c
+++ b/init/main.c
@@ -886,6 +886,7 @@ static int __init kernel_init(void * unused)
 	 * we're essentially up and running. Get rid of the
 	 * initmem segments and start the user-mode stuff..
 	 */
+	stop_boot_trace();
 	init_post();
 	return 0;
 }
diff --git a/kernel/trace/trace_boot.c b/kernel/trace/trace_boot.c
index b9dc2c0093ab..a7efe3559654 100644
--- a/kernel/trace/trace_boot.c
+++ b/kernel/trace/trace_boot.c
@@ -22,11 +22,16 @@ void start_boot_trace(void)
 	trace_boot_enabled = 1;
 }
 
-void stop_boot_trace(struct trace_array *tr)
+void stop_boot_trace(void)
 {
 	trace_boot_enabled = 0;
 }
 
+void reset_boot_trace(struct trace_array *tr)
+{
+	stop_boot_trace();
+}
+
 static void boot_trace_init(struct trace_array *tr)
 {
 	int cpu;
@@ -43,7 +48,7 @@ static void boot_trace_ctrl_update(struct trace_array *tr)
 	if (tr->ctrl)
 		start_boot_trace();
 	else
-		stop_boot_trace(tr);
+		stop_boot_trace();
 }
 
 static enum print_line_t initcall_print_line(struct trace_iterator *iter)
@@ -81,7 +86,7 @@ struct tracer boot_tracer __read_mostly =
 {
 	.name		= "initcall",
 	.init		= boot_trace_init,
-	.reset		= stop_boot_trace,
+	.reset		= reset_boot_trace,
 	.ctrl_update	= boot_trace_ctrl_update,
 	.print_line	= initcall_print_line,
 };
-- 
cgit v1.2.3-55-g7522


From 6f807acd27734197b11d42829d3cbb9c0937b572 Mon Sep 17 00:00:00 2001
From: Steven Rostedt
Date: Sat, 4 Oct 2008 02:00:58 -0400
Subject: ring-buffer: move page indexes into page headers

Remove the global head and tail indexes and move them into the
page header. Each page will now keep track of where the last
write and read was made. We also rename the head and tail to read
and write for better clarification.

This patch is needed for future enhancements to move the ring buffer
to a lockless solution.

Signed-off-by: Steven Rostedt <srostedt@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 kernel/trace/ring_buffer.c | 75 +++++++++++++++++++++++++---------------------
 1 file changed, 41 insertions(+), 34 deletions(-)

diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index 6b8dac02364f..09d4f0d879a7 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -117,6 +117,8 @@ void *ring_buffer_event_data(struct ring_buffer_event *event)
 struct buffer_page {
 	u64		 time_stamp;	/* page time stamp */
 	unsigned	 size;		/* size of page data */
+	unsigned	 write;		/* index for next write */
+	unsigned	 read;		/* index for next read */
 	struct list_head list;		/* list of free pages */
 	void *page;			/* Actual data page */
 };
@@ -153,11 +155,8 @@ struct ring_buffer_per_cpu {
 	spinlock_t			lock;
 	struct lock_class_key		lock_key;
 	struct list_head		pages;
-	unsigned long			head;	/* read from head */
-	unsigned long			tail;	/* write to tail */
-	unsigned long			reader;
-	struct buffer_page		*head_page;
-	struct buffer_page		*tail_page;
+	struct buffer_page		*head_page;	/* read from head */
+	struct buffer_page		*tail_page;	/* write to tail */
 	struct buffer_page		*reader_page;
 	unsigned long			overrun;
 	unsigned long			entries;
@@ -566,10 +565,11 @@ int ring_buffer_resize(struct ring_buffer *buffer, unsigned long size)
 
 static inline int rb_per_cpu_empty(struct ring_buffer_per_cpu *cpu_buffer)
 {
-	return (cpu_buffer->reader == cpu_buffer->reader_page->size &&
+	return cpu_buffer->reader_page->read == cpu_buffer->reader_page->size &&
 		(cpu_buffer->tail_page == cpu_buffer->reader_page ||
 		 (cpu_buffer->tail_page == cpu_buffer->head_page &&
-		  cpu_buffer->head == cpu_buffer->tail)));
+		  cpu_buffer->head_page->read ==
+		  cpu_buffer->tail_page->write));
 }
 
 static inline int rb_null_event(struct ring_buffer_event *event)
@@ -577,7 +577,7 @@ static inline int rb_null_event(struct ring_buffer_event *event)
 	return event->type == RINGBUF_TYPE_PADDING;
 }
 
-static inline void *rb_page_index(struct buffer_page *page, unsigned index)
+static inline void *__rb_page_index(struct buffer_page *page, unsigned index)
 {
 	return page->page + index;
 }
@@ -585,15 +585,21 @@ static inline void *rb_page_index(struct buffer_page *page, unsigned index)
 static inline struct ring_buffer_event *
 rb_reader_event(struct ring_buffer_per_cpu *cpu_buffer)
 {
-	return rb_page_index(cpu_buffer->reader_page,
-			     cpu_buffer->reader);
+	return __rb_page_index(cpu_buffer->reader_page,
+			       cpu_buffer->reader_page->read);
+}
+
+static inline struct ring_buffer_event *
+rb_head_event(struct ring_buffer_per_cpu *cpu_buffer)
+{
+	return __rb_page_index(cpu_buffer->head_page,
+			       cpu_buffer->head_page->read);
 }
 
 static inline struct ring_buffer_event *
 rb_iter_head_event(struct ring_buffer_iter *iter)
 {
-	return rb_page_index(iter->head_page,
-			     iter->head);
+	return __rb_page_index(iter->head_page, iter->head);
 }
 
 /*
@@ -610,7 +616,7 @@ static void rb_update_overflow(struct ring_buffer_per_cpu *cpu_buffer)
 	for (head = 0; head < rb_head_size(cpu_buffer);
 	     head += rb_event_length(event)) {
 
-		event = rb_page_index(cpu_buffer->head_page, head);
+		event = __rb_page_index(cpu_buffer->head_page, head);
 		BUG_ON(rb_null_event(event));
 		/* Only count data entries */
 		if (event->type != RINGBUF_TYPE_DATA)
@@ -640,13 +646,13 @@ rb_add_stamp(struct ring_buffer_per_cpu *cpu_buffer, u64 *ts)
 
 static void rb_reset_head_page(struct ring_buffer_per_cpu *cpu_buffer)
 {
-	cpu_buffer->head = 0;
+	cpu_buffer->head_page->read = 0;
 }
 
 static void rb_reset_reader_page(struct ring_buffer_per_cpu *cpu_buffer)
 {
 	cpu_buffer->read_stamp = cpu_buffer->reader_page->time_stamp;
-	cpu_buffer->reader = 0;
+	cpu_buffer->reader_page->read = 0;
 }
 
 static inline void rb_inc_iter(struct ring_buffer_iter *iter)
@@ -743,9 +749,8 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer,
 	struct ring_buffer *buffer = cpu_buffer->buffer;
 	struct ring_buffer_event *event;
 
-	/* No locking needed for tail page */
 	tail_page = cpu_buffer->tail_page;
-	tail = cpu_buffer->tail;
+	tail = cpu_buffer->tail_page->write;
 
 	if (tail + length > BUF_PAGE_SIZE) {
 		struct buffer_page *next_page = tail_page;
@@ -774,7 +779,7 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer,
 		}
 
 		if (tail != BUF_PAGE_SIZE) {
-			event = rb_page_index(tail_page, tail);
+			event = __rb_page_index(tail_page, tail);
 			/* page padding */
 			event->type = RINGBUF_TYPE_PADDING;
 		}
@@ -784,14 +789,14 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer,
 		tail_page->size = 0;
 		tail = 0;
 		cpu_buffer->tail_page = tail_page;
-		cpu_buffer->tail = tail;
+		cpu_buffer->tail_page->write = tail;
 		rb_add_stamp(cpu_buffer, ts);
 		spin_unlock(&cpu_buffer->lock);
 	}
 
 	BUG_ON(tail + length > BUF_PAGE_SIZE);
 
-	event = rb_page_index(tail_page, tail);
+	event = __rb_page_index(tail_page, tail);
 	rb_update_event(event, type, length);
 
 	return event;
@@ -823,12 +828,12 @@ rb_add_time_stamp(struct ring_buffer_per_cpu *cpu_buffer,
 		return -1;
 
 	/* check to see if we went to the next page */
-	if (cpu_buffer->tail) {
+	if (cpu_buffer->tail_page->write) {
 		/* Still on same page, update timestamp */
 		event->time_delta = *delta & TS_MASK;
 		event->array[0] = *delta >> TS_SHIFT;
 		/* commit the time event */
-		cpu_buffer->tail +=
+		cpu_buffer->tail_page->write +=
 			rb_event_length(event);
 		cpu_buffer->write_stamp = *ts;
 		*delta = 0;
@@ -846,7 +851,7 @@ rb_reserve_next_event(struct ring_buffer_per_cpu *cpu_buffer,
 
 	ts = ring_buffer_time_stamp(cpu_buffer->cpu);
 
-	if (cpu_buffer->tail) {
+	if (cpu_buffer->tail_page->write) {
 		delta = ts - cpu_buffer->write_stamp;
 
 		if (test_time_stamp(delta)) {
@@ -868,7 +873,7 @@ rb_reserve_next_event(struct ring_buffer_per_cpu *cpu_buffer,
 		return NULL;
 
 	/* If the reserve went to the next page, our delta is zero */
-	if (!cpu_buffer->tail)
+	if (!cpu_buffer->tail_page->write)
 		delta = 0;
 
 	event->time_delta = delta;
@@ -933,8 +938,8 @@ ring_buffer_lock_reserve(struct ring_buffer *buffer,
 static void rb_commit(struct ring_buffer_per_cpu *cpu_buffer,
 		      struct ring_buffer_event *event)
 {
-	cpu_buffer->tail += rb_event_length(event);
-	cpu_buffer->tail_page->size = cpu_buffer->tail;
+	cpu_buffer->tail_page->write += rb_event_length(event);
+	cpu_buffer->tail_page->size = cpu_buffer->tail_page->write;
 	cpu_buffer->write_stamp += event->time_delta;
 	cpu_buffer->entries++;
 }
@@ -1178,10 +1183,10 @@ void ring_buffer_iter_reset(struct ring_buffer_iter *iter)
 	/* Iterator usage is expected to have record disabled */
 	if (list_empty(&cpu_buffer->reader_page->list)) {
 		iter->head_page = cpu_buffer->head_page;
-		iter->head = cpu_buffer->head;
+		iter->head = cpu_buffer->head_page->read;
 	} else {
 		iter->head_page = cpu_buffer->reader_page;
-		iter->head = cpu_buffer->reader;
+		iter->head = cpu_buffer->reader_page->read;
 	}
 	if (iter->head)
 		iter->read_stamp = cpu_buffer->read_stamp;
@@ -1200,7 +1205,7 @@ int ring_buffer_iter_empty(struct ring_buffer_iter *iter)
 	cpu_buffer = iter->cpu_buffer;
 
 	return iter->head_page == cpu_buffer->tail_page &&
-		iter->head == cpu_buffer->tail;
+		iter->head == cpu_buffer->tail_page->write;
 }
 
 static void
@@ -1277,11 +1282,11 @@ rb_get_reader_page(struct ring_buffer_per_cpu *cpu_buffer)
 	reader = cpu_buffer->reader_page;
 
 	/* If there's more to read, return this page */
-	if (cpu_buffer->reader < reader->size)
+	if (cpu_buffer->reader_page->read < reader->size)
 		goto out;
 
 	/* Never should we have an index greater than the size */
-	WARN_ON(cpu_buffer->reader > reader->size);
+	WARN_ON(cpu_buffer->reader_page->read > reader->size);
 
 	/* check if we caught up to the tail */
 	reader = NULL;
@@ -1342,7 +1347,7 @@ static void rb_advance_reader(struct ring_buffer_per_cpu *cpu_buffer)
 	rb_update_read_stamp(cpu_buffer, event);
 
 	length = rb_event_length(event);
-	cpu_buffer->reader += length;
+	cpu_buffer->reader_page->read += length;
 }
 
 static void rb_advance_iter(struct ring_buffer_iter *iter)
@@ -1373,7 +1378,7 @@ static void rb_advance_iter(struct ring_buffer_iter *iter)
 	 * at the tail of the buffer.
 	 */
 	BUG_ON((iter->head_page == cpu_buffer->tail_page) &&
-	       (iter->head + length > cpu_buffer->tail));
+	       (iter->head + length > cpu_buffer->tail_page->write));
 
 	rb_update_iter_read_stamp(iter, event);
 
@@ -1623,7 +1628,9 @@ rb_reset_cpu(struct ring_buffer_per_cpu *cpu_buffer)
 	INIT_LIST_HEAD(&cpu_buffer->reader_page->list);
 	cpu_buffer->reader_page->size = 0;
 
-	cpu_buffer->head = cpu_buffer->tail = cpu_buffer->reader = 0;
+	cpu_buffer->head_page->read = 0;
+	cpu_buffer->tail_page->write = 0;
+	cpu_buffer->reader_page->read = 0;
 
 	cpu_buffer->overrun = 0;
 	cpu_buffer->entries = 0;
-- 
cgit v1.2.3-55-g7522


From bf41a158cacba6ca5fc6407a54e7ad8ce1567e2e Mon Sep 17 00:00:00 2001
From: Steven Rostedt
Date: Sat, 4 Oct 2008 02:00:59 -0400
Subject: ring-buffer: make reentrant

This patch replaces the local_irq_save/restore with preempt_disable/
enable. This allows for interrupts to enter while recording.
To write to the ring buffer, you must reserve data, and then
commit it. During this time, an interrupt may call a trace function
that will also record into the buffer before the commit is made.

The interrupt will reserve its entry after the first entry, even
though the first entry did not finish yet.

The time stamp delta of the interrupt entry will be zero, since
in the view of the trace, the interrupt happened during the
first field anyway.

Locking still takes place when the tail/write moves from one page
to the next. The reader always takes the locks.

A new page pointer is added, called the commit. The write/tail will
always point to the end of all entries. The commit field will
point to the last committed entry. Only this commit entry may
update the write time stamp.

The reader can only go up to the commit. It cannot go past it.

If a lot of interrupts come in during a commit that fills up the
buffer, and it happens to make it all the way around the buffer
back to the commit, then a warning is printed and new events will
be dropped.

Signed-off-by: Steven Rostedt <srostedt@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 kernel/trace/ring_buffer.c | 487 ++++++++++++++++++++++++++++++++++-----------
 1 file changed, 374 insertions(+), 113 deletions(-)

diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index 09d4f0d879a7..94af1fe56bb4 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -116,8 +116,8 @@ void *ring_buffer_event_data(struct ring_buffer_event *event)
  */
 struct buffer_page {
 	u64		 time_stamp;	/* page time stamp */
-	unsigned	 size;		/* size of page data */
-	unsigned	 write;		/* index for next write */
+	local_t		 write;		/* index for next write */
+	local_t		 commit;	/* write commited index */
 	unsigned	 read;		/* index for next read */
 	struct list_head list;		/* list of free pages */
 	void *page;			/* Actual data page */
@@ -157,6 +157,7 @@ struct ring_buffer_per_cpu {
 	struct list_head		pages;
 	struct buffer_page		*head_page;	/* read from head */
 	struct buffer_page		*tail_page;	/* write to tail */
+	struct buffer_page		*commit_page;	/* commited pages */
 	struct buffer_page		*reader_page;
 	unsigned long			overrun;
 	unsigned long			entries;
@@ -185,12 +186,32 @@ struct ring_buffer_iter {
 	u64				read_stamp;
 };
 
-#define RB_WARN_ON(buffer, cond)			\
-	if (unlikely(cond)) {				\
-		atomic_inc(&buffer->record_disabled);	\
-		WARN_ON(1);				\
-		return -1;				\
-	}
+#define RB_WARN_ON(buffer, cond)				\
+	do {							\
+		if (unlikely(cond)) {				\
+			atomic_inc(&buffer->record_disabled);	\
+			WARN_ON(1);				\
+		}						\
+	} while (0)
+
+#define RB_WARN_ON_RET(buffer, cond)				\
+	do {							\
+		if (unlikely(cond)) {				\
+			atomic_inc(&buffer->record_disabled);	\
+			WARN_ON(1);				\
+			return -1;				\
+		}						\
+	} while (0)
+
+#define RB_WARN_ON_ONCE(buffer, cond)				\
+	do {							\
+		static int once;				\
+		if (unlikely(cond) && !once) {			\
+			once++;					\
+			atomic_inc(&buffer->record_disabled);	\
+			WARN_ON(1);				\
+		}						\
+	} while (0)
 
 /**
  * check_pages - integrity check of buffer pages
@@ -204,22 +225,19 @@ static int rb_check_pages(struct ring_buffer_per_cpu *cpu_buffer)
 	struct list_head *head = &cpu_buffer->pages;
 	struct buffer_page *page, *tmp;
 
-	RB_WARN_ON(cpu_buffer, head->next->prev != head);
-	RB_WARN_ON(cpu_buffer, head->prev->next != head);
+	RB_WARN_ON_RET(cpu_buffer, head->next->prev != head);
+	RB_WARN_ON_RET(cpu_buffer, head->prev->next != head);
 
 	list_for_each_entry_safe(page, tmp, head, list) {
-		RB_WARN_ON(cpu_buffer, page->list.next->prev != &page->list);
-		RB_WARN_ON(cpu_buffer, page->list.prev->next != &page->list);
+		RB_WARN_ON_RET(cpu_buffer,
+			       page->list.next->prev != &page->list);
+		RB_WARN_ON_RET(cpu_buffer,
+			       page->list.prev->next != &page->list);
 	}
 
 	return 0;
 }
 
-static unsigned rb_head_size(struct ring_buffer_per_cpu *cpu_buffer)
-{
-	return cpu_buffer->head_page->size;
-}
-
 static int rb_allocate_pages(struct ring_buffer_per_cpu *cpu_buffer,
 			     unsigned nr_pages)
 {
@@ -286,7 +304,6 @@ rb_allocate_cpu_buffer(struct ring_buffer *buffer, int cpu)
 	page->page = (void *)addr;
 
 	INIT_LIST_HEAD(&cpu_buffer->reader_page->list);
-	cpu_buffer->reader_page->size = 0;
 
 	ret = rb_allocate_pages(cpu_buffer, buffer->pages);
 	if (ret < 0)
@@ -294,8 +311,7 @@ rb_allocate_cpu_buffer(struct ring_buffer *buffer, int cpu)
 
 	cpu_buffer->head_page
 		= list_entry(cpu_buffer->pages.next, struct buffer_page, list);
-	cpu_buffer->tail_page
-		= list_entry(cpu_buffer->pages.next, struct buffer_page, list);
+	cpu_buffer->tail_page = cpu_buffer->commit_page = cpu_buffer->head_page;
 
 	return cpu_buffer;
 
@@ -563,15 +579,6 @@ int ring_buffer_resize(struct ring_buffer *buffer, unsigned long size)
 	return -ENOMEM;
 }
 
-static inline int rb_per_cpu_empty(struct ring_buffer_per_cpu *cpu_buffer)
-{
-	return cpu_buffer->reader_page->read == cpu_buffer->reader_page->size &&
-		(cpu_buffer->tail_page == cpu_buffer->reader_page ||
-		 (cpu_buffer->tail_page == cpu_buffer->head_page &&
-		  cpu_buffer->head_page->read ==
-		  cpu_buffer->tail_page->write));
-}
-
 static inline int rb_null_event(struct ring_buffer_event *event)
 {
 	return event->type == RINGBUF_TYPE_PADDING;
@@ -602,6 +609,33 @@ rb_iter_head_event(struct ring_buffer_iter *iter)
 	return __rb_page_index(iter->head_page, iter->head);
 }
 
+static inline unsigned rb_page_write(struct buffer_page *bpage)
+{
+	return local_read(&bpage->write);
+}
+
+static inline unsigned rb_page_commit(struct buffer_page *bpage)
+{
+	return local_read(&bpage->commit);
+}
+
+/* Size is determined by what has been commited */
+static inline unsigned rb_page_size(struct buffer_page *bpage)
+{
+	return rb_page_commit(bpage);
+}
+
+static inline unsigned
+rb_commit_index(struct ring_buffer_per_cpu *cpu_buffer)
+{
+	return rb_page_commit(cpu_buffer->commit_page);
+}
+
+static inline unsigned rb_head_size(struct ring_buffer_per_cpu *cpu_buffer)
+{
+	return rb_page_commit(cpu_buffer->head_page);
+}
+
 /*
  * When the tail hits the head and the buffer is in overwrite mode,
  * the head jumps to the next page and all content on the previous
@@ -637,16 +671,76 @@ static inline void rb_inc_page(struct ring_buffer_per_cpu *cpu_buffer,
 	*page = list_entry(p, struct buffer_page, list);
 }
 
-static inline void
-rb_add_stamp(struct ring_buffer_per_cpu *cpu_buffer, u64 *ts)
+static inline unsigned
+rb_event_index(struct ring_buffer_event *event)
 {
-	cpu_buffer->tail_page->time_stamp = *ts;
-	cpu_buffer->write_stamp = *ts;
+	unsigned long addr = (unsigned long)event;
+
+	return (addr & ~PAGE_MASK) - (PAGE_SIZE - BUF_PAGE_SIZE);
 }
 
-static void rb_reset_head_page(struct ring_buffer_per_cpu *cpu_buffer)
+static inline int
+rb_is_commit(struct ring_buffer_per_cpu *cpu_buffer,
+	     struct ring_buffer_event *event)
 {
-	cpu_buffer->head_page->read = 0;
+	unsigned long addr = (unsigned long)event;
+	unsigned long index;
+
+	index = rb_event_index(event);
+	addr &= PAGE_MASK;
+
+	return cpu_buffer->commit_page->page == (void *)addr &&
+		rb_commit_index(cpu_buffer) == index;
+}
+
+static inline void
+rb_set_commit_event(struct ring_buffer_per_cpu *cpu_buffer,
+		    struct ring_buffer_event *event)
+{
+	unsigned long addr = (unsigned long)event;
+	unsigned long index;
+
+	index = rb_event_index(event);
+	addr &= PAGE_MASK;
+
+	while (cpu_buffer->commit_page->page != (void *)addr) {
+		RB_WARN_ON(cpu_buffer,
+			   cpu_buffer->commit_page == cpu_buffer->tail_page);
+		cpu_buffer->commit_page->commit =
+			cpu_buffer->commit_page->write;
+		rb_inc_page(cpu_buffer, &cpu_buffer->commit_page);
+		cpu_buffer->write_stamp = cpu_buffer->commit_page->time_stamp;
+	}
+
+	/* Now set the commit to the event's index */
+	local_set(&cpu_buffer->commit_page->commit, index);
+}
+
+static inline void
+rb_set_commit_to_write(struct ring_buffer_per_cpu *cpu_buffer)
+{
+	/*
+	 * We only race with interrupts and NMIs on this CPU.
+	 * If we own the commit event, then we can commit
+	 * all others that interrupted us, since the interruptions
+	 * are in stack format (they finish before they come
+	 * back to us). This allows us to do a simple loop to
+	 * assign the commit to the tail.
+	 */
+	while (cpu_buffer->commit_page != cpu_buffer->tail_page) {
+		cpu_buffer->commit_page->commit =
+			cpu_buffer->commit_page->write;
+		rb_inc_page(cpu_buffer, &cpu_buffer->commit_page);
+		cpu_buffer->write_stamp = cpu_buffer->commit_page->time_stamp;
+		/* add barrier to keep gcc from optimizing too much */
+		barrier();
+	}
+	while (rb_commit_index(cpu_buffer) !=
+	       rb_page_write(cpu_buffer->commit_page)) {
+		cpu_buffer->commit_page->commit =
+			cpu_buffer->commit_page->write;
+		barrier();
+	}
 }
 
 static void rb_reset_reader_page(struct ring_buffer_per_cpu *cpu_buffer)
@@ -745,61 +839,120 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer,
 		  unsigned type, unsigned long length, u64 *ts)
 {
 	struct buffer_page *tail_page, *head_page, *reader_page;
-	unsigned long tail;
+	unsigned long tail, write;
 	struct ring_buffer *buffer = cpu_buffer->buffer;
 	struct ring_buffer_event *event;
+	unsigned long flags;
 
 	tail_page = cpu_buffer->tail_page;
-	tail = cpu_buffer->tail_page->write;
+	write = local_add_return(length, &tail_page->write);
+	tail = write - length;
 
-	if (tail + length > BUF_PAGE_SIZE) {
+	/* See if we shot pass the end of this buffer page */
+	if (write > BUF_PAGE_SIZE) {
 		struct buffer_page *next_page = tail_page;
 
-		spin_lock(&cpu_buffer->lock);
+		spin_lock_irqsave(&cpu_buffer->lock, flags);
+
 		rb_inc_page(cpu_buffer, &next_page);
 
 		head_page = cpu_buffer->head_page;
 		reader_page = cpu_buffer->reader_page;
 
 		/* we grabbed the lock before incrementing */
-		WARN_ON(next_page == reader_page);
+		RB_WARN_ON(cpu_buffer, next_page == reader_page);
+
+		/*
+		 * If for some reason, we had an interrupt storm that made
+		 * it all the way around the buffer, bail, and warn
+		 * about it.
+		 */
+		if (unlikely(next_page == cpu_buffer->commit_page)) {
+			WARN_ON_ONCE(1);
+			goto out_unlock;
+		}
 
 		if (next_page == head_page) {
 			if (!(buffer->flags & RB_FL_OVERWRITE)) {
-				spin_unlock(&cpu_buffer->lock);
-				return NULL;
+				/* reset write */
+				if (tail <= BUF_PAGE_SIZE)
+					local_set(&tail_page->write, tail);
+				goto out_unlock;
 			}
 
-			/* count overflows */
-			rb_update_overflow(cpu_buffer);
+			/* tail_page has not moved yet? */
+			if (tail_page == cpu_buffer->tail_page) {
+				/* count overflows */
+				rb_update_overflow(cpu_buffer);
+
+				rb_inc_page(cpu_buffer, &head_page);
+				cpu_buffer->head_page = head_page;
+				cpu_buffer->head_page->read = 0;
+			}
+		}
 
-			rb_inc_page(cpu_buffer, &head_page);
-			cpu_buffer->head_page = head_page;
-			rb_reset_head_page(cpu_buffer);
+		/*
+		 * If the tail page is still the same as what we think
+		 * it is, then it is up to us to update the tail
+		 * pointer.
+		 */
+		if (tail_page == cpu_buffer->tail_page) {
+			local_set(&next_page->write, 0);
+			local_set(&next_page->commit, 0);
+			cpu_buffer->tail_page = next_page;
+
+			/* reread the time stamp */
+			*ts = ring_buffer_time_stamp(cpu_buffer->cpu);
+			cpu_buffer->tail_page->time_stamp = *ts;
 		}
 
-		if (tail != BUF_PAGE_SIZE) {
+		/*
+		 * The actual tail page has moved forward.
+		 */
+		if (tail < BUF_PAGE_SIZE) {
+			/* Mark the rest of the page with padding */
 			event = __rb_page_index(tail_page, tail);
-			/* page padding */
 			event->type = RINGBUF_TYPE_PADDING;
 		}
 
-		tail_page->size = tail;
-		tail_page = next_page;
-		tail_page->size = 0;
-		tail = 0;
-		cpu_buffer->tail_page = tail_page;
-		cpu_buffer->tail_page->write = tail;
-		rb_add_stamp(cpu_buffer, ts);
-		spin_unlock(&cpu_buffer->lock);
+		if (tail <= BUF_PAGE_SIZE)
+			/* Set the write back to the previous setting */
+			local_set(&tail_page->write, tail);
+
+		/*
+		 * If this was a commit entry that failed,
+		 * increment that too
+		 */
+		if (tail_page == cpu_buffer->commit_page &&
+		    tail == rb_commit_index(cpu_buffer)) {
+			rb_set_commit_to_write(cpu_buffer);
+		}
+
+		spin_unlock_irqrestore(&cpu_buffer->lock, flags);
+
+		/* fail and let the caller try again */
+		return ERR_PTR(-EAGAIN);
 	}
 
-	BUG_ON(tail + length > BUF_PAGE_SIZE);
+	/* We reserved something on the buffer */
+
+	BUG_ON(write > BUF_PAGE_SIZE);
 
 	event = __rb_page_index(tail_page, tail);
 	rb_update_event(event, type, length);
 
+	/*
+	 * If this is a commit and the tail is zero, then update
+	 * this page's time stamp.
+	 */
+	if (!tail && rb_is_commit(cpu_buffer, event))
+		cpu_buffer->commit_page->time_stamp = *ts;
+
 	return event;
+
+ out_unlock:
+	spin_unlock_irqrestore(&cpu_buffer->lock, flags);
+	return NULL;
 }
 
 static int
@@ -808,6 +961,7 @@ rb_add_time_stamp(struct ring_buffer_per_cpu *cpu_buffer,
 {
 	struct ring_buffer_event *event;
 	static int once;
+	int ret;
 
 	if (unlikely(*delta > (1ULL << 59) && !once++)) {
 		printk(KERN_WARNING "Delta way too big! %llu"
@@ -825,21 +979,38 @@ rb_add_time_stamp(struct ring_buffer_per_cpu *cpu_buffer,
 				  RB_LEN_TIME_EXTEND,
 				  ts);
 	if (!event)
-		return -1;
+		return -EBUSY;
 
-	/* check to see if we went to the next page */
-	if (cpu_buffer->tail_page->write) {
-		/* Still on same page, update timestamp */
-		event->time_delta = *delta & TS_MASK;
-		event->array[0] = *delta >> TS_SHIFT;
-		/* commit the time event */
-		cpu_buffer->tail_page->write +=
-			rb_event_length(event);
+	if (PTR_ERR(event) == -EAGAIN)
+		return -EAGAIN;
+
+	/* Only a commited time event can update the write stamp */
+	if (rb_is_commit(cpu_buffer, event)) {
+		/*
+		 * If this is the first on the page, then we need to
+		 * update the page itself, and just put in a zero.
+		 */
+		if (rb_event_index(event)) {
+			event->time_delta = *delta & TS_MASK;
+			event->array[0] = *delta >> TS_SHIFT;
+		} else {
+			cpu_buffer->commit_page->time_stamp = *ts;
+			event->time_delta = 0;
+			event->array[0] = 0;
+		}
 		cpu_buffer->write_stamp = *ts;
-		*delta = 0;
+		/* let the caller know this was the commit */
+		ret = 1;
+	} else {
+		/* Darn, this is just wasted space */
+		event->time_delta = 0;
+		event->array[0] = 0;
+		ret = 0;
 	}
 
-	return 0;
+	*delta = 0;
+
+	return ret;
 }
 
 static struct ring_buffer_event *
@@ -848,32 +1019,69 @@ rb_reserve_next_event(struct ring_buffer_per_cpu *cpu_buffer,
 {
 	struct ring_buffer_event *event;
 	u64 ts, delta;
+	int commit = 0;
 
+ again:
 	ts = ring_buffer_time_stamp(cpu_buffer->cpu);
 
-	if (cpu_buffer->tail_page->write) {
+	/*
+	 * Only the first commit can update the timestamp.
+	 * Yes there is a race here. If an interrupt comes in
+	 * just after the conditional and it traces too, then it
+	 * will also check the deltas. More than one timestamp may
+	 * also be made. But only the entry that did the actual
+	 * commit will be something other than zero.
+	 */
+	if (cpu_buffer->tail_page == cpu_buffer->commit_page &&
+	    rb_page_write(cpu_buffer->tail_page) ==
+	    rb_commit_index(cpu_buffer)) {
+
 		delta = ts - cpu_buffer->write_stamp;
 
+		/* make sure this delta is calculated here */
+		barrier();
+
+		/* Did the write stamp get updated already? */
+		if (unlikely(ts < cpu_buffer->write_stamp))
+			goto again;
+
 		if (test_time_stamp(delta)) {
-			int ret;
 
-			ret = rb_add_time_stamp(cpu_buffer, &ts, &delta);
-			if (ret < 0)
+			commit = rb_add_time_stamp(cpu_buffer, &ts, &delta);
+
+			if (commit == -EBUSY)
 				return NULL;
+
+			if (commit == -EAGAIN)
+				goto again;
+
+			RB_WARN_ON(cpu_buffer, commit < 0);
 		}
-	} else {
-		spin_lock(&cpu_buffer->lock);
-		rb_add_stamp(cpu_buffer, &ts);
-		spin_unlock(&cpu_buffer->lock);
+	} else
+		/* Non commits have zero deltas */
 		delta = 0;
-	}
 
 	event = __rb_reserve_next(cpu_buffer, type, length, &ts);
-	if (!event)
+	if (PTR_ERR(event) == -EAGAIN)
+		goto again;
+
+	if (!event) {
+		if (unlikely(commit))
+			/*
+			 * Ouch! We needed a timestamp and it was commited. But
+			 * we didn't get our event reserved.
+			 */
+			rb_set_commit_to_write(cpu_buffer);
 		return NULL;
+	}
 
-	/* If the reserve went to the next page, our delta is zero */
-	if (!cpu_buffer->tail_page->write)
+	/*
+	 * If the timestamp was commited, make the commit our entry
+	 * now so that we will update it when needed.
+	 */
+	if (commit)
+		rb_set_commit_event(cpu_buffer, event);
+	else if (!rb_is_commit(cpu_buffer, event))
 		delta = 0;
 
 	event->time_delta = delta;
@@ -881,6 +1089,8 @@ rb_reserve_next_event(struct ring_buffer_per_cpu *cpu_buffer,
 	return event;
 }
 
+static DEFINE_PER_CPU(int, rb_need_resched);
+
 /**
  * ring_buffer_lock_reserve - reserve a part of the buffer
  * @buffer: the ring buffer to reserve from
@@ -904,12 +1114,15 @@ ring_buffer_lock_reserve(struct ring_buffer *buffer,
 {
 	struct ring_buffer_per_cpu *cpu_buffer;
 	struct ring_buffer_event *event;
-	int cpu;
+	int cpu, resched;
 
 	if (atomic_read(&buffer->record_disabled))
 		return NULL;
 
-	local_irq_save(*flags);
+	/* If we are tracing schedule, we don't want to recurse */
+	resched = need_resched();
+	preempt_disable_notrace();
+
 	cpu = raw_smp_processor_id();
 
 	if (!cpu_isset(cpu, buffer->cpumask))
@@ -922,26 +1135,42 @@ ring_buffer_lock_reserve(struct ring_buffer *buffer,
 
 	length = rb_calculate_event_length(length);
 	if (length > BUF_PAGE_SIZE)
-		return NULL;
+		goto out;
 
 	event = rb_reserve_next_event(cpu_buffer, RINGBUF_TYPE_DATA, length);
 	if (!event)
 		goto out;
 
+	/*
+	 * Need to store resched state on this cpu.
+	 * Only the first needs to.
+	 */
+
+	if (preempt_count() == 1)
+		per_cpu(rb_need_resched, cpu) = resched;
+
 	return event;
 
  out:
-	local_irq_restore(*flags);
+	if (resched)
+		preempt_enable_notrace();
+	else
+		preempt_enable_notrace();
 	return NULL;
 }
 
 static void rb_commit(struct ring_buffer_per_cpu *cpu_buffer,
 		      struct ring_buffer_event *event)
 {
-	cpu_buffer->tail_page->write += rb_event_length(event);
-	cpu_buffer->tail_page->size = cpu_buffer->tail_page->write;
-	cpu_buffer->write_stamp += event->time_delta;
 	cpu_buffer->entries++;
+
+	/* Only process further if we own the commit */
+	if (!rb_is_commit(cpu_buffer, event))
+		return;
+
+	cpu_buffer->write_stamp += event->time_delta;
+
+	rb_set_commit_to_write(cpu_buffer);
 }
 
 /**
@@ -965,7 +1194,16 @@ int ring_buffer_unlock_commit(struct ring_buffer *buffer,
 
 	rb_commit(cpu_buffer, event);
 
-	local_irq_restore(flags);
+	/*
+	 * Only the last preempt count needs to restore preemption.
+	 */
+	if (preempt_count() == 1) {
+		if (per_cpu(rb_need_resched, cpu))
+			preempt_enable_no_resched_notrace();
+		else
+			preempt_enable_notrace();
+	} else
+		preempt_enable_no_resched_notrace();
 
 	return 0;
 }
@@ -989,15 +1227,17 @@ int ring_buffer_write(struct ring_buffer *buffer,
 {
 	struct ring_buffer_per_cpu *cpu_buffer;
 	struct ring_buffer_event *event;
-	unsigned long event_length, flags;
+	unsigned long event_length;
 	void *body;
 	int ret = -EBUSY;
-	int cpu;
+	int cpu, resched;
 
 	if (atomic_read(&buffer->record_disabled))
 		return -EBUSY;
 
-	local_irq_save(flags);
+	resched = need_resched();
+	preempt_disable_notrace();
+
 	cpu = raw_smp_processor_id();
 
 	if (!cpu_isset(cpu, buffer->cpumask))
@@ -1022,11 +1262,26 @@ int ring_buffer_write(struct ring_buffer *buffer,
 
 	ret = 0;
  out:
-	local_irq_restore(flags);
+	if (resched)
+		preempt_enable_no_resched_notrace();
+	else
+		preempt_enable_notrace();
 
 	return ret;
 }
 
+static inline int rb_per_cpu_empty(struct ring_buffer_per_cpu *cpu_buffer)
+{
+	struct buffer_page *reader = cpu_buffer->reader_page;
+	struct buffer_page *head = cpu_buffer->head_page;
+	struct buffer_page *commit = cpu_buffer->commit_page;
+
+	return reader->read == rb_page_commit(reader) &&
+		(commit == reader ||
+		 (commit == head &&
+		  head->read == rb_page_commit(commit)));
+}
+
 /**
  * ring_buffer_record_disable - stop all writes into the buffer
  * @buffer: The ring buffer to stop writes to.
@@ -1204,8 +1459,8 @@ int ring_buffer_iter_empty(struct ring_buffer_iter *iter)
 
 	cpu_buffer = iter->cpu_buffer;
 
-	return iter->head_page == cpu_buffer->tail_page &&
-		iter->head == cpu_buffer->tail_page->write;
+	return iter->head_page == cpu_buffer->commit_page &&
+		iter->head == rb_commit_index(cpu_buffer);
 }
 
 static void
@@ -1282,15 +1537,16 @@ rb_get_reader_page(struct ring_buffer_per_cpu *cpu_buffer)
 	reader = cpu_buffer->reader_page;
 
 	/* If there's more to read, return this page */
-	if (cpu_buffer->reader_page->read < reader->size)
+	if (cpu_buffer->reader_page->read < rb_page_size(reader))
 		goto out;
 
 	/* Never should we have an index greater than the size */
-	WARN_ON(cpu_buffer->reader_page->read > reader->size);
+	RB_WARN_ON(cpu_buffer,
+		   cpu_buffer->reader_page->read > rb_page_size(reader));
 
 	/* check if we caught up to the tail */
 	reader = NULL;
-	if (cpu_buffer->tail_page == cpu_buffer->reader_page)
+	if (cpu_buffer->commit_page == cpu_buffer->reader_page)
 		goto out;
 
 	/*
@@ -1301,7 +1557,9 @@ rb_get_reader_page(struct ring_buffer_per_cpu *cpu_buffer)
 	reader = cpu_buffer->head_page;
 	cpu_buffer->reader_page->list.next = reader->list.next;
 	cpu_buffer->reader_page->list.prev = reader->list.prev;
-	cpu_buffer->reader_page->size = 0;
+
+	local_set(&cpu_buffer->reader_page->write, 0);
+	local_set(&cpu_buffer->reader_page->commit, 0);
 
 	/* Make the reader page now replace the head */
 	reader->list.prev->next = &cpu_buffer->reader_page->list;
@@ -1313,7 +1571,7 @@ rb_get_reader_page(struct ring_buffer_per_cpu *cpu_buffer)
 	 */
 	cpu_buffer->head_page = cpu_buffer->reader_page;
 
-	if (cpu_buffer->tail_page != reader)
+	if (cpu_buffer->commit_page != reader)
 		rb_inc_page(cpu_buffer, &cpu_buffer->head_page);
 
 	/* Finally update the reader page to the new head */
@@ -1363,8 +1621,8 @@ static void rb_advance_iter(struct ring_buffer_iter *iter)
 	/*
 	 * Check if we are at the end of the buffer.
 	 */
-	if (iter->head >= iter->head_page->size) {
-		BUG_ON(iter->head_page == cpu_buffer->tail_page);
+	if (iter->head >= rb_page_size(iter->head_page)) {
+		BUG_ON(iter->head_page == cpu_buffer->commit_page);
 		rb_inc_iter(iter);
 		return;
 	}
@@ -1377,16 +1635,16 @@ static void rb_advance_iter(struct ring_buffer_iter *iter)
 	 * This should not be called to advance the header if we are
 	 * at the tail of the buffer.
 	 */
-	BUG_ON((iter->head_page == cpu_buffer->tail_page) &&
-	       (iter->head + length > cpu_buffer->tail_page->write));
+	BUG_ON((iter->head_page == cpu_buffer->commit_page) &&
+	       (iter->head + length > rb_commit_index(cpu_buffer)));
 
 	rb_update_iter_read_stamp(iter, event);
 
 	iter->head += length;
 
 	/* check for end of page padding */
-	if ((iter->head >= iter->head_page->size) &&
-	    (iter->head_page != cpu_buffer->tail_page))
+	if ((iter->head >= rb_page_size(iter->head_page)) &&
+	    (iter->head_page != cpu_buffer->commit_page))
 		rb_advance_iter(iter);
 }
 
@@ -1420,7 +1678,7 @@ ring_buffer_peek(struct ring_buffer *buffer, int cpu, u64 *ts)
 
 	switch (event->type) {
 	case RINGBUF_TYPE_PADDING:
-		WARN_ON(1);
+		RB_WARN_ON(cpu_buffer, 1);
 		rb_advance_reader(cpu_buffer);
 		return NULL;
 
@@ -1622,14 +1880,17 @@ rb_reset_cpu(struct ring_buffer_per_cpu *cpu_buffer)
 {
 	cpu_buffer->head_page
 		= list_entry(cpu_buffer->pages.next, struct buffer_page, list);
-	cpu_buffer->head_page->size = 0;
-	cpu_buffer->tail_page = cpu_buffer->head_page;
-	cpu_buffer->tail_page->size = 0;
-	INIT_LIST_HEAD(&cpu_buffer->reader_page->list);
-	cpu_buffer->reader_page->size = 0;
+	local_set(&cpu_buffer->head_page->write, 0);
+	local_set(&cpu_buffer->head_page->commit, 0);
 
 	cpu_buffer->head_page->read = 0;
-	cpu_buffer->tail_page->write = 0;
+
+	cpu_buffer->tail_page = cpu_buffer->head_page;
+	cpu_buffer->commit_page = cpu_buffer->head_page;
+
+	INIT_LIST_HEAD(&cpu_buffer->reader_page->list);
+	local_set(&cpu_buffer->reader_page->write, 0);
+	local_set(&cpu_buffer->reader_page->commit, 0);
 	cpu_buffer->reader_page->read = 0;
 
 	cpu_buffer->overrun = 0;
-- 
cgit v1.2.3-55-g7522


From 3ea2e6d71aafe35b8aaf89ed711a283815acfae6 Mon Sep 17 00:00:00 2001
From: Steven Rostedt
Date: Sat, 4 Oct 2008 02:01:00 -0400
Subject: ftrace: make some tracers reentrant

Now that the ring buffer is reentrant, some of the ftrace tracers
(sched_swich, debugging traces) can also be reentrant.

Note: Never make the function tracer reentrant, that can cause
  recursion problems all over the kernel. The function tracer
  must disable reentrancy.

Signed-off-by: Steven Rostedt <srostedt@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 kernel/trace/trace.c              | 10 ++--------
 kernel/trace/trace_sched_switch.c | 10 ++--------
 2 files changed, 4 insertions(+), 16 deletions(-)

diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 1cd2e8143bb4..caa4051ce778 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -839,7 +839,6 @@ ftrace_special(unsigned long arg1, unsigned long arg2, unsigned long arg3)
 {
 	struct trace_array *tr = &global_trace;
 	struct trace_array_cpu *data;
-	long disabled;
 	int cpu;
 	int pc;
 
@@ -850,12 +849,10 @@ ftrace_special(unsigned long arg1, unsigned long arg2, unsigned long arg3)
 	preempt_disable_notrace();
 	cpu = raw_smp_processor_id();
 	data = tr->data[cpu];
-	disabled = atomic_inc_return(&data->disabled);
 
-	if (likely(disabled == 1))
+	if (likely(!atomic_read(&data->disabled)))
 		ftrace_trace_special(tr, data, arg1, arg2, arg3, pc);
 
-	atomic_dec(&data->disabled);
 	preempt_enable_notrace();
 }
 
@@ -2961,7 +2958,6 @@ int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
 	struct trace_array_cpu *data;
 	struct print_entry *entry;
 	unsigned long flags, irq_flags;
-	long disabled;
 	int cpu, len = 0, size, pc;
 
 	if (!tr->ctrl || tracing_disabled)
@@ -2971,9 +2967,8 @@ int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
 	preempt_disable_notrace();
 	cpu = raw_smp_processor_id();
 	data = tr->data[cpu];
-	disabled = atomic_inc_return(&data->disabled);
 
-	if (unlikely(disabled != 1))
+	if (unlikely(atomic_read(&data->disabled)))
 		goto out;
 
 	spin_lock_irqsave(&trace_buf_lock, flags);
@@ -2999,7 +2994,6 @@ int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
 	spin_unlock_irqrestore(&trace_buf_lock, flags);
 
  out:
-	atomic_dec(&data->disabled);
 	preempt_enable_notrace();
 
 	return len;
diff --git a/kernel/trace/trace_sched_switch.c b/kernel/trace/trace_sched_switch.c
index c7fa08a5b7f4..b8f56beb1a62 100644
--- a/kernel/trace/trace_sched_switch.c
+++ b/kernel/trace/trace_sched_switch.c
@@ -24,7 +24,6 @@ probe_sched_switch(struct rq *__rq, struct task_struct *prev,
 {
 	struct trace_array_cpu *data;
 	unsigned long flags;
-	long disabled;
 	int cpu;
 	int pc;
 
@@ -41,12 +40,10 @@ probe_sched_switch(struct rq *__rq, struct task_struct *prev,
 	local_irq_save(flags);
 	cpu = raw_smp_processor_id();
 	data = ctx_trace->data[cpu];
-	disabled = atomic_inc_return(&data->disabled);
 
-	if (likely(disabled == 1))
+	if (likely(!atomic_read(&data->disabled)))
 		tracing_sched_switch_trace(ctx_trace, data, prev, next, flags, pc);
 
-	atomic_dec(&data->disabled);
 	local_irq_restore(flags);
 }
 
@@ -55,7 +52,6 @@ probe_sched_wakeup(struct rq *__rq, struct task_struct *wakee)
 {
 	struct trace_array_cpu *data;
 	unsigned long flags;
-	long disabled;
 	int cpu, pc;
 
 	if (!likely(tracer_enabled))
@@ -67,13 +63,11 @@ probe_sched_wakeup(struct rq *__rq, struct task_struct *wakee)
 	local_irq_save(flags);
 	cpu = raw_smp_processor_id();
 	data = ctx_trace->data[cpu];
-	disabled = atomic_inc_return(&data->disabled);
 
-	if (likely(disabled == 1))
+	if (likely(!atomic_read(&data->disabled)))
 		tracing_sched_wakeup_trace(ctx_trace, data, wakee, current,
 					   flags, pc);
 
-	atomic_dec(&data->disabled);
 	local_irq_restore(flags);
 }
 
-- 
cgit v1.2.3-55-g7522


From c2931e05ec5965597cbfb79ad332d4a29aeceb23 Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker
Date: Sat, 4 Oct 2008 22:04:44 +0200
Subject: ftrace: return an error when setting a nonexistent tracer

When one try to set a nonexistent tracer, no error is returned
as if the name of the tracer was correct.
We should return -EINVAL.

Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Acked-by: Steven Rostedt <srostedt@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 kernel/trace/trace.c | 13 ++++++++++---
 1 file changed, 10 insertions(+), 3 deletions(-)

diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index caa4051ce778..78d56614c95b 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -2381,9 +2381,11 @@ tracing_set_trace_write(struct file *filp, const char __user *ubuf,
 	struct tracer *t;
 	char buf[max_tracer_type_len+1];
 	int i;
+	size_t ret;
 
 	if (cnt > max_tracer_type_len)
 		cnt = max_tracer_type_len;
+	ret = cnt;
 
 	if (copy_from_user(&buf, ubuf, cnt))
 		return -EFAULT;
@@ -2399,7 +2401,11 @@ tracing_set_trace_write(struct file *filp, const char __user *ubuf,
 		if (strcmp(t->name, buf) == 0)
 			break;
 	}
-	if (!t || t == current_trace)
+	if (!t) {
+		ret = -EINVAL;
+		goto out;
+	}
+	if (t == current_trace)
 		goto out;
 
 	if (current_trace && current_trace->reset)
@@ -2412,9 +2418,10 @@ tracing_set_trace_write(struct file *filp, const char __user *ubuf,
  out:
 	mutex_unlock(&trace_types_lock);
 
-	filp->f_pos += cnt;
+	if (ret == cnt)
+		filp->f_pos += cnt;
 
-	return cnt;
+	return ret;
 }
 
 static ssize_t
-- 
cgit v1.2.3-55-g7522


From 8a5d900cca57115326bc5b9e7f3bac980986c2c8 Mon Sep 17 00:00:00 2001
From: Arjan van de Ven
Date: Sat, 4 Oct 2008 13:42:27 -0700
Subject: tracing/fastboot: fix printk format typo in boot tracer

When printing nanoseconds, the right printk format string is %09 not %06...

Signed-off-by: Arjan van de Ven <arjan@linux.intel.com>
Acked-by: Frédéric Weisbecker <fweisbec@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 kernel/trace/trace_boot.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/kernel/trace/trace_boot.c b/kernel/trace/trace_boot.c
index a7efe3559654..d0a5e50eeff2 100644
--- a/kernel/trace/trace_boot.c
+++ b/kernel/trace/trace_boot.c
@@ -62,14 +62,14 @@ static enum print_line_t initcall_print_line(struct trace_iterator *iter)
 	struct timespec rettime = ktime_to_timespec(it->rettime);
 
 	if (entry->type == TRACE_BOOT) {
-		ret = trace_seq_printf(s, "[%5ld.%06ld] calling  %s @ %i\n",
+		ret = trace_seq_printf(s, "[%5ld.%09ld] calling  %s @ %i\n",
 					  calltime.tv_sec,
 					  calltime.tv_nsec,
 					  it->func, it->caller);
 		if (!ret)
 			return TRACE_TYPE_PARTIAL_LINE;
 
-		ret = trace_seq_printf(s, "[%5ld.%06ld] initcall %s "
+		ret = trace_seq_printf(s, "[%5ld.%09ld] initcall %s "
 					  "returned %d after %lld msecs\n",
 					  rettime.tv_sec,
 					  rettime.tv_nsec,
-- 
cgit v1.2.3-55-g7522


From ddc7a01aad195708fc943d9446411d11e3547784 Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker
Date: Sat, 4 Oct 2008 21:35:48 +0200
Subject: tracing/fastboot: fix initcalls disposition in bootgraph.pl

When bootgraph.pl parses a file, it gives one row
for each initcall's pid. But only few of them will
be displayed => the longest.

This patch corrects it by giving only a rows for pids
which have initcalls that will be displayed.

[ mingo@elte.hu: resolved conflicts ]
Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Acked-by: Arjan van de Ven <arjan@linux.intel.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 scripts/bootgraph.pl | 26 ++++++++++++++------------
 1 file changed, 14 insertions(+), 12 deletions(-)

diff --git a/scripts/bootgraph.pl b/scripts/bootgraph.pl
index d459b8bdef02..c34b359c34a3 100644
--- a/scripts/bootgraph.pl
+++ b/scripts/bootgraph.pl
@@ -37,12 +37,12 @@
 # 	dmesg | perl scripts/bootgraph.pl > output.svg
 #
 
-my @rows;
-my %start, %end, %row;
+my %start, %end;
 my $done = 0;
-my $rowcount = 0;
 my $maxtime = 0;
 my $count = 0;
+my %pids;
+
 while (<>) {
 	my $line = $_;
 	if ($line =~ /([0-9\.]+)\] calling  ([a-zA-Z\_]+)\+/) {
@@ -50,14 +50,8 @@ while (<>) {
 		if ($done == 0) {
 			$start{$func} = $1;
 		}
-		$row{$func} = 1;
 		if ($line =~ /\@ ([0-9]+)/) {
-			my $pid = $1;
-			if (!defined($rows[$pid])) {
-				$rowcount = $rowcount + 1;
-				$rows[$pid] = $rowcount;
-			}
-			$row{$func} = $rows[$pid];
+			$pids{$func} = $1;
 		}
 		$count = $count + 1;
 	}
@@ -102,17 +96,25 @@ $styles[11] = "fill:rgb(128,255,255);fill-opacity:0.5;stroke-width:1;stroke:rgb(
 my $mult = 950.0 / $maxtime;
 my $threshold = 0.0500 / $maxtime;
 my $stylecounter = 0;
+my %rows;
+my $rowscount = 1;
 while (($key,$value) = each %start) {
 	my $duration = $end{$key} - $start{$key};
 
 	if ($duration >= $threshold) {
 		my $s, $s2, $e, $y;
-		$s = $value * $mult;
+		$pid = $pids{$key};
+
+		if (!defined($rows{$pid})) {
+			$rows{$pid} = $rowscount;
+			$rowscount = $rowscount + 1;
+		}
+		$s = ($value - $firsttime) * $mult;
 		$s2 = $s + 6;
 		$e = $end{$key} * $mult;
 		$w = $e - $s;
 
-		$y = $row{$key} * 150;
+		$y = $rows{$pid} * 150;
 		$y2 = $y + 4;
 
 		$style = $styles[$stylecounter];
-- 
cgit v1.2.3-55-g7522


From 2fbc474901933c8f0c09b0280dfbb6780cb8bd60 Mon Sep 17 00:00:00 2001
From: Harvey Harrison
Date: Wed, 8 Oct 2008 16:51:49 -0700
Subject: ftrace: fix hex output mode of ftrace

Fix the output of ftrace in hex mode as the hi/lo nibbles are output in
reverse order. Without this patch, the output of ftrace is:

raw mode : 6474 0 141531612444 0 140 + 6402 120 S
hex mode : 000091a4 00000000 000000023f1f50c1 00000000 c8 000000b2 00009120 87 ffff00c8 00000035

There is an inversion on ouput hex(6474) is 194a

[based on a patch by Philippe Reynes <tremyfr@yahoo.fr>]
Signed-off-by: Harvey Harrison <harvey.harrison@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 kernel/trace/trace.c | 8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 78d56614c95b..36cbb873845f 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -336,14 +336,12 @@ trace_seq_putmem(struct trace_seq *s, void *mem, size_t len)
 }
 
 #define HEX_CHARS 17
-static const char hex2asc[] = "0123456789abcdef";
 
 static int
 trace_seq_putmem_hex(struct trace_seq *s, void *mem, size_t len)
 {
 	unsigned char hex[HEX_CHARS];
 	unsigned char *data = mem;
-	unsigned char byte;
 	int i, j;
 
 	BUG_ON(len >= HEX_CHARS);
@@ -353,10 +351,8 @@ trace_seq_putmem_hex(struct trace_seq *s, void *mem, size_t len)
 #else
 	for (i = len-1, j = 0; i >= 0; i--) {
 #endif
-		byte = data[i];
-
-		hex[j++] = hex2asc[byte & 0x0f];
-		hex[j++] = hex2asc[byte >> 4];
+		hex[j++] = hex_asc_hi(data[i]);
+		hex[j++] = hex_asc_lo(data[i]);
 	}
 	hex[j++] = ' ';
 
-- 
cgit v1.2.3-55-g7522


From ad0a3b68114e8f3c25ac0045b45a2838f23e3b3a Mon Sep 17 00:00:00 2001
From: Harvey Harrison
Date: Wed, 8 Oct 2008 17:44:55 -0700
Subject: trace: add build-time check to avoid overrunning hex buffer

Remove the runtime BUG_ON and change to a compile-time check in
the macro that calls the hex format routine

[Noticed by Joe Perches]
Signed-off-by: Harvey Harrison <harvey.harrison@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 kernel/trace/trace.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 36cbb873845f..d345d649d073 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -335,7 +335,8 @@ trace_seq_putmem(struct trace_seq *s, void *mem, size_t len)
 	return len;
 }
 
-#define HEX_CHARS 17
+#define MAX_MEMHEX_BYTES	8
+#define HEX_CHARS		(MAX_MEMHEX_BYTES*2 + 1)
 
 static int
 trace_seq_putmem_hex(struct trace_seq *s, void *mem, size_t len)
@@ -344,8 +345,6 @@ trace_seq_putmem_hex(struct trace_seq *s, void *mem, size_t len)
 	unsigned char *data = mem;
 	int i, j;
 
-	BUG_ON(len >= HEX_CHARS);
-
 #ifdef __BIG_ENDIAN
 	for (i = 0, j = 0; i < len; i++) {
 #else
@@ -1668,6 +1667,7 @@ do {							\
 
 #define SEQ_PUT_HEX_FIELD_RET(s, x)			\
 do {							\
+	BUILD_BUG_ON(sizeof(x) > MAX_MEMHEX_BYTES);	\
 	if (!trace_seq_putmem_hex(s, &(x), sizeof(x)))	\
 		return 0;				\
 } while (0)
-- 
cgit v1.2.3-55-g7522


From ca538f6bbe583406f941f3041d40c41f9a13d1de Mon Sep 17 00:00:00 2001
From: Tim Bird
Date: Thu, 9 Oct 2008 15:23:05 -0700
Subject: tracing/fastboot: add better resolution to initcall debug/tracing

Change the time resolution for initcall_debug to microseconds, from
milliseconds.  This is handy to determine which initcalls you want to work
on for faster booting.

One one of my test machines, over 90% of the initcalls are less than a
millisecond and (without this patch) these are all reported as 0 msecs.
Working on the 900 us ones is more important than the 4 us ones.

With 'quiet' on the kernel command line, this adds no significant overhead
to kernel boot time.

Signed-off-by: Tim Bird <tim.bird@am.sony.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/ftrace.h | 4 ++--
 init/main.c            | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index 5812dba4ee24..a3d46151be19 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -215,9 +215,9 @@ ftrace_init_module(unsigned long *start, unsigned long *end) { }
 
 struct boot_trace {
 	pid_t			caller;
-	char 			func[KSYM_NAME_LEN];
+	char			func[KSYM_NAME_LEN];
 	int			result;
-	unsigned long long	duration;
+	unsigned long long	duration;		/* usecs */
 	ktime_t			calltime;
 	ktime_t			rettime;
 };
diff --git a/init/main.c b/init/main.c
index e7939de80f3e..b2e7ff4a5349 100644
--- a/init/main.c
+++ b/init/main.c
@@ -721,8 +721,8 @@ int do_one_initcall(initcall_t fn)
 	if (initcall_debug) {
 		it.rettime = ktime_get();
 		delta = ktime_sub(it.rettime, it.calltime);
-		it.duration = (unsigned long long) delta.tv64 >> 20;
-		printk("initcall %pF returned %d after %Ld msecs\n", fn,
+		it.duration = (unsigned long long) delta.tv64 >> 10;
+		printk("initcall %pF returned %d after %Ld usecs\n", fn,
 			it.result, it.duration);
 		trace_boot(&it, fn);
 	}
-- 
cgit v1.2.3-55-g7522


From bfadadfccc19e36f7d600c5ce7b3e5ba5197fbf0 Mon Sep 17 00:00:00 2001
From: Mathieu Desnoyers
Date: Fri, 10 Oct 2008 03:48:25 -0400
Subject: markers: fix synchronize marker unregister static inline

Use a #define for synchronize marker unregister to fix include dependencies.

Fixes the slab circular inclusion which triggers when slab.git is combined
with tracing.git, where rcupdate includes slab, which includes markers
which includes rcupdate.

Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
Acked-by: Pekka Enberg <penberg@cs.helsinki.fi>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/marker.h | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/include/linux/marker.h b/include/linux/marker.h
index 38e32e781ed7..889196c7fbb1 100644
--- a/include/linux/marker.h
+++ b/include/linux/marker.h
@@ -13,7 +13,6 @@
  */
 
 #include <linux/types.h>
-#include <linux/rcupdate.h>
 
 struct module;
 struct marker;
@@ -166,9 +165,6 @@ extern void *marker_get_private_data(const char *name, marker_probe_func *probe,
  * unregistration and the end of module exit to make sure there is no caller
  * executing a probe when it is freed.
  */
-static inline void marker_synchronize_unregister(void)
-{
-	synchronize_sched();
-}
+#define marker_synchronize_unregister() synchronize_sched()
 
 #endif
-- 
cgit v1.2.3-55-g7522


From 8b27386a9ce9c7f0f8702cff7565a46802ad57d1 Mon Sep 17 00:00:00 2001
From: Anders Kaseorg
Date: Thu, 9 Oct 2008 22:19:08 -0400
Subject: ftrace: make ftrace_test_p6nop disassembler-friendly

Commit 4c3dc21b136f8cb4b72afee16c3ba7e961656c0b in tip introduced the
5-byte NOP ftrace_test_p6nop:

   jmp . + 5
   .byte 0x00, 0x00, 0x00

This is not friendly to disassemblers because an odd number of 0x00s
ends in the middle of an instruction boundary.  This changes the 0x00s
to 1-byte NOPs (0x90).

Signed-off-by: Anders Kaseorg <andersk@mit.edu>
Acked-by: Steven Rostedt <rostedt@goodmis.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/ftrace.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c
index 222507e8157b..d073d981a730 100644
--- a/arch/x86/kernel/ftrace.c
+++ b/arch/x86/kernel/ftrace.c
@@ -132,7 +132,9 @@ int __init ftrace_dyn_arch_init(void *data)
 		".section .text, \"ax\"\n"
 		"ftrace_test_jmp:"
 		"jmp ftrace_test_p6nop\n"
-		".byte 0x00,0x00,0x00\n"  /* 2 byte jmp + 3 bytes */
+		"nop\n"
+		"nop\n"
+		"nop\n"  /* 2 byte jmp + 3 bytes */
 		"ftrace_test_p6nop:"
 		P6_NOP5
 		"jmp 1f\n"
@@ -161,7 +163,7 @@ int __init ftrace_dyn_arch_init(void *data)
 		ftrace_nop = (unsigned long *)ftrace_test_nop5;
 		break;
 	case 2:
-		pr_info("ftrace: converting mcount calls to jmp 1f\n");
+		pr_info("ftrace: converting mcount calls to jmp . + 5\n");
 		ftrace_nop = (unsigned long *)ftrace_test_jmp;
 		break;
 	}
-- 
cgit v1.2.3-55-g7522


From f2461fc82a083dd60062e05e704c5fcc1c658ba1 Mon Sep 17 00:00:00 2001
From: Mathieu Desnoyers
Date: Mon, 6 Oct 2008 10:33:00 -0400
Subject: tracepoints: tracepoint_synchronize_unregister()

Create tracepoint_synchronize_unregister() which must be called before the end
of exit() to make sure every probe callers have exited the non preemptible
section and thus are not executing the probe code anymore.

Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/tracepoint.h | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h
index e623a6fca5c3..199f4c207c1e 100644
--- a/include/linux/tracepoint.h
+++ b/include/linux/tracepoint.h
@@ -124,4 +124,11 @@ extern void tracepoint_iter_reset(struct tracepoint_iter *iter);
 extern int tracepoint_get_iter_range(struct tracepoint **tracepoint,
 	struct tracepoint *begin, struct tracepoint *end);
 
+/*
+ * tracepoint_synchronize_unregister must be called between the last tracepoint
+ * probe unregistration and the end of module exit to make sure there is no
+ * caller executing a probe when it is freed.
+ */
+#define tracepoint_synchronize_unregister() synchronize_sched()
+
 #endif
-- 
cgit v1.2.3-55-g7522


From 231375cc5cc3549bb413f94a164bdcbd5f9ce943 Mon Sep 17 00:00:00 2001
From: Mathieu Desnoyers
Date: Fri, 3 Oct 2008 15:01:33 -0400
Subject: tracepoints: synchronize unregister static inline

Turn tracepoint synchronize unregister into a static inline. There is no
reason to keep it as a macro over a static inline.

Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/tracepoint.h | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h
index 199f4c207c1e..c5bb39c7a770 100644
--- a/include/linux/tracepoint.h
+++ b/include/linux/tracepoint.h
@@ -129,6 +129,9 @@ extern int tracepoint_get_iter_range(struct tracepoint **tracepoint,
  * probe unregistration and the end of module exit to make sure there is no
  * caller executing a probe when it is freed.
  */
-#define tracepoint_synchronize_unregister() synchronize_sched()
+static inline void tracepoint_synchronize_unregister(void)
+{
+	synchronize_sched();
+}
 
 #endif
-- 
cgit v1.2.3-55-g7522


From 80a398a55d3096ff4572b44271d095413749ebb4 Mon Sep 17 00:00:00 2001
From: Arjan van de Ven
Date: Sun, 14 Sep 2008 15:30:52 -0700
Subject: tracing/fastboot: fix issues and improve output of bootgraph.pl

David Sanders reported some issues with bootgraph.pl's display
of his sytems bootup; this commit fixes these by scaling the graph
not from 0 - end time but from the first initcall to the end time;
the minimum display size etc also now need to scale with this, as does
the axis display.

Signed-off-by: Arjan van de Ven <arjan@linux.intel.com>
---
 scripts/bootgraph.pl | 29 ++++++++++++++++-------------
 1 file changed, 16 insertions(+), 13 deletions(-)

diff --git a/scripts/bootgraph.pl b/scripts/bootgraph.pl
index c34b359c34a3..4baf49985589 100644
--- a/scripts/bootgraph.pl
+++ b/scripts/bootgraph.pl
@@ -40,6 +40,7 @@
 my %start, %end;
 my $done = 0;
 my $maxtime = 0;
+my $firsttime = 100;
 my $count = 0;
 my %pids;
 
@@ -49,6 +50,9 @@ while (<>) {
 		my $func = $2;
 		if ($done == 0) {
 			$start{$func} = $1;
+			if ($1 < $firsttime) {
+				$firsttime = $1;
+			}
 		}
 		if ($line =~ /\@ ([0-9]+)/) {
 			$pids{$func} = $1;
@@ -65,6 +69,9 @@ while (<>) {
 	if ($line =~ /Write protecting the/) {
 		$done = 1;
 	}
+	if ($line =~ /Freeing unused kernel memory/) {
+		$done = 1;
+	}
 }
 
 if ($count == 0) {
@@ -93,8 +100,8 @@ $styles[9] = "fill:rgb(255,255,128);fill-opacity:0.5;stroke-width:1;stroke:rgb(0
 $styles[10] = "fill:rgb(255,128,255);fill-opacity:0.5;stroke-width:1;stroke:rgb(0,0,0)";
 $styles[11] = "fill:rgb(128,255,255);fill-opacity:0.5;stroke-width:1;stroke:rgb(0,0,0)";
 
-my $mult = 950.0 / $maxtime;
-my $threshold = 0.0500 / $maxtime;
+my $mult = 950.0 / ($maxtime - $firsttime);
+my $threshold = ($maxtime - $firsttime) / 60.0;
 my $stylecounter = 0;
 my %rows;
 my $rowscount = 1;
@@ -103,15 +110,9 @@ while (($key,$value) = each %start) {
 
 	if ($duration >= $threshold) {
 		my $s, $s2, $e, $y;
-		$pid = $pids{$key};
-
-		if (!defined($rows{$pid})) {
-			$rows{$pid} = $rowscount;
-			$rowscount = $rowscount + 1;
-		}
 		$s = ($value - $firsttime) * $mult;
 		$s2 = $s + 6;
-		$e = $end{$key} * $mult;
+		$e = ($end{$key} - $firsttime) * $mult;
 		$w = $e - $s;
 
 		$y = $rows{$pid} * 150;
@@ -130,11 +131,13 @@ while (($key,$value) = each %start) {
 
 
 # print the time line on top
-my $time = 0.0;
+my $time = $firsttime;
+my $step = ($maxtime - $firsttime) / 15;
 while ($time < $maxtime) {
-	my $s2 = $time * $mult;
-	print "<text transform=\"translate($s2,89) rotate(90)\">$time</text>\n";
-	$time = $time + 0.1;
+	my $s2 = ($time - $firsttime) * $mult;
+	my $tm = int($time * 100) / 100.0;
+	print "<text transform=\"translate($s2,89) rotate(90)\">$tm</text>\n";
+	$time = $time + $step;
 }
 
 print "</svg>\n";
-- 
cgit v1.2.3-55-g7522


From 5c542368a3ded88bed98239fb3570dda416203ee Mon Sep 17 00:00:00 2001
From: Arnaud Patard
Date: Fri, 19 Sep 2008 20:16:25 -0700
Subject: tracing/fastboot: fix bootgraph.pl initcall name regexp

The regexp used to match the start and the end of an initcall
are matching only on [a-zA-Z\_]. This rules out initcalls with
a number in them. This patch is fixing that.

Signed-off-by: Arnaud Patard <apatard@mandriva.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 scripts/bootgraph.pl | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/scripts/bootgraph.pl b/scripts/bootgraph.pl
index 4baf49985589..479fb4ea8914 100644
--- a/scripts/bootgraph.pl
+++ b/scripts/bootgraph.pl
@@ -46,7 +46,7 @@ my %pids;
 
 while (<>) {
 	my $line = $_;
-	if ($line =~ /([0-9\.]+)\] calling  ([a-zA-Z\_]+)\+/) {
+	if ($line =~ /([0-9\.]+)\] calling  ([a-zA-Z0-9\_]+)\+/) {
 		my $func = $2;
 		if ($done == 0) {
 			$start{$func} = $1;
@@ -60,7 +60,7 @@ while (<>) {
 		$count = $count + 1;
 	}
 
-	if ($line =~ /([0-9\.]+)\] initcall ([a-zA-Z\_]+)\+.*returned/) {
+	if ($line =~ /([0-9\.]+)\] initcall ([a-zA-Z0-9\_]+)\+.*returned/) {
 		if ($done == 0) {
 			$end{$2} = $1;
 			$maxtime = $1;
@@ -75,8 +75,8 @@ while (<>) {
 }
 
 if ($count == 0) {
-	print "No data found in the dmesg. Make sure CONFIG_PRINTK_TIME is enabled and\n";
-	print "that initcall_debug is passed on the kernel command line.\n\n";
+	print "No data found in the dmesg. Make sure that 'printk.time=1' and\n";
+	print "'initcall_debug' are passed on the kernel command line.\n\n";
 	print "Usage: \n";
 	print "      dmesg | perl scripts/bootgraph.pl > output.svg\n\n";
 	exit;
-- 
cgit v1.2.3-55-g7522


From 07d1890420cce95c577736e4d67f70cbd39845fe Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker
Date: Sat, 4 Oct 2008 21:35:48 +0200
Subject: tracing/fastboot: fix initcalls disposition in bootgraph.pl

When bootgraph.pl parses a file, it gives one row
for each initcall's pid. But only few of them will
be displayed => the longest.

This patch corrects it by giving only a rows for pids
which have initcalls that will be displayed.

Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Acked-by: Arjan van de Ven <arjan@linux.intel.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 scripts/bootgraph.pl | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/scripts/bootgraph.pl b/scripts/bootgraph.pl
index 479fb4ea8914..5e7316e5aa39 100644
--- a/scripts/bootgraph.pl
+++ b/scripts/bootgraph.pl
@@ -110,6 +110,12 @@ while (($key,$value) = each %start) {
 
 	if ($duration >= $threshold) {
 		my $s, $s2, $e, $y;
+		$pid = $pids{$key};
+
+		if (!defined($rows{$pid})) {
+			$rows{$pid} = $rowscount;
+			$rowscount = $rowscount + 1;
+		}
 		$s = ($value - $firsttime) * $mult;
 		$s2 = $s + 6;
 		$e = ($end{$key} - $firsttime) * $mult;
-- 
cgit v1.2.3-55-g7522


From 4519d9e54dcd273975ad0adebad2a08c20428029 Mon Sep 17 00:00:00 2001
From: Ingo Molnar
Date: Tue, 14 Oct 2008 14:15:43 +0200
Subject: tracing/stacktrace: improve help text

Improve the help text that is displayed for CONFIG_STACK_TRACER.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 kernel/trace/Kconfig | 13 +++++++++++--
 1 file changed, 11 insertions(+), 2 deletions(-)

diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index 396aea11398e..11fd03a429f0 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -141,8 +141,17 @@ config STACK_TRACER
 	select FTRACE
 	select STACKTRACE
 	help
-	  This tracer records the max stack of the kernel, and displays
-	  it in debugfs/tracing/stack_trace
+	  This special tracer records the maximum stack footprint of the
+	  kernel and displays it in debugfs/tracing/stack_trace.
+
+	  This tracer works by hooking into every function call that the
+	  kernel executes, and keeping a maximum stack depth value and
+	  stack-trace saved. Because this logic has to execute in every
+	  kernel function, all the time, this option can slow down the
+	  kernel measurably and is generally intended for kernel
+	  developers only.
+
+	  Say N if unsure.
 
 config DYNAMIC_FTRACE
 	bool "enable/disable ftrace tracepoints dynamically"
-- 
cgit v1.2.3-55-g7522


From 98d9c66ab07471006fd7910cb16453581c41a3e7 Mon Sep 17 00:00:00 2001
From: Ingo Molnar
Date: Tue, 14 Oct 2008 14:27:20 +0200
Subject: tracing/fastboot: improve help text

Improve the help text of the boot tracer.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 kernel/trace/Kconfig | 17 +++++++++++------
 1 file changed, 11 insertions(+), 6 deletions(-)

diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index 11fd03a429f0..1cb3e1f616af 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -127,12 +127,17 @@ config BOOT_TRACER
 	select TRACING
 	help
 	  This tracer helps developers to optimize boot times: it records
-	  the timings of the initcalls. Its aim is to be parsed by the
-	  /scripts/bootgraph.pl tool to produce pretty graphics about
-	  boot inefficiencies, giving a visual representation of the
-	  delays during initcalls. Note that tracers self tests can't
-	  be enabled if this tracer is selected since only one tracer
-	  should touch the tracing buffer at a time.
+	  the timings of the initcalls and traces key events and the identity
+	  of tasks that can cause boot delays, such as context-switches.
+
+	  Its aim is to be parsed by the /scripts/bootgraph.pl tool to
+	  produce pretty graphics about boot inefficiencies, giving a visual
+	  representation of the delays during initcalls - but the raw
+	  /debug/tracing/trace text output is readable too.
+
+	  ( Note that tracing self tests can't be enabled if this tracer is
+	    selected, because the self-tests are an initcall as well and that
+	    would invalidate the boot trace. )
 
 config STACK_TRACER
 	bool "Trace max stack"
-- 
cgit v1.2.3-55-g7522


From 7c2500f17d65092d93345f3996cf82ebca17e9ff Mon Sep 17 00:00:00 2001
From: Wim Van Sebroeck
Date: Wed, 15 Oct 2008 08:53:06 +0000
Subject: [WATCHDOG] ib700wdt.c - fix buffer_underflow bug

This fixes Bug 11399:
if ibwdt_set_heartbeat(int t) is called with value 30 then
the check "if ((t < 0) || (t > 30))" in ibwdt_set_heartbeat
is not going to fail because t == 30, but in the loop, the
check wd_times[i] > t is never going to be true because
none of the wd_times are greater than the value of t (i.e. 30).
So we are exiting the loop with i == -1 and therefore setting
wd_margin to -1 which is wrong.

Reported-by: Zvonimir Rakamaric <zrakamar@cs.ubc.ca>
Signed-off-by: Wim Van Sebroeck <wim@iguana.be>
---
 drivers/watchdog/ib700wdt.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/watchdog/ib700wdt.c b/drivers/watchdog/ib700wdt.c
index 05a28106e8eb..8782ec1f5aa0 100644
--- a/drivers/watchdog/ib700wdt.c
+++ b/drivers/watchdog/ib700wdt.c
@@ -154,7 +154,7 @@ static int ibwdt_set_heartbeat(int t)
 		return -EINVAL;
 
 	for (i = 0x0F; i > -1; i--)
-		if (wd_times[i] > t)
+		if (wd_times[i] >= t)
 			break;
 	wd_margin = i;
 	return 0;
-- 
cgit v1.2.3-55-g7522


From 73bdf0a60e607f4b8ecc5aec597105976565a84f Mon Sep 17 00:00:00 2001
From: Linus Torvalds
Date: Wed, 15 Oct 2008 08:35:12 -0700
Subject: Introduce is_vmalloc_or_module_addr() and use with DEBUG_VIRTUAL

Impact: crash on module insertion with CONFIG_DEBUG_VIRTUAL

We would incorrectly BUG due to:

   VIRTUAL_BUG_ON(!is_vmalloc_addr(vmalloc_addr) &&
   	          !is_module_address(addr));

... because, at least on x86-64, is_module_address() doesn't do what
it should.  This patch introduces is_vmalloc_or_module_addr(), which
is what we really want anyway, and uses it instead.

Signed-off-by: H. Peter Anvin <hpa@zytor.com>
---
 mm/vmalloc.c | 18 ++++++++++++++++--
 1 file changed, 16 insertions(+), 2 deletions(-)

diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index bba06c41fc59..f018d7e0addb 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -168,6 +168,21 @@ int map_vm_area(struct vm_struct *area, pgprot_t prot, struct page ***pages)
 }
 EXPORT_SYMBOL_GPL(map_vm_area);
 
+static inline int is_vmalloc_or_module_addr(const void *x)
+{
+	/*
+	 * x86-64 and sparc64 put modules in a special place,
+	 * and fall back on vmalloc() if that fails. Others
+	 * just put it in the vmalloc space.
+	 */
+#if defined(CONFIG_MODULES) && defined(MODULES_VADDR)
+	unsigned long addr = (unsigned long)x;
+	if (addr >= MODULES_VADDR && addr < MODULES_END)
+		return 1;
+#endif
+	return is_vmalloc_addr(x);
+}
+
 /*
  * Map a vmalloc()-space virtual address to the physical page.
  */
@@ -184,8 +199,7 @@ struct page *vmalloc_to_page(const void *vmalloc_addr)
 	 * XXX we might need to change this if we add VIRTUAL_BUG_ON for
 	 * architectures that do not vmalloc module space
 	 */
-	VIRTUAL_BUG_ON(!is_vmalloc_addr(vmalloc_addr) &&
-			!is_module_address(addr));
+	VIRTUAL_BUG_ON(!is_vmalloc_or_module_addr(vmalloc_addr));
 
 	if (!pgd_none(*pgd)) {
 		pud = pud_offset(pgd, addr);
-- 
cgit v1.2.3-55-g7522


From 463baa8a0947f858d6db1c56d87eeaf1176ba7bb Mon Sep 17 00:00:00 2001
From: Stephen Rothwell
Date: Thu, 16 Oct 2008 20:29:07 +1100
Subject: powerpc: fix linux-next build failure

Today's linux-next build (powerpc allyesconfig) failed like this:

In file included from arch/powerpc/include/asm/mmu-hash64.h:17,
                 from arch/powerpc/include/asm/mmu.h:8,
                 from arch/powerpc/include/asm/pgtable.h:8,
                 from arch/powerpc/mm/slb.c:20:
arch/powerpc/include/asm/page.h:76: error: expected '=', ',', ';', 'asm' or '__attribute__' before 'memstart_addr'
arch/powerpc/include/asm/page.h:77: error: expected '=', ',', ';', 'asm' or '__attribute__' before 'kernstart_addr'

Caused by commit 600715dcdf567c86f8b2c6173fcfb4b873e25a19 ("generic: add
phys_addr_t for holding physical addresses") from the tip-core tree.
This only fails if CONFIG_RELOCATABLE is set.

So include that instead of asm/types.h in asm/page.h for
the CONFIG_RELOCATABLE case.

Signed-off-by: Stephen Rothwell <sfr@canb.auug.org.au>
Cc: ppc-dev <linuxppc-dev@ozlabs.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Jeremy Fitzhardinge <jeremy@goop.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/powerpc/include/asm/page.h | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/include/asm/page.h b/arch/powerpc/include/asm/page.h
index e088545cb3f5..94fe5138b30f 100644
--- a/arch/powerpc/include/asm/page.h
+++ b/arch/powerpc/include/asm/page.h
@@ -10,9 +10,13 @@
  * 2 of the License, or (at your option) any later version.
  */
 
+#ifndef __ASSEMBLY__
+#include <linux/types.h>
+#else
+#include <asm/types.h>
+#endif
 #include <asm/asm-compat.h>
 #include <asm/kdump.h>
-#include <asm/types.h>
 
 /*
  * On PPC32 page size is 4K. For PPC64 we support either 4K or 64K software
-- 
cgit v1.2.3-55-g7522


From 769415c61191bc860f60c6edc3cb7cba24fb3218 Mon Sep 17 00:00:00 2001
From: Miklos Szeredi
Date: Thu, 16 Oct 2008 16:08:56 +0200
Subject: fuse: fix SEEK_END incorrectness

Update file size before using it in lseek(..., SEEK_END).

Reported-by: Amnon Shiloh <u3557@miso.sublimeip.com>
Signed-off-by: Miklos Szeredi <mszeredi@suse.cz>
---
 fs/fuse/file.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index 2bada6bbc317..98079aa800e8 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -1448,6 +1448,9 @@ static loff_t fuse_file_llseek(struct file *file, loff_t offset, int origin)
 	mutex_lock(&inode->i_mutex);
 	switch (origin) {
 	case SEEK_END:
+		retval = fuse_update_attributes(inode, NULL, file, NULL);
+		if (retval)
+			return retval;
 		offset += i_size_read(inode);
 		break;
 	case SEEK_CUR:
-- 
cgit v1.2.3-55-g7522


From 17e18ab6ff6ec44e95514c7346d2cbd0363ef640 Mon Sep 17 00:00:00 2001
From: Julia Lawall
Date: Thu, 16 Oct 2008 16:08:56 +0200
Subject: fuse: add missing fuse_request_free

The error handling code for the second call to fuse_request_alloc should
include freeing the result of the first one.

This bug was found by the Coccinelle project:

  http://www.emn.fr/x-info/coccinelle/

Signed-off-by: Julia Lawall <julia@diku.dk>
Signed-off-by: Miklos Szeredi <mszeredi@suse.cz>
---
 fs/fuse/inode.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index 6a84388cacff..54b1f0e1ef58 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -865,7 +865,7 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent)
 	if (is_bdev) {
 		fc->destroy_req = fuse_request_alloc();
 		if (!fc->destroy_req)
-			goto err_put_root;
+			goto err_free_init_req;
 	}
 
 	mutex_lock(&fuse_mutex);
@@ -895,6 +895,7 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent)
 
  err_unlock:
 	mutex_unlock(&fuse_mutex);
+ err_free_init_req:
 	fuse_request_free(init_req);
  err_put_root:
 	dput(root_dentry);
-- 
cgit v1.2.3-55-g7522


From 37194d0723b9b68b4b299b2564ca99e3d0a094c3 Mon Sep 17 00:00:00 2001
From: Robert P. J. Day
Date: Thu, 16 Oct 2008 16:08:57 +0200
Subject: fuse: config description improvement

Make the short description of the FUSE_FS config option clearer.

Signed-off-by: Robert P. J. Day <rpjday@crashcourse.ca>
Signed-off-by: Miklos Szeredi <mszeredi@suse.cz>
---
 fs/Kconfig | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/Kconfig b/fs/Kconfig
index 9e9d70c02a07..9c43045ebbf9 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -665,7 +665,7 @@ config AUTOFS4_FS
 	  N here.
 
 config FUSE_FS
-	tristate "Filesystem in Userspace support"
+	tristate "FUSE (Filesystem in Userspace) support"
 	help
 	  With FUSE it is possible to implement a fully functional filesystem
 	  in a userspace program.
-- 
cgit v1.2.3-55-g7522


From 29d434b39c807320fbe4bcdce0ab98a0b9fcb285 Mon Sep 17 00:00:00 2001
From: Tejun Heo
Date: Thu, 16 Oct 2008 16:08:57 +0200
Subject: fuse: add include protectors

Add include protectors to include/linux/fuse.h and fs/fuse/fuse_i.h.

Signed-off-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Miklos Szeredi <mszeredi@suse.cz>
---
 fs/fuse/fuse_i.h     | 5 +++++
 include/linux/fuse.h | 5 +++++
 2 files changed, 10 insertions(+)

diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
index 3a876076bdd1..35accfdd747f 100644
--- a/fs/fuse/fuse_i.h
+++ b/fs/fuse/fuse_i.h
@@ -6,6 +6,9 @@
   See the file COPYING.
 */
 
+#ifndef _FS_FUSE_I_H
+#define _FS_FUSE_I_H
+
 #include <linux/fuse.h>
 #include <linux/fs.h>
 #include <linux/mount.h>
@@ -655,3 +658,5 @@ void fuse_set_nowrite(struct inode *inode);
 void fuse_release_nowrite(struct inode *inode);
 
 u64 fuse_get_attr_version(struct fuse_conn *fc);
+
+#endif /* _FS_FUSE_I_H */
diff --git a/include/linux/fuse.h b/include/linux/fuse.h
index 265635dc9908..8bc1101e9b35 100644
--- a/include/linux/fuse.h
+++ b/include/linux/fuse.h
@@ -19,6 +19,9 @@
  *  - add file flags field to fuse_read_in and fuse_write_in
  */
 
+#ifndef _LINUX_FUSE_H
+#define _LINUX_FUSE_H
+
 #include <asm/types.h>
 #include <linux/major.h>
 
@@ -409,3 +412,5 @@ struct fuse_dirent {
 #define FUSE_DIRENT_ALIGN(x) (((x) + sizeof(__u64) - 1) & ~(sizeof(__u64) - 1))
 #define FUSE_DIRENT_SIZE(d) \
 	FUSE_DIRENT_ALIGN(FUSE_NAME_OFFSET + (d)->namelen)
+
+#endif /* _LINUX_FUSE_H */
-- 
cgit v1.2.3-55-g7522


From a7c1b990f71574e077b94ce4582e2cf11cb891fe Mon Sep 17 00:00:00 2001
From: Tejun Heo
Date: Thu, 16 Oct 2008 16:08:57 +0200
Subject: fuse: implement nonseekable open

Let the client request nonseekable open using FOPEN_NONSEEKABLE and
call nonseekable_open() on the file if requested.

Signed-off-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Miklos Szeredi <mszeredi@suse.cz>
---
 fs/fuse/file.c       | 2 ++
 include/linux/fuse.h | 7 ++++++-
 2 files changed, 8 insertions(+), 1 deletion(-)

diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index 98079aa800e8..34930a964b82 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -101,6 +101,8 @@ void fuse_finish_open(struct inode *inode, struct file *file,
 		file->f_op = &fuse_direct_io_file_operations;
 	if (!(outarg->open_flags & FOPEN_KEEP_CACHE))
 		invalidate_inode_pages2(inode->i_mapping);
+	if (outarg->open_flags & FOPEN_NONSEEKABLE)
+		nonseekable_open(inode, file);
 	ff->fh = outarg->fh;
 	file->private_data = fuse_file_get(ff);
 }
diff --git a/include/linux/fuse.h b/include/linux/fuse.h
index 8bc1101e9b35..350fe9767bbc 100644
--- a/include/linux/fuse.h
+++ b/include/linux/fuse.h
@@ -17,6 +17,9 @@
  *  - add lock_owner field to fuse_setattr_in, fuse_read_in and fuse_write_in
  *  - add blksize field to fuse_attr
  *  - add file flags field to fuse_read_in and fuse_write_in
+ *
+ * 7.10
+ *  - add nonseekable open flag
  */
 
 #ifndef _LINUX_FUSE_H
@@ -29,7 +32,7 @@
 #define FUSE_KERNEL_VERSION 7
 
 /** Minor version number of this interface */
-#define FUSE_KERNEL_MINOR_VERSION 9
+#define FUSE_KERNEL_MINOR_VERSION 10
 
 /** The node ID of the root inode */
 #define FUSE_ROOT_ID 1
@@ -101,9 +104,11 @@ struct fuse_file_lock {
  *
  * FOPEN_DIRECT_IO: bypass page cache for this open file
  * FOPEN_KEEP_CACHE: don't invalidate the data cache on open
+ * FOPEN_NONSEEKABLE: the file is not seekable
  */
 #define FOPEN_DIRECT_IO		(1 << 0)
 #define FOPEN_KEEP_CACHE	(1 << 1)
+#define FOPEN_NONSEEKABLE	(1 << 2)
 
 /**
  * INIT request/reply flags
-- 
cgit v1.2.3-55-g7522


From 25db8ad5c56700e7716fe23426b16c5e3b1674b4 Mon Sep 17 00:00:00 2001
From: Alan Cox
Date: Tue, 19 Aug 2008 20:49:40 -0700
Subject: serial, 8250: remove NR_IRQ usage

Works on my test box with a quick test.

Signed-off-by: Alan Cox <alan@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 drivers/serial/68328serial.c | 11 ++------
 drivers/serial/8250.c        | 67 ++++++++++++++++++++++++++++++++++++--------
 2 files changed, 57 insertions(+), 21 deletions(-)

diff --git a/drivers/serial/68328serial.c b/drivers/serial/68328serial.c
index 381b12ac20e0..d935b2d04f93 100644
--- a/drivers/serial/68328serial.c
+++ b/drivers/serial/68328serial.c
@@ -66,7 +66,6 @@
 #endif
 
 static struct m68k_serial m68k_soft[NR_PORTS];
-struct m68k_serial *IRQ_ports[NR_IRQS];
 
 static unsigned int uart_irqs[NR_PORTS] = UART_IRQ_DEFNS;
 
@@ -375,15 +374,11 @@ clear_and_return:
  */
 irqreturn_t rs_interrupt(int irq, void *dev_id)
 {
-	struct m68k_serial * info;
+	struct m68k_serial *info = dev_id;
 	m68328_uart *uart;
 	unsigned short rx;
 	unsigned short tx;
 
-	info = IRQ_ports[irq];
-	if(!info)
-	    return IRQ_NONE;
-
 	uart = &uart_addr[info->line];
 	rx = uart->urx.w;
 
@@ -1383,8 +1378,6 @@ rs68328_init(void)
 		   info->port, info->irq);
 	    printk(" is a builtin MC68328 UART\n");
 	    
-	    IRQ_ports[info->irq] = info;	/* waste of space */
-
 #ifdef CONFIG_M68VZ328
 		if (i > 0 )
 			PJSEL &= 0xCF;  /* PSW enable second port output */
@@ -1393,7 +1386,7 @@ rs68328_init(void)
 	    if (request_irq(uart_irqs[i],
 			    rs_interrupt,
 			    IRQF_DISABLED,
-			    "M68328_UART", NULL))
+			    "M68328_UART", info))
                 panic("Unable to attach 68328 serial interrupt\n");
 	}
 	local_irq_restore(flags);
diff --git a/drivers/serial/8250.c b/drivers/serial/8250.c
index d3ca7d32abe0..c66bb44c4747 100644
--- a/drivers/serial/8250.c
+++ b/drivers/serial/8250.c
@@ -156,11 +156,15 @@ struct uart_8250_port {
 };
 
 struct irq_info {
-	spinlock_t		lock;
+	struct			hlist_node node;
+	int			irq;
+	spinlock_t		lock;	/* Protects list not the hash */
 	struct list_head	*head;
 };
 
-static struct irq_info irq_lists[NR_IRQS];
+#define NR_IRQ_HASH		32	/* Can be adjusted later */
+static struct hlist_head irq_lists[NR_IRQ_HASH];
+static DEFINE_MUTEX(hash_mutex);	/* Used to walk the hash */
 
 /*
  * Here we define the default xmit fifo size used for each type of UART.
@@ -1545,15 +1549,43 @@ static void serial_do_unlink(struct irq_info *i, struct uart_8250_port *up)
 		BUG_ON(i->head != &up->list);
 		i->head = NULL;
 	}
-
 	spin_unlock_irq(&i->lock);
+	/* List empty so throw away the hash node */
+	if (i->head == NULL) {
+		hlist_del(&i->node);
+		kfree(i);
+	}
 }
 
 static int serial_link_irq_chain(struct uart_8250_port *up)
 {
-	struct irq_info *i = irq_lists + up->port.irq;
+	struct hlist_head *h;
+	struct hlist_node *n;
+	struct irq_info *i;
 	int ret, irq_flags = up->port.flags & UPF_SHARE_IRQ ? IRQF_SHARED : 0;
 
+	mutex_lock(&hash_mutex);
+
+	h = &irq_lists[up->port.irq % NR_IRQ_HASH];
+
+	hlist_for_each(n, h) {
+		i = hlist_entry(n, struct irq_info, node);
+		if (i->irq == up->port.irq)
+			break;
+	}
+
+	if (n == NULL) {
+		i = kzalloc(sizeof(struct irq_info), GFP_KERNEL);
+		if (i == NULL) {
+			mutex_unlock(&hash_mutex);
+			return -ENOMEM;
+		}
+		spin_lock_init(&i->lock);
+		i->irq = up->port.irq;
+		hlist_add_head(&i->node, h);
+	}
+	mutex_unlock(&hash_mutex);
+
 	spin_lock_irq(&i->lock);
 
 	if (i->head) {
@@ -1577,14 +1609,28 @@ static int serial_link_irq_chain(struct uart_8250_port *up)
 
 static void serial_unlink_irq_chain(struct uart_8250_port *up)
 {
-	struct irq_info *i = irq_lists + up->port.irq;
+	struct irq_info *i;
+	struct hlist_node *n;
+	struct hlist_head *h;
 
+	mutex_lock(&hash_mutex);
+
+	h = &irq_lists[up->port.irq % NR_IRQ_HASH];
+
+	hlist_for_each(n, h) {
+		i = hlist_entry(n, struct irq_info, node);
+		if (i->irq == up->port.irq)
+			break;
+	}
+
+	BUG_ON(n == NULL);
 	BUG_ON(i->head == NULL);
 
 	if (list_empty(i->head))
 		free_irq(up->port.irq, i);
 
 	serial_do_unlink(i, up);
+	mutex_unlock(&hash_mutex);
 }
 
 /* Base timer interval for polling */
@@ -2960,7 +3006,7 @@ EXPORT_SYMBOL(serial8250_unregister_port);
 
 static int __init serial8250_init(void)
 {
-	int ret, i;
+	int ret;
 
 	if (nr_uarts > UART_NR)
 		nr_uarts = UART_NR;
@@ -2969,9 +3015,6 @@ static int __init serial8250_init(void)
 		"%d ports, IRQ sharing %sabled\n", nr_uarts,
 		share_irqs ? "en" : "dis");
 
-	for (i = 0; i < NR_IRQS; i++)
-		spin_lock_init(&irq_lists[i].lock);
-
 #ifdef CONFIG_SPARC
 	ret = sunserial_register_minors(&serial8250_reg, UART_NR);
 #else
@@ -2999,15 +3042,15 @@ static int __init serial8250_init(void)
 		goto out;
 
 	platform_device_del(serial8250_isa_devs);
- put_dev:
+put_dev:
 	platform_device_put(serial8250_isa_devs);
- unreg_uart_drv:
+unreg_uart_drv:
 #ifdef CONFIG_SPARC
 	sunserial_unregister_minors(&serial8250_reg, UART_NR);
 #else
 	uart_unregister_driver(&serial8250_reg);
 #endif
- out:
+out:
 	return ret;
 }
 
-- 
cgit v1.2.3-55-g7522


From fe648be019119722ec0ac54e3a4b2e5bf5168589 Mon Sep 17 00:00:00 2001
From: Yinghai Lu
Date: Tue, 19 Aug 2008 20:49:41 -0700
Subject: x86: add after_bootmem flag for 32bit

to prepare to use dyn_array support etc.

Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/mm/init_32.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c
index 8396868e82c5..91343c2694b4 100644
--- a/arch/x86/mm/init_32.c
+++ b/arch/x86/mm/init_32.c
@@ -66,6 +66,7 @@ static unsigned long __meminitdata table_end;
 static unsigned long __meminitdata table_top;
 
 static int __initdata after_init_bootmem;
+int after_bootmem;
 
 static __init void *alloc_low_page(unsigned long *phys)
 {
@@ -988,6 +989,8 @@ void __init mem_init(void)
 
 	set_highmem_pages_init();
 
+	after_bootmem = 1;
+
 	codesize =  (unsigned long) &_etext - (unsigned long) &_text;
 	datasize =  (unsigned long) &_edata - (unsigned long) &_etext;
 	initsize =  (unsigned long) &__init_end - (unsigned long) &__init_begin;
-- 
cgit v1.2.3-55-g7522


From 38395738f581ce9417402f28774a8c4650264a00 Mon Sep 17 00:00:00 2001
From: Yinghai Lu
Date: Tue, 19 Aug 2008 20:49:42 -0700
Subject: x86: remove irq_vectors_limits

there's no user left.

Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/asm-x86/mach-generic/irq_vectors_limits.h | 14 --------------
 include/asm-x86/summit/irq_vectors_limits.h       | 14 --------------
 2 files changed, 28 deletions(-)
 delete mode 100644 include/asm-x86/mach-generic/irq_vectors_limits.h
 delete mode 100644 include/asm-x86/summit/irq_vectors_limits.h

diff --git a/include/asm-x86/mach-generic/irq_vectors_limits.h b/include/asm-x86/mach-generic/irq_vectors_limits.h
deleted file mode 100644
index f7870e1a220d..000000000000
--- a/include/asm-x86/mach-generic/irq_vectors_limits.h
+++ /dev/null
@@ -1,14 +0,0 @@
-#ifndef ASM_X86__MACH_GENERIC__IRQ_VECTORS_LIMITS_H
-#define ASM_X86__MACH_GENERIC__IRQ_VECTORS_LIMITS_H
-
-/*
- * For Summit or generic (i.e. installer) kernels, we have lots of I/O APICs,
- * even with uni-proc kernels, so use a big array.
- *
- * This value should be the same in both the generic and summit subarches.
- * Change one, change 'em both.
- */
-#define NR_IRQS	224
-#define NR_IRQ_VECTORS	1024
-
-#endif /* ASM_X86__MACH_GENERIC__IRQ_VECTORS_LIMITS_H */
diff --git a/include/asm-x86/summit/irq_vectors_limits.h b/include/asm-x86/summit/irq_vectors_limits.h
deleted file mode 100644
index 890ce3f5e09a..000000000000
--- a/include/asm-x86/summit/irq_vectors_limits.h
+++ /dev/null
@@ -1,14 +0,0 @@
-#ifndef _ASM_IRQ_VECTORS_LIMITS_H
-#define _ASM_IRQ_VECTORS_LIMITS_H
-
-/*
- * For Summit or generic (i.e. installer) kernels, we have lots of I/O APICs,
- * even with uni-proc kernels, so use a big array.
- *
- * This value should be the same in both the generic and summit subarches.
- * Change one, change 'em both.
- */
-#define NR_IRQS	224
-#define NR_IRQ_VECTORS	1024
-
-#endif /* _ASM_IRQ_VECTORS_LIMITS_H */
-- 
cgit v1.2.3-55-g7522


From 3ddfda11861d305b02ed810b522dcf48b74ca808 Mon Sep 17 00:00:00 2001
From: Yinghai Lu
Date: Tue, 19 Aug 2008 20:49:43 -0700
Subject: generic: add dyn_array support

Allow crazy big arrays via bootmem at init stage.
Architectures use CONFIG_HAVE_DYN_ARRAY to enable it.

usage:

| static struct irq_desc irq_desc_init __initdata = {
|        .status = IRQ_DISABLED,
|        .chip = &no_irq_chip,
|        .handle_irq = handle_bad_irq,
|        .depth = 1,
|        .lock = __SPIN_LOCK_UNLOCKED(irq_desc->lock),
| #ifdef CONFIG_SMP
|        .affinity = CPU_MASK_ALL
| #endif
| };
|
| static void __init init_work(void *data)
| {
|        struct dyn_array *da = data;
|        struct  irq_desc *desc;
|        int i;
|
|        desc = *da->name;
|
|        for (i = 0; i < *da->nr; i++)
|                memcpy(&desc[i], &irq_desc_init, sizeof(struct irq_desc));
| }
|
| struct irq_desc *irq_desc;
| DEFINE_DYN_ARRAY(irq_desc, sizeof(struct irq_desc), nr_irqs, PAGE_SIZE, init_work);

after pre_alloc_dyn_array() after setup_arch(), the array is ready to be
used.

Via this facility we can replace irq_desc[NR_IRQS] array with dyn_array
irq_desc[nr_irqs].

v2: remove _nopanic in pre_alloc_dyn_array()

Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/asm-generic/vmlinux.lds.h |  7 +++++++
 include/linux/init.h              | 23 +++++++++++++++++++++++
 init/main.c                       | 24 ++++++++++++++++++++++++
 3 files changed, 54 insertions(+)

diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h
index 7440a0dceddb..7881406c03ec 100644
--- a/include/asm-generic/vmlinux.lds.h
+++ b/include/asm-generic/vmlinux.lds.h
@@ -210,6 +210,13 @@
  * All archs are supposed to use RO_DATA() */
 #define RODATA RO_DATA(4096)
 
+#define DYN_ARRAY_INIT(align)							\
+	. = ALIGN((align));						\
+	.dyn_array.init : AT(ADDR(.dyn_array.init) - LOAD_OFFSET) {	\
+		VMLINUX_SYMBOL(__dyn_array_start) = .;			\
+		*(.dyn_array.init)					\
+		VMLINUX_SYMBOL(__dyn_array_end) = .;			\
+	}
 #define SECURITY_INIT							\
 	.security_initcall.init : AT(ADDR(.security_initcall.init) - LOAD_OFFSET) { \
 		VMLINUX_SYMBOL(__security_initcall_start) = .;		\
diff --git a/include/linux/init.h b/include/linux/init.h
index 93538b696e3d..cf9fa7f174af 100644
--- a/include/linux/init.h
+++ b/include/linux/init.h
@@ -246,6 +246,29 @@ struct obs_kernel_param {
 
 /* Relies on boot_command_line being set */
 void __init parse_early_param(void);
+
+struct dyn_array {
+	void **name;
+	unsigned long size;
+	unsigned int *nr;
+	unsigned long align;
+	void (*init_work)(void *);
+};
+extern struct dyn_array *__dyn_array_start[], *__dyn_array_end[];
+
+#define DEFINE_DYN_ARRAY(nameX, sizeX, nrX, alignX, init_workX) \
+		static struct dyn_array __dyn_array_##nameX __initdata = \
+		{	.name = (void **)&nameX,\
+			.size = sizeX,\
+			.nr   = &nrX,\
+			.align = alignX,\
+			.init_work = init_workX,\
+		}; \
+		static struct dyn_array *__dyn_array_ptr_##nameX __used \
+		__attribute__((__section__(".dyn_array.init"))) = \
+			&__dyn_array_##nameX
+
+extern void pre_alloc_dyn_array(void);
 #endif /* __ASSEMBLY__ */
 
 /**
diff --git a/init/main.c b/init/main.c
index 27f6bf6108e9..638d3a786412 100644
--- a/init/main.c
+++ b/init/main.c
@@ -536,6 +536,29 @@ void __init __weak thread_info_cache_init(void)
 {
 }
 
+void pre_alloc_dyn_array(void)
+{
+#ifdef CONFIG_HAVE_DYN_ARRAY
+	unsigned long size, phys = 0;
+	struct dyn_array **daa;
+
+	for (daa = __dyn_array_start ; daa < __dyn_array_end; daa++) {
+		struct dyn_array *da = *daa;
+
+		size = da->size * (*da->nr);
+		print_fn_descriptor_symbol("dyna_array %s ", da->name);
+		printk(KERN_CONT "size:%#lx nr:%d align:%#lx",
+			da->size, *da->nr, da->align);
+		*da->name = __alloc_bootmem(size, da->align, phys);
+		phys = virt_to_phys(*da->name);
+		printk(KERN_CONT " ==> [%#lx - %#lx]\n", phys, phys + size);
+
+		if (da->init_work)
+			da->init_work(da);
+	}
+#endif
+}
+
 asmlinkage void __init start_kernel(void)
 {
 	char * command_line;
@@ -567,6 +590,7 @@ asmlinkage void __init start_kernel(void)
 	printk(KERN_NOTICE);
 	printk(linux_banner);
 	setup_arch(&command_line);
+	pre_alloc_dyn_array();
 	mm_init_owner(&init_mm, &init_task);
 	setup_command_line(command_line);
 	unwind_setup();
-- 
cgit v1.2.3-55-g7522


From 1f3fcd4b1adc972d5c6a34cfed98931c46575b49 Mon Sep 17 00:00:00 2001
From: Yinghai Lu
Date: Tue, 19 Aug 2008 20:49:44 -0700
Subject: add per_cpu_dyn_array support

allow dyn-array in per_cpu area, allocated dynamically.

usage:

|  /* in .h */
| struct kernel_stat {
|        struct cpu_usage_stat   cpustat;
|        unsigned int *irqs;
| };
|
|  /* in .c */
| DEFINE_PER_CPU(struct kernel_stat, kstat);
|
| DEFINE_PER_CPU_DYN_ARRAY_ADDR(per_cpu__kstat_irqs, per_cpu__kstat.irqs, sizeof(unsigned int), nr_irqs, sizeof(unsigned long), NULL);

after setup_percpu()/per_cpu_alloc_dyn_array(), the dyn_array in
per_cpu area is ready to use.

Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/setup_percpu.c    |  7 +++--
 include/asm-generic/vmlinux.lds.h |  6 ++++
 include/linux/init.h              | 27 +++++++++++++++--
 init/main.c                       | 63 +++++++++++++++++++++++++++++++++++++--
 4 files changed, 96 insertions(+), 7 deletions(-)

diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c
index 0e67f72d9316..13ba7a83808d 100644
--- a/arch/x86/kernel/setup_percpu.c
+++ b/arch/x86/kernel/setup_percpu.c
@@ -140,7 +140,7 @@ static void __init setup_cpu_pda_map(void)
  */
 void __init setup_per_cpu_areas(void)
 {
-	ssize_t size = PERCPU_ENOUGH_ROOM;
+	ssize_t size, old_size;
 	char *ptr;
 	int cpu;
 
@@ -148,7 +148,8 @@ void __init setup_per_cpu_areas(void)
 	setup_cpu_pda_map();
 
 	/* Copy section for each CPU (we discard the original) */
-	size = PERCPU_ENOUGH_ROOM;
+	old_size = PERCPU_ENOUGH_ROOM;
+	size = old_size + per_cpu_dyn_array_size();
 	printk(KERN_INFO "PERCPU: Allocating %zd bytes of per cpu data\n",
 			  size);
 
@@ -176,6 +177,8 @@ void __init setup_per_cpu_areas(void)
 		per_cpu_offset(cpu) = ptr - __per_cpu_start;
 		memcpy(ptr, __per_cpu_start, __per_cpu_end - __per_cpu_start);
 
+		per_cpu_alloc_dyn_array(cpu, ptr + old_size);
+
 	}
 
 	printk(KERN_DEBUG "NR_CPUS: %d, nr_cpu_ids: %d, nr_node_ids %d\n",
diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h
index 7881406c03ec..c68eda9d9a90 100644
--- a/include/asm-generic/vmlinux.lds.h
+++ b/include/asm-generic/vmlinux.lds.h
@@ -216,6 +216,12 @@
 		VMLINUX_SYMBOL(__dyn_array_start) = .;			\
 		*(.dyn_array.init)					\
 		VMLINUX_SYMBOL(__dyn_array_end) = .;			\
+	}								\
+	. = ALIGN((align));						\
+	.per_cpu_dyn_array.init : AT(ADDR(.per_cpu_dyn_array.init) - LOAD_OFFSET) {	\
+		VMLINUX_SYMBOL(__per_cpu_dyn_array_start) = .;		\
+		*(.per_cpu_dyn_array.init)				\
+		VMLINUX_SYMBOL(__per_cpu_dyn_array_end) = .;		\
 	}
 #define SECURITY_INIT							\
 	.security_initcall.init : AT(ADDR(.security_initcall.init) - LOAD_OFFSET) { \
diff --git a/include/linux/init.h b/include/linux/init.h
index cf9fa7f174af..332806826b8e 100644
--- a/include/linux/init.h
+++ b/include/linux/init.h
@@ -255,12 +255,13 @@ struct dyn_array {
 	void (*init_work)(void *);
 };
 extern struct dyn_array *__dyn_array_start[], *__dyn_array_end[];
+extern struct dyn_array *__per_cpu_dyn_array_start[], *__per_cpu_dyn_array_end[];
 
-#define DEFINE_DYN_ARRAY(nameX, sizeX, nrX, alignX, init_workX) \
+#define DEFINE_DYN_ARRAY_ADDR(nameX, addrX, sizeX, nrX, alignX, init_workX) \
 		static struct dyn_array __dyn_array_##nameX __initdata = \
-		{	.name = (void **)&nameX,\
+		{	.name = (void **)&(nameX),\
 			.size = sizeX,\
-			.nr   = &nrX,\
+			.nr   = &(nrX),\
 			.align = alignX,\
 			.init_work = init_workX,\
 		}; \
@@ -268,7 +269,27 @@ extern struct dyn_array *__dyn_array_start[], *__dyn_array_end[];
 		__attribute__((__section__(".dyn_array.init"))) = \
 			&__dyn_array_##nameX
 
+#define DEFINE_DYN_ARRAY(nameX, sizeX, nrX, alignX, init_workX) \
+	DEFINE_DYN_ARRAY_ADDR(nameX, nameX, sizeX, nrX, alignX, init_workX)
+
+#define DEFINE_PER_CPU_DYN_ARRAY_ADDR(nameX, addrX, sizeX, nrX, alignX, init_workX) \
+		static struct dyn_array __per_cpu_dyn_array_##nameX __initdata = \
+		{	.name = (void **)&(addrX),\
+			.size = sizeX,\
+			.nr   = &(nrX),\
+			.align = alignX,\
+			.init_work = init_workX,\
+		}; \
+		static struct dyn_array *__per_cpu_dyn_array_ptr_##nameX __used \
+		__attribute__((__section__(".per_cpu_dyn_array.init"))) = \
+			&__per_cpu_dyn_array_##nameX
+
+#define DEFINE_PER_CPU_DYN_ARRAY(nameX, sizeX, nrX, alignX, init_workX) \
+	DEFINE_PER_CPU_DYN_ARRAY_ADDR(nameX, nameX, nrX, alignX, init_workX)
+
 extern void pre_alloc_dyn_array(void);
+extern unsigned long per_cpu_dyn_array_size(void);
+extern void per_cpu_alloc_dyn_array(int cpu, char *ptr);
 #endif /* __ASSEMBLY__ */
 
 /**
diff --git a/init/main.c b/init/main.c
index 638d3a786412..416bca4f734f 100644
--- a/init/main.c
+++ b/init/main.c
@@ -391,17 +391,19 @@ EXPORT_SYMBOL(__per_cpu_offset);
 
 static void __init setup_per_cpu_areas(void)
 {
-	unsigned long size, i;
+	unsigned long size, i, old_size;
 	char *ptr;
 	unsigned long nr_possible_cpus = num_possible_cpus();
 
 	/* Copy section for each CPU (we discard the original) */
-	size = ALIGN(PERCPU_ENOUGH_ROOM, PAGE_SIZE);
+	old_size = PERCPU_ENOUGH_ROOM;
+	size = ALIGN(old_size + per_cpu_dyn_array_size(), PAGE_SIZE);
 	ptr = alloc_bootmem_pages(size * nr_possible_cpus);
 
 	for_each_possible_cpu(i) {
 		__per_cpu_offset[i] = ptr - __per_cpu_start;
 		memcpy(ptr, __per_cpu_start, __per_cpu_end - __per_cpu_start);
+		per_cpu_alloc_dyn_array(i, ptr + old_size);
 		ptr += size;
 	}
 }
@@ -559,6 +561,63 @@ void pre_alloc_dyn_array(void)
 #endif
 }
 
+unsigned long per_cpu_dyn_array_size(void)
+{
+	unsigned long total_size = 0;
+#ifdef CONFIG_HAVE_DYN_ARRAY
+	unsigned long size;
+	struct dyn_array **daa;
+
+	for (daa = __per_cpu_dyn_array_start ; daa < __per_cpu_dyn_array_end; daa++) {
+		struct dyn_array *da = *daa;
+
+		size = da->size * (*da->nr);
+		print_fn_descriptor_symbol("per_cpu_dyna_array %s ", da->name);
+		printk(KERN_CONT "size:%#lx nr:%d align:%#lx\n",
+			da->size, *da->nr, da->align);
+		total_size += roundup(size, da->align);
+	}
+	if (total_size)
+		printk(KERN_DEBUG "per_cpu_dyna_array total_size: %#lx\n",
+			 total_size);
+#endif
+	return total_size;
+}
+
+void per_cpu_alloc_dyn_array(int cpu, char *ptr)
+{
+#ifdef CONFIG_HAVE_DYN_ARRAY
+	unsigned long size, phys;
+	struct dyn_array **daa;
+	unsigned long addr;
+	void **array;
+
+	phys = virt_to_phys(ptr);
+
+	for (daa = __per_cpu_dyn_array_start ; daa < __per_cpu_dyn_array_end; daa++) {
+		struct dyn_array *da = *daa;
+
+		size = da->size * (*da->nr);
+		print_fn_descriptor_symbol("per_cpu_dyna_array %s ", da->name);
+		printk(KERN_CONT "size:%#lx nr:%d align:%#lx",
+			da->size, *da->nr, da->align);
+
+		phys = roundup(phys, da->align);
+		addr = (unsigned long)da->name;
+		addr += per_cpu_offset(cpu);
+		array = (void **)addr;
+		*array = phys_to_virt(phys);
+		*da->name = *array; /* so init_work could use it directly */
+		printk(KERN_CONT " %p ==> [%#lx - %#lx]\n", array, phys, phys + size);
+		phys += size;
+
+		if (da->init_work) {
+			da->init_work(da);
+		}
+	}
+#endif
+}
+
 asmlinkage void __init start_kernel(void)
 {
 	char * command_line;
-- 
cgit v1.2.3-55-g7522


From 1f8ff037a871690c762d267d8a052529d3102fc9 Mon Sep 17 00:00:00 2001
From: Yinghai Lu
Date: Tue, 19 Aug 2008 20:49:45 -0700
Subject: x86: alloc dyn_array all together

so could spare some memory with small alignment in bootmem

also tighten the alignment checking, and make print out less debug info.

Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/setup_percpu.c | 16 +++++++----
 include/linux/init.h           |  2 +-
 init/main.c                    | 65 ++++++++++++++++++++++++++++++++----------
 3 files changed, 62 insertions(+), 21 deletions(-)

diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c
index 13ba7a83808d..2b7dab699e83 100644
--- a/arch/x86/kernel/setup_percpu.c
+++ b/arch/x86/kernel/setup_percpu.c
@@ -140,26 +140,31 @@ static void __init setup_cpu_pda_map(void)
  */
 void __init setup_per_cpu_areas(void)
 {
-	ssize_t size, old_size;
+	ssize_t size, old_size, da_size;
 	char *ptr;
 	int cpu;
+	unsigned long align = 1;
 
 	/* Setup cpu_pda map */
 	setup_cpu_pda_map();
 
 	/* Copy section for each CPU (we discard the original) */
 	old_size = PERCPU_ENOUGH_ROOM;
-	size = old_size + per_cpu_dyn_array_size();
+	da_size = per_cpu_dyn_array_size(&align);
+	align = max_t(unsigned long, PAGE_SIZE, align);
+	size = roundup(old_size + da_size, align);
 	printk(KERN_INFO "PERCPU: Allocating %zd bytes of per cpu data\n",
 			  size);
 
 	for_each_possible_cpu(cpu) {
 #ifndef CONFIG_NEED_MULTIPLE_NODES
-		ptr = alloc_bootmem_pages(size);
+		ptr = __alloc_bootmem(size, align,
+				 __pa(MAX_DMA_ADDRESS));
 #else
 		int node = early_cpu_to_node(cpu);
 		if (!node_online(node) || !NODE_DATA(node)) {
-			ptr = alloc_bootmem_pages(size);
+			ptr = __alloc_bootmem(size, align,
+					 __pa(MAX_DMA_ADDRESS));
 			printk(KERN_INFO
 			       "cpu %d has no node %d or node-local memory\n",
 				cpu, node);
@@ -168,7 +173,8 @@ void __init setup_per_cpu_areas(void)
 					 cpu, __pa(ptr));
 		}
 		else {
-			ptr = alloc_bootmem_pages_node(NODE_DATA(node), size);
+			ptr = __alloc_bootmem_node(NODE_DATA(node), size, align,
+							__pa(MAX_DMA_ADDRESS));
 			if (ptr)
 				printk(KERN_DEBUG "per cpu data for cpu%d on node%d at %016lx\n",
 					 cpu, node, __pa(ptr));
diff --git a/include/linux/init.h b/include/linux/init.h
index 332806826b8e..59fbb4aaba6a 100644
--- a/include/linux/init.h
+++ b/include/linux/init.h
@@ -288,7 +288,7 @@ extern struct dyn_array *__per_cpu_dyn_array_start[], *__per_cpu_dyn_array_end[]
 	DEFINE_PER_CPU_DYN_ARRAY_ADDR(nameX, nameX, nrX, alignX, init_workX)
 
 extern void pre_alloc_dyn_array(void);
-extern unsigned long per_cpu_dyn_array_size(void);
+extern unsigned long per_cpu_dyn_array_size(unsigned long *align);
 extern void per_cpu_alloc_dyn_array(int cpu, char *ptr);
 #endif /* __ASSEMBLY__ */
 
diff --git a/init/main.c b/init/main.c
index 416bca4f734f..ab97d0877acc 100644
--- a/init/main.c
+++ b/init/main.c
@@ -394,10 +394,14 @@ static void __init setup_per_cpu_areas(void)
 	unsigned long size, i, old_size;
 	char *ptr;
 	unsigned long nr_possible_cpus = num_possible_cpus();
+	unsigned long align = 1;
+	unsigned da_size;
 
 	/* Copy section for each CPU (we discard the original) */
 	old_size = PERCPU_ENOUGH_ROOM;
-	size = ALIGN(old_size + per_cpu_dyn_array_size(), PAGE_SIZE);
+	da_size = per_cpu_dyn_array_size(&align);
+	align = max_t(unsigned long, PAGE_SIZE, align);
+	size = ALIGN(old_size + da_size, align);
 	ptr = alloc_bootmem_pages(size * nr_possible_cpus);
 
 	for_each_possible_cpu(i) {
@@ -541,45 +545,78 @@ void __init __weak thread_info_cache_init(void)
 void pre_alloc_dyn_array(void)
 {
 #ifdef CONFIG_HAVE_DYN_ARRAY
-	unsigned long size, phys = 0;
+	unsigned long total_size = 0, size, phys;
+	unsigned long max_align = 1;
 	struct dyn_array **daa;
+	char *ptr;
 
+	/* get the total size at first */
 	for (daa = __dyn_array_start ; daa < __dyn_array_end; daa++) {
 		struct dyn_array *da = *daa;
 
 		size = da->size * (*da->nr);
-		print_fn_descriptor_symbol("dyna_array %s ", da->name);
-		printk(KERN_CONT "size:%#lx nr:%d align:%#lx",
+		print_fn_descriptor_symbol("dyn_array %s ", da->name);
+		printk(KERN_CONT "size:%#lx nr:%d align:%#lx\n",
 			da->size, *da->nr, da->align);
-		*da->name = __alloc_bootmem(size, da->align, phys);
-		phys = virt_to_phys(*da->name);
+		total_size += roundup(size, da->align);
+		if (da->align > max_align)
+			max_align = da->align;
+	}
+	if (total_size)
+		printk(KERN_DEBUG "dyn_array total_size: %#lx\n",
+			 total_size);
+	else
+		return;
+
+	/* allocate them all together */
+	max_align = max_t(unsigned long, max_align, PAGE_SIZE);
+	ptr = __alloc_bootmem_nopanic(total_size, max_align, 0);
+	if (!ptr)
+		panic("Can not alloc dyn_alloc\n");
+
+	phys = virt_to_phys(ptr);
+	for (daa = __dyn_array_start ; daa < __dyn_array_end; daa++) {
+		struct dyn_array *da = *daa;
+
+		size = da->size * (*da->nr);
+		print_fn_descriptor_symbol("dyn_array %s ", da->name);
+
+		phys = roundup(phys, da->align);
+		*da->name = phys_to_virt(phys);
 		printk(KERN_CONT " ==> [%#lx - %#lx]\n", phys, phys + size);
 
+		phys += size;
+
 		if (da->init_work)
 			da->init_work(da);
 	}
 #endif
 }
 
-unsigned long per_cpu_dyn_array_size(void)
+unsigned long per_cpu_dyn_array_size(unsigned long *align)
 {
 	unsigned long total_size = 0;
 #ifdef CONFIG_HAVE_DYN_ARRAY
 	unsigned long size;
 	struct dyn_array **daa;
+	unsigned max_align = 1;
 
 	for (daa = __per_cpu_dyn_array_start ; daa < __per_cpu_dyn_array_end; daa++) {
 		struct dyn_array *da = *daa;
 
 		size = da->size * (*da->nr);
-		print_fn_descriptor_symbol("per_cpu_dyna_array %s ", da->name);
+		print_fn_descriptor_symbol("per_cpu_dyn_array %s ", da->name);
 		printk(KERN_CONT "size:%#lx nr:%d align:%#lx\n",
 			da->size, *da->nr, da->align);
 		total_size += roundup(size, da->align);
+		if (da->align > max_align)
+			max_align = da->align;
 	}
-	if (total_size)
-		printk(KERN_DEBUG "per_cpu_dyna_array total_size: %#lx\n",
+	if (total_size) {
+		printk(KERN_DEBUG "per_cpu_dyn_array total_size: %#lx\n",
 			 total_size);
+		*align = max_align;
+	}
 #endif
 	return total_size;
 }
@@ -593,14 +630,11 @@ void per_cpu_alloc_dyn_array(int cpu, char *ptr)
 	void **array;
 
 	phys = virt_to_phys(ptr);
-
 	for (daa = __per_cpu_dyn_array_start ; daa < __per_cpu_dyn_array_end; daa++) {
 		struct dyn_array *da = *daa;
 
 		size = da->size * (*da->nr);
-		print_fn_descriptor_symbol("per_cpu_dyna_array %s ", da->name);
-		printk(KERN_CONT "size:%#lx nr:%d align:%#lx",
-			da->size, *da->nr, da->align);
+		print_fn_descriptor_symbol("per_cpu_dyn_array %s ", da->name);
 
 		phys = roundup(phys, da->align);
 		addr = (unsigned long)da->name;
@@ -608,7 +642,8 @@ void per_cpu_alloc_dyn_array(int cpu, char *ptr)
 		array = (void **)addr;
 		*array = phys_to_virt(phys);
 		*da->name = *array; /* so init_work could use it directly */
-		printk(KERN_CONT " %p ==> [%#lx - %#lx]\n", array, phys, phys + size);
+		printk(KERN_CONT " ==> [%#lx - %#lx]\n", phys, phys + size);
+
 		phys += size;
 
 		if (da->init_work) {
-- 
cgit v1.2.3-55-g7522


From 6da55c3e8da88e8a7cb6452160776ad6706798ad Mon Sep 17 00:00:00 2001
From: Yinghai Lu
Date: Tue, 19 Aug 2008 20:49:46 -0700
Subject: x86: enable dyn_array support

Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/Kconfig                     | 2 ++
 arch/x86/Kconfig                 | 1 +
 arch/x86/kernel/vmlinux_32.lds.S | 1 +
 arch/x86/kernel/vmlinux_64.lds.S | 3 +++
 4 files changed, 7 insertions(+)

diff --git a/arch/Kconfig b/arch/Kconfig
index 0267babe5eb9..c1f9febb404f 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -103,3 +103,5 @@ config HAVE_CLK
 	  The <linux/clk.h> calls support software clock gating and
 	  thus are a key power management tool on many systems.
 
+config HAVE_DYN_ARRAY
+	def_bool n
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index f65c2744d573..42f98009d752 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -33,6 +33,7 @@ config X86
 	select HAVE_ARCH_TRACEHOOK
 	select HAVE_GENERIC_DMA_COHERENT if X86_32
 	select HAVE_EFFICIENT_UNALIGNED_ACCESS
+	select HAVE_DYN_ARRAY
 
 config ARCH_DEFCONFIG
 	string
diff --git a/arch/x86/kernel/vmlinux_32.lds.S b/arch/x86/kernel/vmlinux_32.lds.S
index a9b8560adbc2..c36007ab3940 100644
--- a/arch/x86/kernel/vmlinux_32.lds.S
+++ b/arch/x86/kernel/vmlinux_32.lds.S
@@ -145,6 +145,7 @@ SECTIONS
 	*(.x86_cpu_dev.init)
 	__x86_cpu_dev_end = .;
   }
+  DYN_ARRAY_INIT(8)
   SECURITY_INIT
   . = ALIGN(4);
   .altinstructions : AT(ADDR(.altinstructions) - LOAD_OFFSET) {
diff --git a/arch/x86/kernel/vmlinux_64.lds.S b/arch/x86/kernel/vmlinux_64.lds.S
index 46e05447405b..30973dbac8c2 100644
--- a/arch/x86/kernel/vmlinux_64.lds.S
+++ b/arch/x86/kernel/vmlinux_64.lds.S
@@ -173,6 +173,9 @@ SECTIONS
 	*(.x86_cpu_dev.init)
   }
   __x86_cpu_dev_end = .;
+
+  DYN_ARRAY_INIT(8)
+
   SECURITY_INIT
 
   . = ALIGN(8);
-- 
cgit v1.2.3-55-g7522


From 85c0f90978bf50596dbd23854648020f1f9b5bfd Mon Sep 17 00:00:00 2001
From: Yinghai Lu
Date: Tue, 19 Aug 2008 20:49:47 -0700
Subject: irq: introduce nr_irqs

at this point nr_irqs is equal NR_IRQS

convert a few easy users from NR_IRQS to dynamic nr_irqs.

v2: according to Eric, we need to take care of arch without generic_hardirqs

Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/m68k/kernel/ints.c   |  2 ++
 arch/s390/kernel/irq.c    |  2 ++
 arch/sparc/kernel/irq.c   |  3 +++
 include/linux/interrupt.h |  2 ++
 kernel/irq/autoprobe.c    | 10 +++++-----
 kernel/irq/chip.c         | 20 ++++++++++----------
 kernel/irq/handle.c       |  3 ++-
 kernel/irq/manage.c       | 16 ++++++++--------
 kernel/irq/proc.c         |  2 +-
 kernel/irq/resend.c       |  4 ++--
 kernel/irq/spurious.c     |  4 ++--
 11 files changed, 39 insertions(+), 29 deletions(-)

diff --git a/arch/m68k/kernel/ints.c b/arch/m68k/kernel/ints.c
index 7e8a0d394e61..74453d15692e 100644
--- a/arch/m68k/kernel/ints.c
+++ b/arch/m68k/kernel/ints.c
@@ -46,6 +46,8 @@
 #include <asm/q40ints.h>
 #endif
 
+int nr_irqs = NR_IRQS;
+
 extern u32 auto_irqhandler_fixup[];
 extern u32 user_irqhandler_fixup[];
 extern u16 user_irqvec_fixup[];
diff --git a/arch/s390/kernel/irq.c b/arch/s390/kernel/irq.c
index e7c5bfb7c755..14eb5496c8a8 100644
--- a/arch/s390/kernel/irq.c
+++ b/arch/s390/kernel/irq.c
@@ -17,6 +17,8 @@
 #include <linux/proc_fs.h>
 #include <linux/profile.h>
 
+int nr_irqs = NR_IRQS;
+
 /*
  * show_interrupts is needed by /proc/interrupts.
  */
diff --git a/arch/sparc/kernel/irq.c b/arch/sparc/kernel/irq.c
index 93e1d1c65290..059598b7e0f0 100644
--- a/arch/sparc/kernel/irq.c
+++ b/arch/sparc/kernel/irq.c
@@ -55,6 +55,9 @@
 #define SMP_NOP2
 #define SMP_NOP3
 #endif /* SMP */
+
+int nr_irqs = NR_IRQS;
+
 unsigned long __raw_local_irq_save(void)
 {
 	unsigned long retval;
diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h
index 58ff4e74b2f3..511803853a5b 100644
--- a/include/linux/interrupt.h
+++ b/include/linux/interrupt.h
@@ -15,6 +15,8 @@
 #include <asm/ptrace.h>
 #include <asm/system.h>
 
+extern int nr_irqs;
+
 /*
  * These correspond to the IORESOURCE_IRQ_* defines in
  * linux/ioport.h to select the interrupt line behaviour.  When
diff --git a/kernel/irq/autoprobe.c b/kernel/irq/autoprobe.c
index 533068cfb607..c689e9851a80 100644
--- a/kernel/irq/autoprobe.c
+++ b/kernel/irq/autoprobe.c
@@ -38,7 +38,7 @@ unsigned long probe_irq_on(void)
 	 * something may have generated an irq long ago and we want to
 	 * flush such a longstanding irq before considering it as spurious.
 	 */
-	for (i = NR_IRQS-1; i > 0; i--) {
+	for (i = nr_irqs-1; i > 0; i--) {
 		desc = irq_desc + i;
 
 		spin_lock_irq(&desc->lock);
@@ -68,7 +68,7 @@ unsigned long probe_irq_on(void)
 	 * (we must startup again here because if a longstanding irq
 	 * happened in the previous stage, it may have masked itself)
 	 */
-	for (i = NR_IRQS-1; i > 0; i--) {
+	for (i = nr_irqs-1; i > 0; i--) {
 		desc = irq_desc + i;
 
 		spin_lock_irq(&desc->lock);
@@ -89,7 +89,7 @@ unsigned long probe_irq_on(void)
 	 * Now filter out any obviously spurious interrupts
 	 */
 	mask = 0;
-	for (i = 0; i < NR_IRQS; i++) {
+	for (i = 0; i < nr_irqs; i++) {
 		unsigned int status;
 
 		desc = irq_desc + i;
@@ -130,7 +130,7 @@ unsigned int probe_irq_mask(unsigned long val)
 	int i;
 
 	mask = 0;
-	for (i = 0; i < NR_IRQS; i++) {
+	for (i = 0; i < nr_irqs; i++) {
 		struct irq_desc *desc = irq_desc + i;
 		unsigned int status;
 
@@ -173,7 +173,7 @@ int probe_irq_off(unsigned long val)
 {
 	int i, irq_found = 0, nr_irqs = 0;
 
-	for (i = 0; i < NR_IRQS; i++) {
+	for (i = 0; i < nr_irqs; i++) {
 		struct irq_desc *desc = irq_desc + i;
 		unsigned int status;
 
diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c
index 5203a599d211..bba66e098703 100644
--- a/kernel/irq/chip.c
+++ b/kernel/irq/chip.c
@@ -27,7 +27,7 @@ void dynamic_irq_init(unsigned int irq)
 	struct irq_desc *desc;
 	unsigned long flags;
 
-	if (irq >= NR_IRQS) {
+	if (irq >= nr_irqs) {
 		WARN(1, KERN_ERR "Trying to initialize invalid IRQ%d\n", irq);
 		return;
 	}
@@ -60,7 +60,7 @@ void dynamic_irq_cleanup(unsigned int irq)
 	struct irq_desc *desc;
 	unsigned long flags;
 
-	if (irq >= NR_IRQS) {
+	if (irq >= nr_irqs) {
 		WARN(1, KERN_ERR "Trying to cleanup invalid IRQ%d\n", irq);
 		return;
 	}
@@ -92,7 +92,7 @@ int set_irq_chip(unsigned int irq, struct irq_chip *chip)
 	struct irq_desc *desc;
 	unsigned long flags;
 
-	if (irq >= NR_IRQS) {
+	if (irq >= nr_irqs) {
 		WARN(1, KERN_ERR "Trying to install chip for IRQ%d\n", irq);
 		return -EINVAL;
 	}
@@ -121,7 +121,7 @@ int set_irq_type(unsigned int irq, unsigned int type)
 	unsigned long flags;
 	int ret = -ENXIO;
 
-	if (irq >= NR_IRQS) {
+	if (irq >= nr_irqs) {
 		printk(KERN_ERR "Trying to set irq type for IRQ%d\n", irq);
 		return -ENODEV;
 	}
@@ -149,7 +149,7 @@ int set_irq_data(unsigned int irq, void *data)
 	struct irq_desc *desc;
 	unsigned long flags;
 
-	if (irq >= NR_IRQS) {
+	if (irq >= nr_irqs) {
 		printk(KERN_ERR
 		       "Trying to install controller data for IRQ%d\n", irq);
 		return -EINVAL;
@@ -175,7 +175,7 @@ int set_irq_msi(unsigned int irq, struct msi_desc *entry)
 	struct irq_desc *desc;
 	unsigned long flags;
 
-	if (irq >= NR_IRQS) {
+	if (irq >= nr_irqs) {
 		printk(KERN_ERR
 		       "Trying to install msi data for IRQ%d\n", irq);
 		return -EINVAL;
@@ -201,7 +201,7 @@ int set_irq_chip_data(unsigned int irq, void *data)
 	struct irq_desc *desc = irq_desc + irq;
 	unsigned long flags;
 
-	if (irq >= NR_IRQS || !desc->chip) {
+	if (irq >= nr_irqs || !desc->chip) {
 		printk(KERN_ERR "BUG: bad set_irq_chip_data(IRQ#%d)\n", irq);
 		return -EINVAL;
 	}
@@ -545,7 +545,7 @@ __set_irq_handler(unsigned int irq, irq_flow_handler_t handle, int is_chained,
 	struct irq_desc *desc;
 	unsigned long flags;
 
-	if (irq >= NR_IRQS) {
+	if (irq >= nr_irqs) {
 		printk(KERN_ERR
 		       "Trying to install type control for IRQ%d\n", irq);
 		return;
@@ -610,7 +610,7 @@ void __init set_irq_noprobe(unsigned int irq)
 	struct irq_desc *desc;
 	unsigned long flags;
 
-	if (irq >= NR_IRQS) {
+	if (irq >= nr_irqs) {
 		printk(KERN_ERR "Trying to mark IRQ%d non-probeable\n", irq);
 
 		return;
@@ -628,7 +628,7 @@ void __init set_irq_probe(unsigned int irq)
 	struct irq_desc *desc;
 	unsigned long flags;
 
-	if (irq >= NR_IRQS) {
+	if (irq >= nr_irqs) {
 		printk(KERN_ERR "Trying to mark IRQ%d probeable\n", irq);
 
 		return;
diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c
index f4c8a03a9fbb..e9d022cf593e 100644
--- a/kernel/irq/handle.c
+++ b/kernel/irq/handle.c
@@ -47,6 +47,7 @@ handle_bad_irq(unsigned int irq, struct irq_desc *desc)
  *
  * Controller mappings for all interrupt sources:
  */
+int nr_irqs = NR_IRQS;
 struct irq_desc irq_desc[NR_IRQS] __cacheline_aligned_in_smp = {
 	[0 ... NR_IRQS-1] = {
 		.status = IRQ_DISABLED,
@@ -265,7 +266,7 @@ void early_init_irq_lock_class(void)
 {
 	int i;
 
-	for (i = 0; i < NR_IRQS; i++)
+	for (i = 0; i < nr_irqs; i++)
 		lockdep_set_class(&irq_desc[i].lock, &irq_desc_lock_class);
 }
 
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
index d363f32dba7d..d5a4333d8f1f 100644
--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c
@@ -34,7 +34,7 @@ void synchronize_irq(unsigned int irq)
 	struct irq_desc *desc = irq_desc + irq;
 	unsigned int status;
 
-	if (irq >= NR_IRQS)
+	if (irq >= nr_irqs)
 		return;
 
 	do {
@@ -143,7 +143,7 @@ void disable_irq_nosync(unsigned int irq)
 	struct irq_desc *desc = irq_desc + irq;
 	unsigned long flags;
 
-	if (irq >= NR_IRQS)
+	if (irq >= nr_irqs)
 		return;
 
 	spin_lock_irqsave(&desc->lock, flags);
@@ -171,7 +171,7 @@ void disable_irq(unsigned int irq)
 {
 	struct irq_desc *desc = irq_desc + irq;
 
-	if (irq >= NR_IRQS)
+	if (irq >= nr_irqs)
 		return;
 
 	disable_irq_nosync(irq);
@@ -214,7 +214,7 @@ void enable_irq(unsigned int irq)
 	struct irq_desc *desc = irq_desc + irq;
 	unsigned long flags;
 
-	if (irq >= NR_IRQS)
+	if (irq >= nr_irqs)
 		return;
 
 	spin_lock_irqsave(&desc->lock, flags);
@@ -290,7 +290,7 @@ int can_request_irq(unsigned int irq, unsigned long irqflags)
 {
 	struct irqaction *action;
 
-	if (irq >= NR_IRQS || irq_desc[irq].status & IRQ_NOREQUEST)
+	if (irq >= nr_irqs || irq_desc[irq].status & IRQ_NOREQUEST)
 		return 0;
 
 	action = irq_desc[irq].action;
@@ -356,7 +356,7 @@ int setup_irq(unsigned int irq, struct irqaction *new)
 	int shared = 0;
 	int ret;
 
-	if (irq >= NR_IRQS)
+	if (irq >= nr_irqs)
 		return -EINVAL;
 
 	if (desc->chip == &no_irq_chip)
@@ -515,7 +515,7 @@ void free_irq(unsigned int irq, void *dev_id)
 	unsigned long flags;
 
 	WARN_ON(in_interrupt());
-	if (irq >= NR_IRQS)
+	if (irq >= nr_irqs)
 		return;
 
 	desc = irq_desc + irq;
@@ -630,7 +630,7 @@ int request_irq(unsigned int irq, irq_handler_t handler,
 	 */
 	if ((irqflags & IRQF_SHARED) && !dev_id)
 		return -EINVAL;
-	if (irq >= NR_IRQS)
+	if (irq >= nr_irqs)
 		return -EINVAL;
 	if (irq_desc[irq].status & IRQ_NOREQUEST)
 		return -EINVAL;
diff --git a/kernel/irq/proc.c b/kernel/irq/proc.c
index a09dd29c2fd7..e5225a65a4f9 100644
--- a/kernel/irq/proc.c
+++ b/kernel/irq/proc.c
@@ -240,7 +240,7 @@ void init_irq_proc(void)
 	/*
 	 * Create entries for all existing IRQs.
 	 */
-	for (i = 0; i < NR_IRQS; i++)
+	for (i = 0; i < nr_irqs; i++)
 		register_irq_proc(i);
 }
 
diff --git a/kernel/irq/resend.c b/kernel/irq/resend.c
index a8046791ba2d..cba8aa5bc7f4 100644
--- a/kernel/irq/resend.c
+++ b/kernel/irq/resend.c
@@ -33,8 +33,8 @@ static void resend_irqs(unsigned long arg)
 	struct irq_desc *desc;
 	int irq;
 
-	while (!bitmap_empty(irqs_resend, NR_IRQS)) {
-		irq = find_first_bit(irqs_resend, NR_IRQS);
+	while (!bitmap_empty(irqs_resend, nr_irqs)) {
+		irq = find_first_bit(irqs_resend, nr_irqs);
 		clear_bit(irq, irqs_resend);
 		desc = irq_desc + irq;
 		local_irq_disable();
diff --git a/kernel/irq/spurious.c b/kernel/irq/spurious.c
index 19fe9d6ebfe8..e26ca1e90c08 100644
--- a/kernel/irq/spurious.c
+++ b/kernel/irq/spurious.c
@@ -91,7 +91,7 @@ static int misrouted_irq(int irq)
 	int i;
 	int ok = 0;
 
-	for (i = 1; i < NR_IRQS; i++) {
+	for (i = 1; i < nr_irqs; i++) {
 		struct irq_desc *desc = irq_desc + i;
 
 		if (i == irq)	/* Already tried */
@@ -107,7 +107,7 @@ static int misrouted_irq(int irq)
 static void poll_spurious_irqs(unsigned long dummy)
 {
 	int i;
-	for (i = 1; i < NR_IRQS; i++) {
+	for (i = 1; i < nr_irqs; i++) {
 		struct irq_desc *desc = irq_desc + i;
 		unsigned int status;
 
-- 
cgit v1.2.3-55-g7522


From 0799e432acfda879eaeef9622426bfa1434f3786 Mon Sep 17 00:00:00 2001
From: Yinghai Lu
Date: Tue, 19 Aug 2008 20:49:48 -0700
Subject: x86: use nr_irqs

also add first_free_entry and pin_map_size, which were NR_IRQS derived
constants.

Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/io_apic_32.c | 26 ++++++++++++++------------
 arch/x86/kernel/io_apic_64.c | 33 +++++++++++++++++----------------
 arch/x86/kernel/irq_32.c     |  8 ++++----
 arch/x86/kernel/irq_64.c     |  8 ++++----
 arch/x86/kernel/irqinit_32.c |  2 +-
 arch/x86/kernel/irqinit_64.c |  2 +-
 include/asm-x86/irq.h        |  3 +++
 7 files changed, 44 insertions(+), 38 deletions(-)

diff --git a/arch/x86/kernel/io_apic_32.c b/arch/x86/kernel/io_apic_32.c
index e710289f673e..d382990244f0 100644
--- a/arch/x86/kernel/io_apic_32.c
+++ b/arch/x86/kernel/io_apic_32.c
@@ -70,6 +70,7 @@ int timer_through_8259 __initdata;
  */
 int sis_apic_bug = -1;
 
+int first_free_entry = NR_IRQS;
 /*
  * # of IRQ routing registers
  */
@@ -100,6 +101,8 @@ static int disable_timer_pin_1 __initdata;
 #define MAX_PLUS_SHARED_IRQS NR_IRQS
 #define PIN_MAP_SIZE (MAX_PLUS_SHARED_IRQS + NR_IRQS)
 
+int pin_map_size = PIN_MAP_SIZE;
+
 /*
  * This is performance-critical, we want to do it O(1)
  *
@@ -213,7 +216,6 @@ static void ioapic_mask_entry(int apic, int pin)
  */
 static void add_pin_to_irq(unsigned int irq, int apic, int pin)
 {
-	static int first_free_entry = NR_IRQS;
 	struct irq_pin_list *entry = irq_2_pin + irq;
 
 	while (entry->next)
@@ -222,7 +224,7 @@ static void add_pin_to_irq(unsigned int irq, int apic, int pin)
 	if (entry->pin != -1) {
 		entry->next = first_free_entry;
 		entry = irq_2_pin + entry->next;
-		if (++first_free_entry >= PIN_MAP_SIZE)
+		if (++first_free_entry >= pin_map_size)
 			panic("io_apic.c: whoops");
 	}
 	entry->apic = apic;
@@ -457,7 +459,7 @@ static inline void rotate_irqs_among_cpus(unsigned long useful_load_threshold)
 	int i, j;
 
 	for_each_online_cpu(i) {
-		for (j = 0; j < NR_IRQS; j++) {
+		for (j = 0; j < nr_irqs; j++) {
 			if (!irq_desc[j].action)
 				continue;
 			/* Is it a significant load ?  */
@@ -492,7 +494,7 @@ static void do_irq_balance(void)
 		if (!cpu_online(i))
 			continue;
 		package_index = CPU_TO_PACKAGEINDEX(i);
-		for (j = 0; j < NR_IRQS; j++) {
+		for (j = 0; j < nr_irqs; j++) {
 			unsigned long value_now, delta;
 			/* Is this an active IRQ or balancing disabled ? */
 			if (!irq_desc[j].action || irq_balancing_disabled(j))
@@ -587,7 +589,7 @@ tryanotherirq:
 	 */
 	move_this_load = 0;
 	selected_irq = -1;
-	for (j = 0; j < NR_IRQS; j++) {
+	for (j = 0; j < nr_irqs; j++) {
 		/* Is this an active IRQ? */
 		if (!irq_desc[j].action)
 			continue;
@@ -664,7 +666,7 @@ static int balanced_irq(void *unused)
 	long time_remaining = balanced_irq_interval;
 
 	/* push everything to CPU 0 to give us a starting point.  */
-	for (i = 0 ; i < NR_IRQS ; i++) {
+	for (i = 0 ; i < nr_irqs ; i++) {
 		irq_desc[i].pending_mask = cpumask_of_cpu(0);
 		set_pending_irq(i, cpumask_of_cpu(0));
 	}
@@ -712,8 +714,8 @@ static int __init balanced_irq_init(void)
 		physical_balance = 1;
 
 	for_each_online_cpu(i) {
-		irq_cpu_data[i].irq_delta = kzalloc(sizeof(unsigned long) * NR_IRQS, GFP_KERNEL);
-		irq_cpu_data[i].last_irq = kzalloc(sizeof(unsigned long) * NR_IRQS, GFP_KERNEL);
+		irq_cpu_data[i].irq_delta = kzalloc(sizeof(unsigned long) * nr_irqs, GFP_KERNEL);
+		irq_cpu_data[i].last_irq = kzalloc(sizeof(unsigned long) * nr_irqs, GFP_KERNEL);
 		if (irq_cpu_data[i].irq_delta == NULL || irq_cpu_data[i].last_irq == NULL) {
 			printk(KERN_ERR "balanced_irq_init: out of memory");
 			goto failed;
@@ -1441,7 +1443,7 @@ __apicdebuginit(void) print_IO_APIC(void)
 	}
 	}
 	printk(KERN_DEBUG "IRQ to pin mappings:\n");
-	for (i = 0; i < NR_IRQS; i++) {
+	for (i = 0; i < nr_irqs; i++) {
 		struct irq_pin_list *entry = irq_2_pin + i;
 		if (entry->pin < 0)
 			continue;
@@ -1621,7 +1623,7 @@ static void __init enable_IO_APIC(void)
 	int i, apic;
 	unsigned long flags;
 
-	for (i = 0; i < PIN_MAP_SIZE; i++) {
+	for (i = 0; i < pin_map_size; i++) {
 		irq_2_pin[i].pin = -1;
 		irq_2_pin[i].next = 0;
 	}
@@ -2005,7 +2007,7 @@ static inline void init_IO_APIC_traps(void)
 	 * Also, we've got to be careful not to trash gate
 	 * 0x80, because int 0x80 is hm, kind of importantish. ;)
 	 */
-	for (irq = 0; irq < NR_IRQS ; irq++) {
+	for (irq = 0; irq < nr_irqs ; irq++) {
 		if (IO_APIC_IRQ(irq) && !irq_vector[irq]) {
 			/*
 			 * Hmm.. We don't have an entry for this,
@@ -2449,7 +2451,7 @@ int create_irq(void)
 
 	irq = -ENOSPC;
 	spin_lock_irqsave(&vector_lock, flags);
-	for (new = (NR_IRQS - 1); new >= 0; new--) {
+	for (new = (nr_irqs - 1); new >= 0; new--) {
 		if (platform_legacy_irq(new))
 			continue;
 		if (irq_vector[new] != 0)
diff --git a/arch/x86/kernel/io_apic_64.c b/arch/x86/kernel/io_apic_64.c
index 02063ae042f7..448384c7c1e8 100644
--- a/arch/x86/kernel/io_apic_64.c
+++ b/arch/x86/kernel/io_apic_64.c
@@ -132,6 +132,7 @@ DECLARE_BITMAP(mp_bus_not_pci, MAX_MP_BUSSES);
 #define MAX_PLUS_SHARED_IRQS NR_IRQS
 #define PIN_MAP_SIZE (MAX_PLUS_SHARED_IRQS + NR_IRQS)
 
+int pin_map_size = PIN_MAP_SIZE;
 /*
  * This is performance-critical, we want to do it O(1)
  *
@@ -224,7 +225,7 @@ static inline void io_apic_sync(unsigned int apic)
 	int pin;							\
 	struct irq_pin_list *entry = irq_2_pin + irq;			\
 									\
-	BUG_ON(irq >= NR_IRQS);						\
+	BUG_ON(irq >= nr_irqs);						\
 	for (;;) {							\
 		unsigned int reg;					\
 		pin = entry->pin;					\
@@ -301,7 +302,7 @@ static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, u8 vector)
 	int apic, pin;
 	struct irq_pin_list *entry = irq_2_pin + irq;
 
-	BUG_ON(irq >= NR_IRQS);
+	BUG_ON(irq >= nr_irqs);
 	for (;;) {
 		unsigned int reg;
 		apic = entry->apic;
@@ -358,19 +359,19 @@ static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t mask)
  * shared ISA-space IRQs, so we have to support them. We are super
  * fast in the common case, and fast for shared ISA-space IRQs.
  */
+int first_free_entry = NR_IRQS;
 static void add_pin_to_irq(unsigned int irq, int apic, int pin)
 {
-	static int first_free_entry = NR_IRQS;
 	struct irq_pin_list *entry = irq_2_pin + irq;
 
-	BUG_ON(irq >= NR_IRQS);
+	BUG_ON(irq >= nr_irqs);
 	while (entry->next)
 		entry = irq_2_pin + entry->next;
 
 	if (entry->pin != -1) {
 		entry->next = first_free_entry;
 		entry = irq_2_pin + entry->next;
-		if (++first_free_entry >= PIN_MAP_SIZE)
+		if (++first_free_entry >= pin_map_size)
 			panic("io_apic.c: ran out of irq_2_pin entries!");
 	}
 	entry->apic = apic;
@@ -634,7 +635,7 @@ int IO_APIC_get_PCI_irq_vector(int bus, int slot, int pin)
 				best_guess = irq;
 		}
 	}
-	BUG_ON(best_guess >= NR_IRQS);
+	BUG_ON(best_guess >= nr_irqs);
 	return best_guess;
 }
 
@@ -766,7 +767,7 @@ static int pin_2_irq(int idx, int apic, int pin)
 			irq += nr_ioapic_registers[i++];
 		irq += pin;
 	}
-	BUG_ON(irq >= NR_IRQS);
+	BUG_ON(irq >= nr_irqs);
 	return irq;
 }
 
@@ -801,7 +802,7 @@ static int __assign_irq_vector(int irq, cpumask_t mask)
 	int cpu;
 	struct irq_cfg *cfg;
 
-	BUG_ON((unsigned)irq >= NR_IRQS);
+	BUG_ON((unsigned)irq >= nr_irqs);
 	cfg = &irq_cfg[irq];
 
 	/* Only try and allocate irqs on cpus that are present */
@@ -875,7 +876,7 @@ static void __clear_irq_vector(int irq)
 	cpumask_t mask;
 	int cpu, vector;
 
-	BUG_ON((unsigned)irq >= NR_IRQS);
+	BUG_ON((unsigned)irq >= nr_irqs);
 	cfg = &irq_cfg[irq];
 	BUG_ON(!cfg->vector);
 
@@ -895,7 +896,7 @@ void __setup_vector_irq(int cpu)
 	int irq, vector;
 
 	/* Mark the inuse vectors */
-	for (irq = 0; irq < NR_IRQS; ++irq) {
+	for (irq = 0; irq < nr_irqs; ++irq) {
 		if (!cpu_isset(cpu, irq_cfg[irq].domain))
 			continue;
 		vector = irq_cfg[irq].vector;
@@ -1193,7 +1194,7 @@ __apicdebuginit(void) print_IO_APIC(void)
 	}
 	}
 	printk(KERN_DEBUG "IRQ to pin mappings:\n");
-	for (i = 0; i < NR_IRQS; i++) {
+	for (i = 0; i < nr_irqs; i++) {
 		struct irq_pin_list *entry = irq_2_pin + i;
 		if (entry->pin < 0)
 			continue;
@@ -1366,7 +1367,7 @@ void __init enable_IO_APIC(void)
 	int i, apic;
 	unsigned long flags;
 
-	for (i = 0; i < PIN_MAP_SIZE; i++) {
+	for (i = 0; i < pin_map_size; i++) {
 		irq_2_pin[i].pin = -1;
 		irq_2_pin[i].next = 0;
 	}
@@ -1658,7 +1659,7 @@ static void ir_irq_migration(struct work_struct *work)
 {
 	int irq;
 
-	for (irq = 0; irq < NR_IRQS; irq++) {
+	for (irq = 0; irq < nr_irqs; irq++) {
 		struct irq_desc *desc = irq_desc + irq;
 		if (desc->status & IRQ_MOVE_PENDING) {
 			unsigned long flags;
@@ -1707,7 +1708,7 @@ asmlinkage void smp_irq_move_cleanup_interrupt(void)
 		struct irq_desc *desc;
 		struct irq_cfg *cfg;
 		irq = __get_cpu_var(vector_irq)[vector];
-		if (irq >= NR_IRQS)
+		if (irq >= nr_irqs)
 			continue;
 
 		desc = irq_desc + irq;
@@ -1865,7 +1866,7 @@ static inline void init_IO_APIC_traps(void)
 	 * Also, we've got to be careful not to trash gate
 	 * 0x80, because int 0x80 is hm, kind of importantish. ;)
 	 */
-	for (irq = 0; irq < NR_IRQS ; irq++) {
+	for (irq = 0; irq < nr_irqs ; irq++) {
 		if (IO_APIC_IRQ(irq) && !irq_cfg[irq].vector) {
 			/*
 			 * Hmm.. We don't have an entry for this,
@@ -2279,7 +2280,7 @@ int create_irq(void)
 
 	irq = -ENOSPC;
 	spin_lock_irqsave(&vector_lock, flags);
-	for (new = (NR_IRQS - 1); new >= 0; new--) {
+	for (new = (nr_irqs - 1); new >= 0; new--) {
 		if (platform_legacy_irq(new))
 			continue;
 		if (irq_cfg[new].vector != 0)
diff --git a/arch/x86/kernel/irq_32.c b/arch/x86/kernel/irq_32.c
index b71e02d42f4f..4c7ffb32854c 100644
--- a/arch/x86/kernel/irq_32.c
+++ b/arch/x86/kernel/irq_32.c
@@ -226,7 +226,7 @@ unsigned int do_IRQ(struct pt_regs *regs)
 	int overflow, irq = ~regs->orig_ax;
 	struct irq_desc *desc = irq_desc + irq;
 
-	if (unlikely((unsigned)irq >= NR_IRQS)) {
+	if (unlikely((unsigned)irq >= nr_irqs)) {
 		printk(KERN_EMERG "%s: cannot handle IRQ %d\n",
 					__func__, irq);
 		BUG();
@@ -271,7 +271,7 @@ int show_interrupts(struct seq_file *p, void *v)
 		seq_putc(p, '\n');
 	}
 
-	if (i < NR_IRQS) {
+	if (i < nr_irqs) {
 		unsigned any_count = 0;
 
 		spin_lock_irqsave(&irq_desc[i].lock, flags);
@@ -303,7 +303,7 @@ int show_interrupts(struct seq_file *p, void *v)
 		seq_putc(p, '\n');
 skip:
 		spin_unlock_irqrestore(&irq_desc[i].lock, flags);
-	} else if (i == NR_IRQS) {
+	} else if (i == nr_irqs) {
 		seq_printf(p, "NMI: ");
 		for_each_online_cpu(j)
 			seq_printf(p, "%10u ", nmi_count(j));
@@ -396,7 +396,7 @@ void fixup_irqs(cpumask_t map)
 	unsigned int irq;
 	static int warned;
 
-	for (irq = 0; irq < NR_IRQS; irq++) {
+	for (irq = 0; irq < nr_irqs; irq++) {
 		cpumask_t mask;
 		if (irq == 2)
 			continue;
diff --git a/arch/x86/kernel/irq_64.c b/arch/x86/kernel/irq_64.c
index f065fe9071b9..e1f0839430d2 100644
--- a/arch/x86/kernel/irq_64.c
+++ b/arch/x86/kernel/irq_64.c
@@ -81,7 +81,7 @@ int show_interrupts(struct seq_file *p, void *v)
 		seq_putc(p, '\n');
 	}
 
-	if (i < NR_IRQS) {
+	if (i < nr_irqs) {
 		unsigned any_count = 0;
 
 		spin_lock_irqsave(&irq_desc[i].lock, flags);
@@ -112,7 +112,7 @@ int show_interrupts(struct seq_file *p, void *v)
 		seq_putc(p, '\n');
 skip:
 		spin_unlock_irqrestore(&irq_desc[i].lock, flags);
-	} else if (i == NR_IRQS) {
+	} else if (i == nr_irqs) {
 		seq_printf(p, "NMI: ");
 		for_each_online_cpu(j)
 			seq_printf(p, "%10u ", cpu_pda(j)->__nmi_count);
@@ -201,7 +201,7 @@ asmlinkage unsigned int do_IRQ(struct pt_regs *regs)
 	stack_overflow_check(regs);
 #endif
 
-	if (likely(irq < NR_IRQS))
+	if (likely(irq < nr_irqs))
 		generic_handle_irq(irq);
 	else {
 		if (!disable_apic)
@@ -224,7 +224,7 @@ void fixup_irqs(cpumask_t map)
 	unsigned int irq;
 	static int warned;
 
-	for (irq = 0; irq < NR_IRQS; irq++) {
+	for (irq = 0; irq < nr_irqs; irq++) {
 		cpumask_t mask;
 		int break_affinity = 0;
 		int set_affinity = 1;
diff --git a/arch/x86/kernel/irqinit_32.c b/arch/x86/kernel/irqinit_32.c
index 9200a1e2752d..65c1c9507707 100644
--- a/arch/x86/kernel/irqinit_32.c
+++ b/arch/x86/kernel/irqinit_32.c
@@ -100,7 +100,7 @@ void __init native_init_IRQ(void)
 	 */
 	for (i = 0; i < (NR_VECTORS - FIRST_EXTERNAL_VECTOR); i++) {
 		int vector = FIRST_EXTERNAL_VECTOR + i;
-		if (i >= NR_IRQS)
+		if (i >= nr_irqs)
 			break;
 		/* SYSCALL_VECTOR was reserved in trap_init. */
 		if (!test_bit(vector, used_vectors))
diff --git a/arch/x86/kernel/irqinit_64.c b/arch/x86/kernel/irqinit_64.c
index 5b5be9d43c2a..165c5d9b0d1a 100644
--- a/arch/x86/kernel/irqinit_64.c
+++ b/arch/x86/kernel/irqinit_64.c
@@ -142,7 +142,7 @@ void __init init_ISA_irqs(void)
 	init_bsp_APIC();
 	init_8259A(0);
 
-	for (i = 0; i < NR_IRQS; i++) {
+	for (i = 0; i < nr_irqs; i++) {
 		irq_desc[i].status = IRQ_DISABLED;
 		irq_desc[i].action = NULL;
 		irq_desc[i].depth = 1;
diff --git a/include/asm-x86/irq.h b/include/asm-x86/irq.h
index 1e5f2909c1db..2a130c44f5de 100644
--- a/include/asm-x86/irq.h
+++ b/include/asm-x86/irq.h
@@ -10,6 +10,9 @@
 #include <asm/apicdef.h>
 #include <asm/irq_vectors.h>
 
+extern int pin_map_size;
+extern int first_free_entry;
+
 static inline int irq_canonicalize(int irq)
 {
 	return ((irq == 2) ? 9 : irq);
-- 
cgit v1.2.3-55-g7522


From 1f45f5621df82033cb4964d03530ade2f9a25e7b Mon Sep 17 00:00:00 2001
From: Yinghai Lu
Date: Tue, 19 Aug 2008 20:49:49 -0700
Subject: drivers/char: use nr_irqs

convert them to nr_irqs.

Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 drivers/char/hpet.c       | 2 +-
 drivers/char/random.c     | 4 ++--
 drivers/char/vr41xx_giu.c | 2 +-
 3 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/char/hpet.c b/drivers/char/hpet.c
index f3cfb4c76125..408f5f92cb4e 100644
--- a/drivers/char/hpet.c
+++ b/drivers/char/hpet.c
@@ -219,7 +219,7 @@ static void hpet_timer_set_irq(struct hpet_dev *devp)
 	for (irq = find_first_bit(&v, HPET_MAX_IRQ); irq < HPET_MAX_IRQ;
 		irq = find_next_bit(&v, HPET_MAX_IRQ, 1 + irq)) {
 
-		if (irq >= NR_IRQS) {
+		if (irq >= nr_irqs) {
 			irq = HPET_MAX_IRQ;
 			break;
 		}
diff --git a/drivers/char/random.c b/drivers/char/random.c
index 6af435b89867..31456472829e 100644
--- a/drivers/char/random.c
+++ b/drivers/char/random.c
@@ -648,7 +648,7 @@ EXPORT_SYMBOL_GPL(add_input_randomness);
 
 void add_interrupt_randomness(int irq)
 {
-	if (irq >= NR_IRQS || irq_timer_state[irq] == NULL)
+	if (irq >= nr_irqs || irq_timer_state[irq] == NULL)
 		return;
 
 	DEBUG_ENT("irq event %d\n", irq);
@@ -912,7 +912,7 @@ void rand_initialize_irq(int irq)
 {
 	struct timer_rand_state *state;
 
-	if (irq >= NR_IRQS || irq_timer_state[irq])
+	if (irq >= nr_irqs || irq_timer_state[irq])
 		return;
 
 	/*
diff --git a/drivers/char/vr41xx_giu.c b/drivers/char/vr41xx_giu.c
index ffe9b4e3072e..54c837288d19 100644
--- a/drivers/char/vr41xx_giu.c
+++ b/drivers/char/vr41xx_giu.c
@@ -641,7 +641,7 @@ static int __devinit giu_probe(struct platform_device *dev)
 	}
 
 	irq = platform_get_irq(dev, 0);
-	if (irq < 0 || irq >= NR_IRQS)
+	if (irq < 0 || irq >= nr_irqs)
 		return -EBUSY;
 
 	return cascade_irq(irq, giu_get_irq);
-- 
cgit v1.2.3-55-g7522


From 60e4ad7a72fd7ce562cdf8dd850289e1e76bc1b1 Mon Sep 17 00:00:00 2001
From: Yinghai Lu
Date: Tue, 19 Aug 2008 20:49:50 -0700
Subject: drivers/net: use nr_irqs

Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 drivers/net/3c59x.c                   | 4 ++--
 drivers/net/hamradio/baycom_ser_fdx.c | 4 ++--
 drivers/net/hamradio/scc.c            | 6 +++---
 drivers/net/wan/sbni.c                | 2 +-
 4 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/drivers/net/3c59x.c b/drivers/net/3c59x.c
index 491ee16da5c1..9ba295d9dd97 100644
--- a/drivers/net/3c59x.c
+++ b/drivers/net/3c59x.c
@@ -90,7 +90,7 @@ static int vortex_debug = 1;
 #include <linux/eisa.h>
 #include <linux/bitops.h>
 #include <linux/jiffies.h>
-#include <asm/irq.h>			/* For NR_IRQS only. */
+#include <asm/irq.h>			/* For nr_irqs only. */
 #include <asm/io.h>
 #include <asm/uaccess.h>
 
@@ -1221,7 +1221,7 @@ static int __devinit vortex_probe1(struct device *gendev,
 	if (print_info)
 		printk(", IRQ %d\n", dev->irq);
 	/* Tell them about an invalid IRQ. */
-	if (dev->irq <= 0 || dev->irq >= NR_IRQS)
+	if (dev->irq <= 0 || dev->irq >= nr_irqs)
 		printk(KERN_WARNING " *** Warning: IRQ %d is unlikely to work! ***\n",
 			   dev->irq);
 
diff --git a/drivers/net/hamradio/baycom_ser_fdx.c b/drivers/net/hamradio/baycom_ser_fdx.c
index 17ac6975d70d..b6a816e60c0f 100644
--- a/drivers/net/hamradio/baycom_ser_fdx.c
+++ b/drivers/net/hamradio/baycom_ser_fdx.c
@@ -416,10 +416,10 @@ static int ser12_open(struct net_device *dev)
 	if (!dev || !bc)
 		return -ENXIO;
 	if (!dev->base_addr || dev->base_addr > 0xffff-SER12_EXTENT ||
-	    dev->irq < 2 || dev->irq > NR_IRQS) {
+	    dev->irq < 2 || dev->irq > nr_irqs) {
 		printk(KERN_INFO "baycom_ser_fdx: invalid portnumber (max %u) "
 				"or irq (2 <= irq <= %d)\n",
-				0xffff-SER12_EXTENT, NR_IRQS);
+				0xffff-SER12_EXTENT, nr_irqs);
 		return -ENXIO;
 	}
 	if (bc->baud < 300 || bc->baud > 4800) {
diff --git a/drivers/net/hamradio/scc.c b/drivers/net/hamradio/scc.c
index 45ae9d1191d7..c17e39bc5460 100644
--- a/drivers/net/hamradio/scc.c
+++ b/drivers/net/hamradio/scc.c
@@ -1465,7 +1465,7 @@ static void z8530_init(void)
 	printk(KERN_INFO "Init Z8530 driver: %u channels, IRQ", Nchips*2);
 	
 	flag=" ";
-	for (k = 0; k < NR_IRQS; k++)
+	for (k = 0; k < nr_irqs; k++)
 		if (Ivec[k].used) 
 		{
 			printk("%s%d", flag, k);
@@ -1728,7 +1728,7 @@ static int scc_net_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
 
 			if (hwcfg.irq == 2) hwcfg.irq = 9;
 
-			if (hwcfg.irq < 0 || hwcfg.irq >= NR_IRQS)
+			if (hwcfg.irq < 0 || hwcfg.irq >= nr_irqs)
 				return -EINVAL;
 				
 			if (!Ivec[hwcfg.irq].used && hwcfg.irq)
@@ -2148,7 +2148,7 @@ static void __exit scc_cleanup_driver(void)
 		}
 		
 	/* To unload the port must be closed so no real IRQ pending */
-	for (k=0; k < NR_IRQS ; k++)
+	for (k = 0; k < nr_irqs ; k++)
 		if (Ivec[k].used) free_irq(k, NULL);
 		
 	local_irq_enable();
diff --git a/drivers/net/wan/sbni.c b/drivers/net/wan/sbni.c
index f972fef87c98..ee51b6a5e605 100644
--- a/drivers/net/wan/sbni.c
+++ b/drivers/net/wan/sbni.c
@@ -318,7 +318,7 @@ sbni_pci_probe( struct net_device  *dev )
 				continue;
 		}
 
-		if( pci_irq_line <= 0  ||  pci_irq_line >= NR_IRQS )
+		if (pci_irq_line <= 0 || pci_irq_line >= nr_irqs)
 			printk( KERN_WARNING "  WARNING: The PCI BIOS assigned "
 				"this PCI card to IRQ %d, which is unlikely "
 				"to work!.\n"
-- 
cgit v1.2.3-55-g7522


From a06148c36d3163aee4d5d2cad4e87032d74eaa6d Mon Sep 17 00:00:00 2001
From: Yinghai Lu
Date: Tue, 19 Aug 2008 20:49:51 -0700
Subject: drivers/pci/ intr remapping: use nr_irqs

Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 drivers/pci/intr_remapping.c | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/drivers/pci/intr_remapping.c b/drivers/pci/intr_remapping.c
index bb642cc5e18c..980566e3352b 100644
--- a/drivers/pci/intr_remapping.c
+++ b/drivers/pci/intr_remapping.c
@@ -22,7 +22,7 @@ static DEFINE_SPINLOCK(irq_2_ir_lock);
 
 int irq_remapped(int irq)
 {
-	if (irq > NR_IRQS)
+	if (irq > nr_irqs)
 		return 0;
 
 	if (!irq_2_iommu[irq].iommu)
@@ -35,7 +35,7 @@ int get_irte(int irq, struct irte *entry)
 {
 	int index;
 
-	if (!entry || irq > NR_IRQS)
+	if (!entry || irq > nr_irqs)
 		return -1;
 
 	spin_lock(&irq_2_ir_lock);
@@ -126,7 +126,7 @@ int map_irq_to_irte_handle(int irq, u16 *sub_handle)
 	int index;
 
 	spin_lock(&irq_2_ir_lock);
-	if (irq >= NR_IRQS || !irq_2_iommu[irq].iommu) {
+	if (irq >= nr_irqs || !irq_2_iommu[irq].iommu) {
 		spin_unlock(&irq_2_ir_lock);
 		return -1;
 	}
@@ -140,7 +140,7 @@ int map_irq_to_irte_handle(int irq, u16 *sub_handle)
 int set_irte_irq(int irq, struct intel_iommu *iommu, u16 index, u16 subhandle)
 {
 	spin_lock(&irq_2_ir_lock);
-	if (irq >= NR_IRQS || irq_2_iommu[irq].iommu) {
+	if (irq >= nr_irqs || irq_2_iommu[irq].iommu) {
 		spin_unlock(&irq_2_ir_lock);
 		return -1;
 	}
@@ -158,7 +158,7 @@ int set_irte_irq(int irq, struct intel_iommu *iommu, u16 index, u16 subhandle)
 int clear_irte_irq(int irq, struct intel_iommu *iommu, u16 index)
 {
 	spin_lock(&irq_2_ir_lock);
-	if (irq >= NR_IRQS || !irq_2_iommu[irq].iommu) {
+	if (irq >= nr_irqs || !irq_2_iommu[irq].iommu) {
 		spin_unlock(&irq_2_ir_lock);
 		return -1;
 	}
@@ -180,7 +180,7 @@ int modify_irte(int irq, struct irte *irte_modified)
 	struct intel_iommu *iommu;
 
 	spin_lock(&irq_2_ir_lock);
-	if (irq >= NR_IRQS || !irq_2_iommu[irq].iommu) {
+	if (irq >= nr_irqs || !irq_2_iommu[irq].iommu) {
 		spin_unlock(&irq_2_ir_lock);
 		return -1;
 	}
@@ -205,7 +205,7 @@ int flush_irte(int irq)
 	struct intel_iommu *iommu;
 
 	spin_lock(&irq_2_ir_lock);
-	if (irq >= NR_IRQS || !irq_2_iommu[irq].iommu) {
+	if (irq >= nr_irqs || !irq_2_iommu[irq].iommu) {
 		spin_unlock(&irq_2_ir_lock);
 		return -1;
 	}
@@ -248,7 +248,7 @@ int free_irte(int irq)
 	struct intel_iommu *iommu;
 
 	spin_lock(&irq_2_ir_lock);
-	if (irq >= NR_IRQS || !irq_2_iommu[irq].iommu) {
+	if (irq >= nr_irqs || !irq_2_iommu[irq].iommu) {
 		spin_unlock(&irq_2_ir_lock);
 		return -1;
 	}
-- 
cgit v1.2.3-55-g7522


From 9130addad2fb2bbe1847f13c838438ff10a66d3a Mon Sep 17 00:00:00 2001
From: Yinghai Lu
Date: Tue, 19 Aug 2008 20:49:52 -0700
Subject: drivers/pcmcia: use nr_irqs

Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 drivers/pcmcia/at91_cf.c      | 2 +-
 drivers/pcmcia/vrc4171_card.c | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/pcmcia/at91_cf.c b/drivers/pcmcia/at91_cf.c
index a0ffb8ebfe00..9e1140f085fd 100644
--- a/drivers/pcmcia/at91_cf.c
+++ b/drivers/pcmcia/at91_cf.c
@@ -273,7 +273,7 @@ static int __init at91_cf_probe(struct platform_device *pdev)
 			goto fail0d;
 		cf->socket.pci_irq = board->irq_pin;
 	} else
-		cf->socket.pci_irq = NR_IRQS + 1;
+		cf->socket.pci_irq = nr_irqs + 1;
 
 	/* pcmcia layer only remaps "real" memory not iospace */
 	cf->socket.io_offset = (unsigned long)
diff --git a/drivers/pcmcia/vrc4171_card.c b/drivers/pcmcia/vrc4171_card.c
index eee2f1cb213c..b2c412419059 100644
--- a/drivers/pcmcia/vrc4171_card.c
+++ b/drivers/pcmcia/vrc4171_card.c
@@ -639,7 +639,7 @@ static int __devinit vrc4171_card_setup(char *options)
 		int irq;
 		options += 4;
 		irq = simple_strtoul(options, &options, 0);
-		if (irq >= 0 && irq < NR_IRQS)
+		if (irq >= 0 && irq < nr_irqs)
 			vrc4171_irq = irq;
 
 		if (*options != ',')
-- 
cgit v1.2.3-55-g7522


From c7576b5b339f08c7346591c71a330b08f8b9943f Mon Sep 17 00:00:00 2001
From: Yinghai Lu
Date: Tue, 19 Aug 2008 20:49:53 -0700
Subject: drivers/rtc: use nr_irqs

Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 drivers/rtc/rtc-vr41xx.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/rtc/rtc-vr41xx.c b/drivers/rtc/rtc-vr41xx.c
index 884b635f028b..834dcc6d785f 100644
--- a/drivers/rtc/rtc-vr41xx.c
+++ b/drivers/rtc/rtc-vr41xx.c
@@ -360,7 +360,7 @@ static int __devinit rtc_probe(struct platform_device *pdev)
 	spin_unlock_irq(&rtc_lock);
 
 	aie_irq = platform_get_irq(pdev, 0);
-	if (aie_irq < 0 || aie_irq >= NR_IRQS) {
+	if (aie_irq < 0 || aie_irq >= nr_irqs) {
 		retval = -EBUSY;
 		goto err_device_unregister;
 	}
@@ -371,7 +371,7 @@ static int __devinit rtc_probe(struct platform_device *pdev)
 		goto err_device_unregister;
 
 	pie_irq = platform_get_irq(pdev, 1);
-	if (pie_irq < 0 || pie_irq >= NR_IRQS)
+	if (pie_irq < 0 || pie_irq >= nr_irqs)
 		goto err_free_irq;
 
 	retval = request_irq(pie_irq, rtclong1_interrupt, IRQF_DISABLED,
-- 
cgit v1.2.3-55-g7522


From 171ac6ae94e31d0fcb5ae922efd4a77a7e48b4e5 Mon Sep 17 00:00:00 2001
From: Yinghai Lu
Date: Tue, 19 Aug 2008 20:49:54 -0700
Subject: drivers/scsi: use nr_irqs

Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 drivers/scsi/aha152x.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/scsi/aha152x.c b/drivers/scsi/aha152x.c
index b5a868d85eb4..1e5478abd90e 100644
--- a/drivers/scsi/aha152x.c
+++ b/drivers/scsi/aha152x.c
@@ -337,7 +337,7 @@ CMD_INC_RESID(struct scsi_cmnd *cmd, int inc)
 #else
 #define IRQ_MIN 9
 #if defined(__PPC)
-#define IRQ_MAX (NR_IRQS-1)
+#define IRQ_MAX (nr_irqs-1)
 #else
 #define IRQ_MAX 12
 #endif
-- 
cgit v1.2.3-55-g7522


From a62c41337356989387d15020dc0f0288aaacfa44 Mon Sep 17 00:00:00 2001
From: Yinghai Lu
Date: Tue, 19 Aug 2008 20:49:55 -0700
Subject: drivers/serial: use nr_irqs

Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 drivers/serial/8250.c                   | 2 +-
 drivers/serial/amba-pl010.c             | 2 +-
 drivers/serial/amba-pl011.c             | 2 +-
 drivers/serial/cpm_uart/cpm_uart_core.c | 2 +-
 drivers/serial/m32r_sio.c               | 4 ++--
 drivers/serial/serial_core.c            | 2 +-
 drivers/serial/serial_lh7a40x.c         | 2 +-
 drivers/serial/sh-sci.c                 | 2 +-
 drivers/serial/ucc_uart.c               | 2 +-
 9 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/drivers/serial/8250.c b/drivers/serial/8250.c
index c66bb44c4747..006617e2211d 100644
--- a/drivers/serial/8250.c
+++ b/drivers/serial/8250.c
@@ -2486,7 +2486,7 @@ static void serial8250_config_port(struct uart_port *port, int flags)
 static int
 serial8250_verify_port(struct uart_port *port, struct serial_struct *ser)
 {
-	if (ser->irq >= NR_IRQS || ser->irq < 0 ||
+	if (ser->irq >= nr_irqs || ser->irq < 0 ||
 	    ser->baud_base < 9600 || ser->type < PORT_UNKNOWN ||
 	    ser->type >= ARRAY_SIZE(uart_config) || ser->type == PORT_CIRRUS ||
 	    ser->type == PORT_STARTECH)
diff --git a/drivers/serial/amba-pl010.c b/drivers/serial/amba-pl010.c
index 90b56c2c31e2..71562689116f 100644
--- a/drivers/serial/amba-pl010.c
+++ b/drivers/serial/amba-pl010.c
@@ -512,7 +512,7 @@ static int pl010_verify_port(struct uart_port *port, struct serial_struct *ser)
 	int ret = 0;
 	if (ser->type != PORT_UNKNOWN && ser->type != PORT_AMBA)
 		ret = -EINVAL;
-	if (ser->irq < 0 || ser->irq >= NR_IRQS)
+	if (ser->irq < 0 || ser->irq >= nr_irqs)
 		ret = -EINVAL;
 	if (ser->baud_base < 9600)
 		ret = -EINVAL;
diff --git a/drivers/serial/amba-pl011.c b/drivers/serial/amba-pl011.c
index 9d08f27208a1..b7180046f8db 100644
--- a/drivers/serial/amba-pl011.c
+++ b/drivers/serial/amba-pl011.c
@@ -572,7 +572,7 @@ static int pl010_verify_port(struct uart_port *port, struct serial_struct *ser)
 	int ret = 0;
 	if (ser->type != PORT_UNKNOWN && ser->type != PORT_AMBA)
 		ret = -EINVAL;
-	if (ser->irq < 0 || ser->irq >= NR_IRQS)
+	if (ser->irq < 0 || ser->irq >= nr_irqs)
 		ret = -EINVAL;
 	if (ser->baud_base < 9600)
 		ret = -EINVAL;
diff --git a/drivers/serial/cpm_uart/cpm_uart_core.c b/drivers/serial/cpm_uart/cpm_uart_core.c
index 25efca5a7a1f..e54574c49a38 100644
--- a/drivers/serial/cpm_uart/cpm_uart_core.c
+++ b/drivers/serial/cpm_uart/cpm_uart_core.c
@@ -623,7 +623,7 @@ static int cpm_uart_verify_port(struct uart_port *port,
 
 	if (ser->type != PORT_UNKNOWN && ser->type != PORT_CPM)
 		ret = -EINVAL;
-	if (ser->irq < 0 || ser->irq >= NR_IRQS)
+	if (ser->irq < 0 || ser->irq >= nr_irqs)
 		ret = -EINVAL;
 	if (ser->baud_base < 9600)
 		ret = -EINVAL;
diff --git a/drivers/serial/m32r_sio.c b/drivers/serial/m32r_sio.c
index 23d030511019..611c97a15654 100644
--- a/drivers/serial/m32r_sio.c
+++ b/drivers/serial/m32r_sio.c
@@ -922,7 +922,7 @@ static void m32r_sio_config_port(struct uart_port *port, int flags)
 static int
 m32r_sio_verify_port(struct uart_port *port, struct serial_struct *ser)
 {
-	if (ser->irq >= NR_IRQS || ser->irq < 0 ||
+	if (ser->irq >= nr_irqs || ser->irq < 0 ||
 	    ser->baud_base < 9600 || ser->type < PORT_UNKNOWN ||
 	    ser->type >= ARRAY_SIZE(uart_config))
 		return -EINVAL;
@@ -1162,7 +1162,7 @@ static int __init m32r_sio_init(void)
 
 	printk(KERN_INFO "Serial: M32R SIO driver\n");
 
-	for (i = 0; i < NR_IRQS; i++)
+	for (i = 0; i < nr_irqs; i++)
 		spin_lock_init(&irq_lists[i].lock);
 
 	ret = uart_register_driver(&m32r_sio_reg);
diff --git a/drivers/serial/serial_core.c b/drivers/serial/serial_core.c
index 6bdf3362e3b1..874786a11fe9 100644
--- a/drivers/serial/serial_core.c
+++ b/drivers/serial/serial_core.c
@@ -741,7 +741,7 @@ static int uart_set_info(struct uart_state *state,
 	if (port->ops->verify_port)
 		retval = port->ops->verify_port(port, &new_serial);
 
-	if ((new_serial.irq >= NR_IRQS) || (new_serial.irq < 0) ||
+	if ((new_serial.irq >= nr_irqs) || (new_serial.irq < 0) ||
 	    (new_serial.baud_base < 9600))
 		retval = -EINVAL;
 
diff --git a/drivers/serial/serial_lh7a40x.c b/drivers/serial/serial_lh7a40x.c
index cb49a5ac022f..61dc8b3daa26 100644
--- a/drivers/serial/serial_lh7a40x.c
+++ b/drivers/serial/serial_lh7a40x.c
@@ -460,7 +460,7 @@ static int lh7a40xuart_verify_port (struct uart_port* port,
 
 	if (ser->type != PORT_UNKNOWN && ser->type != PORT_LH7A40X)
 		ret = -EINVAL;
-	if (ser->irq < 0 || ser->irq >= NR_IRQS)
+	if (ser->irq < 0 || ser->irq >= nr_irqs)
 		ret = -EINVAL;
 	if (ser->baud_base < 9600) /* *** FIXME: is this true? */
 		ret = -EINVAL;
diff --git a/drivers/serial/sh-sci.c b/drivers/serial/sh-sci.c
index 3df2aaec829f..667b4b8fd074 100644
--- a/drivers/serial/sh-sci.c
+++ b/drivers/serial/sh-sci.c
@@ -1157,7 +1157,7 @@ static int sci_verify_port(struct uart_port *port, struct serial_struct *ser)
 {
 	struct sci_port *s = &sci_ports[port->line];
 
-	if (ser->irq != s->irqs[SCIx_TXI_IRQ] || ser->irq > NR_IRQS)
+	if (ser->irq != s->irqs[SCIx_TXI_IRQ] || ser->irq > nr_irqs)
 		return -EINVAL;
 	if (ser->baud_base < 2400)
 		/* No paper tape reader for Mitch.. */
diff --git a/drivers/serial/ucc_uart.c b/drivers/serial/ucc_uart.c
index 5c5d18dcb6ac..503f0b99a3b8 100644
--- a/drivers/serial/ucc_uart.c
+++ b/drivers/serial/ucc_uart.c
@@ -1066,7 +1066,7 @@ static int qe_uart_verify_port(struct uart_port *port,
 	if (ser->type != PORT_UNKNOWN && ser->type != PORT_CPM)
 		return -EINVAL;
 
-	if (ser->irq < 0 || ser->irq >= NR_IRQS)
+	if (ser->irq < 0 || ser->irq >= nr_irqs)
 		return -EINVAL;
 
 	if (ser->baud_base < 9600)
-- 
cgit v1.2.3-55-g7522


From da27c118eb4f87f27ae8da2635b916daedf7264f Mon Sep 17 00:00:00 2001
From: Yinghai Lu
Date: Tue, 19 Aug 2008 20:49:56 -0700
Subject: fs/proc: use nr_irqs

Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 fs/proc/proc_misc.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/fs/proc/proc_misc.c b/fs/proc/proc_misc.c
index b675a49c1823..a2173a2a5625 100644
--- a/fs/proc/proc_misc.c
+++ b/fs/proc/proc_misc.c
@@ -509,7 +509,7 @@ static int show_stat(struct seq_file *p, void *v)
 	struct timespec boottime;
 	unsigned int *per_irq_sum;
 
-	per_irq_sum = kzalloc(sizeof(unsigned int)*NR_IRQS, GFP_KERNEL);
+	per_irq_sum = kzalloc(sizeof(unsigned int)*nr_irqs, GFP_KERNEL);
 	if (!per_irq_sum)
 		return -ENOMEM;
 
@@ -531,7 +531,7 @@ static int show_stat(struct seq_file *p, void *v)
 		softirq = cputime64_add(softirq, kstat_cpu(i).cpustat.softirq);
 		steal = cputime64_add(steal, kstat_cpu(i).cpustat.steal);
 		guest = cputime64_add(guest, kstat_cpu(i).cpustat.guest);
-		for (j = 0; j < NR_IRQS; j++) {
+		for (j = 0; j < nr_irqs; j++) {
 			unsigned int temp = kstat_cpu(i).irqs[j];
 			sum += temp;
 			per_irq_sum[j] += temp;
@@ -577,7 +577,7 @@ static int show_stat(struct seq_file *p, void *v)
 	}
 	seq_printf(p, "intr %llu", (unsigned long long)sum);
 
-	for (i = 0; i < NR_IRQS; i++)
+	for (i = 0; i < nr_irqs; i++)
 		seq_printf(p, " %u", per_irq_sum[i]);
 
 	seq_printf(p,
@@ -631,13 +631,13 @@ static const struct file_operations proc_stat_operations = {
  */
 static void *int_seq_start(struct seq_file *f, loff_t *pos)
 {
-	return (*pos <= NR_IRQS) ? pos : NULL;
+	return (*pos <= nr_irqs) ? pos : NULL;
 }
 
 static void *int_seq_next(struct seq_file *f, void *v, loff_t *pos)
 {
 	(*pos)++;
-	if (*pos > NR_IRQS)
+	if (*pos > nr_irqs)
 		return NULL;
 	return pos;
 }
-- 
cgit v1.2.3-55-g7522


From 5a15d7e85582fa84cbd01c6dcc5d927b43fddff4 Mon Sep 17 00:00:00 2001
From: Yinghai Lu
Date: Tue, 19 Aug 2008 20:49:57 -0700
Subject: drivers/xen: use nr_irqs

Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 drivers/xen/events.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/drivers/xen/events.c b/drivers/xen/events.c
index c3290bc186a0..ed8235187dc0 100644
--- a/drivers/xen/events.c
+++ b/drivers/xen/events.c
@@ -139,7 +139,7 @@ static void init_evtchn_cpu_bindings(void)
 #ifdef CONFIG_SMP
 	int i;
 	/* By default all event channels notify CPU#0. */
-	for (i = 0; i < NR_IRQS; i++)
+	for (i = 0; i < nr_irqs; i++)
 		irq_desc[i].affinity = cpumask_of_cpu(0);
 #endif
 
@@ -229,12 +229,12 @@ static int find_unbound_irq(void)
 	int irq;
 
 	/* Only allocate from dynirq range */
-	for (irq = 0; irq < NR_IRQS; irq++)
+	for (irq = 0; irq < nr_irqs; irq++)
 		if (irq_bindcount[irq] == 0)
 			break;
 
-	if (irq == NR_IRQS)
-		panic("No available IRQ to bind to: increase NR_IRQS!\n");
+	if (irq == nr_irqs)
+		panic("No available IRQ to bind to: increase nr_irqs!\n");
 
 	return irq;
 }
@@ -790,7 +790,7 @@ void xen_irq_resume(void)
 		mask_evtchn(evtchn);
 
 	/* No IRQ <-> event-channel mappings. */
-	for (irq = 0; irq < NR_IRQS; irq++)
+	for (irq = 0; irq < nr_irqs; irq++)
 		irq_info[irq].evtchn = 0; /* zap event-channel binding */
 
 	for (evtchn = 0; evtchn < NR_EVENT_CHANNELS; evtchn++)
@@ -822,7 +822,7 @@ void __init xen_init_IRQ(void)
 		mask_evtchn(i);
 
 	/* Dynamic IRQ space is currently unbound. Zero the refcnts. */
-	for (i = 0; i < NR_IRQS; i++)
+	for (i = 0; i < nr_irqs; i++)
 		irq_bindcount[i] = 0;
 
 	irq_ctx_init(smp_processor_id());
-- 
cgit v1.2.3-55-g7522


From eef1de76da54a2ab6c6659c9a3722fd54a0e3459 Mon Sep 17 00:00:00 2001
From: Yinghai Lu
Date: Tue, 19 Aug 2008 20:49:58 -0700
Subject: irqs: make irq_timer_state to use dyn_array

Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 drivers/char/random.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/drivers/char/random.c b/drivers/char/random.c
index 31456472829e..1610aa64c7cf 100644
--- a/drivers/char/random.c
+++ b/drivers/char/random.c
@@ -559,7 +559,13 @@ struct timer_rand_state {
 };
 
 static struct timer_rand_state input_timer_state;
+
+#ifdef CONFIG_HAVE_DYN_ARRAY
+static struct timer_rand_state **irq_timer_state;
+DEFINE_DYN_ARRAY(irq_timer_state, sizeof(struct timer_rand_state *), nr_irqs, PAGE_SIZE, NULL);
+#else
 static struct timer_rand_state *irq_timer_state[NR_IRQS];
+#endif
 
 /*
  * This function adds entropy to the entropy "pool" by using timing
-- 
cgit v1.2.3-55-g7522


From 5aeecaf4908499b1fd006d313ccbacde6a6bac43 Mon Sep 17 00:00:00 2001
From: Yinghai Lu
Date: Tue, 19 Aug 2008 20:49:59 -0700
Subject: irq: make irq2_iommu to use dyn_array

Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 drivers/pci/intr_remapping.c | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/drivers/pci/intr_remapping.c b/drivers/pci/intr_remapping.c
index 980566e3352b..6961be807684 100644
--- a/drivers/pci/intr_remapping.c
+++ b/drivers/pci/intr_remapping.c
@@ -1,3 +1,4 @@
+#include <linux/interrupt.h>
 #include <linux/dmar.h>
 #include <linux/spinlock.h>
 #include <linux/jiffies.h>
@@ -11,12 +12,19 @@ static struct ioapic_scope ir_ioapic[MAX_IO_APICS];
 static int ir_ioapic_num;
 int intr_remapping_enabled;
 
-static struct {
+struct irq_2_iommu {
 	struct intel_iommu *iommu;
 	u16 irte_index;
 	u16 sub_handle;
 	u8  irte_mask;
-} irq_2_iommu[NR_IRQS];
+};
+
+#ifdef CONFIG_HAVE_DYNA_ARRAY
+static struct irq_2_iommu *irq_2_iommu;
+DEFINE_DYN_ARRAY(irq_2_iommu, sizeof(struct irq_2_iommu), nr_irqs, PAGE_SIZE, NULL);
+#else
+static struct irq_2_iommu irq_2_iommu[NR_IRQS];
+#endif
 
 static DEFINE_SPINLOCK(irq_2_ir_lock);
 
-- 
cgit v1.2.3-55-g7522


From d60458b224d6b997a582a05cb8c4b9bed9e17a1d Mon Sep 17 00:00:00 2001
From: Yinghai Lu
Date: Tue, 19 Aug 2008 20:50:00 -0700
Subject: irq: make irq_desc to use dyn_array

Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/irq.h |  4 ++++
 kernel/irq/handle.c | 31 +++++++++++++++++++++++++++++++
 2 files changed, 35 insertions(+)

diff --git a/include/linux/irq.h b/include/linux/irq.h
index 1d73d1abb834..5f4b013624dc 100644
--- a/include/linux/irq.h
+++ b/include/linux/irq.h
@@ -179,7 +179,11 @@ struct irq_desc {
 	const char		*name;
 } ____cacheline_internodealigned_in_smp;
 
+#ifdef CONFIG_HAVE_DYN_ARRAY
+extern struct irq_desc *irq_desc;
+#else
 extern struct irq_desc irq_desc[NR_IRQS];
+#endif
 
 /*
  * Migration helpers for obsolete names, they will go away:
diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c
index e9d022cf593e..e94eeca09ea9 100644
--- a/kernel/irq/handle.c
+++ b/kernel/irq/handle.c
@@ -48,6 +48,36 @@ handle_bad_irq(unsigned int irq, struct irq_desc *desc)
  * Controller mappings for all interrupt sources:
  */
 int nr_irqs = NR_IRQS;
+
+#ifdef CONFIG_HAVE_DYN_ARRAY
+static struct irq_desc irq_desc_init __initdata = {
+	.status = IRQ_DISABLED,
+	.chip = &no_irq_chip,
+	.handle_irq = handle_bad_irq,
+	.depth = 1,
+	.lock = __SPIN_LOCK_UNLOCKED(irq_desc_init.lock),
+#ifdef CONFIG_SMP
+	.affinity = CPU_MASK_ALL
+#endif
+};
+
+static void __init init_work(void *data)
+{
+	struct dyn_array *da = data;
+	int i;
+	struct  irq_desc *desc;
+
+	desc = *da->name;
+
+	for (i = 0; i < *da->nr; i++)
+		memcpy(&desc[i], &irq_desc_init, sizeof(struct irq_desc));
+}
+
+struct irq_desc *irq_desc;
+DEFINE_DYN_ARRAY(irq_desc, sizeof(struct irq_desc), nr_irqs, PAGE_SIZE, init_work);
+
+#else
+
 struct irq_desc irq_desc[NR_IRQS] __cacheline_aligned_in_smp = {
 	[0 ... NR_IRQS-1] = {
 		.status = IRQ_DISABLED,
@@ -60,6 +90,7 @@ struct irq_desc irq_desc[NR_IRQS] __cacheline_aligned_in_smp = {
 #endif
 	}
 };
+#endif
 
 /*
  * What should we do if we get a hw irq event on an illegal vector?
-- 
cgit v1.2.3-55-g7522


From fa42d10dd5e1ff373061c0526f272106512301f9 Mon Sep 17 00:00:00 2001
From: Ingo Molnar
Date: Tue, 19 Aug 2008 20:50:30 -0700
Subject: irq: sparse irqs, export nr_irqs

fix:

  Building modules, stage 2.
  MODPOST 458 modules
  ERROR: "nr_irqs" [drivers/serial/8250.ko] undefined!

Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 kernel/irq/handle.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c
index e94eeca09ea9..6ce3bcc2b8f7 100644
--- a/kernel/irq/handle.c
+++ b/kernel/irq/handle.c
@@ -48,6 +48,7 @@ handle_bad_irq(unsigned int irq, struct irq_desc *desc)
  * Controller mappings for all interrupt sources:
  */
 int nr_irqs = NR_IRQS;
+EXPORT_SYMBOL_GPL(nr_irqs);
 
 #ifdef CONFIG_HAVE_DYN_ARRAY
 static struct irq_desc irq_desc_init __initdata = {
-- 
cgit v1.2.3-55-g7522


From d17a55ded3393ad3878010bb3a8243a15a8d8df5 Mon Sep 17 00:00:00 2001
From: Yinghai Lu
Date: Tue, 19 Aug 2008 20:50:01 -0700
Subject: irq: make irqs in kernel stat use per_cpu_dyn_array

Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/kernel_stat.h | 4 ++++
 kernel/sched.c              | 5 ++++-
 2 files changed, 8 insertions(+), 1 deletion(-)

diff --git a/include/linux/kernel_stat.h b/include/linux/kernel_stat.h
index cf9f40a91c9c..fe1f7fe534b4 100644
--- a/include/linux/kernel_stat.h
+++ b/include/linux/kernel_stat.h
@@ -28,7 +28,11 @@ struct cpu_usage_stat {
 
 struct kernel_stat {
 	struct cpu_usage_stat	cpustat;
+#ifdef CONFIG_HAVE_DYN_ARRAY
+	unsigned int *irqs;
+#else
 	unsigned int irqs[NR_IRQS];
+#endif
 };
 
 DECLARE_PER_CPU(struct kernel_stat, kstat);
diff --git a/kernel/sched.c b/kernel/sched.c
index 6f230596bd0c..b9d713781b5b 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -4048,9 +4048,12 @@ static inline void idle_balance(int cpu, struct rq *rq)
 #endif
 
 DEFINE_PER_CPU(struct kernel_stat, kstat);
-
 EXPORT_PER_CPU_SYMBOL(kstat);
 
+#ifdef CONFIG_HAVE_DYN_ARRAY
+DEFINE_PER_CPU_DYN_ARRAY_ADDR(per_cpu__kstat_irqs, per_cpu__kstat.irqs, sizeof(unsigned int), nr_irqs, sizeof(unsigned long), NULL);
+#endif
+
 /*
  * Return p->sum_exec_runtime plus any more ns on the sched_clock
  * that have not yet been banked in case the task is currently running.
-- 
cgit v1.2.3-55-g7522


From 301e619020dd67bde7e7e64bb9ffb7f30d26c979 Mon Sep 17 00:00:00 2001
From: Yinghai Lu
Date: Tue, 19 Aug 2008 20:50:02 -0700
Subject: x86: use dyn_array in io_apic_xx.c

Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/io_apic_32.c | 54 +++++++++++++++++++++++++++++++++++---------
 arch/x86/kernel/io_apic_64.c | 28 +++++++++++++++++------
 arch/x86/kernel/setup.c      |  6 +++++
 3 files changed, 70 insertions(+), 18 deletions(-)

diff --git a/arch/x86/kernel/io_apic_32.c b/arch/x86/kernel/io_apic_32.c
index d382990244f0..7f2bcc3dad82 100644
--- a/arch/x86/kernel/io_apic_32.c
+++ b/arch/x86/kernel/io_apic_32.c
@@ -70,7 +70,7 @@ int timer_through_8259 __initdata;
  */
 int sis_apic_bug = -1;
 
-int first_free_entry = NR_IRQS;
+int first_free_entry;
 /*
  * # of IRQ routing registers
  */
@@ -98,10 +98,7 @@ static int disable_timer_pin_1 __initdata;
  * Rough estimation of how many shared IRQs there are, can
  * be changed anytime.
  */
-#define MAX_PLUS_SHARED_IRQS NR_IRQS
-#define PIN_MAP_SIZE (MAX_PLUS_SHARED_IRQS + NR_IRQS)
-
-int pin_map_size = PIN_MAP_SIZE;
+int pin_map_size;
 
 /*
  * This is performance-critical, we want to do it O(1)
@@ -112,7 +109,9 @@ int pin_map_size = PIN_MAP_SIZE;
 
 static struct irq_pin_list {
 	int apic, pin, next;
-} irq_2_pin[PIN_MAP_SIZE];
+} *irq_2_pin;
+
+DEFINE_DYN_ARRAY(irq_2_pin, sizeof(struct irq_pin_list), pin_map_size, 16, NULL);
 
 struct io_apic {
 	unsigned int index;
@@ -403,9 +402,28 @@ static struct irq_cpu_info {
 
 #define CPU_TO_PACKAGEINDEX(i) (first_cpu(per_cpu(cpu_sibling_map, i)))
 
-static cpumask_t balance_irq_affinity[NR_IRQS] = {
-	[0 ... NR_IRQS-1] = CPU_MASK_ALL
-};
+static cpumask_t balance_irq_affinity_init __initdata = CPU_MASK_ALL;
+
+static cpumask_t *balance_irq_affinity;
+
+
+static void __init irq_affinity_init_work(void *data)
+{
+	struct dyn_array *da = data;
+
+	int i;
+	struct  balance_irq_affinity *affinity;
+
+	affinity = *da->name;
+
+	for (i = 0; i < *da->nr; i++)
+		memcpy(&affinity[i], &balance_irq_affinity_init,
+			 sizeof(struct balance_irq_affinity));
+
+}
+
+DEFINE_DYN_ARRAY(balance_irq_affinity, sizeof(struct balance_irq_affinity), nr_irqs, PAGE_SIZE, irq_affinity_init_work);
+
 
 void set_balance_irq_affinity(unsigned int irq, cpumask_t mask)
 {
@@ -1170,14 +1188,28 @@ static inline int IO_APIC_irq_trigger(int irq)
 }
 
 /* irq_vectors is indexed by the sum of all RTEs in all I/O APICs. */
-static u8 irq_vector[NR_IRQ_VECTORS] __read_mostly = { FIRST_DEVICE_VECTOR , 0 };
+static u8 irq_vector_init_first __initdata = FIRST_DEVICE_VECTOR;
+static u8 *irq_vector;
+
+static void __init irq_vector_init_work(void *data)
+{
+	struct dyn_array *da = data;
+
+	u8 *irq_vec;
+
+	irq_vec = *da->name;
+
+	irq_vec[0] = irq_vector_init_first;
+}
+
+DEFINE_DYN_ARRAY(irq_vector, sizeof(u8), nr_irqs, PAGE_SIZE, irq_vector_init_work);
 
 static int __assign_irq_vector(int irq)
 {
 	static int current_vector = FIRST_DEVICE_VECTOR, current_offset;
 	int vector, offset;
 
-	BUG_ON((unsigned)irq >= NR_IRQ_VECTORS);
+	BUG_ON((unsigned)irq >= nr_irqs);
 
 	if (irq_vector[irq] > 0)
 		return irq_vector[irq];
diff --git a/arch/x86/kernel/io_apic_64.c b/arch/x86/kernel/io_apic_64.c
index 448384c7c1e8..93a3ffabfe6a 100644
--- a/arch/x86/kernel/io_apic_64.c
+++ b/arch/x86/kernel/io_apic_64.c
@@ -66,7 +66,7 @@ struct irq_cfg {
 };
 
 /* irq_cfg is indexed by the sum of all RTEs in all I/O APICs. */
-static struct irq_cfg irq_cfg[NR_IRQS] __read_mostly = {
+static struct irq_cfg irq_cfg_legacy[] __initdata = {
 	[0]  = { .domain = CPU_MASK_ALL, .vector = IRQ0_VECTOR,  },
 	[1]  = { .domain = CPU_MASK_ALL, .vector = IRQ1_VECTOR,  },
 	[2]  = { .domain = CPU_MASK_ALL, .vector = IRQ2_VECTOR,  },
@@ -85,6 +85,17 @@ static struct irq_cfg irq_cfg[NR_IRQS] __read_mostly = {
 	[15] = { .domain = CPU_MASK_ALL, .vector = IRQ15_VECTOR, },
 };
 
+static struct irq_cfg *irq_cfg;
+
+static void __init init_work(void *data)
+{
+	struct dyn_array *da = data;
+
+	memcpy(*da->name, irq_cfg_legacy, sizeof(irq_cfg_legacy));
+}
+
+DEFINE_DYN_ARRAY(irq_cfg, sizeof(struct irq_cfg), nr_irqs, PAGE_SIZE, init_work);
+
 static int assign_irq_vector(int irq, cpumask_t mask);
 
 int first_system_vector = 0xfe;
@@ -129,10 +140,9 @@ DECLARE_BITMAP(mp_bus_not_pci, MAX_MP_BUSSES);
  * Rough estimation of how many shared IRQs there are, can
  * be changed anytime.
  */
-#define MAX_PLUS_SHARED_IRQS NR_IRQS
-#define PIN_MAP_SIZE (MAX_PLUS_SHARED_IRQS + NR_IRQS)
 
-int pin_map_size = PIN_MAP_SIZE;
+int pin_map_size;
+
 /*
  * This is performance-critical, we want to do it O(1)
  *
@@ -141,8 +151,12 @@ int pin_map_size = PIN_MAP_SIZE;
  */
 
 static struct irq_pin_list {
-	short apic, pin, next;
-} irq_2_pin[PIN_MAP_SIZE];
+	short apic, pin;
+	int next;
+} *irq_2_pin;
+
+DEFINE_DYN_ARRAY(irq_2_pin, sizeof(struct irq_pin_list), pin_map_size, sizeof(struct irq_pin_list), NULL);
+
 
 struct io_apic {
 	unsigned int index;
@@ -359,7 +373,7 @@ static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t mask)
  * shared ISA-space IRQs, so we have to support them. We are super
  * fast in the common case, and fast for shared ISA-space IRQs.
  */
-int first_free_entry = NR_IRQS;
+int first_free_entry;
 static void add_pin_to_irq(unsigned int irq, int apic, int pin)
 {
 	struct irq_pin_list *entry = irq_2_pin + irq;
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index 2255782e8d4b..558ec26b08e2 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -1067,9 +1067,15 @@ void __init setup_arch(char **cmdline_p)
 #endif
 
 	prefill_possible_map();
+
 #ifdef CONFIG_X86_64
+	/* need to wait for nr_cpu_ids settle down */
+	if (nr_irqs == NR_IRQS)
+		nr_irqs = 32 * nr_cpu_ids + 224;
 	init_cpu_to_node();
 #endif
+	pin_map_size = nr_irqs * 2;
+	first_free_entry = nr_irqs;
 
 	init_apic_mappings();
 	ioapic_init_mappings();
-- 
cgit v1.2.3-55-g7522


From a84488c213a8cfc29200344a6fb6357d48c8ed85 Mon Sep 17 00:00:00 2001
From: Ingo Molnar
Date: Tue, 19 Aug 2008 20:50:31 -0700
Subject: irq: sparse irqs, fix #3

fix non-APIC UP build:

 arch/x86/kernel/built-in.o: In function `setup_arch':
 : undefined reference to `pin_map_size'
 arch/x86/kernel/built-in.o: In function `setup_arch':
 : undefined reference to `first_free_entry'

Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/setup.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index 558ec26b08e2..02e3a6697977 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -1074,8 +1074,10 @@ void __init setup_arch(char **cmdline_p)
 		nr_irqs = 32 * nr_cpu_ids + 224;
 	init_cpu_to_node();
 #endif
+#ifdef CONFIG_X86_IO_APIC
 	pin_map_size = nr_irqs * 2;
 	first_free_entry = nr_irqs;
+#endif
 
 	init_apic_mappings();
 	ioapic_init_mappings();
-- 
cgit v1.2.3-55-g7522


From 71f521bbaf375b685aeea20c6b0ed8600cd6edfe Mon Sep 17 00:00:00 2001
From: Yinghai Lu
Date: Tue, 19 Aug 2008 20:50:03 -0700
Subject: x86, irq: get nr_irqs from madt

Until now, NR_IRQS was derived from black magic defines that had to
be "large enough" to both accomodate NR_CPUS and MAX_NR_IO_APICs.

This resulted in a way too large irq_desc[] array on most x86 systems.
Especially with larger CPU masks, the size of irq_desc can spiral out
of control quickly.

So be smarter about it and use precise allocation instead: determine the
default maximum possible IRQ number from the ACPI MADT. Use a minimum limit
of at least 32 IRQs for broken BIOSes.

Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/acpi/boot.c | 30 ++++++++++++++++++++++++++++--
 include/asm-x86/mpspec.h    |  1 +
 2 files changed, 29 insertions(+), 2 deletions(-)

diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
index eb875cdc7367..3e9d163fd92f 100644
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@ -957,6 +957,29 @@ void __init mp_register_ioapic(int id, u32 address, u32 gsi_base)
 	nr_ioapics++;
 }
 
+int get_nr_irqs_via_madt(void)
+{
+	int idx;
+	int nr = 0;
+
+	for (idx = 0; idx < nr_ioapics; idx++) {
+		if (mp_ioapic_routing[idx].gsi_end > nr)
+			nr = mp_ioapic_routing[idx].gsi_end;
+	}
+
+	nr++;
+
+	/* double it for hotplug and msi and nmi */
+	nr <<= 1;
+
+	/* something wrong ? */
+	if (nr < 32)
+		nr = 32;
+
+	return nr;
+
+}
+
 static void assign_to_mp_irq(struct mp_config_intsrc *m,
 				    struct mp_config_intsrc *mp_irq)
 {
@@ -1254,9 +1277,12 @@ static int __init acpi_parse_madt_ioapic_entries(void)
 		return count;
 	}
 
+
+	nr_irqs = get_nr_irqs_via_madt();
+
 	count =
 	    acpi_table_parse_madt(ACPI_MADT_TYPE_INTERRUPT_OVERRIDE, acpi_parse_int_src_ovr,
-				  NR_IRQ_VECTORS);
+				  nr_irqs);
 	if (count < 0) {
 		printk(KERN_ERR PREFIX
 		       "Error parsing interrupt source overrides entry\n");
@@ -1276,7 +1302,7 @@ static int __init acpi_parse_madt_ioapic_entries(void)
 
 	count =
 	    acpi_table_parse_madt(ACPI_MADT_TYPE_NMI_SOURCE, acpi_parse_nmi_src,
-				  NR_IRQ_VECTORS);
+				  nr_irqs);
 	if (count < 0) {
 		printk(KERN_ERR PREFIX "Error parsing NMI SRC entry\n");
 		/* TBD: Cleanup to allow fallback to MPS */
diff --git a/include/asm-x86/mpspec.h b/include/asm-x86/mpspec.h
index be2241a818f1..a0748021250b 100644
--- a/include/asm-x86/mpspec.h
+++ b/include/asm-x86/mpspec.h
@@ -60,6 +60,7 @@ extern void mp_override_legacy_irq(u8 bus_irq, u8 polarity, u8 trigger,
 				   u32 gsi);
 extern void mp_config_acpi_legacy_irqs(void);
 extern int mp_register_gsi(u32 gsi, int edge_level, int active_high_low);
+extern int get_nr_irqs_via_madt(void);
 #ifdef CONFIG_X86_IO_APIC
 extern int mp_config_acpi_gsi(unsigned char number, unsigned int devfn, u8 pin,
 				u32 gsi, int triggering, int polarity);
-- 
cgit v1.2.3-55-g7522


From bfea1238beac9d306eeac081c67de5ca6aec4c7a Mon Sep 17 00:00:00 2001
From: Yinghai Lu
Date: Tue, 19 Aug 2008 20:50:04 -0700
Subject: x86: remove nr_irq_vectors

remove unused defines derived from the (now obsolete) NR_IRQS define.

Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/asm-x86/irq_vectors.h | 9 ---------
 1 file changed, 9 deletions(-)

diff --git a/include/asm-x86/irq_vectors.h b/include/asm-x86/irq_vectors.h
index c5d2d767a1f3..cb09802ce651 100644
--- a/include/asm-x86/irq_vectors.h
+++ b/include/asm-x86/irq_vectors.h
@@ -116,7 +116,6 @@
 # else
 #  define NR_IRQS (NR_VECTORS + (32 * MAX_IO_APICS))
 # endif
-# define NR_IRQ_VECTORS NR_IRQS
 
 #elif !defined(CONFIG_X86_VOYAGER)
 
@@ -124,23 +123,15 @@
 
 #  define NR_IRQS		224
 
-#  if (224 >= 32 * NR_CPUS)
-#   define NR_IRQ_VECTORS	NR_IRQS
-#  else
-#   define NR_IRQ_VECTORS	(32 * NR_CPUS)
-#  endif
-
 # else /* IO_APIC || PARAVIRT */
 
 #  define NR_IRQS		16
-#  define NR_IRQ_VECTORS	NR_IRQS
 
 # endif
 
 #else /* !VISWS && !VOYAGER */
 
 # define NR_IRQS		224
-# define NR_IRQ_VECTORS		NR_IRQS
 
 #endif /* VISWS */
 
-- 
cgit v1.2.3-55-g7522


From 08678b0841267c1d00d771fe01548d86043d065e Mon Sep 17 00:00:00 2001
From: Yinghai Lu
Date: Tue, 19 Aug 2008 20:50:05 -0700
Subject: generic: sparse irqs: use irq_desc() together with dyn_array, instead
 of irq_desc[]

add CONFIG_HAVE_SPARSE_IRQ to for use condensed array.
Get rid of irq_desc[] array assumptions.

Preallocate 32 irq_desc, and irq_desc() will try to get more.

( No change in functionality is expected anywhere, except the odd build
  failure where we missed a code site or where a crossing commit itroduces
  new irq_desc[] usage. )

v2: according to Eric, change get_irq_desc() to irq_desc()

Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/Kconfig                        |   4 ++
 arch/x86/Kconfig                    |   1 +
 arch/x86/kernel/io_apic_32.c        |  46 ++++++++----
 arch/x86/kernel/io_apic_64.c        |  75 +++++++++++++-------
 arch/x86/kernel/irq_32.c            |  24 ++++---
 arch/x86/kernel/irq_64.c            |  35 ++++-----
 arch/x86/kernel/irqinit_64.c        |  10 +--
 arch/x86/kernel/visws_quirks.c      |  30 ++++----
 arch/x86/mach-voyager/voyager_smp.c |   4 +-
 drivers/gpio/gpiolib.c              |   2 +-
 drivers/mfd/asic3.c                 |   4 +-
 drivers/mfd/htc-egpio.c             |   2 +-
 drivers/parisc/dino.c               |   6 +-
 drivers/parisc/eisa.c               |   4 +-
 drivers/parisc/gsc.c                |  12 ++--
 drivers/parisc/iosapic.c            |   4 +-
 drivers/parisc/superio.c            |   4 +-
 drivers/pcmcia/hd64465_ss.c         |  12 +++-
 drivers/xen/events.c                |   8 ++-
 include/linux/irq.h                 |  32 ++++++---
 kernel/irq/autoprobe.c              |  10 +--
 kernel/irq/chip.c                   |  32 +++++----
 kernel/irq/handle.c                 | 138 ++++++++++++++++++++++++++++++++----
 kernel/irq/manage.c                 |  35 +++++----
 kernel/irq/migration.c              |  14 ++--
 kernel/irq/proc.c                   |  36 +++++-----
 kernel/irq/resend.c                 |   2 +-
 kernel/irq/spurious.c               |   5 +-
 28 files changed, 404 insertions(+), 187 deletions(-)

diff --git a/arch/Kconfig b/arch/Kconfig
index c1f9febb404f..b36762246265 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -105,3 +105,7 @@ config HAVE_CLK
 
 config HAVE_DYN_ARRAY
 	def_bool n
+
+config HAVE_SPARSE_IRQ
+	def_bool n
+
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 42f98009d752..1004888e9b13 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -34,6 +34,7 @@ config X86
 	select HAVE_GENERIC_DMA_COHERENT if X86_32
 	select HAVE_EFFICIENT_UNALIGNED_ACCESS
 	select HAVE_DYN_ARRAY
+	select HAVE_SPARSE_IRQ if X86_64
 
 config ARCH_DEFCONFIG
 	string
diff --git a/arch/x86/kernel/io_apic_32.c b/arch/x86/kernel/io_apic_32.c
index 7f2bcc3dad82..c2160cfdec9b 100644
--- a/arch/x86/kernel/io_apic_32.c
+++ b/arch/x86/kernel/io_apic_32.c
@@ -345,6 +345,7 @@ static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t cpumask)
 	struct irq_pin_list *entry = irq_2_pin + irq;
 	unsigned int apicid_value;
 	cpumask_t tmp;
+	struct irq_desc *desc;
 
 	cpus_and(tmp, cpumask, cpu_online_map);
 	if (cpus_empty(tmp))
@@ -365,7 +366,8 @@ static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t cpumask)
 			break;
 		entry = irq_2_pin + entry->next;
 	}
-	irq_desc[irq].affinity = cpumask;
+	desc = irq_to_desc(irq);
+	desc->affinity = cpumask;
 	spin_unlock_irqrestore(&ioapic_lock, flags);
 }
 
@@ -475,10 +477,12 @@ static inline void balance_irq(int cpu, int irq)
 static inline void rotate_irqs_among_cpus(unsigned long useful_load_threshold)
 {
 	int i, j;
+	struct irq_desc *desc;
 
 	for_each_online_cpu(i) {
 		for (j = 0; j < nr_irqs; j++) {
-			if (!irq_desc[j].action)
+			desc = irq_to_desc(j);
+			if (!desc->action)
 				continue;
 			/* Is it a significant load ?  */
 			if (IRQ_DELTA(CPU_TO_PACKAGEINDEX(i), j) <
@@ -505,6 +509,7 @@ static void do_irq_balance(void)
 	unsigned long tmp_cpu_irq;
 	unsigned long imbalance = 0;
 	cpumask_t allowed_mask, target_cpu_mask, tmp;
+	struct irq_desc *desc;
 
 	for_each_possible_cpu(i) {
 		int package_index;
@@ -515,7 +520,8 @@ static void do_irq_balance(void)
 		for (j = 0; j < nr_irqs; j++) {
 			unsigned long value_now, delta;
 			/* Is this an active IRQ or balancing disabled ? */
-			if (!irq_desc[j].action || irq_balancing_disabled(j))
+			desc = irq_to_desc(j);
+			if (!desc->action || irq_balancing_disabled(j))
 				continue;
 			if (package_index == i)
 				IRQ_DELTA(package_index, j) = 0;
@@ -609,7 +615,8 @@ tryanotherirq:
 	selected_irq = -1;
 	for (j = 0; j < nr_irqs; j++) {
 		/* Is this an active IRQ? */
-		if (!irq_desc[j].action)
+		desc = irq_to_desc(j);
+		if (!desc->action)
 			continue;
 		if (imbalance <= IRQ_DELTA(max_loaded, j))
 			continue;
@@ -682,10 +689,12 @@ static int balanced_irq(void *unused)
 	int i;
 	unsigned long prev_balance_time = jiffies;
 	long time_remaining = balanced_irq_interval;
+	struct irq_desc *desc;
 
 	/* push everything to CPU 0 to give us a starting point.  */
 	for (i = 0 ; i < nr_irqs ; i++) {
-		irq_desc[i].pending_mask = cpumask_of_cpu(0);
+		desc = irq_to_desc(i);
+		desc->pending_mask = cpumask_of_cpu(0);
 		set_pending_irq(i, cpumask_of_cpu(0));
 	}
 
@@ -1254,13 +1263,16 @@ static struct irq_chip ioapic_chip;
 
 static void ioapic_register_intr(int irq, int vector, unsigned long trigger)
 {
+	struct irq_desc *desc;
+
+	desc = irq_to_desc(irq);
 	if ((trigger == IOAPIC_AUTO && IO_APIC_irq_trigger(irq)) ||
 	    trigger == IOAPIC_LEVEL) {
-		irq_desc[irq].status |= IRQ_LEVEL;
+		desc->status |= IRQ_LEVEL;
 		set_irq_chip_and_handler_name(irq, &ioapic_chip,
 					 handle_fasteoi_irq, "fasteoi");
 	} else {
-		irq_desc[irq].status &= ~IRQ_LEVEL;
+		desc->status &= ~IRQ_LEVEL;
 		set_irq_chip_and_handler_name(irq, &ioapic_chip,
 					 handle_edge_irq, "edge");
 	}
@@ -2027,6 +2039,7 @@ static struct irq_chip ioapic_chip __read_mostly = {
 static inline void init_IO_APIC_traps(void)
 {
 	int irq;
+	struct irq_desc *desc;
 
 	/*
 	 * NOTE! The local APIC isn't very good at handling
@@ -2048,9 +2061,11 @@ static inline void init_IO_APIC_traps(void)
 			 */
 			if (irq < 16)
 				make_8259A_irq(irq);
-			else
+			else {
+				desc = irq_to_desc(irq);
 				/* Strange. Oh, well.. */
-				irq_desc[irq].chip = &no_irq_chip;
+				desc->chip = &no_irq_chip;
+			}
 		}
 	}
 }
@@ -2089,7 +2104,10 @@ static struct irq_chip lapic_chip __read_mostly = {
 
 static void lapic_register_intr(int irq, int vector)
 {
-	irq_desc[irq].status &= ~IRQ_LEVEL;
+	struct irq_desc *desc;
+
+	desc = irq_to_desc(irq);
+	desc->status &= ~IRQ_LEVEL;
 	set_irq_chip_and_handler_name(irq, &lapic_chip, handle_edge_irq,
 				      "edge");
 	set_intr_gate(vector, interrupt[irq]);
@@ -2556,6 +2574,7 @@ static void set_msi_irq_affinity(unsigned int irq, cpumask_t mask)
 	unsigned int dest;
 	cpumask_t tmp;
 	int vector;
+	struct irq_desc *desc;
 
 	cpus_and(tmp, mask, cpu_online_map);
 	if (cpus_empty(tmp))
@@ -2575,7 +2594,8 @@ static void set_msi_irq_affinity(unsigned int irq, cpumask_t mask)
 	msg.address_lo |= MSI_ADDR_DEST_ID(dest);
 
 	write_msi_msg(irq, &msg);
-	irq_desc[irq].affinity = mask;
+	desc = irq_to_desc(irq);
+	desc->affinity = mask;
 }
 #endif /* CONFIG_SMP */
 
@@ -2649,6 +2669,7 @@ static void set_ht_irq_affinity(unsigned int irq, cpumask_t mask)
 {
 	unsigned int dest;
 	cpumask_t tmp;
+	struct irq_desc *desc;
 
 	cpus_and(tmp, mask, cpu_online_map);
 	if (cpus_empty(tmp))
@@ -2659,7 +2680,8 @@ static void set_ht_irq_affinity(unsigned int irq, cpumask_t mask)
 	dest = cpu_mask_to_apicid(mask);
 
 	target_ht_irq(irq, dest);
-	irq_desc[irq].affinity = mask;
+	desc = irq_to_desc(irq);
+	desc->affinity = mask;
 }
 #endif
 
diff --git a/arch/x86/kernel/io_apic_64.c b/arch/x86/kernel/io_apic_64.c
index 93a3ffabfe6a..cab5a25d81b1 100644
--- a/arch/x86/kernel/io_apic_64.c
+++ b/arch/x86/kernel/io_apic_64.c
@@ -345,6 +345,7 @@ static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t mask)
 	unsigned long flags;
 	unsigned int dest;
 	cpumask_t tmp;
+	struct irq_desc *desc;
 
 	cpus_and(tmp, mask, cpu_online_map);
 	if (cpus_empty(tmp))
@@ -361,9 +362,10 @@ static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t mask)
 	 */
 	dest = SET_APIC_LOGICAL_ID(dest);
 
+	desc = irq_to_desc(irq);
 	spin_lock_irqsave(&ioapic_lock, flags);
 	__target_IO_APIC_irq(irq, dest, cfg->vector);
-	irq_desc[irq].affinity = mask;
+	desc->affinity = mask;
 	spin_unlock_irqrestore(&ioapic_lock, flags);
 }
 #endif
@@ -933,14 +935,17 @@ static struct irq_chip ir_ioapic_chip;
 
 static void ioapic_register_intr(int irq, unsigned long trigger)
 {
+	struct irq_desc *desc;
+
+	desc = irq_to_desc(irq);
 	if (trigger)
-		irq_desc[irq].status |= IRQ_LEVEL;
+		desc->status |= IRQ_LEVEL;
 	else
-		irq_desc[irq].status &= ~IRQ_LEVEL;
+		desc->status &= ~IRQ_LEVEL;
 
 #ifdef CONFIG_INTR_REMAP
 	if (irq_remapped(irq)) {
-		irq_desc[irq].status |= IRQ_MOVE_PCNTXT;
+		desc->status |= IRQ_MOVE_PCNTXT;
 		if (trigger)
 			set_irq_chip_and_handler_name(irq, &ir_ioapic_chip,
 						      handle_fasteoi_irq,
@@ -1596,10 +1601,10 @@ static DECLARE_DELAYED_WORK(ir_migration_work, ir_irq_migration);
 static void migrate_ioapic_irq(int irq, cpumask_t mask)
 {
 	struct irq_cfg *cfg = irq_cfg + irq;
-	struct irq_desc *desc = irq_desc + irq;
+	struct irq_desc *desc;
 	cpumask_t tmp, cleanup_mask;
 	struct irte irte;
-	int modify_ioapic_rte = desc->status & IRQ_LEVEL;
+	int modify_ioapic_rte;
 	unsigned int dest;
 	unsigned long flags;
 
@@ -1616,6 +1621,8 @@ static void migrate_ioapic_irq(int irq, cpumask_t mask)
 	cpus_and(tmp, cfg->domain, mask);
 	dest = cpu_mask_to_apicid(tmp);
 
+	desc = irq_to_desc(irq);
+	modify_ioapic_rte = desc->status & IRQ_LEVEL;
 	if (modify_ioapic_rte) {
 		spin_lock_irqsave(&ioapic_lock, flags);
 		__target_IO_APIC_irq(irq, dest, cfg->vector);
@@ -1637,12 +1644,13 @@ static void migrate_ioapic_irq(int irq, cpumask_t mask)
 		cfg->move_in_progress = 0;
 	}
 
-	irq_desc[irq].affinity = mask;
+	desc->affinity = mask;
 }
 
 static int migrate_irq_remapped_level(int irq)
 {
 	int ret = -1;
+	struct irq_desc *desc = irq_to_desc(irq);
 
 	mask_IO_APIC_irq(irq);
 
@@ -1658,11 +1666,11 @@ static int migrate_irq_remapped_level(int irq)
 	}
 
 	/* everthing is clear. we have right of way */
-	migrate_ioapic_irq(irq, irq_desc[irq].pending_mask);
+	migrate_ioapic_irq(irq, desc->pending_mask);
 
 	ret = 0;
-	irq_desc[irq].status &= ~IRQ_MOVE_PENDING;
-	cpus_clear(irq_desc[irq].pending_mask);
+	desc->status &= ~IRQ_MOVE_PENDING;
+	cpus_clear(desc->pending_mask);
 
 unmask:
 	unmask_IO_APIC_irq(irq);
@@ -1674,7 +1682,7 @@ static void ir_irq_migration(struct work_struct *work)
 	int irq;
 
 	for (irq = 0; irq < nr_irqs; irq++) {
-		struct irq_desc *desc = irq_desc + irq;
+		struct irq_desc *desc = irq_to_desc(irq);
 		if (desc->status & IRQ_MOVE_PENDING) {
 			unsigned long flags;
 
@@ -1686,8 +1694,7 @@ static void ir_irq_migration(struct work_struct *work)
 				continue;
 			}
 
-			desc->chip->set_affinity(irq,
-					         irq_desc[irq].pending_mask);
+			desc->chip->set_affinity(irq, desc->pending_mask);
 			spin_unlock_irqrestore(&desc->lock, flags);
 		}
 	}
@@ -1698,9 +1705,11 @@ static void ir_irq_migration(struct work_struct *work)
  */
 static void set_ir_ioapic_affinity_irq(unsigned int irq, cpumask_t mask)
 {
-	if (irq_desc[irq].status & IRQ_LEVEL) {
-		irq_desc[irq].status |= IRQ_MOVE_PENDING;
-		irq_desc[irq].pending_mask = mask;
+	struct irq_desc *desc = irq_to_desc(irq);
+
+	if (desc->status & IRQ_LEVEL) {
+		desc->status |= IRQ_MOVE_PENDING;
+		desc->pending_mask = mask;
 		migrate_irq_remapped_level(irq);
 		return;
 	}
@@ -1725,7 +1734,7 @@ asmlinkage void smp_irq_move_cleanup_interrupt(void)
 		if (irq >= nr_irqs)
 			continue;
 
-		desc = irq_desc + irq;
+		desc = irq_to_desc(irq);
 		cfg = irq_cfg + irq;
 		spin_lock(&desc->lock);
 		if (!cfg->move_cleanup_count)
@@ -1791,7 +1800,7 @@ static void ack_apic_level(unsigned int irq)
 	irq_complete_move(irq);
 #ifdef CONFIG_GENERIC_PENDING_IRQ
 	/* If we are moving the irq we need to mask it */
-	if (unlikely(irq_desc[irq].status & IRQ_MOVE_PENDING)) {
+	if (unlikely(irq_to_desc(irq)->status & IRQ_MOVE_PENDING)) {
 		do_unmask_irq = 1;
 		mask_IO_APIC_irq(irq);
 	}
@@ -1868,6 +1877,7 @@ static struct irq_chip ir_ioapic_chip __read_mostly = {
 static inline void init_IO_APIC_traps(void)
 {
 	int irq;
+	struct irq_desc *desc;
 
 	/*
 	 * NOTE! The local APIC isn't very good at handling
@@ -1889,9 +1899,11 @@ static inline void init_IO_APIC_traps(void)
 			 */
 			if (irq < 16)
 				make_8259A_irq(irq);
-			else
+			else {
+				desc = irq_to_desc(irq);
 				/* Strange. Oh, well.. */
-				irq_desc[irq].chip = &no_irq_chip;
+				desc->chip = &no_irq_chip;
+			}
 		}
 	}
 }
@@ -1926,7 +1938,10 @@ static struct irq_chip lapic_chip __read_mostly = {
 
 static void lapic_register_intr(int irq)
 {
-	irq_desc[irq].status &= ~IRQ_LEVEL;
+	struct irq_desc *desc;
+
+	desc = irq_to_desc(irq);
+	desc->status &= ~IRQ_LEVEL;
 	set_irq_chip_and_handler_name(irq, &lapic_chip, handle_edge_irq,
 				      "edge");
 }
@@ -2402,6 +2417,7 @@ static void set_msi_irq_affinity(unsigned int irq, cpumask_t mask)
 	struct msi_msg msg;
 	unsigned int dest;
 	cpumask_t tmp;
+	struct irq_desc *desc;
 
 	cpus_and(tmp, mask, cpu_online_map);
 	if (cpus_empty(tmp))
@@ -2421,7 +2437,8 @@ static void set_msi_irq_affinity(unsigned int irq, cpumask_t mask)
 	msg.address_lo |= MSI_ADDR_DEST_ID(dest);
 
 	write_msi_msg(irq, &msg);
-	irq_desc[irq].affinity = mask;
+	desc = irq_to_desc(irq);
+	desc->affinity = mask;
 }
 
 #ifdef CONFIG_INTR_REMAP
@@ -2435,6 +2452,7 @@ static void ir_set_msi_irq_affinity(unsigned int irq, cpumask_t mask)
 	unsigned int dest;
 	cpumask_t tmp, cleanup_mask;
 	struct irte irte;
+	struct irq_desc *desc;
 
 	cpus_and(tmp, mask, cpu_online_map);
 	if (cpus_empty(tmp))
@@ -2469,7 +2487,8 @@ static void ir_set_msi_irq_affinity(unsigned int irq, cpumask_t mask)
 		cfg->move_in_progress = 0;
 	}
 
-	irq_desc[irq].affinity = mask;
+	desc = irq_to_desc(irq);
+	desc->affinity = mask;
 }
 #endif
 #endif /* CONFIG_SMP */
@@ -2543,7 +2562,7 @@ static int setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc, int irq)
 
 #ifdef CONFIG_INTR_REMAP
 	if (irq_remapped(irq)) {
-		struct irq_desc *desc = irq_desc + irq;
+		struct irq_desc *desc = irq_to_desc(irq);
 		/*
 		 * irq migration in process context
 		 */
@@ -2655,6 +2674,7 @@ static void dmar_msi_set_affinity(unsigned int irq, cpumask_t mask)
 	struct msi_msg msg;
 	unsigned int dest;
 	cpumask_t tmp;
+	struct irq_desc *desc;
 
 	cpus_and(tmp, mask, cpu_online_map);
 	if (cpus_empty(tmp))
@@ -2674,7 +2694,8 @@ static void dmar_msi_set_affinity(unsigned int irq, cpumask_t mask)
 	msg.address_lo |= MSI_ADDR_DEST_ID(dest);
 
 	dmar_msi_write(irq, &msg);
-	irq_desc[irq].affinity = mask;
+	desc = irq_to_desc(irq);
+	desc->affinity = mask;
 }
 #endif /* CONFIG_SMP */
 
@@ -2731,6 +2752,7 @@ static void set_ht_irq_affinity(unsigned int irq, cpumask_t mask)
 	struct irq_cfg *cfg = irq_cfg + irq;
 	unsigned int dest;
 	cpumask_t tmp;
+	struct irq_desc *desc;
 
 	cpus_and(tmp, mask, cpu_online_map);
 	if (cpus_empty(tmp))
@@ -2743,7 +2765,8 @@ static void set_ht_irq_affinity(unsigned int irq, cpumask_t mask)
 	dest = cpu_mask_to_apicid(tmp);
 
 	target_ht_irq(irq, dest, cfg->vector);
-	irq_desc[irq].affinity = mask;
+	desc = irq_to_desc(irq);
+	desc->affinity = mask;
 }
 #endif
 
diff --git a/arch/x86/kernel/irq_32.c b/arch/x86/kernel/irq_32.c
index 4c7ffb32854c..ede513be517d 100644
--- a/arch/x86/kernel/irq_32.c
+++ b/arch/x86/kernel/irq_32.c
@@ -224,7 +224,7 @@ unsigned int do_IRQ(struct pt_regs *regs)
 	struct pt_regs *old_regs;
 	/* high bit used in ret_from_ code */
 	int overflow, irq = ~regs->orig_ax;
-	struct irq_desc *desc = irq_desc + irq;
+	struct irq_desc *desc = irq_to_desc(irq);
 
 	if (unlikely((unsigned)irq >= nr_irqs)) {
 		printk(KERN_EMERG "%s: cannot handle IRQ %d\n",
@@ -273,15 +273,16 @@ int show_interrupts(struct seq_file *p, void *v)
 
 	if (i < nr_irqs) {
 		unsigned any_count = 0;
+		struct irq_desc *desc = irq_to_desc(i);
 
-		spin_lock_irqsave(&irq_desc[i].lock, flags);
+		spin_lock_irqsave(&desc->lock, flags);
 #ifndef CONFIG_SMP
 		any_count = kstat_irqs(i);
 #else
 		for_each_online_cpu(j)
 			any_count |= kstat_cpu(j).irqs[i];
 #endif
-		action = irq_desc[i].action;
+		action = desc->action;
 		if (!action && !any_count)
 			goto skip;
 		seq_printf(p, "%3d: ",i);
@@ -291,8 +292,8 @@ int show_interrupts(struct seq_file *p, void *v)
 		for_each_online_cpu(j)
 			seq_printf(p, "%10u ", kstat_cpu(j).irqs[i]);
 #endif
-		seq_printf(p, " %8s", irq_desc[i].chip->name);
-		seq_printf(p, "-%-8s", irq_desc[i].name);
+		seq_printf(p, " %8s", desc->chip->name);
+		seq_printf(p, "-%-8s", desc->name);
 
 		if (action) {
 			seq_printf(p, "  %s", action->name);
@@ -302,7 +303,7 @@ int show_interrupts(struct seq_file *p, void *v)
 
 		seq_putc(p, '\n');
 skip:
-		spin_unlock_irqrestore(&irq_desc[i].lock, flags);
+		spin_unlock_irqrestore(&desc->lock, flags);
 	} else if (i == nr_irqs) {
 		seq_printf(p, "NMI: ");
 		for_each_online_cpu(j)
@@ -398,17 +399,20 @@ void fixup_irqs(cpumask_t map)
 
 	for (irq = 0; irq < nr_irqs; irq++) {
 		cpumask_t mask;
+		struct irq_desc *desc;
+
 		if (irq == 2)
 			continue;
 
-		cpus_and(mask, irq_desc[irq].affinity, map);
+		desc = irq_to_desc(irq);
+		cpus_and(mask, desc->affinity, map);
 		if (any_online_cpu(mask) == NR_CPUS) {
 			printk("Breaking affinity for irq %i\n", irq);
 			mask = map;
 		}
-		if (irq_desc[irq].chip->set_affinity)
-			irq_desc[irq].chip->set_affinity(irq, mask);
-		else if (irq_desc[irq].action && !(warned++))
+		if (desc->chip->set_affinity)
+			desc->chip->set_affinity(irq, mask);
+		else if (desc->action && !(warned++))
 			printk("Cannot set affinity for irq %i\n", irq);
 	}
 
diff --git a/arch/x86/kernel/irq_64.c b/arch/x86/kernel/irq_64.c
index e1f0839430d2..738eb65a924e 100644
--- a/arch/x86/kernel/irq_64.c
+++ b/arch/x86/kernel/irq_64.c
@@ -83,15 +83,16 @@ int show_interrupts(struct seq_file *p, void *v)
 
 	if (i < nr_irqs) {
 		unsigned any_count = 0;
+		struct irq_desc *desc = irq_to_desc(i);
 
-		spin_lock_irqsave(&irq_desc[i].lock, flags);
+		spin_lock_irqsave(&desc->lock, flags);
 #ifndef CONFIG_SMP
 		any_count = kstat_irqs(i);
 #else
 		for_each_online_cpu(j)
 			any_count |= kstat_cpu(j).irqs[i];
 #endif
-		action = irq_desc[i].action;
+		action = desc->action;
 		if (!action && !any_count)
 			goto skip;
 		seq_printf(p, "%3d: ",i);
@@ -101,8 +102,8 @@ int show_interrupts(struct seq_file *p, void *v)
 		for_each_online_cpu(j)
 			seq_printf(p, "%10u ", kstat_cpu(j).irqs[i]);
 #endif
-		seq_printf(p, " %8s", irq_desc[i].chip->name);
-		seq_printf(p, "-%-8s", irq_desc[i].name);
+		seq_printf(p, " %8s", desc->chip->name);
+		seq_printf(p, "-%-8s", desc->name);
 
 		if (action) {
 			seq_printf(p, "  %s", action->name);
@@ -111,7 +112,7 @@ int show_interrupts(struct seq_file *p, void *v)
 		}
 		seq_putc(p, '\n');
 skip:
-		spin_unlock_irqrestore(&irq_desc[i].lock, flags);
+		spin_unlock_irqrestore(&desc->lock, flags);
 	} else if (i == nr_irqs) {
 		seq_printf(p, "NMI: ");
 		for_each_online_cpu(j)
@@ -228,37 +229,39 @@ void fixup_irqs(cpumask_t map)
 		cpumask_t mask;
 		int break_affinity = 0;
 		int set_affinity = 1;
+		struct irq_desc *desc;
 
 		if (irq == 2)
 			continue;
 
+		desc = irq_to_desc(irq);
 		/* interrupt's are disabled at this point */
-		spin_lock(&irq_desc[irq].lock);
+		spin_lock(&desc->lock);
 
 		if (!irq_has_action(irq) ||
-		    cpus_equal(irq_desc[irq].affinity, map)) {
-			spin_unlock(&irq_desc[irq].lock);
+		    cpus_equal(desc->affinity, map)) {
+			spin_unlock(&desc->lock);
 			continue;
 		}
 
-		cpus_and(mask, irq_desc[irq].affinity, map);
+		cpus_and(mask, desc->affinity, map);
 		if (cpus_empty(mask)) {
 			break_affinity = 1;
 			mask = map;
 		}
 
-		if (irq_desc[irq].chip->mask)
-			irq_desc[irq].chip->mask(irq);
+		if (desc->chip->mask)
+			desc->chip->mask(irq);
 
-		if (irq_desc[irq].chip->set_affinity)
-			irq_desc[irq].chip->set_affinity(irq, mask);
+		if (desc->chip->set_affinity)
+			desc->chip->set_affinity(irq, mask);
 		else if (!(warned++))
 			set_affinity = 0;
 
-		if (irq_desc[irq].chip->unmask)
-			irq_desc[irq].chip->unmask(irq);
+		if (desc->chip->unmask)
+			desc->chip->unmask(irq);
 
-		spin_unlock(&irq_desc[irq].lock);
+		spin_unlock(&desc->lock);
 
 		if (break_affinity && set_affinity)
 			printk("Broke affinity for irq %i\n", irq);
diff --git a/arch/x86/kernel/irqinit_64.c b/arch/x86/kernel/irqinit_64.c
index 165c5d9b0d1a..0744b49b4d12 100644
--- a/arch/x86/kernel/irqinit_64.c
+++ b/arch/x86/kernel/irqinit_64.c
@@ -143,9 +143,11 @@ void __init init_ISA_irqs(void)
 	init_8259A(0);
 
 	for (i = 0; i < nr_irqs; i++) {
-		irq_desc[i].status = IRQ_DISABLED;
-		irq_desc[i].action = NULL;
-		irq_desc[i].depth = 1;
+		struct irq_desc *desc = irq_to_desc(i);
+
+		desc->status = IRQ_DISABLED;
+		desc->action = NULL;
+		desc->depth = 1;
 
 		if (i < 16) {
 			/*
@@ -157,7 +159,7 @@ void __init init_ISA_irqs(void)
 			/*
 			 * 'high' PCI IRQs filled in on demand
 			 */
-			irq_desc[i].chip = &no_irq_chip;
+			desc->chip = &no_irq_chip;
 		}
 	}
 }
diff --git a/arch/x86/kernel/visws_quirks.c b/arch/x86/kernel/visws_quirks.c
index 61a97e616f70..9d85ab384435 100644
--- a/arch/x86/kernel/visws_quirks.c
+++ b/arch/x86/kernel/visws_quirks.c
@@ -484,10 +484,11 @@ static void disable_cobalt_irq(unsigned int irq)
 static unsigned int startup_cobalt_irq(unsigned int irq)
 {
 	unsigned long flags;
+	struct irq_desc *desc = irq_to_desc(irq);
 
 	spin_lock_irqsave(&cobalt_lock, flags);
-	if ((irq_desc[irq].status & (IRQ_DISABLED | IRQ_INPROGRESS | IRQ_WAITING)))
-		irq_desc[irq].status &= ~(IRQ_DISABLED | IRQ_INPROGRESS | IRQ_WAITING);
+	if ((desc->status & (IRQ_DISABLED | IRQ_INPROGRESS | IRQ_WAITING)))
+		desc->status &= ~(IRQ_DISABLED | IRQ_INPROGRESS | IRQ_WAITING);
 	enable_cobalt_irq(irq);
 	spin_unlock_irqrestore(&cobalt_lock, flags);
 	return 0;
@@ -506,9 +507,10 @@ static void ack_cobalt_irq(unsigned int irq)
 static void end_cobalt_irq(unsigned int irq)
 {
 	unsigned long flags;
+	struct irq_desc *desc = irq_to_desc(irq);
 
 	spin_lock_irqsave(&cobalt_lock, flags);
-	if (!(irq_desc[irq].status & (IRQ_DISABLED | IRQ_INPROGRESS)))
+	if (!(desc->status & (IRQ_DISABLED | IRQ_INPROGRESS)))
 		enable_cobalt_irq(irq);
 	spin_unlock_irqrestore(&cobalt_lock, flags);
 }
@@ -626,7 +628,7 @@ static irqreturn_t piix4_master_intr(int irq, void *dev_id)
 
 	spin_unlock_irqrestore(&i8259A_lock, flags);
 
-	desc = irq_desc + realirq;
+	desc = irq_to_desc(realirq);
 
 	/*
 	 * handle this 'virtual interrupt' as a Cobalt one now.
@@ -662,27 +664,29 @@ void init_VISWS_APIC_irqs(void)
 	int i;
 
 	for (i = 0; i < CO_IRQ_APIC0 + CO_APIC_LAST + 1; i++) {
-		irq_desc[i].status = IRQ_DISABLED;
-		irq_desc[i].action = 0;
-		irq_desc[i].depth = 1;
+		struct irq_desc *desc = irq_to_desc(i);
+
+		desc->status = IRQ_DISABLED;
+		desc->action = 0;
+		desc->depth = 1;
 
 		if (i == 0) {
-			irq_desc[i].chip = &cobalt_irq_type;
+			desc->chip = &cobalt_irq_type;
 		}
 		else if (i == CO_IRQ_IDE0) {
-			irq_desc[i].chip = &cobalt_irq_type;
+			desc->chip = &cobalt_irq_type;
 		}
 		else if (i == CO_IRQ_IDE1) {
-			irq_desc[i].chip = &cobalt_irq_type;
+			desc->chip = &cobalt_irq_type;
 		}
 		else if (i == CO_IRQ_8259) {
-			irq_desc[i].chip = &piix4_master_irq_type;
+			desc->chip = &piix4_master_irq_type;
 		}
 		else if (i < CO_IRQ_APIC0) {
-			irq_desc[i].chip = &piix4_virtual_irq_type;
+			desc->chip = &piix4_virtual_irq_type;
 		}
 		else if (IS_CO_APIC(i)) {
-			irq_desc[i].chip = &cobalt_irq_type;
+			desc->chip = &cobalt_irq_type;
 		}
 	}
 
diff --git a/arch/x86/mach-voyager/voyager_smp.c b/arch/x86/mach-voyager/voyager_smp.c
index 199a5f4a873c..0f6e8a6523ae 100644
--- a/arch/x86/mach-voyager/voyager_smp.c
+++ b/arch/x86/mach-voyager/voyager_smp.c
@@ -1483,7 +1483,7 @@ static void disable_local_vic_irq(unsigned int irq)
  * the interrupt off to another CPU */
 static void before_handle_vic_irq(unsigned int irq)
 {
-	irq_desc_t *desc = irq_desc + irq;
+	irq_desc_t *desc = irq_to_desc(irq);
 	__u8 cpu = smp_processor_id();
 
 	_raw_spin_lock(&vic_irq_lock);
@@ -1518,7 +1518,7 @@ static void before_handle_vic_irq(unsigned int irq)
 /* Finish the VIC interrupt: basically mask */
 static void after_handle_vic_irq(unsigned int irq)
 {
-	irq_desc_t *desc = irq_desc + irq;
+	irq_desc_t *desc = irq_to_desc(irq);
 
 	_raw_spin_lock(&vic_irq_lock);
 	{
diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c
index 8d2940517c99..572d372899d3 100644
--- a/drivers/gpio/gpiolib.c
+++ b/drivers/gpio/gpiolib.c
@@ -1058,7 +1058,7 @@ static void gpiolib_dbg_show(struct seq_file *s, struct gpio_chip *chip)
 
 		if (!is_out) {
 			int		irq = gpio_to_irq(gpio);
-			struct irq_desc	*desc = irq_desc + irq;
+			struct irq_desc	*desc = irq_to_desc(irq);
 
 			/* This races with request_irq(), set_irq_type(),
 			 * and set_irq_wake() ... but those are "rare".
diff --git a/drivers/mfd/asic3.c b/drivers/mfd/asic3.c
index ba5aa2008273..e4c0db4dc7b1 100644
--- a/drivers/mfd/asic3.c
+++ b/drivers/mfd/asic3.c
@@ -123,7 +123,7 @@ static void asic3_irq_demux(unsigned int irq, struct irq_desc *desc)
 					irqnr = asic->irq_base +
 						(ASIC3_GPIOS_PER_BANK * bank)
 						+ i;
-					desc = irq_desc + irqnr;
+					desc = irq_to_desc(irqnr);
 					desc->handle_irq(irqnr, desc);
 					if (asic->irq_bothedge[bank] & bit)
 						asic3_irq_flip_edge(asic, base,
@@ -136,7 +136,7 @@ static void asic3_irq_demux(unsigned int irq, struct irq_desc *desc)
 		for (i = ASIC3_NUM_GPIOS; i < ASIC3_NR_IRQS; i++) {
 			/* They start at bit 4 and go up */
 			if (status & (1 << (i - ASIC3_NUM_GPIOS + 4))) {
-				desc = irq_desc + asic->irq_base + i;
+				desc = irq_to_desc(asic->irq_base + i);
 				desc->handle_irq(asic->irq_base + i,
 						 desc);
 			}
diff --git a/drivers/mfd/htc-egpio.c b/drivers/mfd/htc-egpio.c
index 6be43172dc65..ad3379fcd194 100644
--- a/drivers/mfd/htc-egpio.c
+++ b/drivers/mfd/htc-egpio.c
@@ -112,7 +112,7 @@ static void egpio_handler(unsigned int irq, struct irq_desc *desc)
 		/* Run irq handler */
 		pr_debug("got IRQ %d\n", irqpin);
 		irq = ei->irq_start + irqpin;
-		desc = &irq_desc[irq];
+		desc = irq_to_desc(irq);
 		desc->handle_irq(irq, desc);
 	}
 }
diff --git a/drivers/parisc/dino.c b/drivers/parisc/dino.c
index fd56128525d1..3bc54b30c3a1 100644
--- a/drivers/parisc/dino.c
+++ b/drivers/parisc/dino.c
@@ -298,7 +298,8 @@ struct pci_port_ops dino_port_ops = {
 
 static void dino_disable_irq(unsigned int irq)
 {
-	struct dino_device *dino_dev = irq_desc[irq].chip_data;
+	struct irq_desc *desc = irq_to_desc(irq);
+	struct dino_device *dino_dev = desc->chip_data;
 	int local_irq = gsc_find_local_irq(irq, dino_dev->global_irq, DINO_LOCAL_IRQS);
 
 	DBG(KERN_WARNING "%s(0x%p, %d)\n", __func__, dino_dev, irq);
@@ -310,7 +311,8 @@ static void dino_disable_irq(unsigned int irq)
 
 static void dino_enable_irq(unsigned int irq)
 {
-	struct dino_device *dino_dev = irq_desc[irq].chip_data;
+	struct irq_desc *desc = irq_to_desc(irq);
+	struct dino_device *dino_dev = desc->chip_data;
 	int local_irq = gsc_find_local_irq(irq, dino_dev->global_irq, DINO_LOCAL_IRQS);
 	u32 tmp;
 
diff --git a/drivers/parisc/eisa.c b/drivers/parisc/eisa.c
index 771cef592542..7891db50c483 100644
--- a/drivers/parisc/eisa.c
+++ b/drivers/parisc/eisa.c
@@ -346,10 +346,10 @@ static int __init eisa_probe(struct parisc_device *dev)
 	}
 	
 	/* Reserve IRQ2 */
-	irq_desc[2].action = &irq2_action;
+	irq_to_desc(2)->action = &irq2_action;
 	
 	for (i = 0; i < 16; i++) {
-		irq_desc[i].chip = &eisa_interrupt_type;
+		irq_to_desc(i)->chip = &eisa_interrupt_type;
 	}
 	
 	EISA_bus = 1;
diff --git a/drivers/parisc/gsc.c b/drivers/parisc/gsc.c
index f7d088b897ee..e76db9e4d504 100644
--- a/drivers/parisc/gsc.c
+++ b/drivers/parisc/gsc.c
@@ -108,7 +108,8 @@ int gsc_find_local_irq(unsigned int irq, int *global_irqs, int limit)
 
 static void gsc_asic_disable_irq(unsigned int irq)
 {
-	struct gsc_asic *irq_dev = irq_desc[irq].chip_data;
+	struct irq_desc *desc = irq_to_desc(irq);
+	struct gsc_asic *irq_dev = desc->chip_data;
 	int local_irq = gsc_find_local_irq(irq, irq_dev->global_irq, 32);
 	u32 imr;
 
@@ -123,7 +124,8 @@ static void gsc_asic_disable_irq(unsigned int irq)
 
 static void gsc_asic_enable_irq(unsigned int irq)
 {
-	struct gsc_asic *irq_dev = irq_desc[irq].chip_data;
+	struct irq_desc *desc = irq_to_desc(irq);
+	struct gsc_asic *irq_dev = desc->chip_data;
 	int local_irq = gsc_find_local_irq(irq, irq_dev->global_irq, 32);
 	u32 imr;
 
@@ -159,12 +161,14 @@ static struct hw_interrupt_type gsc_asic_interrupt_type = {
 int gsc_assign_irq(struct hw_interrupt_type *type, void *data)
 {
 	static int irq = GSC_IRQ_BASE;
+	struct irq_desc *desc;
 
 	if (irq > GSC_IRQ_MAX)
 		return NO_IRQ;
 
-	irq_desc[irq].chip = type;
-	irq_desc[irq].chip_data = data;
+	desc = irq_to_desc(irq);
+	desc->chip = type;
+	desc->chip_data = data;
 	return irq++;
 }
 
diff --git a/drivers/parisc/iosapic.c b/drivers/parisc/iosapic.c
index 6fb3f7979f21..7beffcab2745 100644
--- a/drivers/parisc/iosapic.c
+++ b/drivers/parisc/iosapic.c
@@ -619,7 +619,9 @@ iosapic_set_irt_data( struct vector_info *vi, u32 *dp0, u32 *dp1)
 
 static struct vector_info *iosapic_get_vector(unsigned int irq)
 {
-	return irq_desc[irq].chip_data;
+	struct irq_desc *desc = irq_to_desc(irq);
+
+	return desc->chip_data;
 }
 
 static void iosapic_disable_irq(unsigned int irq)
diff --git a/drivers/parisc/superio.c b/drivers/parisc/superio.c
index 1e8d2d17f04c..1e93c837514f 100644
--- a/drivers/parisc/superio.c
+++ b/drivers/parisc/superio.c
@@ -363,7 +363,9 @@ int superio_fixup_irq(struct pci_dev *pcidev)
 #endif
 
 	for (i = 0; i < 16; i++) {
-		irq_desc[i].chip = &superio_interrupt_type;
+		struct irq_desc *desc = irq_to_desc(i);
+
+		desc->chip = &superio_interrupt_type;
 	}
 
 	/*
diff --git a/drivers/pcmcia/hd64465_ss.c b/drivers/pcmcia/hd64465_ss.c
index 117dc12ab438..9ef69cdb3183 100644
--- a/drivers/pcmcia/hd64465_ss.c
+++ b/drivers/pcmcia/hd64465_ss.c
@@ -233,15 +233,18 @@ static struct hw_interrupt_type hd64465_ss_irq_type = {
  */
 static void hs_map_irq(hs_socket_t *sp, unsigned int irq)
 {
+	struct irq_desc *desc;
+
     	DPRINTK("hs_map_irq(sock=%d irq=%d)\n", sp->number, irq);
 	
 	if (irq >= HS_NUM_MAPPED_IRQS)
 	    return;
 
+	desc = irq_to_desc(irq);
     	hs_mapped_irq[irq].sock = sp;
 	/* insert ourselves as the irq controller */
-	hs_mapped_irq[irq].old_handler = irq_desc[irq].chip;
-	irq_desc[irq].chip = &hd64465_ss_irq_type;
+	hs_mapped_irq[irq].old_handler = desc->chip;
+	desc->chip = &hd64465_ss_irq_type;
 }
 
 
@@ -250,13 +253,16 @@ static void hs_map_irq(hs_socket_t *sp, unsigned int irq)
  */
 static void hs_unmap_irq(hs_socket_t *sp, unsigned int irq)
 {
+	struct irq_desc *desc;
+
     	DPRINTK("hs_unmap_irq(sock=%d irq=%d)\n", sp->number, irq);
 	
 	if (irq >= HS_NUM_MAPPED_IRQS)
 	    return;
 		
+	desc = irq_to_desc(irq);
 	/* restore the original irq controller */
-	irq_desc[irq].chip = hs_mapped_irq[irq].old_handler;
+	desc->chip = hs_mapped_irq[irq].old_handler;
 }
 
 /*============================================================*/
diff --git a/drivers/xen/events.c b/drivers/xen/events.c
index ed8235187dc0..56ace47f24d6 100644
--- a/drivers/xen/events.c
+++ b/drivers/xen/events.c
@@ -125,7 +125,7 @@ static void bind_evtchn_to_cpu(unsigned int chn, unsigned int cpu)
 
 	BUG_ON(irq == -1);
 #ifdef CONFIG_SMP
-	irq_desc[irq].affinity = cpumask_of_cpu(cpu);
+	irq_to_desc(irq)->affinity = cpumask_of_cpu(cpu);
 #endif
 
 	__clear_bit(chn, cpu_evtchn_mask[cpu_evtchn[chn]]);
@@ -139,8 +139,10 @@ static void init_evtchn_cpu_bindings(void)
 #ifdef CONFIG_SMP
 	int i;
 	/* By default all event channels notify CPU#0. */
-	for (i = 0; i < nr_irqs; i++)
-		irq_desc[i].affinity = cpumask_of_cpu(0);
+	for (i = 0; i < nr_irqs; i++) {
+		struct irq_desc *desc = irq_to_desc(i);
+		desc->affinity = cpumask_of_cpu(0);
+	}
 #endif
 
 	memset(cpu_evtchn, 0, sizeof(cpu_evtchn));
diff --git a/include/linux/irq.h b/include/linux/irq.h
index 5f4b013624dc..80b8200f2adb 100644
--- a/include/linux/irq.h
+++ b/include/linux/irq.h
@@ -152,6 +152,10 @@ struct irq_chip {
  * @name:		flow handler name for /proc/interrupts output
  */
 struct irq_desc {
+	unsigned int		irq;
+#ifdef CONFIG_HAVE_SPARSE_IRQ
+	struct irq_desc		*next;
+#endif
 	irq_flow_handler_t	handle_irq;
 	struct irq_chip		*chip;
 	struct msi_desc		*msi_desc;
@@ -179,9 +183,9 @@ struct irq_desc {
 	const char		*name;
 } ____cacheline_internodealigned_in_smp;
 
-#ifdef CONFIG_HAVE_DYN_ARRAY
-extern struct irq_desc *irq_desc;
-#else
+extern struct irq_desc *irq_to_desc(unsigned int irq);
+#ifndef CONFIG_HAVE_DYN_ARRAY
+/* could be removed if we get rid of all irq_desc reference */
 extern struct irq_desc irq_desc[NR_IRQS];
 #endif
 
@@ -249,7 +253,10 @@ extern int no_irq_affinity;
 
 static inline int irq_balancing_disabled(unsigned int irq)
 {
-	return irq_desc[irq].status & IRQ_NO_BALANCING_MASK;
+	struct irq_desc *desc;
+
+	desc = irq_to_desc(irq);
+	return desc->status & IRQ_NO_BALANCING_MASK;
 }
 
 /* Handle irq action chains: */
@@ -281,7 +288,7 @@ extern unsigned int __do_IRQ(unsigned int irq);
  */
 static inline void generic_handle_irq(unsigned int irq)
 {
-	struct irq_desc *desc = irq_desc + irq;
+	struct irq_desc *desc = irq_to_desc(irq);
 
 #ifdef CONFIG_GENERIC_HARDIRQS_NO__DO_IRQ
 	desc->handle_irq(irq, desc);
@@ -325,7 +332,10 @@ __set_irq_handler(unsigned int irq, irq_flow_handler_t handle, int is_chained,
 static inline void __set_irq_handler_unlocked(int irq,
 					      irq_flow_handler_t handler)
 {
-	irq_desc[irq].handle_irq = handler;
+	struct irq_desc *desc;
+
+	desc = irq_to_desc(irq);
+	desc->handle_irq = handler;
 }
 
 /*
@@ -359,7 +369,7 @@ extern void destroy_irq(unsigned int irq);
 /* Test to see if a driver has successfully requested an irq */
 static inline int irq_has_action(unsigned int irq)
 {
-	struct irq_desc *desc = irq_desc + irq;
+	struct irq_desc *desc = irq_to_desc(irq);
 	return desc->action != NULL;
 }
 
@@ -374,10 +384,10 @@ extern int set_irq_chip_data(unsigned int irq, void *data);
 extern int set_irq_type(unsigned int irq, unsigned int type);
 extern int set_irq_msi(unsigned int irq, struct msi_desc *entry);
 
-#define get_irq_chip(irq)	(irq_desc[irq].chip)
-#define get_irq_chip_data(irq)	(irq_desc[irq].chip_data)
-#define get_irq_data(irq)	(irq_desc[irq].handler_data)
-#define get_irq_msi(irq)	(irq_desc[irq].msi_desc)
+#define get_irq_chip(irq)	(irq_to_desc(irq)->chip)
+#define get_irq_chip_data(irq)	(irq_to_desc(irq)->chip_data)
+#define get_irq_data(irq)	(irq_to_desc(irq)->handler_data)
+#define get_irq_msi(irq)	(irq_to_desc(irq)->msi_desc)
 
 #endif /* CONFIG_GENERIC_HARDIRQS */
 
diff --git a/kernel/irq/autoprobe.c b/kernel/irq/autoprobe.c
index c689e9851a80..c45ab718cf07 100644
--- a/kernel/irq/autoprobe.c
+++ b/kernel/irq/autoprobe.c
@@ -39,7 +39,7 @@ unsigned long probe_irq_on(void)
 	 * flush such a longstanding irq before considering it as spurious.
 	 */
 	for (i = nr_irqs-1; i > 0; i--) {
-		desc = irq_desc + i;
+		desc = irq_to_desc(i);
 
 		spin_lock_irq(&desc->lock);
 		if (!desc->action && !(desc->status & IRQ_NOPROBE)) {
@@ -69,7 +69,7 @@ unsigned long probe_irq_on(void)
 	 * happened in the previous stage, it may have masked itself)
 	 */
 	for (i = nr_irqs-1; i > 0; i--) {
-		desc = irq_desc + i;
+		desc = irq_to_desc(i);
 
 		spin_lock_irq(&desc->lock);
 		if (!desc->action && !(desc->status & IRQ_NOPROBE)) {
@@ -92,7 +92,7 @@ unsigned long probe_irq_on(void)
 	for (i = 0; i < nr_irqs; i++) {
 		unsigned int status;
 
-		desc = irq_desc + i;
+		desc = irq_to_desc(i);
 		spin_lock_irq(&desc->lock);
 		status = desc->status;
 
@@ -131,7 +131,7 @@ unsigned int probe_irq_mask(unsigned long val)
 
 	mask = 0;
 	for (i = 0; i < nr_irqs; i++) {
-		struct irq_desc *desc = irq_desc + i;
+		struct irq_desc *desc = irq_to_desc(i);
 		unsigned int status;
 
 		spin_lock_irq(&desc->lock);
@@ -174,7 +174,7 @@ int probe_irq_off(unsigned long val)
 	int i, irq_found = 0, nr_irqs = 0;
 
 	for (i = 0; i < nr_irqs; i++) {
-		struct irq_desc *desc = irq_desc + i;
+		struct irq_desc *desc = irq_to_desc(i);
 		unsigned int status;
 
 		spin_lock_irq(&desc->lock);
diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c
index bba66e098703..76c225cf4b26 100644
--- a/kernel/irq/chip.c
+++ b/kernel/irq/chip.c
@@ -33,7 +33,7 @@ void dynamic_irq_init(unsigned int irq)
 	}
 
 	/* Ensure we don't have left over values from a previous use of this irq */
-	desc = irq_desc + irq;
+	desc = irq_to_desc(irq);
 	spin_lock_irqsave(&desc->lock, flags);
 	desc->status = IRQ_DISABLED;
 	desc->chip = &no_irq_chip;
@@ -65,7 +65,7 @@ void dynamic_irq_cleanup(unsigned int irq)
 		return;
 	}
 
-	desc = irq_desc + irq;
+	desc = irq_to_desc(irq);
 	spin_lock_irqsave(&desc->lock, flags);
 	if (desc->action) {
 		spin_unlock_irqrestore(&desc->lock, flags);
@@ -100,7 +100,7 @@ int set_irq_chip(unsigned int irq, struct irq_chip *chip)
 	if (!chip)
 		chip = &no_irq_chip;
 
-	desc = irq_desc + irq;
+	desc = irq_to_desc(irq);
 	spin_lock_irqsave(&desc->lock, flags);
 	irq_chip_set_defaults(chip);
 	desc->chip = chip;
@@ -126,7 +126,7 @@ int set_irq_type(unsigned int irq, unsigned int type)
 		return -ENODEV;
 	}
 
-	desc = irq_desc + irq;
+	desc = irq_to_desc(irq);
 	if (type == IRQ_TYPE_NONE)
 		return 0;
 
@@ -155,7 +155,7 @@ int set_irq_data(unsigned int irq, void *data)
 		return -EINVAL;
 	}
 
-	desc = irq_desc + irq;
+	desc = irq_to_desc(irq);
 	spin_lock_irqsave(&desc->lock, flags);
 	desc->handler_data = data;
 	spin_unlock_irqrestore(&desc->lock, flags);
@@ -180,7 +180,7 @@ int set_irq_msi(unsigned int irq, struct msi_desc *entry)
 		       "Trying to install msi data for IRQ%d\n", irq);
 		return -EINVAL;
 	}
-	desc = irq_desc + irq;
+	desc = irq_to_desc(irq);
 	spin_lock_irqsave(&desc->lock, flags);
 	desc->msi_desc = entry;
 	if (entry)
@@ -198,9 +198,10 @@ int set_irq_msi(unsigned int irq, struct msi_desc *entry)
  */
 int set_irq_chip_data(unsigned int irq, void *data)
 {
-	struct irq_desc *desc = irq_desc + irq;
+	struct irq_desc *desc;
 	unsigned long flags;
 
+	desc = irq_to_desc(irq);
 	if (irq >= nr_irqs || !desc->chip) {
 		printk(KERN_ERR "BUG: bad set_irq_chip_data(IRQ#%d)\n", irq);
 		return -EINVAL;
@@ -219,8 +220,9 @@ EXPORT_SYMBOL(set_irq_chip_data);
  */
 static void default_enable(unsigned int irq)
 {
-	struct irq_desc *desc = irq_desc + irq;
+	struct irq_desc *desc;
 
+	desc = irq_to_desc(irq);
 	desc->chip->unmask(irq);
 	desc->status &= ~IRQ_MASKED;
 }
@@ -237,7 +239,10 @@ static void default_disable(unsigned int irq)
  */
 static unsigned int default_startup(unsigned int irq)
 {
-	irq_desc[irq].chip->enable(irq);
+	struct irq_desc *desc;
+
+	desc = irq_to_desc(irq);
+	desc->chip->enable(irq);
 
 	return 0;
 }
@@ -247,8 +252,9 @@ static unsigned int default_startup(unsigned int irq)
  */
 static void default_shutdown(unsigned int irq)
 {
-	struct irq_desc *desc = irq_desc + irq;
+	struct irq_desc *desc;
 
+	desc = irq_to_desc(irq);
 	desc->chip->mask(irq);
 	desc->status |= IRQ_MASKED;
 }
@@ -551,7 +557,7 @@ __set_irq_handler(unsigned int irq, irq_flow_handler_t handle, int is_chained,
 		return;
 	}
 
-	desc = irq_desc + irq;
+	desc = irq_to_desc(irq);
 
 	if (!handle)
 		handle = handle_bad_irq;
@@ -616,7 +622,7 @@ void __init set_irq_noprobe(unsigned int irq)
 		return;
 	}
 
-	desc = irq_desc + irq;
+	desc = irq_to_desc(irq);
 
 	spin_lock_irqsave(&desc->lock, flags);
 	desc->status |= IRQ_NOPROBE;
@@ -634,7 +640,7 @@ void __init set_irq_probe(unsigned int irq)
 		return;
 	}
 
-	desc = irq_desc + irq;
+	desc = irq_to_desc(irq);
 
 	spin_lock_irqsave(&desc->lock, flags);
 	desc->status &= ~IRQ_NOPROBE;
diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c
index 6ce3bcc2b8f7..9fc33b3378e6 100644
--- a/kernel/irq/handle.c
+++ b/kernel/irq/handle.c
@@ -18,6 +18,14 @@
 
 #include "internals.h"
 
+#ifdef CONFIG_TRACE_IRQFLAGS
+
+/*
+ * lockdep: we want to handle all irq_desc locks as a single lock-class:
+ */
+static struct lock_class_key irq_desc_lock_class;
+#endif
+
 /**
  * handle_bad_irq - handle spurious and unhandled irqs
  * @irq:       the interrupt number
@@ -51,7 +59,8 @@ int nr_irqs = NR_IRQS;
 EXPORT_SYMBOL_GPL(nr_irqs);
 
 #ifdef CONFIG_HAVE_DYN_ARRAY
-static struct irq_desc irq_desc_init __initdata = {
+static struct irq_desc irq_desc_init = {
+	.irq = -1U,
 	.status = IRQ_DISABLED,
 	.chip = &no_irq_chip,
 	.handle_irq = handle_bad_irq,
@@ -62,6 +71,27 @@ static struct irq_desc irq_desc_init __initdata = {
 #endif
 };
 
+
+static void init_one_irq_desc(struct irq_desc *desc)
+{
+	memcpy(desc, &irq_desc_init, sizeof(struct irq_desc));
+#ifdef CONFIG_TRACE_IRQFLAGS
+	lockdep_set_class(&desc->lock, &irq_desc_lock_class);
+#endif
+}
+
+#ifdef CONFIG_HAVE_SPARSE_IRQ
+static int nr_irq_desc = 32;
+
+static int __init parse_nr_irq_desc(char *arg)
+{
+	if (arg)
+		nr_irq_desc = simple_strtoul(arg, NULL, 0);
+	return 0;
+}
+
+early_param("nr_irq_desc", parse_nr_irq_desc);
+
 static void __init init_work(void *data)
 {
 	struct dyn_array *da = data;
@@ -71,12 +101,83 @@ static void __init init_work(void *data)
 	desc = *da->name;
 
 	for (i = 0; i < *da->nr; i++)
-		memcpy(&desc[i], &irq_desc_init, sizeof(struct irq_desc));
+		init_one_irq_desc(&desc[i]);
+
+	for (i = 1; i < *da->nr; i++)
+		desc[i-1].next = &desc[i];
 }
 
-struct irq_desc *irq_desc;
+static struct irq_desc *sparse_irqs;
+DEFINE_DYN_ARRAY(sparse_irqs, sizeof(struct irq_desc), nr_irq_desc, PAGE_SIZE, init_work);
+
+extern int after_bootmem;
+extern void *__alloc_bootmem_nopanic(unsigned long size,
+			     unsigned long align,
+			     unsigned long goal);
+struct irq_desc *irq_to_desc(unsigned int irq)
+{
+	struct irq_desc *desc, *desc_pri;
+	int i;
+	int count = 0;
+
+	BUG_ON(irq == -1U);
+
+	desc_pri = desc = &sparse_irqs[0];
+	while (desc) {
+		if (desc->irq == irq)
+			return desc;
+
+		if (desc->irq == -1U) {
+			desc->irq = irq;
+			return desc;
+		}
+		desc_pri = desc;
+		desc = desc->next;
+		count++;
+	}
+
+	/*
+	 *  we run out of pre-allocate ones, allocate more
+	 */
+	printk(KERN_DEBUG "try to get more irq_desc %d\n", nr_irq_desc);
+
+	if (after_bootmem)
+		desc = kzalloc(sizeof(struct irq_desc)*nr_irq_desc, GFP_ATOMIC);
+	else
+		desc = __alloc_bootmem_nopanic(sizeof(struct irq_desc)*nr_irq_desc, PAGE_SIZE, 0);
+
+	if (!desc)
+		panic("please boot with nr_irq_desc= %d\n", count * 2);
+
+	for (i = 0; i < nr_irq_desc; i++)
+		init_one_irq_desc(&desc[i]);
+
+	for (i = 1; i < nr_irq_desc; i++)
+		desc[i-1].next = &desc[i];
+
+	desc->irq = irq;
+	desc_pri->next = desc;
+
+	return desc;
+}
+#else
+static void __init init_work(void *data)
+{
+	struct dyn_array *da = data;
+	int i;
+	struct  irq_desc *desc;
+
+	desc = *da->name;
+
+	for (i = 0; i < *da->nr; i++)
+		init_one_irq_desc(&desc[i]);
+
+}
+static struct irq_desc *irq_desc;
 DEFINE_DYN_ARRAY(irq_desc, sizeof(struct irq_desc), nr_irqs, PAGE_SIZE, init_work);
 
+#endif
+
 #else
 
 struct irq_desc irq_desc[NR_IRQS] __cacheline_aligned_in_smp = {
@@ -85,12 +186,23 @@ struct irq_desc irq_desc[NR_IRQS] __cacheline_aligned_in_smp = {
 		.chip = &no_irq_chip,
 		.handle_irq = handle_bad_irq,
 		.depth = 1,
-		.lock = __SPIN_LOCK_UNLOCKED(irq_desc->lock),
+		.lock = __SPIN_LOCK_UNLOCKED(sparse_irqs->lock),
 #ifdef CONFIG_SMP
 		.affinity = CPU_MASK_ALL
 #endif
 	}
 };
+
+#endif
+
+#ifndef CONFIG_HAVE_SPARSE_IRQ
+struct irq_desc *irq_to_desc(unsigned int irq)
+{
+	if (irq < nr_irqs)
+		return &irq_desc[irq];
+
+	return NULL;
+}
 #endif
 
 /*
@@ -99,7 +211,10 @@ struct irq_desc irq_desc[NR_IRQS] __cacheline_aligned_in_smp = {
  */
 static void ack_bad(unsigned int irq)
 {
-	print_irq_desc(irq, irq_desc + irq);
+	struct irq_desc *desc;
+
+	desc = irq_to_desc(irq);
+	print_irq_desc(irq, desc);
 	ack_bad_irq(irq);
 }
 
@@ -196,7 +311,7 @@ irqreturn_t handle_IRQ_event(unsigned int irq, struct irqaction *action)
  */
 unsigned int __do_IRQ(unsigned int irq)
 {
-	struct irq_desc *desc = irq_desc + irq;
+	struct irq_desc *desc = irq_to_desc(irq);
 	struct irqaction *action;
 	unsigned int status;
 
@@ -287,19 +402,16 @@ out:
 }
 #endif
 
-#ifdef CONFIG_TRACE_IRQFLAGS
-
-/*
- * lockdep: we want to handle all irq_desc locks as a single lock-class:
- */
-static struct lock_class_key irq_desc_lock_class;
 
+#ifdef CONFIG_TRACE_IRQFLAGS
 void early_init_irq_lock_class(void)
 {
+#ifndef CONFIG_HAVE_DYN_ARRAY
 	int i;
 
 	for (i = 0; i < nr_irqs; i++)
 		lockdep_set_class(&irq_desc[i].lock, &irq_desc_lock_class);
+#endif
 }
-
 #endif
+
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
index d5a4333d8f1f..b5943e9f95aa 100644
--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c
@@ -31,7 +31,7 @@ cpumask_t irq_default_affinity = CPU_MASK_ALL;
  */
 void synchronize_irq(unsigned int irq)
 {
-	struct irq_desc *desc = irq_desc + irq;
+	struct irq_desc *desc = irq_to_desc(irq);
 	unsigned int status;
 
 	if (irq >= nr_irqs)
@@ -64,7 +64,7 @@ EXPORT_SYMBOL(synchronize_irq);
  */
 int irq_can_set_affinity(unsigned int irq)
 {
-	struct irq_desc *desc = irq_desc + irq;
+	struct irq_desc *desc = irq_to_desc(irq);
 
 	if (CHECK_IRQ_PER_CPU(desc->status) || !desc->chip ||
 	    !desc->chip->set_affinity)
@@ -81,7 +81,7 @@ int irq_can_set_affinity(unsigned int irq)
  */
 int irq_set_affinity(unsigned int irq, cpumask_t cpumask)
 {
-	struct irq_desc *desc = irq_desc + irq;
+	struct irq_desc *desc = irq_to_desc(irq);
 
 	if (!desc->chip->set_affinity)
 		return -EINVAL;
@@ -111,14 +111,16 @@ int irq_set_affinity(unsigned int irq, cpumask_t cpumask)
 int irq_select_affinity(unsigned int irq)
 {
 	cpumask_t mask;
+	struct irq_desc *desc;
 
 	if (!irq_can_set_affinity(irq))
 		return 0;
 
 	cpus_and(mask, cpu_online_map, irq_default_affinity);
 
-	irq_desc[irq].affinity = mask;
-	irq_desc[irq].chip->set_affinity(irq, mask);
+	desc = irq_to_desc(irq);
+	desc->affinity = mask;
+	desc->chip->set_affinity(irq, mask);
 
 	set_balance_irq_affinity(irq, mask);
 	return 0;
@@ -140,7 +142,7 @@ int irq_select_affinity(unsigned int irq)
  */
 void disable_irq_nosync(unsigned int irq)
 {
-	struct irq_desc *desc = irq_desc + irq;
+	struct irq_desc *desc = irq_to_desc(irq);
 	unsigned long flags;
 
 	if (irq >= nr_irqs)
@@ -169,7 +171,7 @@ EXPORT_SYMBOL(disable_irq_nosync);
  */
 void disable_irq(unsigned int irq)
 {
-	struct irq_desc *desc = irq_desc + irq;
+	struct irq_desc *desc = irq_to_desc(irq);
 
 	if (irq >= nr_irqs)
 		return;
@@ -211,7 +213,7 @@ static void __enable_irq(struct irq_desc *desc, unsigned int irq)
  */
 void enable_irq(unsigned int irq)
 {
-	struct irq_desc *desc = irq_desc + irq;
+	struct irq_desc *desc = irq_to_desc(irq);
 	unsigned long flags;
 
 	if (irq >= nr_irqs)
@@ -225,7 +227,7 @@ EXPORT_SYMBOL(enable_irq);
 
 static int set_irq_wake_real(unsigned int irq, unsigned int on)
 {
-	struct irq_desc *desc = irq_desc + irq;
+	struct irq_desc *desc = irq_to_desc(irq);
 	int ret = -ENXIO;
 
 	if (desc->chip->set_wake)
@@ -248,7 +250,7 @@ static int set_irq_wake_real(unsigned int irq, unsigned int on)
  */
 int set_irq_wake(unsigned int irq, unsigned int on)
 {
-	struct irq_desc *desc = irq_desc + irq;
+	struct irq_desc *desc = irq_to_desc(irq);
 	unsigned long flags;
 	int ret = 0;
 
@@ -288,12 +290,13 @@ EXPORT_SYMBOL(set_irq_wake);
  */
 int can_request_irq(unsigned int irq, unsigned long irqflags)
 {
+	struct irq_desc *desc = irq_to_desc(irq);
 	struct irqaction *action;
 
-	if (irq >= nr_irqs || irq_desc[irq].status & IRQ_NOREQUEST)
+	if (irq >= nr_irqs || desc->status & IRQ_NOREQUEST)
 		return 0;
 
-	action = irq_desc[irq].action;
+	action = desc->action;
 	if (action)
 		if (irqflags & action->flags & IRQF_SHARED)
 			action = NULL;
@@ -349,7 +352,7 @@ int __irq_set_trigger(struct irq_desc *desc, unsigned int irq,
  */
 int setup_irq(unsigned int irq, struct irqaction *new)
 {
-	struct irq_desc *desc = irq_desc + irq;
+	struct irq_desc *desc = irq_to_desc(irq);
 	struct irqaction *old, **p;
 	const char *old_name = NULL;
 	unsigned long flags;
@@ -518,7 +521,7 @@ void free_irq(unsigned int irq, void *dev_id)
 	if (irq >= nr_irqs)
 		return;
 
-	desc = irq_desc + irq;
+	desc = irq_to_desc(irq);
 	spin_lock_irqsave(&desc->lock, flags);
 	p = &desc->action;
 	for (;;) {
@@ -615,6 +618,7 @@ int request_irq(unsigned int irq, irq_handler_t handler,
 {
 	struct irqaction *action;
 	int retval;
+	struct irq_desc *desc;
 
 #ifdef CONFIG_LOCKDEP
 	/*
@@ -632,7 +636,8 @@ int request_irq(unsigned int irq, irq_handler_t handler,
 		return -EINVAL;
 	if (irq >= nr_irqs)
 		return -EINVAL;
-	if (irq_desc[irq].status & IRQ_NOREQUEST)
+	desc = irq_to_desc(irq);
+	if (desc->status & IRQ_NOREQUEST)
 		return -EINVAL;
 	if (!handler)
 		return -EINVAL;
diff --git a/kernel/irq/migration.c b/kernel/irq/migration.c
index 77b7acc875c5..90b920d3f52b 100644
--- a/kernel/irq/migration.c
+++ b/kernel/irq/migration.c
@@ -3,18 +3,18 @@
 
 void set_pending_irq(unsigned int irq, cpumask_t mask)
 {
-	struct irq_desc *desc = irq_desc + irq;
+	struct irq_desc *desc = irq_to_desc(irq);
 	unsigned long flags;
 
 	spin_lock_irqsave(&desc->lock, flags);
 	desc->status |= IRQ_MOVE_PENDING;
-	irq_desc[irq].pending_mask = mask;
+	desc->pending_mask = mask;
 	spin_unlock_irqrestore(&desc->lock, flags);
 }
 
 void move_masked_irq(int irq)
 {
-	struct irq_desc *desc = irq_desc + irq;
+	struct irq_desc *desc = irq_to_desc(irq);
 	cpumask_t tmp;
 
 	if (likely(!(desc->status & IRQ_MOVE_PENDING)))
@@ -30,7 +30,7 @@ void move_masked_irq(int irq)
 
 	desc->status &= ~IRQ_MOVE_PENDING;
 
-	if (unlikely(cpus_empty(irq_desc[irq].pending_mask)))
+	if (unlikely(cpus_empty(desc->pending_mask)))
 		return;
 
 	if (!desc->chip->set_affinity)
@@ -38,7 +38,7 @@ void move_masked_irq(int irq)
 
 	assert_spin_locked(&desc->lock);
 
-	cpus_and(tmp, irq_desc[irq].pending_mask, cpu_online_map);
+	cpus_and(tmp, desc->pending_mask, cpu_online_map);
 
 	/*
 	 * If there was a valid mask to work with, please
@@ -55,12 +55,12 @@ void move_masked_irq(int irq)
 	if (likely(!cpus_empty(tmp))) {
 		desc->chip->set_affinity(irq,tmp);
 	}
-	cpus_clear(irq_desc[irq].pending_mask);
+	cpus_clear(desc->pending_mask);
 }
 
 void move_native_irq(int irq)
 {
-	struct irq_desc *desc = irq_desc + irq;
+	struct irq_desc *desc = irq_to_desc(irq);
 
 	if (likely(!(desc->status & IRQ_MOVE_PENDING)))
 		return;
diff --git a/kernel/irq/proc.c b/kernel/irq/proc.c
index e5225a65a4f9..c2f356c808f6 100644
--- a/kernel/irq/proc.c
+++ b/kernel/irq/proc.c
@@ -19,7 +19,7 @@ static struct proc_dir_entry *root_irq_dir;
 
 static int irq_affinity_proc_show(struct seq_file *m, void *v)
 {
-	struct irq_desc *desc = irq_desc + (long)m->private;
+	struct irq_desc *desc = irq_to_desc((long)m->private);
 	cpumask_t *mask = &desc->affinity;
 
 #ifdef CONFIG_GENERIC_PENDING_IRQ
@@ -43,7 +43,7 @@ static ssize_t irq_affinity_proc_write(struct file *file,
 	cpumask_t new_value;
 	int err;
 
-	if (!irq_desc[irq].chip->set_affinity || no_irq_affinity ||
+	if (!irq_to_desc(irq)->chip->set_affinity || no_irq_affinity ||
 	    irq_balancing_disabled(irq))
 		return -EIO;
 
@@ -132,20 +132,20 @@ static const struct file_operations default_affinity_proc_fops = {
 static int irq_spurious_read(char *page, char **start, off_t off,
 				  int count, int *eof, void *data)
 {
-	struct irq_desc *d = &irq_desc[(long) data];
+	struct irq_desc *desc = irq_to_desc((long) data);
 	return sprintf(page, "count %u\n"
 			     "unhandled %u\n"
 			     "last_unhandled %u ms\n",
-			d->irq_count,
-			d->irqs_unhandled,
-			jiffies_to_msecs(d->last_unhandled));
+			desc->irq_count,
+			desc->irqs_unhandled,
+			jiffies_to_msecs(desc->last_unhandled));
 }
 
 #define MAX_NAMELEN 128
 
 static int name_unique(unsigned int irq, struct irqaction *new_action)
 {
-	struct irq_desc *desc = irq_desc + irq;
+	struct irq_desc *desc = irq_to_desc(irq);
 	struct irqaction *action;
 	unsigned long flags;
 	int ret = 1;
@@ -165,8 +165,9 @@ static int name_unique(unsigned int irq, struct irqaction *new_action)
 void register_handler_proc(unsigned int irq, struct irqaction *action)
 {
 	char name [MAX_NAMELEN];
+	struct irq_desc *desc = irq_to_desc(irq);
 
-	if (!irq_desc[irq].dir || action->dir || !action->name ||
+	if (!desc->dir || action->dir || !action->name ||
 					!name_unique(irq, action))
 		return;
 
@@ -174,7 +175,7 @@ void register_handler_proc(unsigned int irq, struct irqaction *action)
 	snprintf(name, MAX_NAMELEN, "%s", action->name);
 
 	/* create /proc/irq/1234/handler/ */
-	action->dir = proc_mkdir(name, irq_desc[irq].dir);
+	action->dir = proc_mkdir(name, desc->dir);
 }
 
 #undef MAX_NAMELEN
@@ -185,25 +186,24 @@ void register_irq_proc(unsigned int irq)
 {
 	char name [MAX_NAMELEN];
 	struct proc_dir_entry *entry;
+	struct irq_desc *desc = irq_to_desc(irq);
 
-	if (!root_irq_dir ||
-		(irq_desc[irq].chip == &no_irq_chip) ||
-			irq_desc[irq].dir)
+	if (!root_irq_dir || (desc->chip == &no_irq_chip) || desc->dir)
 		return;
 
 	memset(name, 0, MAX_NAMELEN);
 	sprintf(name, "%d", irq);
 
 	/* create /proc/irq/1234 */
-	irq_desc[irq].dir = proc_mkdir(name, root_irq_dir);
+	desc->dir = proc_mkdir(name, root_irq_dir);
 
 #ifdef CONFIG_SMP
 	/* create /proc/irq/<irq>/smp_affinity */
-	proc_create_data("smp_affinity", 0600, irq_desc[irq].dir,
+	proc_create_data("smp_affinity", 0600, desc->dir,
 			 &irq_affinity_proc_fops, (void *)(long)irq);
 #endif
 
-	entry = create_proc_entry("spurious", 0444, irq_desc[irq].dir);
+	entry = create_proc_entry("spurious", 0444, desc->dir);
 	if (entry) {
 		entry->data = (void *)(long)irq;
 		entry->read_proc = irq_spurious_read;
@@ -214,8 +214,10 @@ void register_irq_proc(unsigned int irq)
 
 void unregister_handler_proc(unsigned int irq, struct irqaction *action)
 {
-	if (action->dir)
-		remove_proc_entry(action->dir->name, irq_desc[irq].dir);
+	if (action->dir) {
+		struct irq_desc *desc = irq_to_desc(irq);
+		remove_proc_entry(action->dir->name, desc->dir);
+	}
 }
 
 void register_default_affinity_proc(void)
diff --git a/kernel/irq/resend.c b/kernel/irq/resend.c
index cba8aa5bc7f4..89c7117acf2b 100644
--- a/kernel/irq/resend.c
+++ b/kernel/irq/resend.c
@@ -36,7 +36,7 @@ static void resend_irqs(unsigned long arg)
 	while (!bitmap_empty(irqs_resend, nr_irqs)) {
 		irq = find_first_bit(irqs_resend, nr_irqs);
 		clear_bit(irq, irqs_resend);
-		desc = irq_desc + irq;
+		desc = irq_to_desc(irq);
 		local_irq_disable();
 		desc->handle_irq(irq, desc);
 		local_irq_enable();
diff --git a/kernel/irq/spurious.c b/kernel/irq/spurious.c
index e26ca1e90c08..b5d906002e1d 100644
--- a/kernel/irq/spurious.c
+++ b/kernel/irq/spurious.c
@@ -92,11 +92,12 @@ static int misrouted_irq(int irq)
 	int ok = 0;
 
 	for (i = 1; i < nr_irqs; i++) {
-		struct irq_desc *desc = irq_desc + i;
+		struct irq_desc *desc;
 
 		if (i == irq)	/* Already tried */
 			continue;
 
+		desc = irq_to_desc(i);
 		if (try_one_irq(i, desc))
 			ok = 1;
 	}
@@ -108,7 +109,7 @@ static void poll_spurious_irqs(unsigned long dummy)
 {
 	int i;
 	for (i = 1; i < nr_irqs; i++) {
-		struct irq_desc *desc = irq_desc + i;
+		struct irq_desc *desc = irq_to_desc(i);
 		unsigned int status;
 
 		/* Racy but it doesn't matter */
-- 
cgit v1.2.3-55-g7522


From 3bf52a4df3ccd25d4154797977c556a2a8b3bc1e Mon Sep 17 00:00:00 2001
From: Ingo Molnar
Date: Tue, 19 Aug 2008 20:50:29 -0700
Subject: irq: sparse irqs, fix IRQ auto-probe crash

fix:

[   10.631533] calling  yenta_socket_init+0x0/0x20
[   10.631533] Yenta: CardBus bridge found at 0000:15:00.0 [17aa:2012]
[   10.631533] Yenta: Using INTVAL to route CSC interrupts to PCI
[   10.631533] Yenta: Routing CardBus interrupts to PCI
[   10.631533] Yenta TI: socket 0000:15:00.0, mfunc 0x01d01002, devctl 0x64
[   10.731599] BUG: unable to handle kernel NULL pointer dereference at 00000040
[   10.731838] IP: [<c0c95b5f>] _spin_lock_irq+0xf/0x20
[   10.732221] *pde = 00000000
[   10.732741] Oops: 0002 [#1] SMP
[   10.733453]
[   10.734253] Pid: 1, comm: swapper Tainted: G        W (2.6.27-rc3-tip-00173-gd7eaa4f-dirty #1)
[   10.735188] EIP: 0060:[<c0c95b5f>] EFLAGS: 00010002 CPU: 0
[   10.735523] EIP is at _spin_lock_irq+0xf/0x20
[   10.735523] EAX: 00000040 EBX: 00000000 ECX: f6e04c90 EDX: 00000100
[   10.735523] ESI: 000000df EDI: f6e04c90 EBP: f7867df0 ESP: f7867df0
[   10.735523]  DS: 007b ES: 007b FS: 00d8 GS: 0000 SS: 0068
[   10.735523] Process swapper (pid: 1, ti=f7867000 task=f7870000 task.ti=f7867000)
[   10.735523] Stack: f7867e04 c0155fbd 00000000 00000000 f6e04c90 f7867e5c c0c6e319 c0f6a074
[   10.735523]        f6e04c90 000017aa 00002012 c112b648 f791f240 c112b5e0 f7867e44 c010440b
[   10.735523]        f791f240 f791f29c c112b8ec f791f240 00000000 f7867e5c c048f893 03c0b648
[   10.735523] Call Trace:
[   10.735523]  [<c0155fbd>] ? probe_irq_on+0x3d/0x140
[   10.735523]  [<c0c6e319>] ? yenta_probe+0x529/0x640
[   10.735523]  [<c010440b>] ? mcount_call+0x5/0xa
[   10.735523]  [<c048f893>] ? pci_match_device+0xa3/0xb0
[   10.735523]  [<c048fc1e>] ? pci_device_probe+0x5e/0x80
[   10.735523]  [<c0515423>] ? driver_probe_device+0x83/0x180
[   10.735523]  [<c0515594>] ? __driver_attach+0x74/0x80
[   10.735523]  [<c0514b69>] ? bus_for_each_dev+0x49/0x70
[   10.735523]  [<c051528e>] ? driver_attach+0x1e/0x20
[   10.735523]  [<c0515520>] ? __driver_attach+0x0/0x80
[   10.735523]  [<c05150d3>] ? bus_add_driver+0x1a3/0x220
[   10.735523]  [<c048fb60>] ? pci_device_remove+0x0/0x40
[   10.735523]  [<c05157f4>] ? driver_register+0x54/0x130
[   10.735523]  [<c048fe2f>] ? __pci_register_driver+0x4f/0x90
[   10.735523]  [<c11e9419>] ? yenta_socket_init+0x19/0x20
[   10.735523]  [<c0101125>] ? do_one_initcall+0x35/0x160
[   10.735523]  [<c11e9400>] ? yenta_socket_init+0x0/0x20
[   10.735523]  [<c01391a6>] ? __queue_work+0x36/0x50
[   10.735523]  [<c013922d>] ? queue_work_on+0x3d/0x50
[   10.735523]  [<c11a2758>] ? kernel_init+0x148/0x210
[   10.735523]  [<c11a2610>] ? kernel_init+0x0/0x210
[   10.735523]  [<c01043f3>] ? kernel_thread_helper+0x7/0x10
[   10.735523]  =======================
[   10.735523] Code: 10 38 f2 74 06 f3 90 8a 10 eb f6 5d 89 c8 c3 8d b6 00 00 00 00 8d bc 27 00 00 00 00 55 89 e5 e8 a4 e8 46 ff fa ba 00 01 00 00 90 <66> 0f c1 10 38 f2 74 06 f3 90 8a 10 eb f6 5d c3 90 55 89 e5 53

as auto-probing wants to iterate over existing irqs.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 kernel/irq/autoprobe.c | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/kernel/irq/autoprobe.c b/kernel/irq/autoprobe.c
index c45ab718cf07..b3a5549ea81e 100644
--- a/kernel/irq/autoprobe.c
+++ b/kernel/irq/autoprobe.c
@@ -40,6 +40,8 @@ unsigned long probe_irq_on(void)
 	 */
 	for (i = nr_irqs-1; i > 0; i--) {
 		desc = irq_to_desc(i);
+		if (!desc)
+			continue;
 
 		spin_lock_irq(&desc->lock);
 		if (!desc->action && !(desc->status & IRQ_NOPROBE)) {
@@ -70,6 +72,8 @@ unsigned long probe_irq_on(void)
 	 */
 	for (i = nr_irqs-1; i > 0; i--) {
 		desc = irq_to_desc(i);
+		if (!desc)
+			continue;
 
 		spin_lock_irq(&desc->lock);
 		if (!desc->action && !(desc->status & IRQ_NOPROBE)) {
@@ -93,6 +97,8 @@ unsigned long probe_irq_on(void)
 		unsigned int status;
 
 		desc = irq_to_desc(i);
+		if (!desc)
+			continue;
 		spin_lock_irq(&desc->lock);
 		status = desc->status;
 
@@ -134,6 +140,8 @@ unsigned int probe_irq_mask(unsigned long val)
 		struct irq_desc *desc = irq_to_desc(i);
 		unsigned int status;
 
+		if (!desc)
+			continue;
 		spin_lock_irq(&desc->lock);
 		status = desc->status;
 
@@ -177,6 +185,8 @@ int probe_irq_off(unsigned long val)
 		struct irq_desc *desc = irq_to_desc(i);
 		unsigned int status;
 
+		if (!desc)
+			continue;
 		spin_lock_irq(&desc->lock);
 		status = desc->status;
 
-- 
cgit v1.2.3-55-g7522


From 3ac2de48ed3c998df7f366e039c97eedb27e7c3d Mon Sep 17 00:00:00 2001
From: Yinghai Lu
Date: Tue, 19 Aug 2008 20:50:06 -0700
Subject: x86: add irq_cfg in io_apic_64.c

preallocate size is 32, and if it is not enough, irq_cfg will more
via alloc_bootmem() or kzalloc(). (depending on how early we are in
system setup)

v2: fix typo about size of init_one_irq_cfg ... should use sizeof(struct irq_cfg)
v3: according to Eric, change get_irq_cfg() to irq_cfg()
v4: squash add irq_cfg_alloc in

Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/io_apic_64.c | 209 ++++++++++++++++++++++++++++++++++---------
 1 file changed, 169 insertions(+), 40 deletions(-)

diff --git a/arch/x86/kernel/io_apic_64.c b/arch/x86/kernel/io_apic_64.c
index cab5a25d81b1..858c37a31a2f 100644
--- a/arch/x86/kernel/io_apic_64.c
+++ b/arch/x86/kernel/io_apic_64.c
@@ -57,7 +57,11 @@
 
 #define __apicdebuginit(type) static type __init
 
+struct irq_cfg;
+
 struct irq_cfg {
+	unsigned int irq;
+	struct irq_cfg *next;
 	cpumask_t domain;
 	cpumask_t old_domain;
 	unsigned move_cleanup_count;
@@ -67,34 +71,132 @@ struct irq_cfg {
 
 /* irq_cfg is indexed by the sum of all RTEs in all I/O APICs. */
 static struct irq_cfg irq_cfg_legacy[] __initdata = {
-	[0]  = { .domain = CPU_MASK_ALL, .vector = IRQ0_VECTOR,  },
-	[1]  = { .domain = CPU_MASK_ALL, .vector = IRQ1_VECTOR,  },
-	[2]  = { .domain = CPU_MASK_ALL, .vector = IRQ2_VECTOR,  },
-	[3]  = { .domain = CPU_MASK_ALL, .vector = IRQ3_VECTOR,  },
-	[4]  = { .domain = CPU_MASK_ALL, .vector = IRQ4_VECTOR,  },
-	[5]  = { .domain = CPU_MASK_ALL, .vector = IRQ5_VECTOR,  },
-	[6]  = { .domain = CPU_MASK_ALL, .vector = IRQ6_VECTOR,  },
-	[7]  = { .domain = CPU_MASK_ALL, .vector = IRQ7_VECTOR,  },
-	[8]  = { .domain = CPU_MASK_ALL, .vector = IRQ8_VECTOR,  },
-	[9]  = { .domain = CPU_MASK_ALL, .vector = IRQ9_VECTOR,  },
-	[10] = { .domain = CPU_MASK_ALL, .vector = IRQ10_VECTOR, },
-	[11] = { .domain = CPU_MASK_ALL, .vector = IRQ11_VECTOR, },
-	[12] = { .domain = CPU_MASK_ALL, .vector = IRQ12_VECTOR, },
-	[13] = { .domain = CPU_MASK_ALL, .vector = IRQ13_VECTOR, },
-	[14] = { .domain = CPU_MASK_ALL, .vector = IRQ14_VECTOR, },
-	[15] = { .domain = CPU_MASK_ALL, .vector = IRQ15_VECTOR, },
+	[0]  = { .irq =  0, .domain = CPU_MASK_ALL, .vector = IRQ0_VECTOR,  },
+	[1]  = { .irq =  1, .domain = CPU_MASK_ALL, .vector = IRQ1_VECTOR,  },
+	[2]  = { .irq =  2, .domain = CPU_MASK_ALL, .vector = IRQ2_VECTOR,  },
+	[3]  = { .irq =  3, .domain = CPU_MASK_ALL, .vector = IRQ3_VECTOR,  },
+	[4]  = { .irq =  4, .domain = CPU_MASK_ALL, .vector = IRQ4_VECTOR,  },
+	[5]  = { .irq =  5, .domain = CPU_MASK_ALL, .vector = IRQ5_VECTOR,  },
+	[6]  = { .irq =  6, .domain = CPU_MASK_ALL, .vector = IRQ6_VECTOR,  },
+	[7]  = { .irq =  7, .domain = CPU_MASK_ALL, .vector = IRQ7_VECTOR,  },
+	[8]  = { .irq =  8, .domain = CPU_MASK_ALL, .vector = IRQ8_VECTOR,  },
+	[9]  = { .irq =  9, .domain = CPU_MASK_ALL, .vector = IRQ9_VECTOR,  },
+	[10] = { .irq = 10, .domain = CPU_MASK_ALL, .vector = IRQ10_VECTOR, },
+	[11] = { .irq = 11, .domain = CPU_MASK_ALL, .vector = IRQ11_VECTOR, },
+	[12] = { .irq = 12, .domain = CPU_MASK_ALL, .vector = IRQ12_VECTOR, },
+	[13] = { .irq = 13, .domain = CPU_MASK_ALL, .vector = IRQ13_VECTOR, },
+	[14] = { .irq = 14, .domain = CPU_MASK_ALL, .vector = IRQ14_VECTOR, },
+	[15] = { .irq = 15, .domain = CPU_MASK_ALL, .vector = IRQ15_VECTOR, },
 };
 
-static struct irq_cfg *irq_cfg;
+static struct irq_cfg irq_cfg_init = { .irq =  -1U, };
+/* need to be biger than size of irq_cfg_legacy */
+static int nr_irq_cfg = 32;
+
+static int __init parse_nr_irq_cfg(char *arg)
+{
+	if (arg) {
+		nr_irq_cfg = simple_strtoul(arg, NULL, 0);
+		if (nr_irq_cfg < 32)
+			nr_irq_cfg = 32;
+	}
+	return 0;
+}
+
+early_param("nr_irq_cfg", parse_nr_irq_cfg);
+
+static void init_one_irq_cfg(struct irq_cfg *cfg)
+{
+	memcpy(cfg, &irq_cfg_init, sizeof(struct irq_cfg));
+}
 
 static void __init init_work(void *data)
 {
 	struct dyn_array *da = data;
+	struct irq_cfg *cfg;
+	int i;
 
-	memcpy(*da->name, irq_cfg_legacy, sizeof(irq_cfg_legacy));
+	cfg = *da->name;
+
+	memcpy(cfg, irq_cfg_legacy, sizeof(irq_cfg_legacy));
+
+	i = sizeof(irq_cfg_legacy)/sizeof(irq_cfg_legacy[0]);
+	for (; i < *da->nr; i++)
+		init_one_irq_cfg(&cfg[i]);
+
+	for (i = 1; i < *da->nr; i++)
+		cfg[i-1].next = &cfg[i];
 }
 
-DEFINE_DYN_ARRAY(irq_cfg, sizeof(struct irq_cfg), nr_irqs, PAGE_SIZE, init_work);
+static struct irq_cfg *irq_cfgx;
+DEFINE_DYN_ARRAY(irq_cfgx, sizeof(struct irq_cfg), nr_irq_cfg, PAGE_SIZE, init_work);
+
+static struct irq_cfg *irq_cfg(unsigned int irq)
+{
+	struct irq_cfg *cfg;
+
+	BUG_ON(irq == -1U);
+
+	cfg = &irq_cfgx[0];
+	while (cfg) {
+		if (cfg->irq == irq)
+			return cfg;
+
+		if (cfg->irq == -1U)
+			return NULL;
+
+		cfg = cfg->next;
+	}
+
+	return NULL;
+}
+
+static struct irq_cfg *irq_cfg_alloc(unsigned int irq)
+{
+	struct irq_cfg *cfg, *cfg_pri;
+	int i;
+	int count = 0;
+
+	BUG_ON(irq == -1U);
+
+	cfg_pri = cfg = &irq_cfgx[0];
+	while (cfg) {
+		if (cfg->irq == irq)
+			return cfg;
+
+		if (cfg->irq == -1U) {
+			cfg->irq = irq;
+			return cfg;
+		}
+		cfg_pri = cfg;
+		cfg = cfg->next;
+		count++;
+	}
+
+	/*
+	 *  we run out of pre-allocate ones, allocate more
+	 */
+	printk(KERN_DEBUG "try to get more irq_cfg %d\n", nr_irq_cfg);
+
+	if (after_bootmem)
+		cfg = kzalloc(sizeof(struct irq_cfg)*nr_irq_cfg, GFP_ATOMIC);
+	else
+		cfg = __alloc_bootmem_nopanic(sizeof(struct irq_cfg)*nr_irq_cfg, PAGE_SIZE, 0);
+
+	if (!cfg)
+		panic("please boot with nr_irq_cfg= %d\n", count * 2);
+
+	for (i = 0; i < nr_irq_cfg; i++)
+		init_one_irq_cfg(&cfg[i]);
+
+	for (i = 1; i < nr_irq_cfg; i++)
+		cfg[i-1].next = &cfg[i];
+
+	cfg->irq = irq;
+	cfg_pri->next = cfg;
+
+	return cfg;
+}
 
 static int assign_irq_vector(int irq, cpumask_t mask);
 
@@ -341,7 +443,7 @@ static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, u8 vector)
 
 static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t mask)
 {
-	struct irq_cfg *cfg = irq_cfg + irq;
+	struct irq_cfg *cfg = irq_cfg(irq);
 	unsigned long flags;
 	unsigned int dest;
 	cpumask_t tmp;
@@ -381,6 +483,8 @@ static void add_pin_to_irq(unsigned int irq, int apic, int pin)
 	struct irq_pin_list *entry = irq_2_pin + irq;
 
 	BUG_ON(irq >= nr_irqs);
+	irq_cfg_alloc(irq);
+
 	while (entry->next)
 		entry = irq_2_pin + entry->next;
 
@@ -819,7 +923,7 @@ static int __assign_irq_vector(int irq, cpumask_t mask)
 	struct irq_cfg *cfg;
 
 	BUG_ON((unsigned)irq >= nr_irqs);
-	cfg = &irq_cfg[irq];
+	cfg = irq_cfg(irq);
 
 	/* Only try and allocate irqs on cpus that are present */
 	cpus_and(mask, mask, cpu_online_map);
@@ -893,7 +997,7 @@ static void __clear_irq_vector(int irq)
 	int cpu, vector;
 
 	BUG_ON((unsigned)irq >= nr_irqs);
-	cfg = &irq_cfg[irq];
+	cfg = irq_cfg(irq);
 	BUG_ON(!cfg->vector);
 
 	vector = cfg->vector;
@@ -913,17 +1017,23 @@ void __setup_vector_irq(int cpu)
 
 	/* Mark the inuse vectors */
 	for (irq = 0; irq < nr_irqs; ++irq) {
-		if (!cpu_isset(cpu, irq_cfg[irq].domain))
+		struct irq_cfg *cfg = irq_cfg(irq);
+
+		if (!cpu_isset(cpu, cfg->domain))
 			continue;
-		vector = irq_cfg[irq].vector;
+		vector = cfg->vector;
 		per_cpu(vector_irq, cpu)[vector] = irq;
 	}
 	/* Mark the free vectors */
 	for (vector = 0; vector < NR_VECTORS; ++vector) {
+		struct irq_cfg *cfg;
+
 		irq = per_cpu(vector_irq, cpu)[vector];
 		if (irq < 0)
 			continue;
-		if (!cpu_isset(cpu, irq_cfg[irq].domain))
+
+		cfg = irq_cfg(irq);
+		if (!cpu_isset(cpu, cfg->domain))
 			per_cpu(vector_irq, cpu)[vector] = -1;
 	}
 }
@@ -1029,13 +1139,15 @@ static int setup_ioapic_entry(int apic, int irq,
 static void setup_IO_APIC_irq(int apic, int pin, unsigned int irq,
 			      int trigger, int polarity)
 {
-	struct irq_cfg *cfg = irq_cfg + irq;
+	struct irq_cfg *cfg;
 	struct IO_APIC_route_entry entry;
 	cpumask_t mask;
 
 	if (!IO_APIC_IRQ(irq))
 		return;
 
+	cfg = irq_cfg(irq);
+
 	mask = TARGET_CPUS;
 	if (assign_irq_vector(irq, mask))
 		return;
@@ -1553,7 +1665,7 @@ static unsigned int startup_ioapic_irq(unsigned int irq)
 
 static int ioapic_retrigger_irq(unsigned int irq)
 {
-	struct irq_cfg *cfg = &irq_cfg[irq];
+	struct irq_cfg *cfg = irq_cfg(irq);
 	unsigned long flags;
 
 	spin_lock_irqsave(&vector_lock, flags);
@@ -1600,7 +1712,7 @@ static DECLARE_DELAYED_WORK(ir_migration_work, ir_irq_migration);
  */
 static void migrate_ioapic_irq(int irq, cpumask_t mask)
 {
-	struct irq_cfg *cfg = irq_cfg + irq;
+	struct irq_cfg *cfg;
 	struct irq_desc *desc;
 	cpumask_t tmp, cleanup_mask;
 	struct irte irte;
@@ -1618,6 +1730,7 @@ static void migrate_ioapic_irq(int irq, cpumask_t mask)
 	if (assign_irq_vector(irq, mask))
 		return;
 
+	cfg = irq_cfg(irq);
 	cpus_and(tmp, cfg->domain, mask);
 	dest = cpu_mask_to_apicid(tmp);
 
@@ -1735,7 +1848,7 @@ asmlinkage void smp_irq_move_cleanup_interrupt(void)
 			continue;
 
 		desc = irq_to_desc(irq);
-		cfg = irq_cfg + irq;
+		cfg = irq_cfg(irq);
 		spin_lock(&desc->lock);
 		if (!cfg->move_cleanup_count)
 			goto unlock;
@@ -1754,7 +1867,7 @@ unlock:
 
 static void irq_complete_move(unsigned int irq)
 {
-	struct irq_cfg *cfg = irq_cfg + irq;
+	struct irq_cfg *cfg = irq_cfg(irq);
 	unsigned vector, me;
 
 	if (likely(!cfg->move_in_progress))
@@ -1891,7 +2004,10 @@ static inline void init_IO_APIC_traps(void)
 	 * 0x80, because int 0x80 is hm, kind of importantish. ;)
 	 */
 	for (irq = 0; irq < nr_irqs ; irq++) {
-		if (IO_APIC_IRQ(irq) && !irq_cfg[irq].vector) {
+		struct irq_cfg *cfg;
+
+		cfg = irq_cfg(irq);
+		if (IO_APIC_IRQ(irq) && !cfg->vector) {
 			/*
 			 * Hmm.. We don't have an entry for this,
 			 * so default to an old-fashioned 8259
@@ -2028,7 +2144,7 @@ static inline void __init unlock_ExtINT_logic(void)
  */
 static inline void __init check_timer(void)
 {
-	struct irq_cfg *cfg = irq_cfg + 0;
+	struct irq_cfg *cfg = irq_cfg(0);
 	int apic1, pin1, apic2, pin2;
 	unsigned long flags;
 	int no_pin1 = 0;
@@ -2306,14 +2422,19 @@ int create_irq(void)
 	int irq;
 	int new;
 	unsigned long flags;
+	struct irq_cfg *cfg_new;
 
 	irq = -ENOSPC;
 	spin_lock_irqsave(&vector_lock, flags);
 	for (new = (nr_irqs - 1); new >= 0; new--) {
 		if (platform_legacy_irq(new))
 			continue;
-		if (irq_cfg[new].vector != 0)
+		cfg_new = irq_cfg(new);
+		if (cfg_new && cfg_new->vector != 0)
 			continue;
+		/* check if need to create one */
+		if (!cfg_new)
+			cfg_new = irq_cfg_alloc(new);
 		if (__assign_irq_vector(new, TARGET_CPUS) == 0)
 			irq = new;
 		break;
@@ -2346,7 +2467,7 @@ void destroy_irq(unsigned int irq)
 #ifdef CONFIG_PCI_MSI
 static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_msg *msg)
 {
-	struct irq_cfg *cfg = irq_cfg + irq;
+	struct irq_cfg *cfg;
 	int err;
 	unsigned dest;
 	cpumask_t tmp;
@@ -2356,6 +2477,7 @@ static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_ms
 	if (err)
 		return err;
 
+	cfg = irq_cfg(irq);
 	cpus_and(tmp, cfg->domain, tmp);
 	dest = cpu_mask_to_apicid(tmp);
 
@@ -2413,7 +2535,7 @@ static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_ms
 #ifdef CONFIG_SMP
 static void set_msi_irq_affinity(unsigned int irq, cpumask_t mask)
 {
-	struct irq_cfg *cfg = irq_cfg + irq;
+	struct irq_cfg *cfg;
 	struct msi_msg msg;
 	unsigned int dest;
 	cpumask_t tmp;
@@ -2426,6 +2548,7 @@ static void set_msi_irq_affinity(unsigned int irq, cpumask_t mask)
 	if (assign_irq_vector(irq, mask))
 		return;
 
+	cfg = irq_cfg(irq);
 	cpus_and(tmp, cfg->domain, mask);
 	dest = cpu_mask_to_apicid(tmp);
 
@@ -2448,7 +2571,7 @@ static void set_msi_irq_affinity(unsigned int irq, cpumask_t mask)
  */
 static void ir_set_msi_irq_affinity(unsigned int irq, cpumask_t mask)
 {
-	struct irq_cfg *cfg = irq_cfg + irq;
+	struct irq_cfg *cfg;
 	unsigned int dest;
 	cpumask_t tmp, cleanup_mask;
 	struct irte irte;
@@ -2464,6 +2587,7 @@ static void ir_set_msi_irq_affinity(unsigned int irq, cpumask_t mask)
 	if (assign_irq_vector(irq, mask))
 		return;
 
+	cfg = irq_cfg(irq);
 	cpus_and(tmp, cfg->domain, mask);
 	dest = cpu_mask_to_apicid(tmp);
 
@@ -2670,7 +2794,7 @@ void arch_teardown_msi_irq(unsigned int irq)
 #ifdef CONFIG_SMP
 static void dmar_msi_set_affinity(unsigned int irq, cpumask_t mask)
 {
-	struct irq_cfg *cfg = irq_cfg + irq;
+	struct irq_cfg *cfg;
 	struct msi_msg msg;
 	unsigned int dest;
 	cpumask_t tmp;
@@ -2683,6 +2807,7 @@ static void dmar_msi_set_affinity(unsigned int irq, cpumask_t mask)
 	if (assign_irq_vector(irq, mask))
 		return;
 
+	cfg = irq_cfg(irq);
 	cpus_and(tmp, cfg->domain, mask);
 	dest = cpu_mask_to_apicid(tmp);
 
@@ -2749,7 +2874,7 @@ static void target_ht_irq(unsigned int irq, unsigned int dest, u8 vector)
 
 static void set_ht_irq_affinity(unsigned int irq, cpumask_t mask)
 {
-	struct irq_cfg *cfg = irq_cfg + irq;
+	struct irq_cfg *cfg;
 	unsigned int dest;
 	cpumask_t tmp;
 	struct irq_desc *desc;
@@ -2761,6 +2886,7 @@ static void set_ht_irq_affinity(unsigned int irq, cpumask_t mask)
 	if (assign_irq_vector(irq, mask))
 		return;
 
+	cfg = irq_cfg(irq);
 	cpus_and(tmp, cfg->domain, mask);
 	dest = cpu_mask_to_apicid(tmp);
 
@@ -2783,7 +2909,7 @@ static struct irq_chip ht_irq_chip = {
 
 int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev)
 {
-	struct irq_cfg *cfg = irq_cfg + irq;
+	struct irq_cfg *cfg;
 	int err;
 	cpumask_t tmp;
 
@@ -2793,6 +2919,7 @@ int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev)
 		struct ht_irq_msg msg;
 		unsigned dest;
 
+		cfg = irq_cfg(irq);
 		cpus_and(tmp, cfg->domain, tmp);
 		dest = cpu_mask_to_apicid(tmp);
 
@@ -2891,6 +3018,7 @@ int acpi_get_override_irq(int bus_irq, int *trigger, int *polarity)
 void __init setup_ioapic_dest(void)
 {
 	int pin, ioapic, irq, irq_entry;
+	struct irq_cfg *cfg;
 
 	if (skip_ioapic_setup == 1)
 		return;
@@ -2906,7 +3034,8 @@ void __init setup_ioapic_dest(void)
 			 * when you have too many devices, because at that time only boot
 			 * cpu is online.
 			 */
-			if (!irq_cfg[irq].vector)
+			cfg = irq_cfg(irq);
+			if (!cfg->vector)
 				setup_IO_APIC_irq(ioapic, pin, irq,
 						  irq_trigger(irq_entry),
 						  irq_polarity(irq_entry));
-- 
cgit v1.2.3-55-g7522


From e5a53714acfc7b5f868d07d27c5f02cb00b118db Mon Sep 17 00:00:00 2001
From: Yinghai Lu
Date: Tue, 19 Aug 2008 20:50:07 -0700
Subject: x86: put irq_2_pin pointer into irq_cfg

preallocate 32 irq_2_pin entries, and use get_one_free_irq_2_pin() to get
one more and link to irq_cfg if needed.

so don't waste one where no irq is enabled.

Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/io_apic_64.c | 161 ++++++++++++++++++++++++++++++++-----------
 1 file changed, 120 insertions(+), 41 deletions(-)

diff --git a/arch/x86/kernel/io_apic_64.c b/arch/x86/kernel/io_apic_64.c
index 858c37a31a2f..51ef7eb75f2e 100644
--- a/arch/x86/kernel/io_apic_64.c
+++ b/arch/x86/kernel/io_apic_64.c
@@ -58,10 +58,11 @@
 #define __apicdebuginit(type) static type __init
 
 struct irq_cfg;
-
+struct irq_pin_list;
 struct irq_cfg {
 	unsigned int irq;
 	struct irq_cfg *next;
+	struct irq_pin_list *irq_2_pin;
 	cpumask_t domain;
 	cpumask_t old_domain;
 	unsigned move_cleanup_count;
@@ -252,13 +253,66 @@ int pin_map_size;
  * between pins and IRQs.
  */
 
-static struct irq_pin_list {
-	short apic, pin;
-	int next;
-} *irq_2_pin;
+struct irq_pin_list {
+	int apic, pin;
+	struct irq_pin_list *next;
+};
+
+static struct irq_pin_list *irq_2_pin_head;
+/* fill one page ? */
+static int nr_irq_2_pin = 0x100;
+static struct irq_pin_list *irq_2_pin_ptr;
+static void __init irq_2_pin_init_work(void *data)
+{
+	struct dyn_array *da = data;
+	struct irq_pin_list *pin;
+	int i;
+
+	pin = *da->name;
+
+	for (i = 1; i < *da->nr; i++)
+		pin[i-1].next = &pin[i];
+
+	irq_2_pin_ptr = &pin[0];
+}
+DEFINE_DYN_ARRAY(irq_2_pin_head, sizeof(struct irq_pin_list), nr_irq_2_pin, PAGE_SIZE, irq_2_pin_init_work);
+
+static struct irq_pin_list *get_one_free_irq_2_pin(void)
+{
+	struct irq_pin_list *pin;
+	int i;
+
+	pin = irq_2_pin_ptr;
+
+	if (pin) {
+		irq_2_pin_ptr = pin->next;
+		pin->next = NULL;
+		return pin;
+	}
+
+	/*
+	 *  we run out of pre-allocate ones, allocate more
+	 */
+	printk(KERN_DEBUG "try to get more irq_2_pin %d\n", nr_irq_2_pin);
+
+	if (after_bootmem)
+		pin = kzalloc(sizeof(struct irq_pin_list)*nr_irq_2_pin,
+				 GFP_ATOMIC);
+	else
+		pin = __alloc_bootmem_nopanic(sizeof(struct irq_pin_list) *
+				nr_irq_2_pin, PAGE_SIZE, 0);
+
+	if (!pin)
+		panic("can not get more irq_2_pin\n");
 
-DEFINE_DYN_ARRAY(irq_2_pin, sizeof(struct irq_pin_list), pin_map_size, sizeof(struct irq_pin_list), NULL);
+	for (i = 1; i < nr_irq_2_pin; i++)
+		pin[i-1].next = &pin[i];
 
+	irq_2_pin_ptr = pin->next;
+	pin->next = NULL;
+
+	return pin;
+}
 
 struct io_apic {
 	unsigned int index;
@@ -300,16 +354,17 @@ static bool io_apic_level_ack_pending(unsigned int irq)
 {
 	struct irq_pin_list *entry;
 	unsigned long flags;
+	struct irq_cfg *cfg = irq_cfg(irq);
 
 	spin_lock_irqsave(&ioapic_lock, flags);
-	entry = irq_2_pin + irq;
+	entry = cfg->irq_2_pin;
 	for (;;) {
 		unsigned int reg;
 		int pin;
 
-		pin = entry->pin;
-		if (pin == -1)
+		if (!entry)
 			break;
+		pin = entry->pin;
 		reg = io_apic_read(entry->apic, 0x10 + pin*2);
 		/* Is the remote IRR bit set? */
 		if (reg & IO_APIC_REDIR_REMOTE_IRR) {
@@ -318,7 +373,7 @@ static bool io_apic_level_ack_pending(unsigned int irq)
 		}
 		if (!entry->next)
 			break;
-		entry = irq_2_pin + entry->next;
+		entry = entry->next;
 	}
 	spin_unlock_irqrestore(&ioapic_lock, flags);
 
@@ -339,21 +394,24 @@ static inline void io_apic_sync(unsigned int apic)
 									\
 {									\
 	int pin;							\
-	struct irq_pin_list *entry = irq_2_pin + irq;			\
+	struct irq_cfg *cfg;						\
+	struct irq_pin_list *entry;					\
 									\
 	BUG_ON(irq >= nr_irqs);						\
+	cfg = irq_cfg(irq);						\
+	entry = cfg->irq_2_pin;						\
 	for (;;) {							\
 		unsigned int reg;					\
-		pin = entry->pin;					\
-		if (pin == -1)						\
+		if (!entry)						\
 			break;						\
+		pin = entry->pin;					\
 		reg = io_apic_read(entry->apic, 0x10 + R + pin*2);	\
 		reg ACTION;						\
 		io_apic_modify(entry->apic, reg);			\
 		FINAL;							\
 		if (!entry->next)					\
 			break;						\
-		entry = irq_2_pin + entry->next;			\
+		entry = entry->next;					\
 	}								\
 }
 
@@ -416,15 +474,20 @@ static void ioapic_mask_entry(int apic, int pin)
 static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, u8 vector)
 {
 	int apic, pin;
-	struct irq_pin_list *entry = irq_2_pin + irq;
+	struct irq_cfg *cfg;
+	struct irq_pin_list *entry;
 
 	BUG_ON(irq >= nr_irqs);
+	cfg = irq_cfg(irq);
+	entry = cfg->irq_2_pin;
 	for (;;) {
 		unsigned int reg;
+
+		if (!entry)
+			break;
+
 		apic = entry->apic;
 		pin = entry->pin;
-		if (pin == -1)
-			break;
 		/*
 		 * With interrupt-remapping, destination information comes
 		 * from interrupt-remapping table entry.
@@ -437,7 +500,7 @@ static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, u8 vector)
 		io_apic_modify(apic, reg);
 		if (!entry->next)
 			break;
-		entry = irq_2_pin + entry->next;
+		entry = entry->next;
 	}
 }
 
@@ -480,22 +543,35 @@ static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t mask)
 int first_free_entry;
 static void add_pin_to_irq(unsigned int irq, int apic, int pin)
 {
-	struct irq_pin_list *entry = irq_2_pin + irq;
+	struct irq_cfg *cfg;
+	struct irq_pin_list *entry;
 
 	BUG_ON(irq >= nr_irqs);
-	irq_cfg_alloc(irq);
+	/* first time to refer irq_cfg, so with new */
+	cfg = irq_cfg_alloc(irq);
+	entry = cfg->irq_2_pin;
+	if (!entry) {
+		entry = get_one_free_irq_2_pin();
+		cfg->irq_2_pin = entry;
+		entry->apic = apic;
+		entry->pin = pin;
+		printk(KERN_DEBUG " 0 add_pin_to_irq: irq %d --> apic %d pin %d\n", irq, apic, pin);
+		return;
+	}
 
-	while (entry->next)
-		entry = irq_2_pin + entry->next;
+	while (entry->next) {
+		/* not again, please */
+		if (entry->apic == apic && entry->pin == pin)
+			return;
 
-	if (entry->pin != -1) {
-		entry->next = first_free_entry;
-		entry = irq_2_pin + entry->next;
-		if (++first_free_entry >= pin_map_size)
-			panic("io_apic.c: ran out of irq_2_pin entries!");
+		entry = entry->next;
 	}
+
+	entry->next = get_one_free_irq_2_pin();
+	entry = entry->next;
 	entry->apic = apic;
 	entry->pin = pin;
+	printk(KERN_DEBUG " x add_pin_to_irq: irq %d --> apic %d pin %d\n", irq, apic, pin);
 }
 
 /*
@@ -505,17 +581,24 @@ static void __init replace_pin_at_irq(unsigned int irq,
 				      int oldapic, int oldpin,
 				      int newapic, int newpin)
 {
-	struct irq_pin_list *entry = irq_2_pin + irq;
+	struct irq_cfg *cfg = irq_cfg(irq);
+	struct irq_pin_list *entry = cfg->irq_2_pin;
+	int replaced = 0;
 
-	while (1) {
+	while (entry) {
 		if (entry->apic == oldapic && entry->pin == oldpin) {
 			entry->apic = newapic;
 			entry->pin = newpin;
-		}
-		if (!entry->next)
+			replaced = 1;
+			/* every one is different, right? */
 			break;
-		entry = irq_2_pin + entry->next;
+		}
+		entry = entry->next;
 	}
+
+	/* why? call replace before add? */
+	if (!replaced)
+		add_pin_to_irq(irq, newapic, newpin);
 }
 
 
@@ -1326,15 +1409,16 @@ __apicdebuginit(void) print_IO_APIC(void)
 	}
 	printk(KERN_DEBUG "IRQ to pin mappings:\n");
 	for (i = 0; i < nr_irqs; i++) {
-		struct irq_pin_list *entry = irq_2_pin + i;
-		if (entry->pin < 0)
+		struct irq_cfg *cfg = irq_cfg(i);
+		struct irq_pin_list *entry = cfg->irq_2_pin;
+		if (!entry)
 			continue;
 		printk(KERN_DEBUG "IRQ%d ", i);
 		for (;;) {
 			printk("-> %d:%d", entry->apic, entry->pin);
 			if (!entry->next)
 				break;
-			entry = irq_2_pin + entry->next;
+			entry = entry->next;
 		}
 		printk("\n");
 	}
@@ -1495,14 +1579,9 @@ void __init enable_IO_APIC(void)
 {
 	union IO_APIC_reg_01 reg_01;
 	int i8259_apic, i8259_pin;
-	int i, apic;
+	int apic;
 	unsigned long flags;
 
-	for (i = 0; i < pin_map_size; i++) {
-		irq_2_pin[i].pin = -1;
-		irq_2_pin[i].next = 0;
-	}
-
 	/*
 	 * The number of IO-APIC IRQ registers (== #pins):
 	 */
-- 
cgit v1.2.3-55-g7522


From 3060d6fe28570640c2d7d66d38b9eaa848c3b9e3 Mon Sep 17 00:00:00 2001
From: Yinghai Lu
Date: Tue, 19 Aug 2008 20:50:08 -0700
Subject: x86: put timer_rand_state pointer into irq_desc

irq_timer_state[] is a NR_IRQS sized array that is a side-by array to
the real irq_desc[] array.

Integrate that field into the (now dynamic) irq_desc dynamic array and
save some RAM.

v2: keep the old way to support arch not support irq_desc

Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 drivers/char/random.c | 66 +++++++++++++++++++++++++++++++++++++++++++++++----
 include/linux/irq.h   |  2 ++
 2 files changed, 63 insertions(+), 5 deletions(-)

diff --git a/drivers/char/random.c b/drivers/char/random.c
index 1610aa64c7cf..60c9c7ee6b2c 100644
--- a/drivers/char/random.c
+++ b/drivers/char/random.c
@@ -558,7 +558,7 @@ struct timer_rand_state {
 	unsigned dont_count_entropy:1;
 };
 
-static struct timer_rand_state input_timer_state;
+#ifndef CONFIG_HAVE_SPARSE_IRQ
 
 #ifdef CONFIG_HAVE_DYN_ARRAY
 static struct timer_rand_state **irq_timer_state;
@@ -567,6 +567,51 @@ DEFINE_DYN_ARRAY(irq_timer_state, sizeof(struct timer_rand_state *), nr_irqs, PA
 static struct timer_rand_state *irq_timer_state[NR_IRQS];
 #endif
 
+static struct timer_rand_state *get_timer_rand_state(unsigned int irq)
+{
+	if (irq >= nr_irqs)
+		return NULL;
+
+	return irq_timer_state[irq];
+}
+
+static void set_timer_rand_state(unsigned int irq, struct timer_rand_state *state)
+{
+	if (irq >= nr_irqs)
+		return;
+
+	irq_timer_state[irq] = state;
+}
+
+#else
+
+static struct timer_rand_state *get_timer_rand_state(unsigned int irq)
+{
+	struct irq_desc *desc;
+
+	desc = irq_to_desc(irq);
+
+	if (!desc)
+		return NULL;
+
+	return desc->timer_rand_state;
+}
+
+static void set_timer_rand_state(unsigned int irq, struct timer_rand_state *state)
+{
+	struct irq_desc *desc;
+
+	desc = irq_to_desc(irq);
+
+	if (!desc)
+		return;
+
+	desc->timer_rand_state = state;
+}
+#endif
+
+static struct timer_rand_state input_timer_state;
+
 /*
  * This function adds entropy to the entropy "pool" by using timing
  * delays.  It uses the timer_rand_state structure to make an estimate
@@ -654,11 +699,15 @@ EXPORT_SYMBOL_GPL(add_input_randomness);
 
 void add_interrupt_randomness(int irq)
 {
-	if (irq >= nr_irqs || irq_timer_state[irq] == NULL)
+	struct timer_rand_state *state;
+
+	state = get_timer_rand_state(irq);
+
+	if (state == NULL)
 		return;
 
 	DEBUG_ENT("irq event %d\n", irq);
-	add_timer_randomness(irq_timer_state[irq], 0x100 + irq);
+	add_timer_randomness(state, 0x100 + irq);
 }
 
 #ifdef CONFIG_BLOCK
@@ -918,7 +967,14 @@ void rand_initialize_irq(int irq)
 {
 	struct timer_rand_state *state;
 
-	if (irq >= nr_irqs || irq_timer_state[irq])
+#ifndef CONFIG_HAVE_SPARSE_IRQ
+	if (irq >= nr_irqs)
+		return;
+#endif
+
+	state = get_timer_rand_state(irq);
+
+	if (state)
 		return;
 
 	/*
@@ -927,7 +983,7 @@ void rand_initialize_irq(int irq)
 	 */
 	state = kzalloc(sizeof(struct timer_rand_state), GFP_KERNEL);
 	if (state)
-		irq_timer_state[irq] = state;
+		set_timer_rand_state(irq, state);
 }
 
 #ifdef CONFIG_BLOCK
diff --git a/include/linux/irq.h b/include/linux/irq.h
index 80b8200f2adb..60c856aaac0f 100644
--- a/include/linux/irq.h
+++ b/include/linux/irq.h
@@ -127,6 +127,7 @@ struct irq_chip {
 	const char	*typename;
 };
 
+struct timer_rand_state;
 /**
  * struct irq_desc - interrupt descriptor
  *
@@ -155,6 +156,7 @@ struct irq_desc {
 	unsigned int		irq;
 #ifdef CONFIG_HAVE_SPARSE_IRQ
 	struct irq_desc		*next;
+	struct timer_rand_state *timer_rand_state;
 #endif
 	irq_flow_handler_t	handle_irq;
 	struct irq_chip		*chip;
-- 
cgit v1.2.3-55-g7522


From 7f95ec9e4c12fd067febfd57532da1166d75d858 Mon Sep 17 00:00:00 2001
From: Yinghai Lu
Date: Tue, 19 Aug 2008 20:50:09 -0700
Subject: x86: move kstat_irqs from kstat to irq_desc

based on Eric's patch ...

together mold it with dyn_array for irq_desc, will allcate kstat_irqs for
nr_irq_desc alltogether if needed. -- at that point nr_cpus is known already.

v2: make sure system without generic_hardirqs works they don't have irq_desc
v3: fix merging
v4: [mingo@elte.hu] fix typo

[ mingo@elte.hu ] irq: build fix

fix:

 arch/x86/xen/spinlock.c: In function 'xen_spin_lock_slow':
 arch/x86/xen/spinlock.c:90: error: 'struct kernel_stat' has no member named 'irqs'

Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/io_apic_32.c   |  2 +-
 arch/x86/kernel/irq_32.c       |  4 +-
 arch/x86/kernel/irq_64.c       |  4 +-
 arch/x86/kernel/visws_quirks.c |  2 +-
 arch/x86/xen/spinlock.c        |  2 +-
 fs/proc/proc_misc.c            |  2 +-
 include/linux/irq.h            |  7 +++
 include/linux/kernel_stat.h    | 22 +++++++---
 kernel/irq/chip.c              | 15 +++----
 kernel/irq/handle.c            | 97 ++++++++++++++++++++++++++++++------------
 kernel/sched.c                 |  5 +--
 11 files changed, 106 insertions(+), 56 deletions(-)

diff --git a/arch/x86/kernel/io_apic_32.c b/arch/x86/kernel/io_apic_32.c
index c2160cfdec9b..204884b1415a 100644
--- a/arch/x86/kernel/io_apic_32.c
+++ b/arch/x86/kernel/io_apic_32.c
@@ -526,7 +526,7 @@ static void do_irq_balance(void)
 			if (package_index == i)
 				IRQ_DELTA(package_index, j) = 0;
 			/* Determine the total count per processor per IRQ */
-			value_now = (unsigned long) kstat_cpu(i).irqs[j];
+			value_now = (unsigned long) kstat_irqs_cpu(j, i);
 
 			/* Determine the activity per processor per IRQ */
 			delta = value_now - LAST_CPU_IRQ(i, j);
diff --git a/arch/x86/kernel/irq_32.c b/arch/x86/kernel/irq_32.c
index ede513be517d..576c5df6cad8 100644
--- a/arch/x86/kernel/irq_32.c
+++ b/arch/x86/kernel/irq_32.c
@@ -280,7 +280,7 @@ int show_interrupts(struct seq_file *p, void *v)
 		any_count = kstat_irqs(i);
 #else
 		for_each_online_cpu(j)
-			any_count |= kstat_cpu(j).irqs[i];
+			any_count |= kstat_irqs_cpu(i, j);
 #endif
 		action = desc->action;
 		if (!action && !any_count)
@@ -290,7 +290,7 @@ int show_interrupts(struct seq_file *p, void *v)
 		seq_printf(p, "%10u ", kstat_irqs(i));
 #else
 		for_each_online_cpu(j)
-			seq_printf(p, "%10u ", kstat_cpu(j).irqs[i]);
+			seq_printf(p, "%10u ", kstat_irqs_cpu(i, j));
 #endif
 		seq_printf(p, " %8s", desc->chip->name);
 		seq_printf(p, "-%-8s", desc->name);
diff --git a/arch/x86/kernel/irq_64.c b/arch/x86/kernel/irq_64.c
index 738eb65a924e..4a0a4eb44dcb 100644
--- a/arch/x86/kernel/irq_64.c
+++ b/arch/x86/kernel/irq_64.c
@@ -90,7 +90,7 @@ int show_interrupts(struct seq_file *p, void *v)
 		any_count = kstat_irqs(i);
 #else
 		for_each_online_cpu(j)
-			any_count |= kstat_cpu(j).irqs[i];
+			any_count |= kstat_irqs_cpu(i, j);
 #endif
 		action = desc->action;
 		if (!action && !any_count)
@@ -100,7 +100,7 @@ int show_interrupts(struct seq_file *p, void *v)
 		seq_printf(p, "%10u ", kstat_irqs(i));
 #else
 		for_each_online_cpu(j)
-			seq_printf(p, "%10u ", kstat_cpu(j).irqs[i]);
+			seq_printf(p, "%10u ", kstat_irqs_cpu(i, j));
 #endif
 		seq_printf(p, " %8s", desc->chip->name);
 		seq_printf(p, "-%-8s", desc->name);
diff --git a/arch/x86/kernel/visws_quirks.c b/arch/x86/kernel/visws_quirks.c
index 9d85ab384435..817aa55a1209 100644
--- a/arch/x86/kernel/visws_quirks.c
+++ b/arch/x86/kernel/visws_quirks.c
@@ -633,7 +633,7 @@ static irqreturn_t piix4_master_intr(int irq, void *dev_id)
 	/*
 	 * handle this 'virtual interrupt' as a Cobalt one now.
 	 */
-	kstat_cpu(smp_processor_id()).irqs[realirq]++;
+	kstat_irqs_this_cpu(desc)++;
 
 	if (likely(desc->action != NULL))
 		handle_IRQ_event(realirq, desc->action);
diff --git a/arch/x86/xen/spinlock.c b/arch/x86/xen/spinlock.c
index dd71e3a021cd..bb6bc721b13d 100644
--- a/arch/x86/xen/spinlock.c
+++ b/arch/x86/xen/spinlock.c
@@ -241,7 +241,7 @@ static noinline int xen_spin_lock_slow(struct raw_spinlock *lock, bool irq_enabl
 		ADD_STATS(taken_slow_spurious, !xen_test_irq_pending(irq));
 	} while (!xen_test_irq_pending(irq)); /* check for spurious wakeups */
 
-	kstat_this_cpu.irqs[irq]++;
+	kstat_irqs_this_cpu(irq_to_desc(irq))++;
 
 out:
 	raw_local_irq_restore(flags);
diff --git a/fs/proc/proc_misc.c b/fs/proc/proc_misc.c
index a2173a2a5625..aa069acf61a0 100644
--- a/fs/proc/proc_misc.c
+++ b/fs/proc/proc_misc.c
@@ -532,7 +532,7 @@ static int show_stat(struct seq_file *p, void *v)
 		steal = cputime64_add(steal, kstat_cpu(i).cpustat.steal);
 		guest = cputime64_add(guest, kstat_cpu(i).cpustat.guest);
 		for (j = 0; j < nr_irqs; j++) {
-			unsigned int temp = kstat_cpu(i).irqs[j];
+			unsigned int temp = kstat_irqs_cpu(j, i);
 			sum += temp;
 			per_irq_sum[j] += temp;
 		}
diff --git a/include/linux/irq.h b/include/linux/irq.h
index 60c856aaac0f..cbf471aee1ce 100644
--- a/include/linux/irq.h
+++ b/include/linux/irq.h
@@ -157,6 +157,11 @@ struct irq_desc {
 #ifdef CONFIG_HAVE_SPARSE_IRQ
 	struct irq_desc		*next;
 	struct timer_rand_state *timer_rand_state;
+#endif
+#ifdef CONFIG_HAVE_DYN_ARRAY
+	unsigned int            *kstat_irqs;
+#else
+	unsigned int            kstat_irqs[NR_CPUS];
 #endif
 	irq_flow_handler_t	handle_irq;
 	struct irq_chip		*chip;
@@ -190,6 +195,8 @@ extern struct irq_desc *irq_to_desc(unsigned int irq);
 /* could be removed if we get rid of all irq_desc reference */
 extern struct irq_desc irq_desc[NR_IRQS];
 #endif
+#define kstat_irqs_this_cpu(DESC) \
+	((DESC)->kstat_irqs[smp_processor_id()])
 
 /*
  * Migration helpers for obsolete names, they will go away:
diff --git a/include/linux/kernel_stat.h b/include/linux/kernel_stat.h
index fe1f7fe534b4..f10616712de5 100644
--- a/include/linux/kernel_stat.h
+++ b/include/linux/kernel_stat.h
@@ -28,10 +28,8 @@ struct cpu_usage_stat {
 
 struct kernel_stat {
 	struct cpu_usage_stat	cpustat;
-#ifdef CONFIG_HAVE_DYN_ARRAY
-	unsigned int *irqs;
-#else
-	unsigned int irqs[NR_IRQS];
+#ifndef CONFIG_GENERIC_HARDIRQS
+       unsigned int irqs[NR_IRQS];
 #endif
 };
 
@@ -43,15 +41,25 @@ DECLARE_PER_CPU(struct kernel_stat, kstat);
 
 extern unsigned long long nr_context_switches(void);
 
+#ifndef CONFIG_GENERIC_HARDIRQS
+static inline unsigned int kstat_irqs_cpu(unsigned int irq, int cpu)
+{
+       return kstat_cpu(cpu).irqs[irq];
+}
+#else
+extern unsigned int kstat_irqs_cpu(unsigned int irq, int cpu);
+#endif
+
 /*
  * Number of interrupts per specific IRQ source, since bootup
  */
-static inline int kstat_irqs(int irq)
+static inline unsigned int kstat_irqs(unsigned int irq)
 {
-	int cpu, sum = 0;
+	unsigned int sum = 0;
+	int cpu;
 
 	for_each_possible_cpu(cpu)
-		sum += kstat_cpu(cpu).irqs[irq];
+		sum += kstat_irqs_cpu(irq, cpu);
 
 	return sum;
 }
diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c
index 76c225cf4b26..2aa3d4b2fce8 100644
--- a/kernel/irq/chip.c
+++ b/kernel/irq/chip.c
@@ -312,14 +312,13 @@ handle_simple_irq(unsigned int irq, struct irq_desc *desc)
 {
 	struct irqaction *action;
 	irqreturn_t action_ret;
-	const unsigned int cpu = smp_processor_id();
 
 	spin_lock(&desc->lock);
 
 	if (unlikely(desc->status & IRQ_INPROGRESS))
 		goto out_unlock;
 	desc->status &= ~(IRQ_REPLAY | IRQ_WAITING);
-	kstat_cpu(cpu).irqs[irq]++;
+	kstat_irqs_this_cpu(desc)++;
 
 	action = desc->action;
 	if (unlikely(!action || (desc->status & IRQ_DISABLED)))
@@ -351,7 +350,6 @@ out_unlock:
 void
 handle_level_irq(unsigned int irq, struct irq_desc *desc)
 {
-	unsigned int cpu = smp_processor_id();
 	struct irqaction *action;
 	irqreturn_t action_ret;
 
@@ -361,7 +359,7 @@ handle_level_irq(unsigned int irq, struct irq_desc *desc)
 	if (unlikely(desc->status & IRQ_INPROGRESS))
 		goto out_unlock;
 	desc->status &= ~(IRQ_REPLAY | IRQ_WAITING);
-	kstat_cpu(cpu).irqs[irq]++;
+	kstat_irqs_this_cpu(desc)++;
 
 	/*
 	 * If its disabled or no action available
@@ -399,7 +397,6 @@ out_unlock:
 void
 handle_fasteoi_irq(unsigned int irq, struct irq_desc *desc)
 {
-	unsigned int cpu = smp_processor_id();
 	struct irqaction *action;
 	irqreturn_t action_ret;
 
@@ -409,7 +406,7 @@ handle_fasteoi_irq(unsigned int irq, struct irq_desc *desc)
 		goto out;
 
 	desc->status &= ~(IRQ_REPLAY | IRQ_WAITING);
-	kstat_cpu(cpu).irqs[irq]++;
+	kstat_irqs_this_cpu(desc)++;
 
 	/*
 	 * If its disabled or no action available
@@ -458,8 +455,6 @@ out:
 void
 handle_edge_irq(unsigned int irq, struct irq_desc *desc)
 {
-	const unsigned int cpu = smp_processor_id();
-
 	spin_lock(&desc->lock);
 
 	desc->status &= ~(IRQ_REPLAY | IRQ_WAITING);
@@ -476,7 +471,7 @@ handle_edge_irq(unsigned int irq, struct irq_desc *desc)
 		goto out_unlock;
 	}
 
-	kstat_cpu(cpu).irqs[irq]++;
+	kstat_irqs_this_cpu(desc)++;
 
 	/* Start handling the irq */
 	desc->chip->ack(irq);
@@ -531,7 +526,7 @@ handle_percpu_irq(unsigned int irq, struct irq_desc *desc)
 {
 	irqreturn_t action_ret;
 
-	kstat_this_cpu.irqs[irq]++;
+	kstat_irqs_this_cpu(desc)++;
 
 	if (desc->chip->ack)
 		desc->chip->ack(irq);
diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c
index 9fc33b3378e6..1f346990f3f8 100644
--- a/kernel/irq/handle.c
+++ b/kernel/irq/handle.c
@@ -37,7 +37,7 @@ void
 handle_bad_irq(unsigned int irq, struct irq_desc *desc)
 {
 	print_irq_desc(irq, desc);
-	kstat_this_cpu.irqs[irq]++;
+	kstat_irqs_this_cpu(desc)++;
 	ack_bad_irq(irq);
 }
 
@@ -80,17 +80,38 @@ static void init_one_irq_desc(struct irq_desc *desc)
 #endif
 }
 
-#ifdef CONFIG_HAVE_SPARSE_IRQ
-static int nr_irq_desc = 32;
+extern int after_bootmem;
+extern void *__alloc_bootmem_nopanic(unsigned long size,
+			     unsigned long align,
+			     unsigned long goal);
 
-static int __init parse_nr_irq_desc(char *arg)
+static void init_kstat_irqs(struct irq_desc *desc, int nr_desc, int nr)
 {
-	if (arg)
-		nr_irq_desc = simple_strtoul(arg, NULL, 0);
-	return 0;
+	unsigned long bytes, total_bytes;
+	char *ptr;
+	int i;
+	unsigned long phys;
+
+	/* Compute how many bytes we need per irq and allocate them */
+	bytes = nr * sizeof(unsigned int);
+	total_bytes = bytes * nr_desc;
+	if (after_bootmem)
+		ptr = kzalloc(total_bytes, GFP_ATOMIC);
+	else
+		ptr = __alloc_bootmem_nopanic(total_bytes, PAGE_SIZE, 0);
+
+	if (!ptr)
+		panic(" can not allocate kstat_irqs\n");
+
+	phys = __pa(ptr);
+	printk(KERN_DEBUG "kstat_irqs ==> [%#lx - %#lx]\n", phys, phys + total_bytes);
+
+	for (i = 0; i < nr_desc; i++) {
+		desc[i].kstat_irqs = (unsigned int *)ptr;
+		ptr += bytes;
+	}
 }
 
-early_param("nr_irq_desc", parse_nr_irq_desc);
 
 static void __init init_work(void *data)
 {
@@ -100,25 +121,44 @@ static void __init init_work(void *data)
 
 	desc = *da->name;
 
-	for (i = 0; i < *da->nr; i++)
+	for (i = 0; i < *da->nr; i++) {
 		init_one_irq_desc(&desc[i]);
+#ifndef CONFIG_HAVE_SPARSE_IRQ
+		desc[i].irq = i;
+#endif
+	}
 
+#ifdef CONFIG_HAVE_SPARSE_IRQ
 	for (i = 1; i < *da->nr; i++)
 		desc[i-1].next = &desc[i];
+#endif
+
+	/* init kstat_irqs, nr_cpu_ids is ready already */
+	init_kstat_irqs(desc, *da->nr, nr_cpu_ids);
 }
 
+#ifdef CONFIG_HAVE_SPARSE_IRQ
+static int nr_irq_desc = 32;
+
+static int __init parse_nr_irq_desc(char *arg)
+{
+	if (arg)
+		nr_irq_desc = simple_strtoul(arg, NULL, 0);
+	return 0;
+}
+
+early_param("nr_irq_desc", parse_nr_irq_desc);
+
 static struct irq_desc *sparse_irqs;
 DEFINE_DYN_ARRAY(sparse_irqs, sizeof(struct irq_desc), nr_irq_desc, PAGE_SIZE, init_work);
 
-extern int after_bootmem;
-extern void *__alloc_bootmem_nopanic(unsigned long size,
-			     unsigned long align,
-			     unsigned long goal);
 struct irq_desc *irq_to_desc(unsigned int irq)
 {
 	struct irq_desc *desc, *desc_pri;
 	int i;
 	int count = 0;
+	unsigned long phys;
+	unsigned long total_bytes;
 
 	BUG_ON(irq == -1U);
 
@@ -141,38 +181,34 @@ struct irq_desc *irq_to_desc(unsigned int irq)
 	 */
 	printk(KERN_DEBUG "try to get more irq_desc %d\n", nr_irq_desc);
 
+	total_bytes = sizeof(struct irq_desc) * nr_irq_desc;
 	if (after_bootmem)
-		desc = kzalloc(sizeof(struct irq_desc)*nr_irq_desc, GFP_ATOMIC);
+		desc = kzalloc(total_bytes, GFP_ATOMIC);
 	else
-		desc = __alloc_bootmem_nopanic(sizeof(struct irq_desc)*nr_irq_desc, PAGE_SIZE, 0);
+		desc = __alloc_bootmem_nopanic(total_bytes, PAGE_SIZE, 0);
 
 	if (!desc)
 		panic("please boot with nr_irq_desc= %d\n", count * 2);
 
+	phys = __pa(desc);
+	printk(KERN_DEBUG "irq_desc ==> [%#lx - %#lx]\n", phys, phys + total_bytes);
+
 	for (i = 0; i < nr_irq_desc; i++)
 		init_one_irq_desc(&desc[i]);
 
 	for (i = 1; i < nr_irq_desc; i++)
 		desc[i-1].next = &desc[i];
 
+	/* init kstat_irqs, nr_cpu_ids is ready already */
+	init_kstat_irqs(desc, nr_irq_desc, nr_cpu_ids);
+
 	desc->irq = irq;
 	desc_pri->next = desc;
 
 	return desc;
 }
 #else
-static void __init init_work(void *data)
-{
-	struct dyn_array *da = data;
-	int i;
-	struct  irq_desc *desc;
-
-	desc = *da->name;
 
-	for (i = 0; i < *da->nr; i++)
-		init_one_irq_desc(&desc[i]);
-
-}
 static struct irq_desc *irq_desc;
 DEFINE_DYN_ARRAY(irq_desc, sizeof(struct irq_desc), nr_irqs, PAGE_SIZE, init_work);
 
@@ -315,7 +351,7 @@ unsigned int __do_IRQ(unsigned int irq)
 	struct irqaction *action;
 	unsigned int status;
 
-	kstat_this_cpu.irqs[irq]++;
+	kstat_irqs_this_cpu(desc)++;
 	if (CHECK_IRQ_PER_CPU(desc->status)) {
 		irqreturn_t action_ret;
 
@@ -415,3 +451,10 @@ void early_init_irq_lock_class(void)
 }
 #endif
 
+unsigned int kstat_irqs_cpu(unsigned int irq, int cpu)
+{
+	struct irq_desc *desc = irq_to_desc(irq);
+	return desc->kstat_irqs[cpu];
+}
+EXPORT_SYMBOL(kstat_irqs_cpu);
+
diff --git a/kernel/sched.c b/kernel/sched.c
index b9d713781b5b..6f230596bd0c 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -4048,11 +4048,8 @@ static inline void idle_balance(int cpu, struct rq *rq)
 #endif
 
 DEFINE_PER_CPU(struct kernel_stat, kstat);
-EXPORT_PER_CPU_SYMBOL(kstat);
 
-#ifdef CONFIG_HAVE_DYN_ARRAY
-DEFINE_PER_CPU_DYN_ARRAY_ADDR(per_cpu__kstat_irqs, per_cpu__kstat.irqs, sizeof(unsigned int), nr_irqs, sizeof(unsigned long), NULL);
-#endif
+EXPORT_PER_CPU_SYMBOL(kstat);
 
 /*
  * Return p->sum_exec_runtime plus any more ns on the sched_clock
-- 
cgit v1.2.3-55-g7522


From 9059d8fa4a3a9153da53da890039f7f956cc9d19 Mon Sep 17 00:00:00 2001
From: Yinghai Lu
Date: Tue, 19 Aug 2008 20:50:10 -0700
Subject: irq: add irq_desc_without_new

add an irq_desc accessor that will not allocate any sparse entry
but returns failure if there's no entry present.

Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/irq.h | 13 +++++++++++++
 kernel/irq/handle.c | 28 ++++++++++++++++++++++++----
 2 files changed, 37 insertions(+), 4 deletions(-)

diff --git a/include/linux/irq.h b/include/linux/irq.h
index cbf471aee1ce..c9ffef7c3b44 100644
--- a/include/linux/irq.h
+++ b/include/linux/irq.h
@@ -191,10 +191,23 @@ struct irq_desc {
 } ____cacheline_internodealigned_in_smp;
 
 extern struct irq_desc *irq_to_desc(unsigned int irq);
+extern struct irq_desc *__irq_to_desc(unsigned int irq);
+
+#ifndef CONFIG_HAVE_SPARSE_IRQ
+
 #ifndef CONFIG_HAVE_DYN_ARRAY
 /* could be removed if we get rid of all irq_desc reference */
 extern struct irq_desc irq_desc[NR_IRQS];
+#else
+extern struct irq_desc *irq_desc;
 #endif
+
+#else
+
+extern struct irq_desc *sparse_irqs;
+
+#endif
+
 #define kstat_irqs_this_cpu(DESC) \
 	((DESC)->kstat_irqs[smp_processor_id()])
 
diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c
index 1f346990f3f8..8e55dbe50afc 100644
--- a/kernel/irq/handle.c
+++ b/kernel/irq/handle.c
@@ -112,7 +112,6 @@ static void init_kstat_irqs(struct irq_desc *desc, int nr_desc, int nr)
 	}
 }
 
-
 static void __init init_work(void *data)
 {
 	struct dyn_array *da = data;
@@ -149,9 +148,27 @@ static int __init parse_nr_irq_desc(char *arg)
 
 early_param("nr_irq_desc", parse_nr_irq_desc);
 
-static struct irq_desc *sparse_irqs;
+struct irq_desc *sparse_irqs;
 DEFINE_DYN_ARRAY(sparse_irqs, sizeof(struct irq_desc), nr_irq_desc, PAGE_SIZE, init_work);
 
+struct irq_desc *__irq_to_desc(unsigned int irq)
+{
+	struct irq_desc *desc;
+
+	BUG_ON(irq == -1U);
+
+	desc = &sparse_irqs[0];
+	while (desc) {
+		if (desc->irq == irq)
+			return desc;
+
+		if (desc->irq == -1U)
+			return NULL;
+
+		desc = desc->next;
+	}
+	return NULL;
+}
 struct irq_desc *irq_to_desc(unsigned int irq)
 {
 	struct irq_desc *desc, *desc_pri;
@@ -208,8 +225,7 @@ struct irq_desc *irq_to_desc(unsigned int irq)
 	return desc;
 }
 #else
-
-static struct irq_desc *irq_desc;
+struct irq_desc *irq_desc;
 DEFINE_DYN_ARRAY(irq_desc, sizeof(struct irq_desc), nr_irqs, PAGE_SIZE, init_work);
 
 #endif
@@ -239,6 +255,10 @@ struct irq_desc *irq_to_desc(unsigned int irq)
 
 	return NULL;
 }
+struct irq_desc *__irq_to_desc(unsigned int irq)
+{
+	return irq_to_desc(irq);
+}
 #endif
 
 /*
-- 
cgit v1.2.3-55-g7522


From 2c6927a38f65b53b62f86158fba29a068c4e8b6a Mon Sep 17 00:00:00 2001
From: Yinghai Lu
Date: Tue, 19 Aug 2008 20:50:11 -0700
Subject: irq: replace loop with nr_irqs with for_each_irq_desc

There are a handful of loops that go from 0 to nr_irqs and use
get_irq_desc() on them. These would allocate all the irq_desc
entries, regardless of the need for them.

Use the smarter for_each_irq_desc() iterator that will only iterate
over the present ones.

v2: make sure arch without GENERIC_HARDIRQS work too

Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/io_apic_64.c |  6 +++---
 arch/x86/kernel/irq_64.c     |  5 ++---
 arch/x86/kernel/irqinit_64.c | 17 +++++------------
 include/linux/irq.h          |  7 +++++++
 kernel/irq/internals.h       |  4 ++--
 kernel/irq/manage.c          |  2 +-
 kernel/irq/proc.c            | 10 +++++-----
 7 files changed, 25 insertions(+), 26 deletions(-)

diff --git a/arch/x86/kernel/io_apic_64.c b/arch/x86/kernel/io_apic_64.c
index 51ef7eb75f2e..708be9724daf 100644
--- a/arch/x86/kernel/io_apic_64.c
+++ b/arch/x86/kernel/io_apic_64.c
@@ -1871,10 +1871,10 @@ unmask:
 
 static void ir_irq_migration(struct work_struct *work)
 {
-	int irq;
+	unsigned int irq;
+	struct irq_desc *desc;
 
-	for (irq = 0; irq < nr_irqs; irq++) {
-		struct irq_desc *desc = irq_to_desc(irq);
+	for_each_irq_desc(irq, desc) {
 		if (desc->status & IRQ_MOVE_PENDING) {
 			unsigned long flags;
 
diff --git a/arch/x86/kernel/irq_64.c b/arch/x86/kernel/irq_64.c
index 4a0a4eb44dcb..b3cf55e325f5 100644
--- a/arch/x86/kernel/irq_64.c
+++ b/arch/x86/kernel/irq_64.c
@@ -224,17 +224,16 @@ void fixup_irqs(cpumask_t map)
 {
 	unsigned int irq;
 	static int warned;
+	struct irq_desc *desc;
 
-	for (irq = 0; irq < nr_irqs; irq++) {
+	for_each_irq_desc(irq, desc) {
 		cpumask_t mask;
 		int break_affinity = 0;
 		int set_affinity = 1;
-		struct irq_desc *desc;
 
 		if (irq == 2)
 			continue;
 
-		desc = irq_to_desc(irq);
 		/* interrupt's are disabled at this point */
 		spin_lock(&desc->lock);
 
diff --git a/arch/x86/kernel/irqinit_64.c b/arch/x86/kernel/irqinit_64.c
index 0744b49b4d12..cd9f42d028d9 100644
--- a/arch/x86/kernel/irqinit_64.c
+++ b/arch/x86/kernel/irqinit_64.c
@@ -142,25 +142,18 @@ void __init init_ISA_irqs(void)
 	init_bsp_APIC();
 	init_8259A(0);
 
-	for (i = 0; i < nr_irqs; i++) {
+	for (i = 0; i < 16; i++) {
 		struct irq_desc *desc = irq_to_desc(i);
 
 		desc->status = IRQ_DISABLED;
 		desc->action = NULL;
 		desc->depth = 1;
 
-		if (i < 16) {
-			/*
-			 * 16 old-style INTA-cycle interrupts:
-			 */
-			set_irq_chip_and_handler_name(i, &i8259A_chip,
+		/*
+		 * 16 old-style INTA-cycle interrupts:
+		 */
+		set_irq_chip_and_handler_name(i, &i8259A_chip,
 						      handle_level_irq, "XT");
-		} else {
-			/*
-			 * 'high' PCI IRQs filled in on demand
-			 */
-			desc->chip = &no_irq_chip;
-		}
 	}
 }
 
diff --git a/include/linux/irq.h b/include/linux/irq.h
index c9ffef7c3b44..9de16ca8b8e5 100644
--- a/include/linux/irq.h
+++ b/include/linux/irq.h
@@ -202,9 +202,16 @@ extern struct irq_desc irq_desc[NR_IRQS];
 extern struct irq_desc *irq_desc;
 #endif
 
+#ifdef CONFIG_GENERIC_HARDIRQS
+#define for_each_irq_desc(irq, desc)		\
+	for (irq = 0, desc = irq_desc; irq < nr_irqs; irq++, desc = &irq_desc[irq])
+#endif
+
 #else
 
 extern struct irq_desc *sparse_irqs;
+#define for_each_irq_desc(irqX, desc)		\
+	for (desc = sparse_irqs, irqX = desc->irq; desc && irqX != -1U; desc = desc->next, irqX = desc ? desc->irq : -1U)
 
 #endif
 
diff --git a/kernel/irq/internals.h b/kernel/irq/internals.h
index 422dd00c8bd3..c9767e641980 100644
--- a/kernel/irq/internals.h
+++ b/kernel/irq/internals.h
@@ -14,11 +14,11 @@ extern int __irq_set_trigger(struct irq_desc *desc, unsigned int irq,
 		unsigned long flags);
 
 #ifdef CONFIG_PROC_FS
-extern void register_irq_proc(unsigned int irq);
+extern void register_irq_proc(unsigned int irq, struct irq_desc *desc);
 extern void register_handler_proc(unsigned int irq, struct irqaction *action);
 extern void unregister_handler_proc(unsigned int irq, struct irqaction *action);
 #else
-static inline void register_irq_proc(unsigned int irq) { }
+static inline void register_irq_proc(unsigned int irq, struct irq_desc *desc) { }
 static inline void register_handler_proc(unsigned int irq,
 					 struct irqaction *action) { }
 static inline void unregister_handler_proc(unsigned int irq,
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
index b5943e9f95aa..5070f55fdc16 100644
--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c
@@ -478,7 +478,7 @@ int setup_irq(unsigned int irq, struct irqaction *new)
 	spin_unlock_irqrestore(&desc->lock, flags);
 
 	new->irq = irq;
-	register_irq_proc(irq);
+	register_irq_proc(irq, desc);
 	new->dir = NULL;
 	register_handler_proc(irq, new);
 
diff --git a/kernel/irq/proc.c b/kernel/irq/proc.c
index c2f356c808f6..bc0993d86c8b 100644
--- a/kernel/irq/proc.c
+++ b/kernel/irq/proc.c
@@ -182,11 +182,10 @@ void register_handler_proc(unsigned int irq, struct irqaction *action)
 
 #define MAX_NAMELEN 10
 
-void register_irq_proc(unsigned int irq)
+void register_irq_proc(unsigned int irq, struct irq_desc *desc)
 {
 	char name [MAX_NAMELEN];
 	struct proc_dir_entry *entry;
-	struct irq_desc *desc = irq_to_desc(irq);
 
 	if (!root_irq_dir || (desc->chip == &no_irq_chip) || desc->dir)
 		return;
@@ -230,7 +229,8 @@ void register_default_affinity_proc(void)
 
 void init_irq_proc(void)
 {
-	int i;
+	unsigned int irq;
+	struct irq_desc *desc;
 
 	/* create /proc/irq */
 	root_irq_dir = proc_mkdir("irq", NULL);
@@ -242,7 +242,7 @@ void init_irq_proc(void)
 	/*
 	 * Create entries for all existing IRQs.
 	 */
-	for (i = 0; i < nr_irqs; i++)
-		register_irq_proc(i);
+	for_each_irq_desc(irq, desc)
+		register_irq_proc(irq, desc);
 }
 
-- 
cgit v1.2.3-55-g7522


From c7fb03a475bd80c642c1345d85c7c550f63514b8 Mon Sep 17 00:00:00 2001
From: Yinghai Lu
Date: Tue, 19 Aug 2008 20:50:12 -0700
Subject: irq, fs/proc: replace loop with nr_irqs for proc/stat

Replace another nr_irqs loop to avoid the allocation of all sparse
irq entries - use for_each_irq_desc instead.

v2: make sure arch without GENERIC_HARDIRQS works too

Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 fs/proc/proc_misc.c       | 42 ++++++++++++++++++++++++++++--------------
 include/linux/interrupt.h |  5 +++++
 2 files changed, 33 insertions(+), 14 deletions(-)

diff --git a/fs/proc/proc_misc.c b/fs/proc/proc_misc.c
index aa069acf61a0..c3cbabe8b38e 100644
--- a/fs/proc/proc_misc.c
+++ b/fs/proc/proc_misc.c
@@ -30,6 +30,7 @@
 #include <linux/mm.h>
 #include <linux/mmzone.h>
 #include <linux/pagemap.h>
+#include <linux/irq.h>
 #include <linux/interrupt.h>
 #include <linux/swap.h>
 #include <linux/slab.h>
@@ -501,17 +502,16 @@ static const struct file_operations proc_vmalloc_operations = {
 
 static int show_stat(struct seq_file *p, void *v)
 {
-	int i;
+	int i, j;
 	unsigned long jif;
 	cputime64_t user, nice, system, idle, iowait, irq, softirq, steal;
 	cputime64_t guest;
 	u64 sum = 0;
 	struct timespec boottime;
-	unsigned int *per_irq_sum;
-
-	per_irq_sum = kzalloc(sizeof(unsigned int)*nr_irqs, GFP_KERNEL);
-	if (!per_irq_sum)
-		return -ENOMEM;
+	unsigned int per_irq_sum;
+#ifdef CONFIG_GENERIC_HARDIRQS
+	struct irq_desc *desc;
+#endif
 
 	user = nice = system = idle = iowait =
 		irq = softirq = steal = cputime64_zero;
@@ -520,8 +520,6 @@ static int show_stat(struct seq_file *p, void *v)
 	jif = boottime.tv_sec;
 
 	for_each_possible_cpu(i) {
-		int j;
-
 		user = cputime64_add(user, kstat_cpu(i).cpustat.user);
 		nice = cputime64_add(nice, kstat_cpu(i).cpustat.nice);
 		system = cputime64_add(system, kstat_cpu(i).cpustat.system);
@@ -531,10 +529,12 @@ static int show_stat(struct seq_file *p, void *v)
 		softirq = cputime64_add(softirq, kstat_cpu(i).cpustat.softirq);
 		steal = cputime64_add(steal, kstat_cpu(i).cpustat.steal);
 		guest = cputime64_add(guest, kstat_cpu(i).cpustat.guest);
-		for (j = 0; j < nr_irqs; j++) {
-			unsigned int temp = kstat_irqs_cpu(j, i);
+		for_each_irq_desc(j, desc)
+		{
+			unsigned int temp;
+
+			temp = kstat_irqs_cpu(j, i);
 			sum += temp;
-			per_irq_sum[j] += temp;
 		}
 		sum += arch_irq_stat_cpu(i);
 	}
@@ -577,8 +577,23 @@ static int show_stat(struct seq_file *p, void *v)
 	}
 	seq_printf(p, "intr %llu", (unsigned long long)sum);
 
-	for (i = 0; i < nr_irqs; i++)
-		seq_printf(p, " %u", per_irq_sum[i]);
+	/* sum again ? it could be updated? */
+	for_each_irq_desc(j, desc)
+	{
+		per_irq_sum = 0;
+		for_each_possible_cpu(i) {
+			unsigned int temp;
+
+			temp = kstat_irqs_cpu(j, i);
+			per_irq_sum += temp;
+		}
+
+#ifdef CONFIG_HAVE_SPARSE_IRQ
+		seq_printf(p, " %u:%u", j, per_irq_sum);
+#else
+		seq_printf(p, " %u", per_irq_sum);
+#endif
+	}
 
 	seq_printf(p,
 		"\nctxt %llu\n"
@@ -592,7 +607,6 @@ static int show_stat(struct seq_file *p, void *v)
 		nr_running(),
 		nr_iowait());
 
-	kfree(per_irq_sum);
 	return 0;
 }
 
diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h
index 511803853a5b..d4039a0b23f4 100644
--- a/include/linux/interrupt.h
+++ b/include/linux/interrupt.h
@@ -17,6 +17,11 @@
 
 extern int nr_irqs;
 
+#ifndef CONFIG_GENERIC_HARDIRQS
+#define for_each_irq_desc(irq, desc)		\
+	for (irq = 0; irq < nr_irqs; irq++)
+#endif
+
 /*
  * These correspond to the IORESOURCE_IRQ_* defines in
  * linux/ioport.h to select the interrupt line behaviour.  When
-- 
cgit v1.2.3-55-g7522


From 46b8214d12c274bd4265aae482ab7ffe69d94818 Mon Sep 17 00:00:00 2001
From: Yinghai Lu
Date: Tue, 19 Aug 2008 20:50:13 -0700
Subject: x86, ioapic: replace loop with nr_irqs with for_each_irq_icfg

Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/io_apic_64.c | 24 ++++++++++++------------
 1 file changed, 12 insertions(+), 12 deletions(-)

diff --git a/arch/x86/kernel/io_apic_64.c b/arch/x86/kernel/io_apic_64.c
index 708be9724daf..60d60061659c 100644
--- a/arch/x86/kernel/io_apic_64.c
+++ b/arch/x86/kernel/io_apic_64.c
@@ -129,6 +129,9 @@ static void __init init_work(void *data)
 		cfg[i-1].next = &cfg[i];
 }
 
+#define for_each_irq_cfg(cfg)		\
+	for (cfg = irq_cfgx; cfg && cfg->irq != -1U; cfg = cfg->next)
+
 static struct irq_cfg *irq_cfgx;
 DEFINE_DYN_ARRAY(irq_cfgx, sizeof(struct irq_cfg), nr_irq_cfg, PAGE_SIZE, init_work);
 
@@ -1097,20 +1100,18 @@ void __setup_vector_irq(int cpu)
 	/* Initialize vector_irq on a new cpu */
 	/* This function must be called with vector_lock held */
 	int irq, vector;
+	struct irq_cfg *cfg;
 
 	/* Mark the inuse vectors */
-	for (irq = 0; irq < nr_irqs; ++irq) {
-		struct irq_cfg *cfg = irq_cfg(irq);
-
+	for_each_irq_cfg(cfg) {
 		if (!cpu_isset(cpu, cfg->domain))
 			continue;
 		vector = cfg->vector;
+		irq = cfg->irq;
 		per_cpu(vector_irq, cpu)[vector] = irq;
 	}
 	/* Mark the free vectors */
 	for (vector = 0; vector < NR_VECTORS; ++vector) {
-		struct irq_cfg *cfg;
-
 		irq = per_cpu(vector_irq, cpu)[vector];
 		if (irq < 0)
 			continue;
@@ -1340,6 +1341,7 @@ __apicdebuginit(void) print_IO_APIC(void)
 	union IO_APIC_reg_01 reg_01;
 	union IO_APIC_reg_02 reg_02;
 	unsigned long flags;
+	struct irq_cfg *cfg;
 
 	if (apic_verbosity == APIC_QUIET)
 		return;
@@ -1408,12 +1410,11 @@ __apicdebuginit(void) print_IO_APIC(void)
 	}
 	}
 	printk(KERN_DEBUG "IRQ to pin mappings:\n");
-	for (i = 0; i < nr_irqs; i++) {
-		struct irq_cfg *cfg = irq_cfg(i);
+	for_each_irq_cfg(cfg) {
 		struct irq_pin_list *entry = cfg->irq_2_pin;
 		if (!entry)
 			continue;
-		printk(KERN_DEBUG "IRQ%d ", i);
+		printk(KERN_DEBUG "IRQ%d ", cfg->irq);
 		for (;;) {
 			printk("-> %d:%d", entry->apic, entry->pin);
 			if (!entry->next)
@@ -2070,6 +2071,7 @@ static inline void init_IO_APIC_traps(void)
 {
 	int irq;
 	struct irq_desc *desc;
+	struct irq_cfg *cfg;
 
 	/*
 	 * NOTE! The local APIC isn't very good at handling
@@ -2082,10 +2084,8 @@ static inline void init_IO_APIC_traps(void)
 	 * Also, we've got to be careful not to trash gate
 	 * 0x80, because int 0x80 is hm, kind of importantish. ;)
 	 */
-	for (irq = 0; irq < nr_irqs ; irq++) {
-		struct irq_cfg *cfg;
-
-		cfg = irq_cfg(irq);
+	for_each_irq_cfg(cfg) {
+		irq = cfg->irq;
 		if (IO_APIC_IRQ(irq) && !cfg->vector) {
 			/*
 			 * Hmm.. We don't have an entry for this,
-- 
cgit v1.2.3-55-g7522


From 7d94f7ca401dd7f445fda9a971a48aa5427b3e55 Mon Sep 17 00:00:00 2001
From: Yinghai Lu
Date: Tue, 19 Aug 2008 20:50:14 -0700
Subject: irq: remove >= nr_irqs checking with config_have_sparse_irq

remove irq limit checks - nr_irqs is dynamic and we expand anytime.

v2: fix checking about result irq_cfg_without_new, so could use msi again
v3: use irq_desc_without_new to check irq is valid

Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/io_apic_64.c |  9 --------
 arch/x86/kernel/irq_64.c     |  2 +-
 kernel/irq/chip.c            | 49 ++++++++++++++++++++++++--------------------
 kernel/irq/manage.c          | 43 +++++++++++++++++++++++---------------
 4 files changed, 55 insertions(+), 48 deletions(-)

diff --git a/arch/x86/kernel/io_apic_64.c b/arch/x86/kernel/io_apic_64.c
index 60d60061659c..1b8cccb5ba25 100644
--- a/arch/x86/kernel/io_apic_64.c
+++ b/arch/x86/kernel/io_apic_64.c
@@ -400,7 +400,6 @@ static inline void io_apic_sync(unsigned int apic)
 	struct irq_cfg *cfg;						\
 	struct irq_pin_list *entry;					\
 									\
-	BUG_ON(irq >= nr_irqs);						\
 	cfg = irq_cfg(irq);						\
 	entry = cfg->irq_2_pin;						\
 	for (;;) {							\
@@ -480,7 +479,6 @@ static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, u8 vector)
 	struct irq_cfg *cfg;
 	struct irq_pin_list *entry;
 
-	BUG_ON(irq >= nr_irqs);
 	cfg = irq_cfg(irq);
 	entry = cfg->irq_2_pin;
 	for (;;) {
@@ -549,7 +547,6 @@ static void add_pin_to_irq(unsigned int irq, int apic, int pin)
 	struct irq_cfg *cfg;
 	struct irq_pin_list *entry;
 
-	BUG_ON(irq >= nr_irqs);
 	/* first time to refer irq_cfg, so with new */
 	cfg = irq_cfg_alloc(irq);
 	entry = cfg->irq_2_pin;
@@ -841,7 +838,6 @@ int IO_APIC_get_PCI_irq_vector(int bus, int slot, int pin)
 				best_guess = irq;
 		}
 	}
-	BUG_ON(best_guess >= nr_irqs);
 	return best_guess;
 }
 
@@ -973,7 +969,6 @@ static int pin_2_irq(int idx, int apic, int pin)
 			irq += nr_ioapic_registers[i++];
 		irq += pin;
 	}
-	BUG_ON(irq >= nr_irqs);
 	return irq;
 }
 
@@ -1008,7 +1003,6 @@ static int __assign_irq_vector(int irq, cpumask_t mask)
 	int cpu;
 	struct irq_cfg *cfg;
 
-	BUG_ON((unsigned)irq >= nr_irqs);
 	cfg = irq_cfg(irq);
 
 	/* Only try and allocate irqs on cpus that are present */
@@ -1082,7 +1076,6 @@ static void __clear_irq_vector(int irq)
 	cpumask_t mask;
 	int cpu, vector;
 
-	BUG_ON((unsigned)irq >= nr_irqs);
 	cfg = irq_cfg(irq);
 	BUG_ON(!cfg->vector);
 
@@ -1924,8 +1917,6 @@ asmlinkage void smp_irq_move_cleanup_interrupt(void)
 		struct irq_desc *desc;
 		struct irq_cfg *cfg;
 		irq = __get_cpu_var(vector_irq)[vector];
-		if (irq >= nr_irqs)
-			continue;
 
 		desc = irq_to_desc(irq);
 		cfg = irq_cfg(irq);
diff --git a/arch/x86/kernel/irq_64.c b/arch/x86/kernel/irq_64.c
index b3cf55e325f5..a3e36336d914 100644
--- a/arch/x86/kernel/irq_64.c
+++ b/arch/x86/kernel/irq_64.c
@@ -202,7 +202,7 @@ asmlinkage unsigned int do_IRQ(struct pt_regs *regs)
 	stack_overflow_check(regs);
 #endif
 
-	if (likely(irq < nr_irqs))
+	if (likely(__irq_to_desc(irq)))
 		generic_handle_irq(irq);
 	else {
 		if (!disable_apic)
diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c
index 2aa3d4b2fce8..a4bb0da9c88c 100644
--- a/kernel/irq/chip.c
+++ b/kernel/irq/chip.c
@@ -27,13 +27,13 @@ void dynamic_irq_init(unsigned int irq)
 	struct irq_desc *desc;
 	unsigned long flags;
 
-	if (irq >= nr_irqs) {
+	desc = irq_to_desc(irq);
+	if (!desc) {
 		WARN(1, KERN_ERR "Trying to initialize invalid IRQ%d\n", irq);
 		return;
 	}
 
 	/* Ensure we don't have left over values from a previous use of this irq */
-	desc = irq_to_desc(irq);
 	spin_lock_irqsave(&desc->lock, flags);
 	desc->status = IRQ_DISABLED;
 	desc->chip = &no_irq_chip;
@@ -60,12 +60,12 @@ void dynamic_irq_cleanup(unsigned int irq)
 	struct irq_desc *desc;
 	unsigned long flags;
 
-	if (irq >= nr_irqs) {
+	desc = __irq_to_desc(irq);
+	if (!desc) {
 		WARN(1, KERN_ERR "Trying to cleanup invalid IRQ%d\n", irq);
 		return;
 	}
 
-	desc = irq_to_desc(irq);
 	spin_lock_irqsave(&desc->lock, flags);
 	if (desc->action) {
 		spin_unlock_irqrestore(&desc->lock, flags);
@@ -92,7 +92,8 @@ int set_irq_chip(unsigned int irq, struct irq_chip *chip)
 	struct irq_desc *desc;
 	unsigned long flags;
 
-	if (irq >= nr_irqs) {
+	desc = __irq_to_desc(irq);
+	if (!desc) {
 		WARN(1, KERN_ERR "Trying to install chip for IRQ%d\n", irq);
 		return -EINVAL;
 	}
@@ -121,12 +122,12 @@ int set_irq_type(unsigned int irq, unsigned int type)
 	unsigned long flags;
 	int ret = -ENXIO;
 
-	if (irq >= nr_irqs) {
+	desc = __irq_to_desc(irq);
+	if (!desc) {
 		printk(KERN_ERR "Trying to set irq type for IRQ%d\n", irq);
 		return -ENODEV;
 	}
 
-	desc = irq_to_desc(irq);
 	if (type == IRQ_TYPE_NONE)
 		return 0;
 
@@ -149,13 +150,13 @@ int set_irq_data(unsigned int irq, void *data)
 	struct irq_desc *desc;
 	unsigned long flags;
 
-	if (irq >= nr_irqs) {
+	desc = __irq_to_desc(irq);
+	if (!desc) {
 		printk(KERN_ERR
 		       "Trying to install controller data for IRQ%d\n", irq);
 		return -EINVAL;
 	}
 
-	desc = irq_to_desc(irq);
 	spin_lock_irqsave(&desc->lock, flags);
 	desc->handler_data = data;
 	spin_unlock_irqrestore(&desc->lock, flags);
@@ -175,12 +176,13 @@ int set_irq_msi(unsigned int irq, struct msi_desc *entry)
 	struct irq_desc *desc;
 	unsigned long flags;
 
-	if (irq >= nr_irqs) {
+	desc = __irq_to_desc(irq);
+	if (!desc) {
 		printk(KERN_ERR
 		       "Trying to install msi data for IRQ%d\n", irq);
 		return -EINVAL;
 	}
-	desc = irq_to_desc(irq);
+
 	spin_lock_irqsave(&desc->lock, flags);
 	desc->msi_desc = entry;
 	if (entry)
@@ -201,8 +203,14 @@ int set_irq_chip_data(unsigned int irq, void *data)
 	struct irq_desc *desc;
 	unsigned long flags;
 
-	desc = irq_to_desc(irq);
-	if (irq >= nr_irqs || !desc->chip) {
+	desc = __irq_to_desc(irq);
+	if (!desc) {
+		printk(KERN_ERR
+		       "Trying to install chip data for IRQ%d\n", irq);
+		return -EINVAL;
+	}
+
+	if (!desc->chip) {
 		printk(KERN_ERR "BUG: bad set_irq_chip_data(IRQ#%d)\n", irq);
 		return -EINVAL;
 	}
@@ -546,14 +554,13 @@ __set_irq_handler(unsigned int irq, irq_flow_handler_t handle, int is_chained,
 	struct irq_desc *desc;
 	unsigned long flags;
 
-	if (irq >= nr_irqs) {
+	desc = __irq_to_desc(irq);
+	if (!desc) {
 		printk(KERN_ERR
 		       "Trying to install type control for IRQ%d\n", irq);
 		return;
 	}
 
-	desc = irq_to_desc(irq);
-
 	if (!handle)
 		handle = handle_bad_irq;
 	else if (desc->chip == &no_irq_chip) {
@@ -611,14 +618,13 @@ void __init set_irq_noprobe(unsigned int irq)
 	struct irq_desc *desc;
 	unsigned long flags;
 
-	if (irq >= nr_irqs) {
+	desc = __irq_to_desc(irq);
+	if (!desc) {
 		printk(KERN_ERR "Trying to mark IRQ%d non-probeable\n", irq);
 
 		return;
 	}
 
-	desc = irq_to_desc(irq);
-
 	spin_lock_irqsave(&desc->lock, flags);
 	desc->status |= IRQ_NOPROBE;
 	spin_unlock_irqrestore(&desc->lock, flags);
@@ -629,14 +635,13 @@ void __init set_irq_probe(unsigned int irq)
 	struct irq_desc *desc;
 	unsigned long flags;
 
-	if (irq >= nr_irqs) {
+	desc = __irq_to_desc(irq);
+	if (!desc) {
 		printk(KERN_ERR "Trying to mark IRQ%d probeable\n", irq);
 
 		return;
 	}
 
-	desc = irq_to_desc(irq);
-
 	spin_lock_irqsave(&desc->lock, flags);
 	desc->status &= ~IRQ_NOPROBE;
 	spin_unlock_irqrestore(&desc->lock, flags);
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
index 5070f55fdc16..c0b4d4df6de2 100644
--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c
@@ -31,10 +31,10 @@ cpumask_t irq_default_affinity = CPU_MASK_ALL;
  */
 void synchronize_irq(unsigned int irq)
 {
-	struct irq_desc *desc = irq_to_desc(irq);
+	struct irq_desc *desc = __irq_to_desc(irq);
 	unsigned int status;
 
-	if (irq >= nr_irqs)
+	if (!desc)
 		return;
 
 	do {
@@ -142,10 +142,11 @@ int irq_select_affinity(unsigned int irq)
  */
 void disable_irq_nosync(unsigned int irq)
 {
-	struct irq_desc *desc = irq_to_desc(irq);
+	struct irq_desc *desc;
 	unsigned long flags;
 
-	if (irq >= nr_irqs)
+	desc = __irq_to_desc(irq);
+	if (!desc)
 		return;
 
 	spin_lock_irqsave(&desc->lock, flags);
@@ -171,9 +172,10 @@ EXPORT_SYMBOL(disable_irq_nosync);
  */
 void disable_irq(unsigned int irq)
 {
-	struct irq_desc *desc = irq_to_desc(irq);
+	struct irq_desc *desc;
 
-	if (irq >= nr_irqs)
+	desc = __irq_to_desc(irq);
+	if (!desc)
 		return;
 
 	disable_irq_nosync(irq);
@@ -213,10 +215,11 @@ static void __enable_irq(struct irq_desc *desc, unsigned int irq)
  */
 void enable_irq(unsigned int irq)
 {
-	struct irq_desc *desc = irq_to_desc(irq);
+	struct irq_desc *desc;
 	unsigned long flags;
 
-	if (irq >= nr_irqs)
+	desc = __irq_to_desc(irq);
+	if (!desc)
 		return;
 
 	spin_lock_irqsave(&desc->lock, flags);
@@ -290,10 +293,14 @@ EXPORT_SYMBOL(set_irq_wake);
  */
 int can_request_irq(unsigned int irq, unsigned long irqflags)
 {
-	struct irq_desc *desc = irq_to_desc(irq);
+	struct irq_desc *desc;
 	struct irqaction *action;
 
-	if (irq >= nr_irqs || desc->status & IRQ_NOREQUEST)
+	desc = __irq_to_desc(irq);
+	if (!desc)
+		return 0;
+
+	if (desc->status & IRQ_NOREQUEST)
 		return 0;
 
 	action = desc->action;
@@ -352,14 +359,15 @@ int __irq_set_trigger(struct irq_desc *desc, unsigned int irq,
  */
 int setup_irq(unsigned int irq, struct irqaction *new)
 {
-	struct irq_desc *desc = irq_to_desc(irq);
+	struct irq_desc *desc;
 	struct irqaction *old, **p;
 	const char *old_name = NULL;
 	unsigned long flags;
 	int shared = 0;
 	int ret;
 
-	if (irq >= nr_irqs)
+	desc = __irq_to_desc(irq);
+	if (!desc)
 		return -EINVAL;
 
 	if (desc->chip == &no_irq_chip)
@@ -518,10 +526,11 @@ void free_irq(unsigned int irq, void *dev_id)
 	unsigned long flags;
 
 	WARN_ON(in_interrupt());
-	if (irq >= nr_irqs)
+
+	desc = __irq_to_desc(irq);
+	if (!desc)
 		return;
 
-	desc = irq_to_desc(irq);
 	spin_lock_irqsave(&desc->lock, flags);
 	p = &desc->action;
 	for (;;) {
@@ -634,9 +643,11 @@ int request_irq(unsigned int irq, irq_handler_t handler,
 	 */
 	if ((irqflags & IRQF_SHARED) && !dev_id)
 		return -EINVAL;
-	if (irq >= nr_irqs)
+
+	desc = __irq_to_desc(irq);
+	if (!desc)
 		return -EINVAL;
-	desc = irq_to_desc(irq);
+
 	if (desc->status & IRQ_NOREQUEST)
 		return -EINVAL;
 	if (!handler)
-- 
cgit v1.2.3-55-g7522


From 46926b67fc663d357a1a8174328998a9e49da0b8 Mon Sep 17 00:00:00 2001
From: Yinghai Lu
Date: Tue, 19 Aug 2008 20:50:15 -0700
Subject: generic: add irq_desc in function in parameter

So we could remove some duplicated calling to irq_desc

v2: make sure irq_desc in  init/main.c is not used without generic_hardirqs

Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/irq_64.c |  6 ++++--
 include/linux/irq.h      |  9 ++++++---
 init/main.c              |  7 +++++++
 kernel/irq/handle.c      | 30 ++++++++++++++++++++++++++++++
 4 files changed, 47 insertions(+), 5 deletions(-)

diff --git a/arch/x86/kernel/irq_64.c b/arch/x86/kernel/irq_64.c
index a3e36336d914..f58b995b30ee 100644
--- a/arch/x86/kernel/irq_64.c
+++ b/arch/x86/kernel/irq_64.c
@@ -189,6 +189,7 @@ u64 arch_irq_stat(void)
 asmlinkage unsigned int do_IRQ(struct pt_regs *regs)
 {
 	struct pt_regs *old_regs = set_irq_regs(regs);
+	struct irq_desc *desc;
 
 	/* high bit used in ret_from_ code  */
 	unsigned vector = ~regs->orig_ax;
@@ -202,8 +203,9 @@ asmlinkage unsigned int do_IRQ(struct pt_regs *regs)
 	stack_overflow_check(regs);
 #endif
 
-	if (likely(__irq_to_desc(irq)))
-		generic_handle_irq(irq);
+	desc = __irq_to_desc(irq);
+	if (likely(desc))
+		generic_handle_irq_desc(irq, desc);
 	else {
 		if (!disable_apic)
 			ack_APIC_irq();
diff --git a/include/linux/irq.h b/include/linux/irq.h
index 9de16ca8b8e5..7b59e193a119 100644
--- a/include/linux/irq.h
+++ b/include/linux/irq.h
@@ -315,10 +315,8 @@ extern unsigned int __do_IRQ(unsigned int irq);
  * irqchip-style controller then we call the ->handle_irq() handler,
  * and it calls __do_IRQ() if it's attached to an irqtype-style controller.
  */
-static inline void generic_handle_irq(unsigned int irq)
+static inline void generic_handle_irq_desc(unsigned int irq, struct irq_desc *desc)
 {
-	struct irq_desc *desc = irq_to_desc(irq);
-
 #ifdef CONFIG_GENERIC_HARDIRQS_NO__DO_IRQ
 	desc->handle_irq(irq, desc);
 #else
@@ -329,6 +327,11 @@ static inline void generic_handle_irq(unsigned int irq)
 #endif
 }
 
+static inline void generic_handle_irq(unsigned int irq)
+{
+	generic_handle_irq_desc(irq, irq_to_desc(irq));
+}
+
 /* Handling of unhandled and spurious interrupts: */
 extern void note_interrupt(unsigned int irq, struct irq_desc *desc,
 			   int action_ret);
diff --git a/init/main.c b/init/main.c
index ab97d0877acc..0d2e60144f83 100644
--- a/init/main.c
+++ b/init/main.c
@@ -590,6 +590,13 @@ void pre_alloc_dyn_array(void)
 		if (da->init_work)
 			da->init_work(da);
 	}
+#else
+#ifdef CONFIF_GENERIC_HARDIRQS
+	unsigned int i;
+
+	for (i = 0; i < NR_IRQS; i++)
+		irq_desc[i].irq = i;
+#endif
 #endif
 }
 
diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c
index 8e55dbe50afc..e1d787e9169b 100644
--- a/kernel/irq/handle.c
+++ b/kernel/irq/handle.c
@@ -197,6 +197,21 @@ struct irq_desc *irq_to_desc(unsigned int irq)
 	 *  we run out of pre-allocate ones, allocate more
 	 */
 	printk(KERN_DEBUG "try to get more irq_desc %d\n", nr_irq_desc);
+	{
+		/* double check if some one mess up the list */
+		struct irq_desc *desc;
+		int count = 0;
+
+		desc = &sparse_irqs[0];
+		while (desc) {
+			printk(KERN_DEBUG "found irq_desc for irq %d\n", desc->irq);
+			if (desc->next)
+				printk(KERN_DEBUG "found irq_desc for irq %d and next will be irq %d\n", desc->irq, desc->next->irq);
+			desc = desc->next;
+			count++;
+		}
+		printk(KERN_DEBUG "all preallocted %d\n", count);
+	}
 
 	total_bytes = sizeof(struct irq_desc) * nr_irq_desc;
 	if (after_bootmem)
@@ -221,6 +236,21 @@ struct irq_desc *irq_to_desc(unsigned int irq)
 
 	desc->irq = irq;
 	desc_pri->next = desc;
+	{
+		/* double check if some one mess up the list */
+		struct irq_desc *desc;
+		int count = 0;
+
+		desc = &sparse_irqs[0];
+		while (desc) {
+			printk(KERN_DEBUG "1 found irq_desc for irq %d\n", desc->irq);
+			if (desc->next)
+				printk(KERN_DEBUG "1 found irq_desc for irq %d and next will be irq %d\n", desc->irq, desc->next->irq);
+			desc = desc->next;
+			count++;
+		}
+		printk(KERN_DEBUG "1 all preallocted %d\n", count);
+	}
 
 	return desc;
 }
-- 
cgit v1.2.3-55-g7522


From 1d5f6b36c4736af1dac396d6267eb53dcc8c0021 Mon Sep 17 00:00:00 2001
From: Yinghai Lu
Date: Tue, 19 Aug 2008 20:50:16 -0700
Subject: x86: check with without_new in show_interrupts

so we don't get new one that we don't need it.

Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/irq_64.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/arch/x86/kernel/irq_64.c b/arch/x86/kernel/irq_64.c
index f58b995b30ee..f337f87c1e16 100644
--- a/arch/x86/kernel/irq_64.c
+++ b/arch/x86/kernel/irq_64.c
@@ -83,7 +83,10 @@ int show_interrupts(struct seq_file *p, void *v)
 
 	if (i < nr_irqs) {
 		unsigned any_count = 0;
-		struct irq_desc *desc = irq_to_desc(i);
+		struct irq_desc *desc = __irq_to_desc(i);
+
+		if (!desc)
+			return 0;
 
 		spin_lock_irqsave(&desc->lock, flags);
 #ifndef CONFIG_SMP
-- 
cgit v1.2.3-55-g7522


From cb5bc83225a86ca53bbb889ed8439e4fd6cf44ac Mon Sep 17 00:00:00 2001
From: Yinghai Lu
Date: Tue, 19 Aug 2008 20:50:17 -0700
Subject: x86_64: rename irq_desc/irq_desc_alloc

change names:

          irq_desc() ==> irq_desc_alloc
	__irq_desc() ==> irq_desc

Also split a few of the uses in lowlevel x86 code.

v2: need to check if desc is null in smp_irq_move_cleanup

Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/io_apic_64.c | 10 +++++++++-
 arch/x86/kernel/irq_64.c     |  4 ++--
 arch/x86/kernel/irqinit_64.c |  3 ++-
 include/linux/irq.h          |  2 +-
 kernel/irq/chip.c            | 21 +++++++++++----------
 kernel/irq/handle.c          | 23 +++++------------------
 kernel/irq/manage.c          | 16 ++++++++--------
 7 files changed, 38 insertions(+), 41 deletions(-)

diff --git a/arch/x86/kernel/io_apic_64.c b/arch/x86/kernel/io_apic_64.c
index 1b8cccb5ba25..a054db9ef190 100644
--- a/arch/x86/kernel/io_apic_64.c
+++ b/arch/x86/kernel/io_apic_64.c
@@ -1124,7 +1124,12 @@ static void ioapic_register_intr(int irq, unsigned long trigger)
 {
 	struct irq_desc *desc;
 
-	desc = irq_to_desc(irq);
+	/* first time to use this irq_desc */
+	if (irq < 16)
+		desc = irq_to_desc(irq);
+	else
+		desc = irq_to_desc_alloc(irq);
+
 	if (trigger)
 		desc->status |= IRQ_LEVEL;
 	else
@@ -1919,6 +1924,9 @@ asmlinkage void smp_irq_move_cleanup_interrupt(void)
 		irq = __get_cpu_var(vector_irq)[vector];
 
 		desc = irq_to_desc(irq);
+		if (!desc)
+			continue;
+
 		cfg = irq_cfg(irq);
 		spin_lock(&desc->lock);
 		if (!cfg->move_cleanup_count)
diff --git a/arch/x86/kernel/irq_64.c b/arch/x86/kernel/irq_64.c
index f337f87c1e16..5d5976e0311a 100644
--- a/arch/x86/kernel/irq_64.c
+++ b/arch/x86/kernel/irq_64.c
@@ -83,7 +83,7 @@ int show_interrupts(struct seq_file *p, void *v)
 
 	if (i < nr_irqs) {
 		unsigned any_count = 0;
-		struct irq_desc *desc = __irq_to_desc(i);
+		struct irq_desc *desc = irq_to_desc(i);
 
 		if (!desc)
 			return 0;
@@ -206,7 +206,7 @@ asmlinkage unsigned int do_IRQ(struct pt_regs *regs)
 	stack_overflow_check(regs);
 #endif
 
-	desc = __irq_to_desc(irq);
+	desc = irq_to_desc(irq);
 	if (likely(desc))
 		generic_handle_irq_desc(irq, desc);
 	else {
diff --git a/arch/x86/kernel/irqinit_64.c b/arch/x86/kernel/irqinit_64.c
index cd9f42d028d9..d17fbc26d96f 100644
--- a/arch/x86/kernel/irqinit_64.c
+++ b/arch/x86/kernel/irqinit_64.c
@@ -143,7 +143,8 @@ void __init init_ISA_irqs(void)
 	init_8259A(0);
 
 	for (i = 0; i < 16; i++) {
-		struct irq_desc *desc = irq_to_desc(i);
+		/* first time call this irq_desc */
+		struct irq_desc *desc = irq_to_desc_alloc(i);
 
 		desc->status = IRQ_DISABLED;
 		desc->action = NULL;
diff --git a/include/linux/irq.h b/include/linux/irq.h
index 7b59e193a119..5fe1b01c11fe 100644
--- a/include/linux/irq.h
+++ b/include/linux/irq.h
@@ -191,7 +191,7 @@ struct irq_desc {
 } ____cacheline_internodealigned_in_smp;
 
 extern struct irq_desc *irq_to_desc(unsigned int irq);
-extern struct irq_desc *__irq_to_desc(unsigned int irq);
+extern struct irq_desc *irq_to_desc_alloc(unsigned int irq);
 
 #ifndef CONFIG_HAVE_SPARSE_IRQ
 
diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c
index a4bb0da9c88c..9fc5e69213de 100644
--- a/kernel/irq/chip.c
+++ b/kernel/irq/chip.c
@@ -27,7 +27,8 @@ void dynamic_irq_init(unsigned int irq)
 	struct irq_desc *desc;
 	unsigned long flags;
 
-	desc = irq_to_desc(irq);
+	/* first time to use this irq_desc */
+	desc = irq_to_desc_alloc(irq);
 	if (!desc) {
 		WARN(1, KERN_ERR "Trying to initialize invalid IRQ%d\n", irq);
 		return;
@@ -60,7 +61,7 @@ void dynamic_irq_cleanup(unsigned int irq)
 	struct irq_desc *desc;
 	unsigned long flags;
 
-	desc = __irq_to_desc(irq);
+	desc = irq_to_desc(irq);
 	if (!desc) {
 		WARN(1, KERN_ERR "Trying to cleanup invalid IRQ%d\n", irq);
 		return;
@@ -92,7 +93,7 @@ int set_irq_chip(unsigned int irq, struct irq_chip *chip)
 	struct irq_desc *desc;
 	unsigned long flags;
 
-	desc = __irq_to_desc(irq);
+	desc = irq_to_desc(irq);
 	if (!desc) {
 		WARN(1, KERN_ERR "Trying to install chip for IRQ%d\n", irq);
 		return -EINVAL;
@@ -122,7 +123,7 @@ int set_irq_type(unsigned int irq, unsigned int type)
 	unsigned long flags;
 	int ret = -ENXIO;
 
-	desc = __irq_to_desc(irq);
+	desc = irq_to_desc(irq);
 	if (!desc) {
 		printk(KERN_ERR "Trying to set irq type for IRQ%d\n", irq);
 		return -ENODEV;
@@ -150,7 +151,7 @@ int set_irq_data(unsigned int irq, void *data)
 	struct irq_desc *desc;
 	unsigned long flags;
 
-	desc = __irq_to_desc(irq);
+	desc = irq_to_desc(irq);
 	if (!desc) {
 		printk(KERN_ERR
 		       "Trying to install controller data for IRQ%d\n", irq);
@@ -176,7 +177,7 @@ int set_irq_msi(unsigned int irq, struct msi_desc *entry)
 	struct irq_desc *desc;
 	unsigned long flags;
 
-	desc = __irq_to_desc(irq);
+	desc = irq_to_desc(irq);
 	if (!desc) {
 		printk(KERN_ERR
 		       "Trying to install msi data for IRQ%d\n", irq);
@@ -203,7 +204,7 @@ int set_irq_chip_data(unsigned int irq, void *data)
 	struct irq_desc *desc;
 	unsigned long flags;
 
-	desc = __irq_to_desc(irq);
+	desc = irq_to_desc(irq);
 	if (!desc) {
 		printk(KERN_ERR
 		       "Trying to install chip data for IRQ%d\n", irq);
@@ -554,7 +555,7 @@ __set_irq_handler(unsigned int irq, irq_flow_handler_t handle, int is_chained,
 	struct irq_desc *desc;
 	unsigned long flags;
 
-	desc = __irq_to_desc(irq);
+	desc = irq_to_desc(irq);
 	if (!desc) {
 		printk(KERN_ERR
 		       "Trying to install type control for IRQ%d\n", irq);
@@ -618,7 +619,7 @@ void __init set_irq_noprobe(unsigned int irq)
 	struct irq_desc *desc;
 	unsigned long flags;
 
-	desc = __irq_to_desc(irq);
+	desc = irq_to_desc(irq);
 	if (!desc) {
 		printk(KERN_ERR "Trying to mark IRQ%d non-probeable\n", irq);
 
@@ -635,7 +636,7 @@ void __init set_irq_probe(unsigned int irq)
 	struct irq_desc *desc;
 	unsigned long flags;
 
-	desc = __irq_to_desc(irq);
+	desc = irq_to_desc(irq);
 	if (!desc) {
 		printk(KERN_ERR "Trying to mark IRQ%d probeable\n", irq);
 
diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c
index e1d787e9169b..d44e3515eae1 100644
--- a/kernel/irq/handle.c
+++ b/kernel/irq/handle.c
@@ -151,7 +151,7 @@ early_param("nr_irq_desc", parse_nr_irq_desc);
 struct irq_desc *sparse_irqs;
 DEFINE_DYN_ARRAY(sparse_irqs, sizeof(struct irq_desc), nr_irq_desc, PAGE_SIZE, init_work);
 
-struct irq_desc *__irq_to_desc(unsigned int irq)
+struct irq_desc *irq_to_desc(unsigned int irq)
 {
 	struct irq_desc *desc;
 
@@ -169,7 +169,7 @@ struct irq_desc *__irq_to_desc(unsigned int irq)
 	}
 	return NULL;
 }
-struct irq_desc *irq_to_desc(unsigned int irq)
+struct irq_desc *irq_to_desc_alloc(unsigned int irq)
 {
 	struct irq_desc *desc, *desc_pri;
 	int i;
@@ -186,6 +186,7 @@ struct irq_desc *irq_to_desc(unsigned int irq)
 
 		if (desc->irq == -1U) {
 			desc->irq = irq;
+			printk(KERN_DEBUG "found new irq_desc for irq %d\n", desc->irq);
 			return desc;
 		}
 		desc_pri = desc;
@@ -236,21 +237,7 @@ struct irq_desc *irq_to_desc(unsigned int irq)
 
 	desc->irq = irq;
 	desc_pri->next = desc;
-	{
-		/* double check if some one mess up the list */
-		struct irq_desc *desc;
-		int count = 0;
-
-		desc = &sparse_irqs[0];
-		while (desc) {
-			printk(KERN_DEBUG "1 found irq_desc for irq %d\n", desc->irq);
-			if (desc->next)
-				printk(KERN_DEBUG "1 found irq_desc for irq %d and next will be irq %d\n", desc->irq, desc->next->irq);
-			desc = desc->next;
-			count++;
-		}
-		printk(KERN_DEBUG "1 all preallocted %d\n", count);
-	}
+	printk(KERN_DEBUG "1 found new irq_desc for irq %d and pri will be irq %d\n", desc->irq, desc_pri->irq);
 
 	return desc;
 }
@@ -285,7 +272,7 @@ struct irq_desc *irq_to_desc(unsigned int irq)
 
 	return NULL;
 }
-struct irq_desc *__irq_to_desc(unsigned int irq)
+struct irq_desc *irq_to_desc_alloc(unsigned int irq)
 {
 	return irq_to_desc(irq);
 }
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
index c0b4d4df6de2..6df49218632a 100644
--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c
@@ -31,7 +31,7 @@ cpumask_t irq_default_affinity = CPU_MASK_ALL;
  */
 void synchronize_irq(unsigned int irq)
 {
-	struct irq_desc *desc = __irq_to_desc(irq);
+	struct irq_desc *desc = irq_to_desc(irq);
 	unsigned int status;
 
 	if (!desc)
@@ -145,7 +145,7 @@ void disable_irq_nosync(unsigned int irq)
 	struct irq_desc *desc;
 	unsigned long flags;
 
-	desc = __irq_to_desc(irq);
+	desc = irq_to_desc(irq);
 	if (!desc)
 		return;
 
@@ -174,7 +174,7 @@ void disable_irq(unsigned int irq)
 {
 	struct irq_desc *desc;
 
-	desc = __irq_to_desc(irq);
+	desc = irq_to_desc(irq);
 	if (!desc)
 		return;
 
@@ -218,7 +218,7 @@ void enable_irq(unsigned int irq)
 	struct irq_desc *desc;
 	unsigned long flags;
 
-	desc = __irq_to_desc(irq);
+	desc = irq_to_desc(irq);
 	if (!desc)
 		return;
 
@@ -296,7 +296,7 @@ int can_request_irq(unsigned int irq, unsigned long irqflags)
 	struct irq_desc *desc;
 	struct irqaction *action;
 
-	desc = __irq_to_desc(irq);
+	desc = irq_to_desc(irq);
 	if (!desc)
 		return 0;
 
@@ -366,7 +366,7 @@ int setup_irq(unsigned int irq, struct irqaction *new)
 	int shared = 0;
 	int ret;
 
-	desc = __irq_to_desc(irq);
+	desc = irq_to_desc(irq);
 	if (!desc)
 		return -EINVAL;
 
@@ -527,7 +527,7 @@ void free_irq(unsigned int irq, void *dev_id)
 
 	WARN_ON(in_interrupt());
 
-	desc = __irq_to_desc(irq);
+	desc = irq_to_desc(irq);
 	if (!desc)
 		return;
 
@@ -644,7 +644,7 @@ int request_irq(unsigned int irq, irq_handler_t handler,
 	if ((irqflags & IRQF_SHARED) && !dev_id)
 		return -EINVAL;
 
-	desc = __irq_to_desc(irq);
+	desc = irq_to_desc(irq);
 	if (!desc)
 		return -EINVAL;
 
-- 
cgit v1.2.3-55-g7522


From 67fb283e148e9bd761f73691d3173b6eab9ba8db Mon Sep 17 00:00:00 2001
From: Yinghai Lu
Date: Tue, 19 Aug 2008 20:50:18 -0700
Subject: irq: separate sparse_irqs from sparse_irqs_free

so later don't need compare with -1U

Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/irq.h |   2 +-
 kernel/irq/handle.c | 115 ++++++++++++++++++++++++++++------------------------
 2 files changed, 62 insertions(+), 55 deletions(-)

diff --git a/include/linux/irq.h b/include/linux/irq.h
index 5fe1b01c11fe..d5749852ee69 100644
--- a/include/linux/irq.h
+++ b/include/linux/irq.h
@@ -211,7 +211,7 @@ extern struct irq_desc *irq_desc;
 
 extern struct irq_desc *sparse_irqs;
 #define for_each_irq_desc(irqX, desc)		\
-	for (desc = sparse_irqs, irqX = desc->irq; desc && irqX != -1U; desc = desc->next, irqX = desc ? desc->irq : -1U)
+	for (desc = sparse_irqs, irqX = desc->irq; desc; desc = desc->next, irqX = desc ? desc->irq : -1U)
 
 #endif
 
diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c
index d44e3515eae1..6d174390f3a0 100644
--- a/kernel/irq/handle.c
+++ b/kernel/irq/handle.c
@@ -112,6 +112,11 @@ static void init_kstat_irqs(struct irq_desc *desc, int nr_desc, int nr)
 	}
 }
 
+#ifdef CONFIG_HAVE_SPARSE_IRQ
+static struct irq_desc *sparse_irqs_free;
+struct irq_desc *sparse_irqs;
+#endif
+
 static void __init init_work(void *data)
 {
 	struct dyn_array *da = data;
@@ -127,13 +132,16 @@ static void __init init_work(void *data)
 #endif
 	}
 
+	/* init kstat_irqs, nr_cpu_ids is ready already */
+	init_kstat_irqs(desc, *da->nr, nr_cpu_ids);
+
 #ifdef CONFIG_HAVE_SPARSE_IRQ
 	for (i = 1; i < *da->nr; i++)
 		desc[i-1].next = &desc[i];
-#endif
 
-	/* init kstat_irqs, nr_cpu_ids is ready already */
-	init_kstat_irqs(desc, *da->nr, nr_cpu_ids);
+	sparse_irqs_free = sparse_irqs;
+	sparse_irqs = NULL;
+#endif
 }
 
 #ifdef CONFIG_HAVE_SPARSE_IRQ
@@ -148,23 +156,17 @@ static int __init parse_nr_irq_desc(char *arg)
 
 early_param("nr_irq_desc", parse_nr_irq_desc);
 
-struct irq_desc *sparse_irqs;
 DEFINE_DYN_ARRAY(sparse_irqs, sizeof(struct irq_desc), nr_irq_desc, PAGE_SIZE, init_work);
 
 struct irq_desc *irq_to_desc(unsigned int irq)
 {
 	struct irq_desc *desc;
 
-	BUG_ON(irq == -1U);
-
-	desc = &sparse_irqs[0];
+	desc = sparse_irqs;
 	while (desc) {
 		if (desc->irq == irq)
 			return desc;
 
-		if (desc->irq == -1U)
-			return NULL;
-
 		desc = desc->next;
 	}
 	return NULL;
@@ -174,21 +176,12 @@ struct irq_desc *irq_to_desc_alloc(unsigned int irq)
 	struct irq_desc *desc, *desc_pri;
 	int i;
 	int count = 0;
-	unsigned long phys;
-	unsigned long total_bytes;
 
-	BUG_ON(irq == -1U);
-
-	desc_pri = desc = &sparse_irqs[0];
+	desc_pri = desc = sparse_irqs;
 	while (desc) {
 		if (desc->irq == irq)
 			return desc;
 
-		if (desc->irq == -1U) {
-			desc->irq = irq;
-			printk(KERN_DEBUG "found new irq_desc for irq %d\n", desc->irq);
-			return desc;
-		}
 		desc_pri = desc;
 		desc = desc->next;
 		count++;
@@ -197,48 +190,62 @@ struct irq_desc *irq_to_desc_alloc(unsigned int irq)
 	/*
 	 *  we run out of pre-allocate ones, allocate more
 	 */
-	printk(KERN_DEBUG "try to get more irq_desc %d\n", nr_irq_desc);
-	{
-		/* double check if some one mess up the list */
-		struct irq_desc *desc;
-		int count = 0;
-
-		desc = &sparse_irqs[0];
-		while (desc) {
-			printk(KERN_DEBUG "found irq_desc for irq %d\n", desc->irq);
-			if (desc->next)
-				printk(KERN_DEBUG "found irq_desc for irq %d and next will be irq %d\n", desc->irq, desc->next->irq);
-			desc = desc->next;
-			count++;
-		}
-		printk(KERN_DEBUG "all preallocted %d\n", count);
-	}
+	if (!sparse_irqs_free) {
+		unsigned long phys;
+		unsigned long total_bytes;
 
-	total_bytes = sizeof(struct irq_desc) * nr_irq_desc;
-	if (after_bootmem)
-		desc = kzalloc(total_bytes, GFP_ATOMIC);
-	else
-		desc = __alloc_bootmem_nopanic(total_bytes, PAGE_SIZE, 0);
+		printk(KERN_DEBUG "try to get more irq_desc %d\n", nr_irq_desc);
 
-	if (!desc)
-		panic("please boot with nr_irq_desc= %d\n", count * 2);
+		total_bytes = sizeof(struct irq_desc) * nr_irq_desc;
+		if (after_bootmem)
+			desc = kzalloc(total_bytes, GFP_ATOMIC);
+		else
+			desc = __alloc_bootmem_nopanic(total_bytes, PAGE_SIZE, 0);
 
-	phys = __pa(desc);
-	printk(KERN_DEBUG "irq_desc ==> [%#lx - %#lx]\n", phys, phys + total_bytes);
+		if (!desc)
+			panic("please boot with nr_irq_desc= %d\n", count * 2);
 
-	for (i = 0; i < nr_irq_desc; i++)
-		init_one_irq_desc(&desc[i]);
+		phys = __pa(desc);
+		printk(KERN_DEBUG "irq_desc ==> [%#lx - %#lx]\n", phys, phys + total_bytes);
 
-	for (i = 1; i < nr_irq_desc; i++)
-		desc[i-1].next = &desc[i];
+		for (i = 0; i < nr_irq_desc; i++)
+			init_one_irq_desc(&desc[i]);
 
-	/* init kstat_irqs, nr_cpu_ids is ready already */
-	init_kstat_irqs(desc, nr_irq_desc, nr_cpu_ids);
+		for (i = 1; i < nr_irq_desc; i++)
+			desc[i-1].next = &desc[i];
 
-	desc->irq = irq;
-	desc_pri->next = desc;
-	printk(KERN_DEBUG "1 found new irq_desc for irq %d and pri will be irq %d\n", desc->irq, desc_pri->irq);
+		/* init kstat_irqs, nr_cpu_ids is ready already */
+		init_kstat_irqs(desc, nr_irq_desc, nr_cpu_ids);
 
+		sparse_irqs_free = desc;
+	}
+
+	desc = sparse_irqs_free;
+	sparse_irqs_free = sparse_irqs_free->next;
+	desc->next = NULL;
+	if (desc_pri)
+		desc_pri->next = desc;
+	else
+		sparse_irqs = desc;
+	desc->irq = irq;
+	printk(KERN_DEBUG "found new irq_desc for irq %d\n", desc->irq);
+#ifdef CONFIG_HAVE_SPARSE_IRQ_DEBUG
+	{
+		/* dump the results */
+		struct irq_desc *desc;
+		unsigned long phys;
+		unsigned long bytes = sizeof(struct irq_desc);
+		unsigned int irqx;
+
+		printk(KERN_DEBUG "=========================== %d\n", irq);
+		printk(KERN_DEBUG "irq_desc dump after get that for %d\n", irq);
+		for_each_irq_desc(irqx, desc) {
+			phys = __pa(desc);
+			printk(KERN_DEBUG "irq_desc %d ==> [%#lx - %#lx]\n", irqx, phys, phys + bytes);
+		}
+		printk(KERN_DEBUG "===========================\n");
+	}
+#endif
 	return desc;
 }
 #else
-- 
cgit v1.2.3-55-g7522


From a2f9f43858db64cb8b45c4f6746d7a52b80d4dcb Mon Sep 17 00:00:00 2001
From: Yinghai Lu
Date: Tue, 19 Aug 2008 20:50:19 -0700
Subject: x86_64: separate irq_cfgx from irq_cfgx_free

so later don't need to compare with -1U

Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/io_apic_64.c | 92 ++++++++++++++++++++++++++++----------------
 1 file changed, 59 insertions(+), 33 deletions(-)

diff --git a/arch/x86/kernel/io_apic_64.c b/arch/x86/kernel/io_apic_64.c
index a054db9ef190..8ab7ae01773f 100644
--- a/arch/x86/kernel/io_apic_64.c
+++ b/arch/x86/kernel/io_apic_64.c
@@ -111,44 +111,44 @@ static void init_one_irq_cfg(struct irq_cfg *cfg)
 	memcpy(cfg, &irq_cfg_init, sizeof(struct irq_cfg));
 }
 
+static struct irq_cfg *irq_cfgx;
+static struct irq_cfg *irq_cfgx_free;
 static void __init init_work(void *data)
 {
 	struct dyn_array *da = data;
 	struct irq_cfg *cfg;
+	int legacy_count;
 	int i;
 
 	cfg = *da->name;
 
 	memcpy(cfg, irq_cfg_legacy, sizeof(irq_cfg_legacy));
 
-	i = sizeof(irq_cfg_legacy)/sizeof(irq_cfg_legacy[0]);
-	for (; i < *da->nr; i++)
+	legacy_count = sizeof(irq_cfg_legacy)/sizeof(irq_cfg_legacy[0]);
+	for (i = legacy_count; i < *da->nr; i++)
 		init_one_irq_cfg(&cfg[i]);
 
 	for (i = 1; i < *da->nr; i++)
 		cfg[i-1].next = &cfg[i];
+
+	irq_cfgx_free = &irq_cfgx[legacy_count];
+	irq_cfgx[legacy_count - 1].next = NULL;
 }
 
 #define for_each_irq_cfg(cfg)		\
-	for (cfg = irq_cfgx; cfg && cfg->irq != -1U; cfg = cfg->next)
+	for (cfg = irq_cfgx; cfg; cfg = cfg->next)
 
-static struct irq_cfg *irq_cfgx;
 DEFINE_DYN_ARRAY(irq_cfgx, sizeof(struct irq_cfg), nr_irq_cfg, PAGE_SIZE, init_work);
 
 static struct irq_cfg *irq_cfg(unsigned int irq)
 {
 	struct irq_cfg *cfg;
 
-	BUG_ON(irq == -1U);
-
-	cfg = &irq_cfgx[0];
+	cfg = irq_cfgx;
 	while (cfg) {
 		if (cfg->irq == irq)
 			return cfg;
 
-		if (cfg->irq == -1U)
-			return NULL;
-
 		cfg = cfg->next;
 	}
 
@@ -161,44 +161,70 @@ static struct irq_cfg *irq_cfg_alloc(unsigned int irq)
 	int i;
 	int count = 0;
 
-	BUG_ON(irq == -1U);
-
-	cfg_pri = cfg = &irq_cfgx[0];
+	cfg_pri = cfg = irq_cfgx;
 	while (cfg) {
 		if (cfg->irq == irq)
 			return cfg;
 
-		if (cfg->irq == -1U) {
-			cfg->irq = irq;
-			return cfg;
-		}
 		cfg_pri = cfg;
 		cfg = cfg->next;
 		count++;
 	}
 
-	/*
-	 *  we run out of pre-allocate ones, allocate more
-	 */
-	printk(KERN_DEBUG "try to get more irq_cfg %d\n", nr_irq_cfg);
+	if (!irq_cfgx_free) {
+		unsigned long phys;
+		unsigned long total_bytes;
+		/*
+		 *  we run out of pre-allocate ones, allocate more
+		 */
+		printk(KERN_DEBUG "try to get more irq_cfg %d\n", nr_irq_cfg);
 
-	if (after_bootmem)
-		cfg = kzalloc(sizeof(struct irq_cfg)*nr_irq_cfg, GFP_ATOMIC);
-	else
-		cfg = __alloc_bootmem_nopanic(sizeof(struct irq_cfg)*nr_irq_cfg, PAGE_SIZE, 0);
+		total_bytes = sizeof(struct irq_cfg) * nr_irq_cfg;
+		if (after_bootmem)
+			cfg = kzalloc(total_bytes, GFP_ATOMIC);
+		else
+			cfg = __alloc_bootmem_nopanic(total_bytes, PAGE_SIZE, 0);
 
-	if (!cfg)
-		panic("please boot with nr_irq_cfg= %d\n", count * 2);
+		if (!cfg)
+			panic("please boot with nr_irq_cfg= %d\n", count * 2);
 
-	for (i = 0; i < nr_irq_cfg; i++)
-		init_one_irq_cfg(&cfg[i]);
+		phys = __pa(cfg);
+		printk(KERN_DEBUG "irq_irq ==> [%#lx - %#lx]\n", phys, phys + total_bytes);
 
-	for (i = 1; i < nr_irq_cfg; i++)
-		cfg[i-1].next = &cfg[i];
+		for (i = 0; i < nr_irq_cfg; i++)
+			init_one_irq_cfg(&cfg[i]);
 
-	cfg->irq = irq;
-	cfg_pri->next = cfg;
+		for (i = 1; i < nr_irq_cfg; i++)
+			cfg[i-1].next = &cfg[i];
+
+		irq_cfgx_free = cfg;
+	}
 
+	cfg = irq_cfgx_free;
+	irq_cfgx_free = irq_cfgx_free->next;
+	cfg->next = NULL;
+	if (cfg_pri)
+		cfg_pri->next = cfg;
+	else
+		irq_cfgx = cfg;
+	cfg->irq = irq;
+	printk(KERN_DEBUG "found new irq_cfg for irq %d\n", cfg->irq);
+#ifdef CONFIG_HAVE_SPARSE_IRQ_DEBUG
+	{
+		/* dump the results */
+		struct irq_cfg *cfg;
+		unsigned long phys;
+		unsigned long bytes = sizeof(struct irq_cfg);
+
+		printk(KERN_DEBUG "=========================== %d\n", irq);
+		printk(KERN_DEBUG "irq_cfg dump after get that for %d\n", irq);
+		for_each_irq_cfg(cfg) {
+			phys = __pa(cfg);
+			printk(KERN_DEBUG "irq_cfg %d ==> [%#lx - %#lx]\n", cfg->irq, phys, phys + bytes);
+		}
+		printk(KERN_DEBUG "===========================\n");
+	}
+#endif
 	return cfg;
 }
 
-- 
cgit v1.2.3-55-g7522


From 52b17329d6d0a4824b89206803a430915031ff23 Mon Sep 17 00:00:00 2001
From: Yinghai Lu
Date: Tue, 19 Aug 2008 20:50:20 -0700
Subject: x86_64: make /proc/interrupts work with dyn irq_desc

loop with irq_desc list

Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/irq_64.c | 31 ++++++++++++++++++++++++-------
 fs/proc/proc_misc.c      | 28 ++++++++++++++++++++++++----
 2 files changed, 48 insertions(+), 11 deletions(-)

diff --git a/arch/x86/kernel/irq_64.c b/arch/x86/kernel/irq_64.c
index 5d5976e0311a..7bd841a9c640 100644
--- a/arch/x86/kernel/irq_64.c
+++ b/arch/x86/kernel/irq_64.c
@@ -70,9 +70,27 @@ static inline void stack_overflow_check(struct pt_regs *regs)
 
 int show_interrupts(struct seq_file *p, void *v)
 {
-	int i = *(loff_t *) v, j;
+	int i, j;
 	struct irqaction * action;
 	unsigned long flags;
+	unsigned int entries;
+	struct irq_desc *desc;
+	int tail = 0;
+
+#ifdef CONFIG_HAVE_SPARSE_IRQ
+	desc = (struct irq_desc *)v;
+	entries = -1U;
+	i = desc->irq;
+	if (!desc->next)
+		tail = 1;
+#else
+	entries = nr_irqs - 1;
+	i = *(loff_t *) v;
+	if (i == nr_irqs)
+		tail = 1;
+	else
+		desc = irq_to_desc(i);
+#endif
 
 	if (i == 0) {
 		seq_printf(p, "           ");
@@ -81,12 +99,8 @@ int show_interrupts(struct seq_file *p, void *v)
 		seq_putc(p, '\n');
 	}
 
-	if (i < nr_irqs) {
+	if (i <= entries) {
 		unsigned any_count = 0;
-		struct irq_desc *desc = irq_to_desc(i);
-
-		if (!desc)
-			return 0;
 
 		spin_lock_irqsave(&desc->lock, flags);
 #ifndef CONFIG_SMP
@@ -116,7 +130,9 @@ int show_interrupts(struct seq_file *p, void *v)
 		seq_putc(p, '\n');
 skip:
 		spin_unlock_irqrestore(&desc->lock, flags);
-	} else if (i == nr_irqs) {
+	}
+
+	if (tail) {
 		seq_printf(p, "NMI: ");
 		for_each_online_cpu(j)
 			seq_printf(p, "%10u ", cpu_pda(j)->__nmi_count);
@@ -155,6 +171,7 @@ skip:
 		seq_printf(p, "  Spurious interrupts\n");
 		seq_printf(p, "ERR: %10u\n", atomic_read(&irq_err_count));
 	}
+
 	return 0;
 }
 
diff --git a/fs/proc/proc_misc.c b/fs/proc/proc_misc.c
index c3cbabe8b38e..72dd739a7f8a 100644
--- a/fs/proc/proc_misc.c
+++ b/fs/proc/proc_misc.c
@@ -645,15 +645,36 @@ static const struct file_operations proc_stat_operations = {
  */
 static void *int_seq_start(struct seq_file *f, loff_t *pos)
 {
+#ifdef CONFIG_HAVE_SPARSE_IRQ
+	struct irq_desc *desc;
+	int irq;
+	int count = *pos;
+
+	for_each_irq_desc(irq, desc) {
+		if (count-- == 0)
+			return desc;
+	}
+
+	return NULL;
+#else
 	return (*pos <= nr_irqs) ? pos : NULL;
+#endif
 }
 
+
 static void *int_seq_next(struct seq_file *f, void *v, loff_t *pos)
 {
+#ifdef CONFIG_HAVE_SPARSE_IRQ
+	struct irq_desc *desc;
+
+	desc = ((struct irq_desc *)v)->next;
 	(*pos)++;
-	if (*pos > nr_irqs)
-		return NULL;
-	return pos;
+
+	return desc;
+#else
+	(*pos)++;
+	return (*pos <= nr_irqs) ? pos : NULL;
+#endif
 }
 
 static void int_seq_stop(struct seq_file *f, void *v)
@@ -661,7 +682,6 @@ static void int_seq_stop(struct seq_file *f, void *v)
 	/* Nothing to do */
 }
 
-
 static const struct seq_operations int_seq_ops = {
 	.start = int_seq_start,
 	.next  = int_seq_next,
-- 
cgit v1.2.3-55-g7522


From e420dfb40c453a9760b86c7f338052bdb4dfa755 Mon Sep 17 00:00:00 2001
From: Yinghai Lu
Date: Tue, 19 Aug 2008 20:50:21 -0700
Subject: x86: put irq_2_iommu pointer into irq_desc

when CONFIG_HAVE_SPARSE_IRQ
preallocate some irq_2_iommu entries, and use get_one_free_irq_2_iomm to
get new one and link to irq_desc if needed.

else will use dyn_array or static array.

v2: <= nr_irqs fix

Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 drivers/pci/intr_remapping.c | 213 +++++++++++++++++++++++++++++++++----------
 include/linux/irq.h          |   4 +
 2 files changed, 169 insertions(+), 48 deletions(-)

diff --git a/drivers/pci/intr_remapping.c b/drivers/pci/intr_remapping.c
index 6961be807684..23372c811159 100644
--- a/drivers/pci/intr_remapping.c
+++ b/drivers/pci/intr_remapping.c
@@ -19,41 +19,136 @@ struct irq_2_iommu {
 	u8  irte_mask;
 };
 
-#ifdef CONFIG_HAVE_DYNA_ARRAY
-static struct irq_2_iommu *irq_2_iommu;
-DEFINE_DYN_ARRAY(irq_2_iommu, sizeof(struct irq_2_iommu), nr_irqs, PAGE_SIZE, NULL);
+#ifdef CONFIG_HAVE_SPARSE_IRQ
+static struct irq_2_iommu *irq_2_iommuX;
+/* fill one page ? */
+static int nr_irq_2_iommu = 0x100;
+static int irq_2_iommu_index;
+DEFINE_DYN_ARRAY(irq_2_iommuX, sizeof(struct irq_2_iommu), nr_irq_2_iommu, PAGE_SIZE, NULL);
+
+extern void *__alloc_bootmem_nopanic(unsigned long size,
+				     unsigned long align,
+				     unsigned long goal);
+
+static struct irq_2_iommu *get_one_free_irq_2_iommu(int not_used)
+{
+	struct irq_2_iommu *iommu;
+	unsigned long total_bytes;
+
+	if (irq_2_iommu_index >= nr_irq_2_iommu) {
+		/*
+		 *  we run out of pre-allocate ones, allocate more
+		 */
+		printk(KERN_DEBUG "try to get more irq_2_iommu %d\n", nr_irq_2_iommu);
+
+		total_bytes = sizeof(struct irq_2_iommu)*nr_irq_2_iommu;
+
+		if (after_bootmem)
+			iommu = kzalloc(total_bytes, GFP_ATOMIC);
+		else
+			iommu = __alloc_bootmem_nopanic(total_bytes, PAGE_SIZE, 0);
+
+		if (!iommu)
+			panic("can not get more irq_2_iommu\n");
+
+		irq_2_iommuX = iommu;
+		irq_2_iommu_index = 0;
+	}
+
+	iommu = &irq_2_iommuX[irq_2_iommu_index];
+	irq_2_iommu_index++;
+	return iommu;
+}
+
+static struct irq_2_iommu *irq_2_iommu(unsigned int irq)
+{
+	struct irq_desc *desc;
+
+	desc = irq_to_desc(irq);
+
+	BUG_ON(!desc);
+
+	return desc->irq_2_iommu;
+}
+
+static struct irq_2_iommu *irq_2_iommu_alloc(unsigned int irq)
+{
+	struct irq_desc *desc;
+	struct irq_2_iommu *irq_iommu;
+
+	desc = irq_to_desc(irq);
+
+	BUG_ON(!desc);
+
+	irq_iommu = desc->irq_2_iommu;
+
+	if (!irq_iommu)
+		desc->irq_2_iommu = get_one_free_irq_2_iommu(irq);
+
+	return desc->irq_2_iommu;
+}
+
+#else /* !CONFIG_HAVE_SPARSE_IRQ */
+
+#ifdef CONFIG_HAVE_DYN_ARRAY
+static struct irq_2_iommu *irq_2_iommuX;
+DEFINE_DYN_ARRAY(irq_2_iommuX, sizeof(struct irq_2_iommu), nr_irqs, PAGE_SIZE, NULL);
 #else
-static struct irq_2_iommu irq_2_iommu[NR_IRQS];
+static struct irq_2_iommu irq_2_iommuX[NR_IRQS];
+#endif
+
+static struct irq_2_iommu *irq_2_iommu(unsigned int irq)
+{
+	if (irq < nr_irqs)
+		return &irq_2_iommuX[irq];
+
+	return NULL;
+}
+static struct irq_2_iommu *irq_2_iommu_alloc(unsigned int irq)
+{
+	return irq_2_iommu(irq);
+}
 #endif
 
 static DEFINE_SPINLOCK(irq_2_ir_lock);
 
-int irq_remapped(int irq)
+static struct irq_2_iommu *valid_irq_2_iommu(unsigned int irq)
 {
-	if (irq > nr_irqs)
-		return 0;
+	struct irq_2_iommu *irq_iommu;
+
+	irq_iommu = irq_2_iommu(irq);
 
-	if (!irq_2_iommu[irq].iommu)
-		return 0;
+	if (!irq_iommu)
+		return NULL;
 
-	return 1;
+	if (!irq_iommu->iommu)
+		return NULL;
+
+	return irq_iommu;
+}
+
+int irq_remapped(int irq)
+{
+	return valid_irq_2_iommu(irq) != NULL;
 }
 
 int get_irte(int irq, struct irte *entry)
 {
 	int index;
+	struct irq_2_iommu *irq_iommu;
 
-	if (!entry || irq > nr_irqs)
+	if (!entry)
 		return -1;
 
 	spin_lock(&irq_2_ir_lock);
-	if (!irq_2_iommu[irq].iommu) {
+	irq_iommu = valid_irq_2_iommu(irq);
+	if (!irq_iommu) {
 		spin_unlock(&irq_2_ir_lock);
 		return -1;
 	}
 
-	index = irq_2_iommu[irq].irte_index + irq_2_iommu[irq].sub_handle;
-	*entry = *(irq_2_iommu[irq].iommu->ir_table->base + index);
+	index = irq_iommu->irte_index + irq_iommu->sub_handle;
+	*entry = *(irq_iommu->iommu->ir_table->base + index);
 
 	spin_unlock(&irq_2_ir_lock);
 	return 0;
@@ -62,6 +157,7 @@ int get_irte(int irq, struct irte *entry)
 int alloc_irte(struct intel_iommu *iommu, int irq, u16 count)
 {
 	struct ir_table *table = iommu->ir_table;
+	struct irq_2_iommu *irq_iommu;
 	u16 index, start_index;
 	unsigned int mask = 0;
 	int i;
@@ -69,6 +165,12 @@ int alloc_irte(struct intel_iommu *iommu, int irq, u16 count)
 	if (!count)
 		return -1;
 
+#ifndef	CONFIG_HAVE_SPARSE_IRQ
+	/* protect irq_2_iommu_alloc later */
+	if (irq >= nr_irqs)
+		return -1;
+#endif
+
 	/*
 	 * start the IRTE search from index 0.
 	 */
@@ -108,10 +210,11 @@ int alloc_irte(struct intel_iommu *iommu, int irq, u16 count)
 	for (i = index; i < index + count; i++)
 		table->base[i].present = 1;
 
-	irq_2_iommu[irq].iommu = iommu;
-	irq_2_iommu[irq].irte_index =  index;
-	irq_2_iommu[irq].sub_handle = 0;
-	irq_2_iommu[irq].irte_mask = mask;
+	irq_iommu = irq_2_iommu_alloc(irq);
+	irq_iommu->iommu = iommu;
+	irq_iommu->irte_index =  index;
+	irq_iommu->sub_handle = 0;
+	irq_iommu->irte_mask = mask;
 
 	spin_unlock(&irq_2_ir_lock);
 
@@ -132,31 +235,36 @@ static void qi_flush_iec(struct intel_iommu *iommu, int index, int mask)
 int map_irq_to_irte_handle(int irq, u16 *sub_handle)
 {
 	int index;
+	struct irq_2_iommu *irq_iommu;
 
 	spin_lock(&irq_2_ir_lock);
-	if (irq >= nr_irqs || !irq_2_iommu[irq].iommu) {
+	irq_iommu = valid_irq_2_iommu(irq);
+	if (!irq_iommu) {
 		spin_unlock(&irq_2_ir_lock);
 		return -1;
 	}
 
-	*sub_handle = irq_2_iommu[irq].sub_handle;
-	index = irq_2_iommu[irq].irte_index;
+	*sub_handle = irq_iommu->sub_handle;
+	index = irq_iommu->irte_index;
 	spin_unlock(&irq_2_ir_lock);
 	return index;
 }
 
 int set_irte_irq(int irq, struct intel_iommu *iommu, u16 index, u16 subhandle)
 {
+	struct irq_2_iommu *irq_iommu;
+
 	spin_lock(&irq_2_ir_lock);
-	if (irq >= nr_irqs || irq_2_iommu[irq].iommu) {
+	irq_iommu = valid_irq_2_iommu(irq);
+	if (!irq_iommu) {
 		spin_unlock(&irq_2_ir_lock);
 		return -1;
 	}
 
-	irq_2_iommu[irq].iommu = iommu;
-	irq_2_iommu[irq].irte_index = index;
-	irq_2_iommu[irq].sub_handle = subhandle;
-	irq_2_iommu[irq].irte_mask = 0;
+	irq_iommu->iommu = iommu;
+	irq_iommu->irte_index = index;
+	irq_iommu->sub_handle = subhandle;
+	irq_iommu->irte_mask = 0;
 
 	spin_unlock(&irq_2_ir_lock);
 
@@ -165,16 +273,19 @@ int set_irte_irq(int irq, struct intel_iommu *iommu, u16 index, u16 subhandle)
 
 int clear_irte_irq(int irq, struct intel_iommu *iommu, u16 index)
 {
+	struct irq_2_iommu *irq_iommu;
+
 	spin_lock(&irq_2_ir_lock);
-	if (irq >= nr_irqs || !irq_2_iommu[irq].iommu) {
+	irq_iommu = valid_irq_2_iommu(irq);
+	if (!irq_iommu) {
 		spin_unlock(&irq_2_ir_lock);
 		return -1;
 	}
 
-	irq_2_iommu[irq].iommu = NULL;
-	irq_2_iommu[irq].irte_index = 0;
-	irq_2_iommu[irq].sub_handle = 0;
-	irq_2_iommu[irq].irte_mask = 0;
+	irq_iommu->iommu = NULL;
+	irq_iommu->irte_index = 0;
+	irq_iommu->sub_handle = 0;
+	irq_2_iommu(irq)->irte_mask = 0;
 
 	spin_unlock(&irq_2_ir_lock);
 
@@ -186,16 +297,18 @@ int modify_irte(int irq, struct irte *irte_modified)
 	int index;
 	struct irte *irte;
 	struct intel_iommu *iommu;
+	struct irq_2_iommu *irq_iommu;
 
 	spin_lock(&irq_2_ir_lock);
-	if (irq >= nr_irqs || !irq_2_iommu[irq].iommu) {
+	irq_iommu = valid_irq_2_iommu(irq);
+	if (!irq_iommu) {
 		spin_unlock(&irq_2_ir_lock);
 		return -1;
 	}
 
-	iommu = irq_2_iommu[irq].iommu;
+	iommu = irq_iommu->iommu;
 
-	index = irq_2_iommu[irq].irte_index + irq_2_iommu[irq].sub_handle;
+	index = irq_iommu->irte_index + irq_iommu->sub_handle;
 	irte = &iommu->ir_table->base[index];
 
 	set_64bit((unsigned long *)irte, irte_modified->low | (1 << 1));
@@ -211,18 +324,20 @@ int flush_irte(int irq)
 {
 	int index;
 	struct intel_iommu *iommu;
+	struct irq_2_iommu *irq_iommu;
 
 	spin_lock(&irq_2_ir_lock);
-	if (irq >= nr_irqs || !irq_2_iommu[irq].iommu) {
+	irq_iommu = valid_irq_2_iommu(irq);
+	if (!irq_iommu) {
 		spin_unlock(&irq_2_ir_lock);
 		return -1;
 	}
 
-	iommu = irq_2_iommu[irq].iommu;
+	iommu = irq_iommu->iommu;
 
-	index = irq_2_iommu[irq].irte_index + irq_2_iommu[irq].sub_handle;
+	index = irq_iommu->irte_index + irq_iommu->sub_handle;
 
-	qi_flush_iec(iommu, index, irq_2_iommu[irq].irte_mask);
+	qi_flush_iec(iommu, index, irq_iommu->irte_mask);
 	spin_unlock(&irq_2_ir_lock);
 
 	return 0;
@@ -254,28 +369,30 @@ int free_irte(int irq)
 	int index, i;
 	struct irte *irte;
 	struct intel_iommu *iommu;
+	struct irq_2_iommu *irq_iommu;
 
 	spin_lock(&irq_2_ir_lock);
-	if (irq >= nr_irqs || !irq_2_iommu[irq].iommu) {
+	irq_iommu = valid_irq_2_iommu(irq);
+	if (!irq_iommu) {
 		spin_unlock(&irq_2_ir_lock);
 		return -1;
 	}
 
-	iommu = irq_2_iommu[irq].iommu;
+	iommu = irq_iommu->iommu;
 
-	index = irq_2_iommu[irq].irte_index + irq_2_iommu[irq].sub_handle;
+	index = irq_iommu->irte_index + irq_iommu->sub_handle;
 	irte = &iommu->ir_table->base[index];
 
-	if (!irq_2_iommu[irq].sub_handle) {
-		for (i = 0; i < (1 << irq_2_iommu[irq].irte_mask); i++)
+	if (!irq_iommu->sub_handle) {
+		for (i = 0; i < (1 << irq_iommu->irte_mask); i++)
 			set_64bit((unsigned long *)irte, 0);
-		qi_flush_iec(iommu, index, irq_2_iommu[irq].irte_mask);
+		qi_flush_iec(iommu, index, irq_iommu->irte_mask);
 	}
 
-	irq_2_iommu[irq].iommu = NULL;
-	irq_2_iommu[irq].irte_index = 0;
-	irq_2_iommu[irq].sub_handle = 0;
-	irq_2_iommu[irq].irte_mask = 0;
+	irq_iommu->iommu = NULL;
+	irq_iommu->irte_index = 0;
+	irq_iommu->sub_handle = 0;
+	irq_iommu->irte_mask = 0;
 
 	spin_unlock(&irq_2_ir_lock);
 
diff --git a/include/linux/irq.h b/include/linux/irq.h
index d5749852ee69..788d5a35a580 100644
--- a/include/linux/irq.h
+++ b/include/linux/irq.h
@@ -128,6 +128,7 @@ struct irq_chip {
 };
 
 struct timer_rand_state;
+struct irq_2_iommu;
 /**
  * struct irq_desc - interrupt descriptor
  *
@@ -162,6 +163,9 @@ struct irq_desc {
 	unsigned int            *kstat_irqs;
 #else
 	unsigned int            kstat_irqs[NR_CPUS];
+#endif
+#if defined(CONFIG_INTR_REMAP) && defined(CONFIG_HAVE_SPARSE_IRQ)
+       struct irq_2_iommu      *irq_2_iommu;
 #endif
 	irq_flow_handler_t	handle_irq;
 	struct irq_chip		*chip;
-- 
cgit v1.2.3-55-g7522


From 6d50bc26836e16a9589e0b128d527c29e30d722a Mon Sep 17 00:00:00 2001
From: Yinghai Lu
Date: Tue, 19 Aug 2008 20:50:22 -0700
Subject: x86: use 28 bits irq NR for pci msi/msix and ht

also print out irq no in /proc/interrups and /proc/stat in hex, so could
tell bus/dev/func.

Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/io_apic_64.c | 64 ++++++++++++++++++++++++++++++++++----------
 arch/x86/kernel/irq_64.c     |  2 +-
 drivers/pci/htirq.c          | 22 +++++++++++++--
 fs/proc/proc_misc.c          |  2 +-
 include/linux/irq.h          |  1 +
 5 files changed, 73 insertions(+), 18 deletions(-)

diff --git a/arch/x86/kernel/io_apic_64.c b/arch/x86/kernel/io_apic_64.c
index 8ab7ae01773f..b0d4abc55a11 100644
--- a/arch/x86/kernel/io_apic_64.c
+++ b/arch/x86/kernel/io_apic_64.c
@@ -2520,17 +2520,21 @@ device_initcall(ioapic_init_sysfs);
 /*
  * Dynamic irq allocate and deallocation
  */
-int create_irq(void)
+unsigned int create_irq_nr(unsigned int irq_want)
 {
 	/* Allocate an unused irq */
-	int irq;
-	int new;
+	unsigned int irq;
+	unsigned int new;
 	unsigned long flags;
 	struct irq_cfg *cfg_new;
 
-	irq = -ENOSPC;
+#ifndef CONFIG_HAVE_SPARSE_IRQ
+	irq_want = nr_irqs - 1;
+#endif
+
+	irq = 0;
 	spin_lock_irqsave(&vector_lock, flags);
-	for (new = (nr_irqs - 1); new >= 0; new--) {
+	for (new = irq_want; new > 0; new--) {
 		if (platform_legacy_irq(new))
 			continue;
 		cfg_new = irq_cfg(new);
@@ -2545,12 +2549,24 @@ int create_irq(void)
 	}
 	spin_unlock_irqrestore(&vector_lock, flags);
 
-	if (irq >= 0) {
+	if (irq > 0) {
 		dynamic_irq_init(irq);
 	}
 	return irq;
 }
 
+int create_irq(void)
+{
+	int irq;
+
+	irq = create_irq_nr(nr_irqs - 1);
+
+	if (irq == 0)
+		irq = -1;
+
+	return irq;
+}
+
 void destroy_irq(unsigned int irq)
 {
 	unsigned long flags;
@@ -2803,13 +2819,29 @@ static int setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc, int irq)
 	return 0;
 }
 
+static unsigned int build_irq_for_pci_dev(struct pci_dev *dev)
+{
+	unsigned int irq;
+
+	irq = dev->bus->number;
+	irq <<= 8;
+	irq |= dev->devfn;
+	irq <<= 12;
+
+	return irq;
+}
+
 int arch_setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc)
 {
-	int irq, ret;
+	unsigned int irq;
+	int ret;
+	unsigned int irq_want;
 
-	irq = create_irq();
-	if (irq < 0)
-		return irq;
+	irq_want = build_irq_for_pci_dev(dev) + 0x100;
+
+	irq = create_irq_nr(irq_want);
+	if (irq == 0)
+		return -1;
 
 #ifdef CONFIG_INTR_REMAP
 	if (!intr_remapping_enabled)
@@ -2836,18 +2868,22 @@ error:
 
 int arch_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
 {
-	int irq, ret, sub_handle;
+	unsigned int irq;
+	int ret, sub_handle;
 	struct msi_desc *desc;
+	unsigned int irq_want;
+
 #ifdef CONFIG_INTR_REMAP
 	struct intel_iommu *iommu = 0;
 	int index = 0;
 #endif
 
+	irq_want = build_irq_for_pci_dev(dev) + 0x100;
 	sub_handle = 0;
 	list_for_each_entry(desc, &dev->msi_list, list) {
-		irq = create_irq();
-		if (irq < 0)
-			return irq;
+		irq = create_irq_nr(irq_want--);
+		if (irq == 0)
+			return -1;
 #ifdef CONFIG_INTR_REMAP
 		if (!intr_remapping_enabled)
 			goto no_ir;
diff --git a/arch/x86/kernel/irq_64.c b/arch/x86/kernel/irq_64.c
index 7bd841a9c640..348a11168c2b 100644
--- a/arch/x86/kernel/irq_64.c
+++ b/arch/x86/kernel/irq_64.c
@@ -112,7 +112,7 @@ int show_interrupts(struct seq_file *p, void *v)
 		action = desc->action;
 		if (!action && !any_count)
 			goto skip;
-		seq_printf(p, "%3d: ",i);
+		seq_printf(p, "%#x: ",i);
 #ifndef CONFIG_SMP
 		seq_printf(p, "%10u ", kstat_irqs(i));
 #else
diff --git a/drivers/pci/htirq.c b/drivers/pci/htirq.c
index 279c940a0039..7c5aef13fcdb 100644
--- a/drivers/pci/htirq.c
+++ b/drivers/pci/htirq.c
@@ -82,6 +82,18 @@ void unmask_ht_irq(unsigned int irq)
 	write_ht_irq_msg(irq, &msg);
 }
 
+static unsigned int build_irq_for_pci_dev(struct pci_dev *dev)
+{
+	unsigned int irq;
+
+	irq = dev->bus->number;
+	irq <<= 8;
+	irq |= dev->devfn;
+	irq <<= 12;
+
+	return irq;
+}
+
 /**
  * __ht_create_irq - create an irq and attach it to a device.
  * @dev: The hypertransport device to find the irq capability on.
@@ -97,7 +109,8 @@ int __ht_create_irq(struct pci_dev *dev, int idx, ht_irq_update_t *update)
 	u32 data;
 	int max_irq;
 	int pos;
-	int irq;
+	unsigned int irq;
+	unsigned int irq_want;
 
 	pos = pci_find_ht_capability(dev, HT_CAPTYPE_IRQ);
 	if (!pos)
@@ -125,8 +138,13 @@ int __ht_create_irq(struct pci_dev *dev, int idx, ht_irq_update_t *update)
 	cfg->msg.address_lo = 0xffffffff;
 	cfg->msg.address_hi = 0xffffffff;
 
+	irq_want= build_irq_for_pci_dev(dev);
+#ifdef CONFIG_HAVE_SPARSE_IRQ
+	irq = create_irq_nr(irq_want + idx);
+#else
 	irq = create_irq();
-	if (irq < 0) {
+#endif
+	if (irq == 0) {
 		kfree(cfg);
 		return -EBUSY;
 	}
diff --git a/fs/proc/proc_misc.c b/fs/proc/proc_misc.c
index 72dd739a7f8a..d68c3592fe4a 100644
--- a/fs/proc/proc_misc.c
+++ b/fs/proc/proc_misc.c
@@ -589,7 +589,7 @@ static int show_stat(struct seq_file *p, void *v)
 		}
 
 #ifdef CONFIG_HAVE_SPARSE_IRQ
-		seq_printf(p, " %u:%u", j, per_irq_sum);
+		seq_printf(p, " %#x:%u", j, per_irq_sum);
 #else
 		seq_printf(p, " %u", per_irq_sum);
 #endif
diff --git a/include/linux/irq.h b/include/linux/irq.h
index 788d5a35a580..704136138dc7 100644
--- a/include/linux/irq.h
+++ b/include/linux/irq.h
@@ -399,6 +399,7 @@ extern void set_irq_noprobe(unsigned int irq);
 extern void set_irq_probe(unsigned int irq);
 
 /* Handle dynamic irq creation and destruction */
+extern unsigned int create_irq_nr(unsigned int irq_want);
 extern int create_irq(void);
 extern void destroy_irq(unsigned int irq);
 
-- 
cgit v1.2.3-55-g7522


From 8b8e8c1bf7275eca859fe551dfa484134eaf013b Mon Sep 17 00:00:00 2001
From: Yinghai Lu
Date: Tue, 19 Aug 2008 20:50:23 -0700
Subject: x86: remove irqbalance in kernel for 32 bit

This has been deprecated for years, the user space irqbalanced utility
works better with numa, has configurable policies, etc...

Signed-off-by: Yinghai Lu <yhlu.kernel@gmai.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/Kconfig                |   8 -
 arch/x86/configs/i386_defconfig |   1 -
 arch/x86/kernel/io_apic_32.c    | 402 ----------------------------------------
 arch/x86/kernel/quirks.c        |   3 -
 include/linux/irq.h             |  14 +-
 kernel/irq/manage.c             |   3 -
 6 files changed, 3 insertions(+), 428 deletions(-)

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 1004888e9b13..3e0eaaa1a339 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -1254,14 +1254,6 @@ config EFI
   	resultant kernel should continue to boot on existing non-EFI
   	platforms.
 
-config IRQBALANCE
-	def_bool y
-	prompt "Enable kernel irq balancing"
-	depends on X86_32 && SMP && X86_IO_APIC
-	help
-	  The default yes will allow the kernel to do irq load balancing.
-	  Saying no will keep the kernel from doing irq load balancing.
-
 config SECCOMP
 	def_bool y
 	prompt "Enable seccomp to safely compute untrusted bytecode"
diff --git a/arch/x86/configs/i386_defconfig b/arch/x86/configs/i386_defconfig
index 52d0359719d7..13b8c86ae985 100644
--- a/arch/x86/configs/i386_defconfig
+++ b/arch/x86/configs/i386_defconfig
@@ -287,7 +287,6 @@ CONFIG_MTRR=y
 # CONFIG_MTRR_SANITIZER is not set
 CONFIG_X86_PAT=y
 CONFIG_EFI=y
-# CONFIG_IRQBALANCE is not set
 CONFIG_SECCOMP=y
 # CONFIG_HZ_100 is not set
 # CONFIG_HZ_250 is not set
diff --git a/arch/x86/kernel/io_apic_32.c b/arch/x86/kernel/io_apic_32.c
index 204884b1415a..668edf226067 100644
--- a/arch/x86/kernel/io_apic_32.c
+++ b/arch/x86/kernel/io_apic_32.c
@@ -371,408 +371,6 @@ static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t cpumask)
 	spin_unlock_irqrestore(&ioapic_lock, flags);
 }
 
-#if defined(CONFIG_IRQBALANCE)
-# include <asm/processor.h>	/* kernel_thread() */
-# include <linux/kernel_stat.h>	/* kstat */
-# include <linux/slab.h>		/* kmalloc() */
-# include <linux/timer.h>
-
-#define IRQBALANCE_CHECK_ARCH -999
-#define MAX_BALANCED_IRQ_INTERVAL	(5*HZ)
-#define MIN_BALANCED_IRQ_INTERVAL	(HZ/2)
-#define BALANCED_IRQ_MORE_DELTA		(HZ/10)
-#define BALANCED_IRQ_LESS_DELTA		(HZ)
-
-static int irqbalance_disabled __read_mostly = IRQBALANCE_CHECK_ARCH;
-static int physical_balance __read_mostly;
-static long balanced_irq_interval __read_mostly = MAX_BALANCED_IRQ_INTERVAL;
-
-static struct irq_cpu_info {
-	unsigned long *last_irq;
-	unsigned long *irq_delta;
-	unsigned long irq;
-} irq_cpu_data[NR_CPUS];
-
-#define CPU_IRQ(cpu)		(irq_cpu_data[cpu].irq)
-#define LAST_CPU_IRQ(cpu, irq)   (irq_cpu_data[cpu].last_irq[irq])
-#define IRQ_DELTA(cpu, irq) 	(irq_cpu_data[cpu].irq_delta[irq])
-
-#define IDLE_ENOUGH(cpu,now) \
-	(idle_cpu(cpu) && ((now) - per_cpu(irq_stat, (cpu)).idle_timestamp > 1))
-
-#define IRQ_ALLOWED(cpu, allowed_mask)	cpu_isset(cpu, allowed_mask)
-
-#define CPU_TO_PACKAGEINDEX(i) (first_cpu(per_cpu(cpu_sibling_map, i)))
-
-static cpumask_t balance_irq_affinity_init __initdata = CPU_MASK_ALL;
-
-static cpumask_t *balance_irq_affinity;
-
-
-static void __init irq_affinity_init_work(void *data)
-{
-	struct dyn_array *da = data;
-
-	int i;
-	struct  balance_irq_affinity *affinity;
-
-	affinity = *da->name;
-
-	for (i = 0; i < *da->nr; i++)
-		memcpy(&affinity[i], &balance_irq_affinity_init,
-			 sizeof(struct balance_irq_affinity));
-
-}
-
-DEFINE_DYN_ARRAY(balance_irq_affinity, sizeof(struct balance_irq_affinity), nr_irqs, PAGE_SIZE, irq_affinity_init_work);
-
-
-void set_balance_irq_affinity(unsigned int irq, cpumask_t mask)
-{
-	balance_irq_affinity[irq] = mask;
-}
-
-static unsigned long move(int curr_cpu, cpumask_t allowed_mask,
-			unsigned long now, int direction)
-{
-	int search_idle = 1;
-	int cpu = curr_cpu;
-
-	goto inside;
-
-	do {
-		if (unlikely(cpu == curr_cpu))
-			search_idle = 0;
-inside:
-		if (direction == 1) {
-			cpu++;
-			if (cpu >= NR_CPUS)
-				cpu = 0;
-		} else {
-			cpu--;
-			if (cpu == -1)
-				cpu = NR_CPUS-1;
-		}
-	} while (!cpu_online(cpu) || !IRQ_ALLOWED(cpu, allowed_mask) ||
-			(search_idle && !IDLE_ENOUGH(cpu, now)));
-
-	return cpu;
-}
-
-static inline void balance_irq(int cpu, int irq)
-{
-	unsigned long now = jiffies;
-	cpumask_t allowed_mask;
-	unsigned int new_cpu;
-
-	if (irqbalance_disabled)
-		return;
-
-	cpus_and(allowed_mask, cpu_online_map, balance_irq_affinity[irq]);
-	new_cpu = move(cpu, allowed_mask, now, 1);
-	if (cpu != new_cpu)
-		set_pending_irq(irq, cpumask_of_cpu(new_cpu));
-}
-
-static inline void rotate_irqs_among_cpus(unsigned long useful_load_threshold)
-{
-	int i, j;
-	struct irq_desc *desc;
-
-	for_each_online_cpu(i) {
-		for (j = 0; j < nr_irqs; j++) {
-			desc = irq_to_desc(j);
-			if (!desc->action)
-				continue;
-			/* Is it a significant load ?  */
-			if (IRQ_DELTA(CPU_TO_PACKAGEINDEX(i), j) <
-						useful_load_threshold)
-				continue;
-			balance_irq(i, j);
-		}
-	}
-	balanced_irq_interval = max((long)MIN_BALANCED_IRQ_INTERVAL,
-		balanced_irq_interval - BALANCED_IRQ_LESS_DELTA);
-	return;
-}
-
-static void do_irq_balance(void)
-{
-	int i, j;
-	unsigned long max_cpu_irq = 0, min_cpu_irq = (~0);
-	unsigned long move_this_load = 0;
-	int max_loaded = 0, min_loaded = 0;
-	int load;
-	unsigned long useful_load_threshold = balanced_irq_interval + 10;
-	int selected_irq;
-	int tmp_loaded, first_attempt = 1;
-	unsigned long tmp_cpu_irq;
-	unsigned long imbalance = 0;
-	cpumask_t allowed_mask, target_cpu_mask, tmp;
-	struct irq_desc *desc;
-
-	for_each_possible_cpu(i) {
-		int package_index;
-		CPU_IRQ(i) = 0;
-		if (!cpu_online(i))
-			continue;
-		package_index = CPU_TO_PACKAGEINDEX(i);
-		for (j = 0; j < nr_irqs; j++) {
-			unsigned long value_now, delta;
-			/* Is this an active IRQ or balancing disabled ? */
-			desc = irq_to_desc(j);
-			if (!desc->action || irq_balancing_disabled(j))
-				continue;
-			if (package_index == i)
-				IRQ_DELTA(package_index, j) = 0;
-			/* Determine the total count per processor per IRQ */
-			value_now = (unsigned long) kstat_irqs_cpu(j, i);
-
-			/* Determine the activity per processor per IRQ */
-			delta = value_now - LAST_CPU_IRQ(i, j);
-
-			/* Update last_cpu_irq[][] for the next time */
-			LAST_CPU_IRQ(i, j) = value_now;
-
-			/* Ignore IRQs whose rate is less than the clock */
-			if (delta < useful_load_threshold)
-				continue;
-			/* update the load for the processor or package total */
-			IRQ_DELTA(package_index, j) += delta;
-
-			/* Keep track of the higher numbered sibling as well */
-			if (i != package_index)
-				CPU_IRQ(i) += delta;
-			/*
-			 * We have sibling A and sibling B in the package
-			 *
-			 * cpu_irq[A] = load for cpu A + load for cpu B
-			 * cpu_irq[B] = load for cpu B
-			 */
-			CPU_IRQ(package_index) += delta;
-		}
-	}
-	/* Find the least loaded processor package */
-	for_each_online_cpu(i) {
-		if (i != CPU_TO_PACKAGEINDEX(i))
-			continue;
-		if (min_cpu_irq > CPU_IRQ(i)) {
-			min_cpu_irq = CPU_IRQ(i);
-			min_loaded = i;
-		}
-	}
-	max_cpu_irq = ULONG_MAX;
-
-tryanothercpu:
-	/*
-	 * Look for heaviest loaded processor.
-	 * We may come back to get the next heaviest loaded processor.
-	 * Skip processors with trivial loads.
-	 */
-	tmp_cpu_irq = 0;
-	tmp_loaded = -1;
-	for_each_online_cpu(i) {
-		if (i != CPU_TO_PACKAGEINDEX(i))
-			continue;
-		if (max_cpu_irq <= CPU_IRQ(i))
-			continue;
-		if (tmp_cpu_irq < CPU_IRQ(i)) {
-			tmp_cpu_irq = CPU_IRQ(i);
-			tmp_loaded = i;
-		}
-	}
-
-	if (tmp_loaded == -1) {
-	 /*
-	  * In the case of small number of heavy interrupt sources,
-	  * loading some of the cpus too much. We use Ingo's original
-	  * approach to rotate them around.
-	  */
-		if (!first_attempt && imbalance >= useful_load_threshold) {
-			rotate_irqs_among_cpus(useful_load_threshold);
-			return;
-		}
-		goto not_worth_the_effort;
-	}
-
-	first_attempt = 0;		/* heaviest search */
-	max_cpu_irq = tmp_cpu_irq;	/* load */
-	max_loaded = tmp_loaded;	/* processor */
-	imbalance = (max_cpu_irq - min_cpu_irq) / 2;
-
-	/*
-	 * if imbalance is less than approx 10% of max load, then
-	 * observe diminishing returns action. - quit
-	 */
-	if (imbalance < (max_cpu_irq >> 3))
-		goto not_worth_the_effort;
-
-tryanotherirq:
-	/* if we select an IRQ to move that can't go where we want, then
-	 * see if there is another one to try.
-	 */
-	move_this_load = 0;
-	selected_irq = -1;
-	for (j = 0; j < nr_irqs; j++) {
-		/* Is this an active IRQ? */
-		desc = irq_to_desc(j);
-		if (!desc->action)
-			continue;
-		if (imbalance <= IRQ_DELTA(max_loaded, j))
-			continue;
-		/* Try to find the IRQ that is closest to the imbalance
-		 * without going over.
-		 */
-		if (move_this_load < IRQ_DELTA(max_loaded, j)) {
-			move_this_load = IRQ_DELTA(max_loaded, j);
-			selected_irq = j;
-		}
-	}
-	if (selected_irq == -1)
-		goto tryanothercpu;
-
-	imbalance = move_this_load;
-
-	/* For physical_balance case, we accumulated both load
-	 * values in the one of the siblings cpu_irq[],
-	 * to use the same code for physical and logical processors
-	 * as much as possible.
-	 *
-	 * NOTE: the cpu_irq[] array holds the sum of the load for
-	 * sibling A and sibling B in the slot for the lowest numbered
-	 * sibling (A), _AND_ the load for sibling B in the slot for
-	 * the higher numbered sibling.
-	 *
-	 * We seek the least loaded sibling by making the comparison
-	 * (A+B)/2 vs B
-	 */
-	load = CPU_IRQ(min_loaded) >> 1;
-	for_each_cpu_mask(j, per_cpu(cpu_sibling_map, min_loaded)) {
-		if (load > CPU_IRQ(j)) {
-			/* This won't change cpu_sibling_map[min_loaded] */
-			load = CPU_IRQ(j);
-			min_loaded = j;
-		}
-	}
-
-	cpus_and(allowed_mask,
-		cpu_online_map,
-		balance_irq_affinity[selected_irq]);
-	target_cpu_mask = cpumask_of_cpu(min_loaded);
-	cpus_and(tmp, target_cpu_mask, allowed_mask);
-
-	if (!cpus_empty(tmp)) {
-		/* mark for change destination */
-		set_pending_irq(selected_irq, cpumask_of_cpu(min_loaded));
-
-		/* Since we made a change, come back sooner to
-		 * check for more variation.
-		 */
-		balanced_irq_interval = max((long)MIN_BALANCED_IRQ_INTERVAL,
-			balanced_irq_interval - BALANCED_IRQ_LESS_DELTA);
-		return;
-	}
-	goto tryanotherirq;
-
-not_worth_the_effort:
-	/*
-	 * if we did not find an IRQ to move, then adjust the time interval
-	 * upward
-	 */
-	balanced_irq_interval = min((long)MAX_BALANCED_IRQ_INTERVAL,
-		balanced_irq_interval + BALANCED_IRQ_MORE_DELTA);
-	return;
-}
-
-static int balanced_irq(void *unused)
-{
-	int i;
-	unsigned long prev_balance_time = jiffies;
-	long time_remaining = balanced_irq_interval;
-	struct irq_desc *desc;
-
-	/* push everything to CPU 0 to give us a starting point.  */
-	for (i = 0 ; i < nr_irqs ; i++) {
-		desc = irq_to_desc(i);
-		desc->pending_mask = cpumask_of_cpu(0);
-		set_pending_irq(i, cpumask_of_cpu(0));
-	}
-
-	set_freezable();
-	for ( ; ; ) {
-		time_remaining = schedule_timeout_interruptible(time_remaining);
-		try_to_freeze();
-		if (time_after(jiffies,
-				prev_balance_time+balanced_irq_interval)) {
-			preempt_disable();
-			do_irq_balance();
-			prev_balance_time = jiffies;
-			time_remaining = balanced_irq_interval;
-			preempt_enable();
-		}
-	}
-	return 0;
-}
-
-static int __init balanced_irq_init(void)
-{
-	int i;
-	struct cpuinfo_x86 *c;
-	cpumask_t tmp;
-
-	cpus_shift_right(tmp, cpu_online_map, 2);
-	c = &boot_cpu_data;
-	/* When not overwritten by the command line ask subarchitecture. */
-	if (irqbalance_disabled == IRQBALANCE_CHECK_ARCH)
-		irqbalance_disabled = NO_BALANCE_IRQ;
-	if (irqbalance_disabled)
-		return 0;
-
-	 /* disable irqbalance completely if there is only one processor online */
-	if (num_online_cpus() < 2) {
-		irqbalance_disabled = 1;
-		return 0;
-	}
-	/*
-	 * Enable physical balance only if more than 1 physical processor
-	 * is present
-	 */
-	if (smp_num_siblings > 1 && !cpus_empty(tmp))
-		physical_balance = 1;
-
-	for_each_online_cpu(i) {
-		irq_cpu_data[i].irq_delta = kzalloc(sizeof(unsigned long) * nr_irqs, GFP_KERNEL);
-		irq_cpu_data[i].last_irq = kzalloc(sizeof(unsigned long) * nr_irqs, GFP_KERNEL);
-		if (irq_cpu_data[i].irq_delta == NULL || irq_cpu_data[i].last_irq == NULL) {
-			printk(KERN_ERR "balanced_irq_init: out of memory");
-			goto failed;
-		}
-	}
-
-	printk(KERN_INFO "Starting balanced_irq\n");
-	if (!IS_ERR(kthread_run(balanced_irq, NULL, "kirqd")))
-		return 0;
-	printk(KERN_ERR "balanced_irq_init: failed to spawn balanced_irq");
-failed:
-	for_each_possible_cpu(i) {
-		kfree(irq_cpu_data[i].irq_delta);
-		irq_cpu_data[i].irq_delta = NULL;
-		kfree(irq_cpu_data[i].last_irq);
-		irq_cpu_data[i].last_irq = NULL;
-	}
-	return 0;
-}
-
-int __devinit irqbalance_disable(char *str)
-{
-	irqbalance_disabled = 1;
-	return 1;
-}
-
-__setup("noirqbalance", irqbalance_disable);
-
-late_initcall(balanced_irq_init);
-#endif /* CONFIG_IRQBALANCE */
 #endif /* CONFIG_SMP */
 
 #ifndef CONFIG_SMP
diff --git a/arch/x86/kernel/quirks.c b/arch/x86/kernel/quirks.c
index f6a11b9b1f98..67465ed89310 100644
--- a/arch/x86/kernel/quirks.c
+++ b/arch/x86/kernel/quirks.c
@@ -35,9 +35,6 @@ static void __devinit quirk_intel_irqbalance(struct pci_dev *dev)
 	if (!(word & (1 << 13))) {
 		dev_info(&dev->dev, "Intel E7520/7320/7525 detected; "
 			"disabling irq balancing and affinity\n");
-#ifdef CONFIG_IRQBALANCE
-		irqbalance_disable("");
-#endif
 		noirqdebug_setup("");
 #ifdef CONFIG_PROC_FS
 		no_irq_affinity = 1;
diff --git a/include/linux/irq.h b/include/linux/irq.h
index 704136138dc7..2445d2b3d5dc 100644
--- a/include/linux/irq.h
+++ b/include/linux/irq.h
@@ -185,7 +185,7 @@ struct irq_desc {
 	cpumask_t		affinity;
 	unsigned int		cpu;
 #endif
-#if defined(CONFIG_GENERIC_PENDING_IRQ) || defined(CONFIG_IRQBALANCE)
+#ifdef CONFIG_GENERIC_PENDING_IRQ
 	cpumask_t		pending_mask;
 #endif
 #ifdef CONFIG_PROC_FS
@@ -241,13 +241,13 @@ extern int setup_irq(unsigned int irq, struct irqaction *new);
 
 #ifdef CONFIG_SMP
 
-#if defined(CONFIG_GENERIC_PENDING_IRQ) || defined(CONFIG_IRQBALANCE)
+#ifdef CONFIG_GENERIC_PENDING_IRQ
 
 void set_pending_irq(unsigned int irq, cpumask_t mask);
 void move_native_irq(int irq);
 void move_masked_irq(int irq);
 
-#else /* CONFIG_GENERIC_PENDING_IRQ || CONFIG_IRQBALANCE */
+#else /* CONFIG_GENERIC_PENDING_IRQ */
 
 static inline void move_irq(int irq)
 {
@@ -274,14 +274,6 @@ static inline void set_pending_irq(unsigned int irq, cpumask_t mask)
 
 #endif /* CONFIG_SMP */
 
-#ifdef CONFIG_IRQBALANCE
-extern void set_balance_irq_affinity(unsigned int irq, cpumask_t mask);
-#else
-static inline void set_balance_irq_affinity(unsigned int irq, cpumask_t mask)
-{
-}
-#endif
-
 extern int no_irq_affinity;
 
 static inline int irq_balancing_disabled(unsigned int irq)
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
index 6df49218632a..ddc956861a58 100644
--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c
@@ -86,8 +86,6 @@ int irq_set_affinity(unsigned int irq, cpumask_t cpumask)
 	if (!desc->chip->set_affinity)
 		return -EINVAL;
 
-	set_balance_irq_affinity(irq, cpumask);
-
 #ifdef CONFIG_GENERIC_PENDING_IRQ
 	if (desc->status & IRQ_MOVE_PCNTXT) {
 		unsigned long flags;
@@ -122,7 +120,6 @@ int irq_select_affinity(unsigned int irq)
 	desc->affinity = mask;
 	desc->chip->set_affinity(irq, mask);
 
-	set_balance_irq_affinity(irq, mask);
 	return 0;
 }
 #endif
-- 
cgit v1.2.3-55-g7522


From a1420f395d7721895c05ba3dedf150294d2c0e4d Mon Sep 17 00:00:00 2001
From: Yinghai Lu
Date: Tue, 19 Aug 2008 20:50:24 -0700
Subject: x86: add irq_cfg for 32bit

it only contains vector ...

Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/io_apic_32.c | 71 ++++++++++++++++++++++++++++----------------
 1 file changed, 46 insertions(+), 25 deletions(-)

diff --git a/arch/x86/kernel/io_apic_32.c b/arch/x86/kernel/io_apic_32.c
index 668edf226067..033ad953bee3 100644
--- a/arch/x86/kernel/io_apic_32.c
+++ b/arch/x86/kernel/io_apic_32.c
@@ -94,6 +94,43 @@ DECLARE_BITMAP(mp_bus_not_pci, MAX_MP_BUSSES);
 
 static int disable_timer_pin_1 __initdata;
 
+struct irq_cfg {
+	u8 vector;
+};
+
+
+/* irq_cfg is indexed by the sum of all RTEs in all I/O APICs. */
+static struct irq_cfg irq_cfg_legacy[] __initdata = {
+	[0] = { .vector = FIRST_DEVICE_VECTOR,  },
+};
+
+static void __init init_work(void *data)
+{
+        struct dyn_array *da = data;
+        struct irq_cfg *cfg;
+        int legacy_count;
+        int i;
+
+        cfg = *da->name;
+
+        legacy_count = sizeof(irq_cfg_legacy)/sizeof(irq_cfg_legacy[0]);
+
+	BUG_ON(legacy_count > nr_irqs);
+
+        memcpy(cfg, irq_cfg_legacy, sizeof(irq_cfg_legacy));
+}
+
+static struct irq_cfg *irq_cfgx;
+DEFINE_DYN_ARRAY(irq_cfgx, sizeof(struct irq_cfg), nr_irqs, PAGE_SIZE, init_work);
+
+static struct irq_cfg *irq_cfg(unsigned int irq)
+{
+	if (irq >= nr_irqs)
+		return NULL;
+
+	return &irq_cfgx[irq];
+}
+
 /*
  * Rough estimation of how many shared IRQs there are, can
  * be changed anytime.
@@ -794,22 +831,6 @@ static inline int IO_APIC_irq_trigger(int irq)
 	return 0;
 }
 
-/* irq_vectors is indexed by the sum of all RTEs in all I/O APICs. */
-static u8 irq_vector_init_first __initdata = FIRST_DEVICE_VECTOR;
-static u8 *irq_vector;
-
-static void __init irq_vector_init_work(void *data)
-{
-	struct dyn_array *da = data;
-
-	u8 *irq_vec;
-
-	irq_vec = *da->name;
-
-	irq_vec[0] = irq_vector_init_first;
-}
-
-DEFINE_DYN_ARRAY(irq_vector, sizeof(u8), nr_irqs, PAGE_SIZE, irq_vector_init_work);
 
 static int __assign_irq_vector(int irq)
 {
@@ -818,8 +839,8 @@ static int __assign_irq_vector(int irq)
 
 	BUG_ON((unsigned)irq >= nr_irqs);
 
-	if (irq_vector[irq] > 0)
-		return irq_vector[irq];
+	if (irq_cfg(irq)->vector > 0)
+		return irq_cfg(irq)->vector;
 
 	vector = current_vector;
 	offset = current_offset;
@@ -836,7 +857,7 @@ next:
 
 	current_vector = vector;
 	current_offset = offset;
-	irq_vector[irq] = vector;
+	irq_cfg(irq)->vector = vector;
 
 	return vector;
 }
@@ -1598,7 +1619,7 @@ static void ack_ioapic_quirk_irq(unsigned int irq)
  * operation to prevent an edge-triggered interrupt escaping meanwhile.
  * The idea is from Manfred Spraul.  --macro
  */
-	i = irq_vector[irq];
+	i = irq_cfg(irq)->vector;
 
 	v = apic_read(APIC_TMR + ((i & ~0x1f) >> 1));
 
@@ -1615,7 +1636,7 @@ static void ack_ioapic_quirk_irq(unsigned int irq)
 
 static int ioapic_retrigger_irq(unsigned int irq)
 {
-	send_IPI_self(irq_vector[irq]);
+	send_IPI_self(irq_cfg(irq)->vector);
 
 	return 1;
 }
@@ -1651,7 +1672,7 @@ static inline void init_IO_APIC_traps(void)
 	 * 0x80, because int 0x80 is hm, kind of importantish. ;)
 	 */
 	for (irq = 0; irq < nr_irqs ; irq++) {
-		if (IO_APIC_IRQ(irq) && !irq_vector[irq]) {
+		if (IO_APIC_IRQ(irq) && !irq_cfg(irq)->vector) {
 			/*
 			 * Hmm.. We don't have an entry for this,
 			 * so default to an old-fashioned 8259
@@ -2102,7 +2123,7 @@ int create_irq(void)
 	for (new = (nr_irqs - 1); new >= 0; new--) {
 		if (platform_legacy_irq(new))
 			continue;
-		if (irq_vector[new] != 0)
+		if (irq_cfg(new)->vector != 0)
 			continue;
 		vector = __assign_irq_vector(new);
 		if (likely(vector > 0))
@@ -2125,8 +2146,8 @@ void destroy_irq(unsigned int irq)
 	dynamic_irq_cleanup(irq);
 
 	spin_lock_irqsave(&vector_lock, flags);
-	clear_bit(irq_vector[irq], used_vectors);
-	irq_vector[irq] = 0;
+	clear_bit(irq_cfg(irq)->vector, used_vectors);
+	irq_cfg(irq)->vector = 0;
 	spin_unlock_irqrestore(&vector_lock, flags);
 }
 
-- 
cgit v1.2.3-55-g7522


From da51a821314dd0ec7126f2bf9a62117146fbb6b9 Mon Sep 17 00:00:00 2001
From: Yinghai Lu
Date: Tue, 19 Aug 2008 20:50:25 -0700
Subject: x86: make 32bit use irq_cfg_alloc, etc

Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/io_apic_32.c | 180 ++++++++++++++++++++++++++++++++++++++-----
 1 file changed, 160 insertions(+), 20 deletions(-)

diff --git a/arch/x86/kernel/io_apic_32.c b/arch/x86/kernel/io_apic_32.c
index 033ad953bee3..5a83d7f5b147 100644
--- a/arch/x86/kernel/io_apic_32.c
+++ b/arch/x86/kernel/io_apic_32.c
@@ -94,41 +94,172 @@ DECLARE_BITMAP(mp_bus_not_pci, MAX_MP_BUSSES);
 
 static int disable_timer_pin_1 __initdata;
 
+struct irq_cfg;
+
 struct irq_cfg {
+	unsigned int irq;
+	struct irq_cfg *next;
 	u8 vector;
 };
 
 
 /* irq_cfg is indexed by the sum of all RTEs in all I/O APICs. */
 static struct irq_cfg irq_cfg_legacy[] __initdata = {
-	[0] = { .vector = FIRST_DEVICE_VECTOR,  },
+	[0]  = { .irq =  0, .vector = IRQ0_VECTOR,  },
+	[1]  = { .irq =  1, .vector = IRQ1_VECTOR,  },
+	[2]  = { .irq =  2, .vector = IRQ2_VECTOR,  },
+	[3]  = { .irq =  3, .vector = IRQ3_VECTOR,  },
+	[4]  = { .irq =  4, .vector = IRQ4_VECTOR,  },
+	[5]  = { .irq =  5, .vector = IRQ5_VECTOR,  },
+	[6]  = { .irq =  6, .vector = IRQ6_VECTOR,  },
+	[7]  = { .irq =  7, .vector = IRQ7_VECTOR,  },
+	[8]  = { .irq =  8, .vector = IRQ8_VECTOR,  },
+	[9]  = { .irq =  9, .vector = IRQ9_VECTOR,  },
+	[10] = { .irq = 10, .vector = IRQ10_VECTOR, },
+	[11] = { .irq = 11, .vector = IRQ11_VECTOR, },
+	[12] = { .irq = 12, .vector = IRQ12_VECTOR, },
+	[13] = { .irq = 13, .vector = IRQ13_VECTOR, },
+	[14] = { .irq = 14, .vector = IRQ14_VECTOR, },
+	[15] = { .irq = 15, .vector = IRQ15_VECTOR, },
 };
 
+static struct irq_cfg irq_cfg_init = { .irq =  -1U, };
+/* need to be biger than size of irq_cfg_legacy */
+static int nr_irq_cfg = 32;
+
+static int __init parse_nr_irq_cfg(char *arg)
+{
+	if (arg) {
+		nr_irq_cfg = simple_strtoul(arg, NULL, 0);
+		if (nr_irq_cfg < 32)
+			nr_irq_cfg = 32;
+	}
+	return 0;
+}
+
+early_param("nr_irq_cfg", parse_nr_irq_cfg);
+
+static void init_one_irq_cfg(struct irq_cfg *cfg)
+{
+	memcpy(cfg, &irq_cfg_init, sizeof(struct irq_cfg));
+}
+
+static struct irq_cfg *irq_cfgx;
+static struct irq_cfg *irq_cfgx_free;
 static void __init init_work(void *data)
 {
-        struct dyn_array *da = data;
-        struct irq_cfg *cfg;
-        int legacy_count;
-        int i;
+	struct dyn_array *da = data;
+	struct irq_cfg *cfg;
+	int legacy_count;
+	int i;
+
+	cfg = *da->name;
 
-        cfg = *da->name;
+	memcpy(cfg, irq_cfg_legacy, sizeof(irq_cfg_legacy));
 
-        legacy_count = sizeof(irq_cfg_legacy)/sizeof(irq_cfg_legacy[0]);
+	legacy_count = sizeof(irq_cfg_legacy)/sizeof(irq_cfg_legacy[0]);
+	for (i = legacy_count; i < *da->nr; i++)
+		init_one_irq_cfg(&cfg[i]);
 
-	BUG_ON(legacy_count > nr_irqs);
+	for (i = 1; i < *da->nr; i++)
+		cfg[i-1].next = &cfg[i];
 
-        memcpy(cfg, irq_cfg_legacy, sizeof(irq_cfg_legacy));
+	irq_cfgx_free = &irq_cfgx[legacy_count];
+	irq_cfgx[legacy_count - 1].next = NULL;
 }
 
-static struct irq_cfg *irq_cfgx;
-DEFINE_DYN_ARRAY(irq_cfgx, sizeof(struct irq_cfg), nr_irqs, PAGE_SIZE, init_work);
+#define for_each_irq_cfg(cfg)           \
+	for (cfg = irq_cfgx; cfg; cfg = cfg->next)
+
+DEFINE_DYN_ARRAY(irq_cfgx, sizeof(struct irq_cfg), nr_irq_cfg, PAGE_SIZE, init_work);
 
 static struct irq_cfg *irq_cfg(unsigned int irq)
 {
-	if (irq >= nr_irqs)
-		return NULL;
+	struct irq_cfg *cfg;
+
+	cfg = irq_cfgx;
+	while (cfg) {
+		if (cfg->irq == irq)
+			return cfg;
 
-	return &irq_cfgx[irq];
+		cfg = cfg->next;
+	}
+
+	return NULL;
+}
+
+static struct irq_cfg *irq_cfg_alloc(unsigned int irq)
+{
+	struct irq_cfg *cfg, *cfg_pri;
+	int i;
+	int count = 0;
+
+	cfg_pri = cfg = irq_cfgx;
+	while (cfg) {
+		if (cfg->irq == irq)
+			return cfg;
+
+		cfg_pri = cfg;
+		cfg = cfg->next;
+		count++;
+	}
+
+	if (!irq_cfgx_free) {
+		unsigned long phys;
+		unsigned long total_bytes;
+		/*
+		 *  we run out of pre-allocate ones, allocate more
+		 */
+		printk(KERN_DEBUG "try to get more irq_cfg %d\n", nr_irq_cfg);
+
+		total_bytes = sizeof(struct irq_cfg) * nr_irq_cfg;
+		if (after_bootmem)
+			cfg = kzalloc(total_bytes, GFP_ATOMIC);
+		else
+			cfg = __alloc_bootmem_nopanic(total_bytes, PAGE_SIZE, 0);
+
+		if (!cfg)
+			panic("please boot with nr_irq_cfg= %d\n", count * 2);
+
+		phys = __pa(cfg);
+		printk(KERN_DEBUG "irq_irq ==> [%#lx - %#lx]\n", phys, phys + total_bytes);
+
+		for (i = 0; i < nr_irq_cfg; i++)
+			init_one_irq_cfg(&cfg[i]);
+
+		for (i = 1; i < nr_irq_cfg; i++)
+			cfg[i-1].next = &cfg[i];
+
+		irq_cfgx_free = cfg;
+	}
+
+	cfg = irq_cfgx_free;
+	irq_cfgx_free = irq_cfgx_free->next;
+	cfg->next = NULL;
+	if (cfg_pri)
+		cfg_pri->next = cfg;
+	else
+		irq_cfgx = cfg;
+	cfg->irq = irq;
+	printk(KERN_DEBUG "found new irq_cfg for irq %d\n", cfg->irq);
+
+#ifdef CONFIG_HAVE_SPARSE_IRQ_DEBUG
+	{
+		/* dump the results */
+		struct irq_cfg *cfg;
+		unsigned long phys;
+		unsigned long bytes = sizeof(struct irq_cfg);
+
+		printk(KERN_DEBUG "=========================== %d\n", irq);
+		printk(KERN_DEBUG "irq_cfg dump after get that for %d\n", irq);
+		for_each_irq_cfg(cfg) {
+			phys = __pa(cfg);
+			printk(KERN_DEBUG "irq_cfg %d ==> [%#lx - %#lx]\n", cfg->irq, phys, phys + bytes);
+		}
+		printk(KERN_DEBUG "===========================\n");
+	}
+#endif
+	return cfg;
 }
 
 /*
@@ -254,6 +385,7 @@ static void add_pin_to_irq(unsigned int irq, int apic, int pin)
 {
 	struct irq_pin_list *entry = irq_2_pin + irq;
 
+	irq_cfg_alloc(irq);
 	while (entry->next)
 		entry = irq_2_pin + entry->next;
 
@@ -836,11 +968,13 @@ static int __assign_irq_vector(int irq)
 {
 	static int current_vector = FIRST_DEVICE_VECTOR, current_offset;
 	int vector, offset;
+	struct irq_cfg *cfg;
 
 	BUG_ON((unsigned)irq >= nr_irqs);
 
-	if (irq_cfg(irq)->vector > 0)
-		return irq_cfg(irq)->vector;
+	cfg = irq_cfg(irq);
+	if (cfg->vector > 0)
+		return cfg->vector;
 
 	vector = current_vector;
 	offset = current_offset;
@@ -857,7 +991,7 @@ next:
 
 	current_vector = vector;
 	current_offset = offset;
-	irq_cfg(irq)->vector = vector;
+	cfg->vector = vector;
 
 	return vector;
 }
@@ -1659,6 +1793,7 @@ static inline void init_IO_APIC_traps(void)
 {
 	int irq;
 	struct irq_desc *desc;
+	struct irq_cfg *cfg;
 
 	/*
 	 * NOTE! The local APIC isn't very good at handling
@@ -1671,8 +1806,9 @@ static inline void init_IO_APIC_traps(void)
 	 * Also, we've got to be careful not to trash gate
 	 * 0x80, because int 0x80 is hm, kind of importantish. ;)
 	 */
-	for (irq = 0; irq < nr_irqs ; irq++) {
-		if (IO_APIC_IRQ(irq) && !irq_cfg(irq)->vector) {
+	for_each_irq_cfg(cfg) {
+		irq = cfg->irq;
+		if (IO_APIC_IRQ(irq) && !cfg->vector) {
 			/*
 			 * Hmm.. We don't have an entry for this,
 			 * so default to an old-fashioned 8259
@@ -2117,14 +2253,18 @@ int create_irq(void)
 	/* Allocate an unused irq */
 	int irq, new, vector = 0;
 	unsigned long flags;
+	struct irq_cfg *cfg_new;
 
 	irq = -ENOSPC;
 	spin_lock_irqsave(&vector_lock, flags);
 	for (new = (nr_irqs - 1); new >= 0; new--) {
 		if (platform_legacy_irq(new))
 			continue;
-		if (irq_cfg(new)->vector != 0)
+		cfg_new = irq_cfg(new);
+		if (cfg_new && cfg_new->vector != 0)
 			continue;
+		if (!cfg_new)
+			cfg_new = irq_cfg_alloc(new);
 		vector = __assign_irq_vector(new);
 		if (likely(vector > 0))
 			irq = new;
-- 
cgit v1.2.3-55-g7522


From 0f978f4505e96227a89b3c9447552aca983c6b57 Mon Sep 17 00:00:00 2001
From: Yinghai Lu
Date: Tue, 19 Aug 2008 20:50:26 -0700
Subject: x86: make 32bit to use irq_2_pin in irq_cfg

so it is more like 64 bit.

Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/io_apic_32.c | 152 +++++++++++++++++++++++++++++++++----------
 1 file changed, 117 insertions(+), 35 deletions(-)

diff --git a/arch/x86/kernel/io_apic_32.c b/arch/x86/kernel/io_apic_32.c
index 5a83d7f5b147..59d2e8a273e3 100644
--- a/arch/x86/kernel/io_apic_32.c
+++ b/arch/x86/kernel/io_apic_32.c
@@ -95,10 +95,11 @@ DECLARE_BITMAP(mp_bus_not_pci, MAX_MP_BUSSES);
 static int disable_timer_pin_1 __initdata;
 
 struct irq_cfg;
-
+struct irq_pin_list;
 struct irq_cfg {
 	unsigned int irq;
 	struct irq_cfg *next;
+	struct irq_pin_list *irq_2_pin;
 	u8 vector;
 };
 
@@ -275,11 +276,66 @@ int pin_map_size;
  * between pins and IRQs.
  */
 
-static struct irq_pin_list {
-	int apic, pin, next;
-} *irq_2_pin;
+struct irq_pin_list {
+	int apic, pin;
+	struct irq_pin_list *next;
+};
+
+static struct irq_pin_list *irq_2_pin_head;
+/* fill one page ? */
+static int nr_irq_2_pin = 0x100;
+static struct irq_pin_list *irq_2_pin_ptr;
+static void __init irq_2_pin_init_work(void *data)
+{
+	struct dyn_array *da = data;
+	struct irq_pin_list *pin;
+	int i;
+
+	pin = *da->name;
+
+	for (i = 1; i < *da->nr; i++)
+		pin[i-1].next = &pin[i];
+
+	irq_2_pin_ptr = &pin[0];
+}
+DEFINE_DYN_ARRAY(irq_2_pin_head, sizeof(struct irq_pin_list), nr_irq_2_pin, PAGE_SIZE, irq_2_pin_init_work);
+
+static struct irq_pin_list *get_one_free_irq_2_pin(void)
+{
+	struct irq_pin_list *pin;
+	int i;
+
+	pin = irq_2_pin_ptr;
+
+	if (pin) {
+		irq_2_pin_ptr = pin->next;
+		pin->next = NULL;
+		return pin;
+	}
+
+	/*
+	 *  we run out of pre-allocate ones, allocate more
+	 */
+	printk(KERN_DEBUG "try to get more irq_2_pin %d\n", nr_irq_2_pin);
+
+	if (after_bootmem)
+		pin = kzalloc(sizeof(struct irq_pin_list)*nr_irq_2_pin,
+				 GFP_ATOMIC);
+	else
+		pin = __alloc_bootmem_nopanic(sizeof(struct irq_pin_list) *
+				nr_irq_2_pin, PAGE_SIZE, 0);
+
+	if (!pin)
+		panic("can not get more irq_2_pin\n");
 
-DEFINE_DYN_ARRAY(irq_2_pin, sizeof(struct irq_pin_list), pin_map_size, 16, NULL);
+	for (i = 1; i < nr_irq_2_pin; i++)
+		pin[i-1].next = &pin[i];
+
+	irq_2_pin_ptr = pin->next;
+	pin->next = NULL;
+
+	return pin;
+}
 
 struct io_apic {
 	unsigned int index;
@@ -383,20 +439,34 @@ static void ioapic_mask_entry(int apic, int pin)
  */
 static void add_pin_to_irq(unsigned int irq, int apic, int pin)
 {
-	struct irq_pin_list *entry = irq_2_pin + irq;
+	struct irq_cfg *cfg;
+	struct irq_pin_list *entry;
+
+	/* first time to refer irq_cfg, so with new */
+	cfg = irq_cfg_alloc(irq);
+	entry = cfg->irq_2_pin;
+	if (!entry) {
+		entry = get_one_free_irq_2_pin();
+		cfg->irq_2_pin = entry;
+		entry->apic = apic;
+		entry->pin = pin;
+		printk(KERN_DEBUG " 0 add_pin_to_irq: irq %d --> apic %d pin %d\n", irq, apic, pin);
+		return;
+	}
 
-	irq_cfg_alloc(irq);
-	while (entry->next)
-		entry = irq_2_pin + entry->next;
+	while (entry->next) {
+		/* not again, please */
+		if (entry->apic == apic && entry->pin == pin)
+			return;
 
-	if (entry->pin != -1) {
-		entry->next = first_free_entry;
-		entry = irq_2_pin + entry->next;
-		if (++first_free_entry >= pin_map_size)
-			panic("io_apic.c: whoops");
+		entry = entry->next;
 	}
+
+	entry->next = get_one_free_irq_2_pin();
+	entry = entry->next;
 	entry->apic = apic;
 	entry->pin = pin;
+	printk(KERN_DEBUG " x add_pin_to_irq: irq %d --> apic %d pin %d\n", irq, apic, pin);
 }
 
 /*
@@ -406,35 +476,45 @@ static void __init replace_pin_at_irq(unsigned int irq,
 				      int oldapic, int oldpin,
 				      int newapic, int newpin)
 {
-	struct irq_pin_list *entry = irq_2_pin + irq;
+	struct irq_cfg *cfg = irq_cfg(irq);
+	struct irq_pin_list *entry = cfg->irq_2_pin;
+	int replaced = 0;
 
-	while (1) {
+	while (entry) {
 		if (entry->apic == oldapic && entry->pin == oldpin) {
 			entry->apic = newapic;
 			entry->pin = newpin;
-		}
-		if (!entry->next)
+			replaced = 1;
+			/* every one is different, right? */
 			break;
-		entry = irq_2_pin + entry->next;
+		}
+		entry = entry->next;
 	}
+
+	/* why? call replace before add? */
+	if (!replaced)
+		add_pin_to_irq(irq, newapic, newpin);
 }
 
 static void __modify_IO_APIC_irq(unsigned int irq, unsigned long enable, unsigned long disable)
 {
-	struct irq_pin_list *entry = irq_2_pin + irq;
+	struct irq_cfg *cfg;
+	struct irq_pin_list *entry;
 	unsigned int pin, reg;
 
+	cfg = irq_cfg(irq);
+	entry = cfg->irq_2_pin;
 	for (;;) {
-		pin = entry->pin;
-		if (pin == -1)
+		if (!entry)
 			break;
+		pin = entry->pin;
 		reg = io_apic_read(entry->apic, 0x10 + pin*2);
 		reg &= ~disable;
 		reg |= enable;
 		io_apic_modify(entry->apic, 0x10 + pin*2, reg);
 		if (!entry->next)
 			break;
-		entry = irq_2_pin + entry->next;
+		entry = entry->next;
 	}
 }
 
@@ -509,13 +589,18 @@ static void clear_IO_APIC(void)
 #ifdef CONFIG_SMP
 static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t cpumask)
 {
+	struct irq_cfg *cfg;
 	unsigned long flags;
 	int pin;
-	struct irq_pin_list *entry = irq_2_pin + irq;
+	struct irq_pin_list *entry;
 	unsigned int apicid_value;
 	cpumask_t tmp;
 	struct irq_desc *desc;
 
+
+	cfg = irq_cfg(irq);
+	entry = cfg->irq_2_pin;
+
 	cpus_and(tmp, cpumask, cpu_online_map);
 	if (cpus_empty(tmp))
 		tmp = TARGET_CPUS;
@@ -527,13 +612,13 @@ static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t cpumask)
 	apicid_value = apicid_value << 24;
 	spin_lock_irqsave(&ioapic_lock, flags);
 	for (;;) {
-		pin = entry->pin;
-		if (pin == -1)
+		if (!entry)
 			break;
+		pin = entry->pin;
 		io_apic_write(entry->apic, 0x10 + 1 + pin*2, apicid_value);
 		if (!entry->next)
 			break;
-		entry = irq_2_pin + entry->next;
+		entry = entry->next;
 	}
 	desc = irq_to_desc(irq);
 	desc->affinity = cpumask;
@@ -1152,6 +1237,7 @@ __apicdebuginit(void) print_IO_APIC(void)
 	union IO_APIC_reg_02 reg_02;
 	union IO_APIC_reg_03 reg_03;
 	unsigned long flags;
+	struct irq_cfg *cfg;
 
 	if (apic_verbosity == APIC_QUIET)
 		return;
@@ -1240,16 +1326,16 @@ __apicdebuginit(void) print_IO_APIC(void)
 	}
 	}
 	printk(KERN_DEBUG "IRQ to pin mappings:\n");
-	for (i = 0; i < nr_irqs; i++) {
-		struct irq_pin_list *entry = irq_2_pin + i;
-		if (entry->pin < 0)
+	for_each_irq_cfg(cfg) {
+		struct irq_pin_list *entry = cfg->irq_2_pin;
+		if (!entry)
 			continue;
 		printk(KERN_DEBUG "IRQ%d ", i);
 		for (;;) {
 			printk("-> %d:%d", entry->apic, entry->pin);
 			if (!entry->next)
 				break;
-			entry = irq_2_pin + entry->next;
+			entry = entry->next;
 		}
 		printk("\n");
 	}
@@ -1420,10 +1506,6 @@ static void __init enable_IO_APIC(void)
 	int i, apic;
 	unsigned long flags;
 
-	for (i = 0; i < pin_map_size; i++) {
-		irq_2_pin[i].pin = -1;
-		irq_2_pin[i].next = 0;
-	}
 	if (!pirqs_enabled)
 		for (i = 0; i < MAX_PIRQS; i++)
 			pirq_entries[i] = -1;
-- 
cgit v1.2.3-55-g7522


From 199751d715bba5b469ea22adadc68a4166bfa4f5 Mon Sep 17 00:00:00 2001
From: Yinghai Lu
Date: Tue, 19 Aug 2008 20:50:27 -0700
Subject: x86: make 32 bit to use sparse_irq

but actually irq still needs to be less than NR_IRQS, because
interrupt[NR_IRQS] in entry.S.

need to enable per_cpu vector...

Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/Kconfig             |  2 +-
 arch/x86/kernel/io_apic_32.c | 63 ++++++++++++++++++++++++++++++--------------
 arch/x86/kernel/irq_32.c     | 37 +++++++++++++++++++-------
 arch/x86/kernel/irqinit_32.c |  7 +++++
 4 files changed, 79 insertions(+), 30 deletions(-)

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 3e0eaaa1a339..b157e94637bf 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -34,7 +34,7 @@ config X86
 	select HAVE_GENERIC_DMA_COHERENT if X86_32
 	select HAVE_EFFICIENT_UNALIGNED_ACCESS
 	select HAVE_DYN_ARRAY
-	select HAVE_SPARSE_IRQ if X86_64
+	select HAVE_SPARSE_IRQ
 
 config ARCH_DEFCONFIG
 	string
diff --git a/arch/x86/kernel/io_apic_32.c b/arch/x86/kernel/io_apic_32.c
index 59d2e8a273e3..66c0a91362a7 100644
--- a/arch/x86/kernel/io_apic_32.c
+++ b/arch/x86/kernel/io_apic_32.c
@@ -595,7 +595,6 @@ static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t cpumask)
 	struct irq_pin_list *entry;
 	unsigned int apicid_value;
 	cpumask_t tmp;
-	struct irq_desc *desc;
 
 
 	cfg = irq_cfg(irq);
@@ -620,8 +619,7 @@ static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t cpumask)
 			break;
 		entry = entry->next;
 	}
-	desc = irq_to_desc(irq);
-	desc->affinity = cpumask;
+	irq_to_desc(irq)->affinity = cpumask;
 	spin_unlock_irqrestore(&ioapic_lock, flags);
 }
 
@@ -1055,8 +1053,6 @@ static int __assign_irq_vector(int irq)
 	int vector, offset;
 	struct irq_cfg *cfg;
 
-	BUG_ON((unsigned)irq >= nr_irqs);
-
 	cfg = irq_cfg(irq);
 	if (cfg->vector > 0)
 		return cfg->vector;
@@ -1103,7 +1099,12 @@ static void ioapic_register_intr(int irq, int vector, unsigned long trigger)
 {
 	struct irq_desc *desc;
 
-	desc = irq_to_desc(irq);
+	/* first time to use this irq_desc */
+	if (irq < 16)
+		desc = irq_to_desc(irq);
+	else
+		desc = irq_to_desc_alloc(irq);
+
 	if ((trigger == IOAPIC_AUTO && IO_APIC_irq_trigger(irq)) ||
 	    trigger == IOAPIC_LEVEL) {
 		desc->status |= IRQ_LEVEL;
@@ -2330,16 +2331,19 @@ device_initcall(ioapic_init_sysfs);
 /*
  * Dynamic irq allocate and deallocation
  */
-int create_irq(void)
+unsigned int create_irq_nr(unsigned int irq_want)
 {
 	/* Allocate an unused irq */
-	int irq, new, vector = 0;
+	unsigned int irq, new, vector = 0;
 	unsigned long flags;
 	struct irq_cfg *cfg_new;
 
-	irq = -ENOSPC;
+	/* only can use bus/dev/fn.. when per_cpu vector is used */
+	irq_want = nr_irqs - 1;
+
+	irq = 0;
 	spin_lock_irqsave(&vector_lock, flags);
-	for (new = (nr_irqs - 1); new >= 0; new--) {
+	for (new = (nr_irqs - 1); new > 0; new--) {
 		if (platform_legacy_irq(new))
 			continue;
 		cfg_new = irq_cfg(new);
@@ -2354,13 +2358,18 @@ int create_irq(void)
 	}
 	spin_unlock_irqrestore(&vector_lock, flags);
 
-	if (irq >= 0) {
+	if (irq > 0) {
 		set_intr_gate(vector, interrupt[irq]);
 		dynamic_irq_init(irq);
 	}
 	return irq;
 }
 
+int create_irq(void)
+{
+	return create_irq_nr(nr_irqs - 1);
+}
+
 void destroy_irq(unsigned int irq)
 {
 	unsigned long flags;
@@ -2415,7 +2424,6 @@ static void set_msi_irq_affinity(unsigned int irq, cpumask_t mask)
 	unsigned int dest;
 	cpumask_t tmp;
 	int vector;
-	struct irq_desc *desc;
 
 	cpus_and(tmp, mask, cpu_online_map);
 	if (cpus_empty(tmp))
@@ -2435,8 +2443,7 @@ static void set_msi_irq_affinity(unsigned int irq, cpumask_t mask)
 	msg.address_lo |= MSI_ADDR_DEST_ID(dest);
 
 	write_msi_msg(irq, &msg);
-	desc = irq_to_desc(irq);
-	desc->affinity = mask;
+	irq_to_desc(irq)->affinity = mask;
 }
 #endif /* CONFIG_SMP */
 
@@ -2455,13 +2462,31 @@ static struct irq_chip msi_chip = {
 	.retrigger	= ioapic_retrigger_irq,
 };
 
+static unsigned int build_irq_for_pci_dev(struct pci_dev *dev)
+{
+	unsigned int irq;
+
+	irq = dev->bus->number;
+	irq <<= 8;
+	irq |= dev->devfn;
+	irq <<= 12;
+
+	return irq;
+}
+
 int arch_setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc)
 {
 	struct msi_msg msg;
 	int irq, ret;
-	irq = create_irq();
-	if (irq < 0)
-		return irq;
+
+	unsigned int irq_want;
+
+	irq_want = build_irq_for_pci_dev(dev) + 0x100;
+
+	irq = create_irq_nr(irq_want);
+
+	if (irq == 0)
+		return -1;
 
 	ret = msi_compose_msg(dev, irq, &msg);
 	if (ret < 0) {
@@ -2510,7 +2535,6 @@ static void set_ht_irq_affinity(unsigned int irq, cpumask_t mask)
 {
 	unsigned int dest;
 	cpumask_t tmp;
-	struct irq_desc *desc;
 
 	cpus_and(tmp, mask, cpu_online_map);
 	if (cpus_empty(tmp))
@@ -2521,8 +2545,7 @@ static void set_ht_irq_affinity(unsigned int irq, cpumask_t mask)
 	dest = cpu_mask_to_apicid(mask);
 
 	target_ht_irq(irq, dest);
-	desc = irq_to_desc(irq);
-	desc->affinity = mask;
+	irq_to_desc(irq)->affinity = mask;
 }
 #endif
 
diff --git a/arch/x86/kernel/irq_32.c b/arch/x86/kernel/irq_32.c
index 576c5df6cad8..0a57e39159a8 100644
--- a/arch/x86/kernel/irq_32.c
+++ b/arch/x86/kernel/irq_32.c
@@ -224,9 +224,10 @@ unsigned int do_IRQ(struct pt_regs *regs)
 	struct pt_regs *old_regs;
 	/* high bit used in ret_from_ code */
 	int overflow, irq = ~regs->orig_ax;
-	struct irq_desc *desc = irq_to_desc(irq);
+	struct irq_desc *desc;
 
-	if (unlikely((unsigned)irq >= nr_irqs)) {
+	desc = irq_to_desc(irq);
+	if (unlikely(!desc)) {
 		printk(KERN_EMERG "%s: cannot handle IRQ %d\n",
 					__func__, irq);
 		BUG();
@@ -263,6 +264,24 @@ int show_interrupts(struct seq_file *p, void *v)
 	int i = *(loff_t *) v, j;
 	struct irqaction * action;
 	unsigned long flags;
+	unsigned int entries;
+	struct irq_desc *desc;
+	int tail = 0;
+
+#ifdef CONFIG_HAVE_SPARSE_IRQ
+	desc = (struct irq_desc *)v;
+	entries = -1U;
+	i = desc->irq;
+	if (!desc->next)
+		tail = 1;
+#else
+	entries = nr_irqs - 1;
+	i = *(loff_t *) v;
+	if (i == nr_irqs)
+		tail = 1;
+	else
+		desc = irq_to_desc(i);
+#endif
 
 	if (i == 0) {
 		seq_printf(p, "           ");
@@ -271,9 +290,8 @@ int show_interrupts(struct seq_file *p, void *v)
 		seq_putc(p, '\n');
 	}
 
-	if (i < nr_irqs) {
+	if (i <= entries) {
 		unsigned any_count = 0;
-		struct irq_desc *desc = irq_to_desc(i);
 
 		spin_lock_irqsave(&desc->lock, flags);
 #ifndef CONFIG_SMP
@@ -285,7 +303,7 @@ int show_interrupts(struct seq_file *p, void *v)
 		action = desc->action;
 		if (!action && !any_count)
 			goto skip;
-		seq_printf(p, "%3d: ",i);
+		seq_printf(p, "%#x: ",i);
 #ifndef CONFIG_SMP
 		seq_printf(p, "%10u ", kstat_irqs(i));
 #else
@@ -304,7 +322,9 @@ int show_interrupts(struct seq_file *p, void *v)
 		seq_putc(p, '\n');
 skip:
 		spin_unlock_irqrestore(&desc->lock, flags);
-	} else if (i == nr_irqs) {
+	}
+
+	if (tail) {
 		seq_printf(p, "NMI: ");
 		for_each_online_cpu(j)
 			seq_printf(p, "%10u ", nmi_count(j));
@@ -396,15 +416,14 @@ void fixup_irqs(cpumask_t map)
 {
 	unsigned int irq;
 	static int warned;
+	struct irq_desc *desc;
 
-	for (irq = 0; irq < nr_irqs; irq++) {
+	for_each_irq_desc(irq, desc) {
 		cpumask_t mask;
-		struct irq_desc *desc;
 
 		if (irq == 2)
 			continue;
 
-		desc = irq_to_desc(irq);
 		cpus_and(mask, desc->affinity, map);
 		if (any_online_cpu(mask) == NR_CPUS) {
 			printk("Breaking affinity for irq %i\n", irq);
diff --git a/arch/x86/kernel/irqinit_32.c b/arch/x86/kernel/irqinit_32.c
index 65c1c9507707..ded09ac2642e 100644
--- a/arch/x86/kernel/irqinit_32.c
+++ b/arch/x86/kernel/irqinit_32.c
@@ -69,6 +69,13 @@ void __init init_ISA_irqs (void)
 	 * 16 old-style INTA-cycle interrupts:
 	 */
 	for (i = 0; i < 16; i++) {
+		/* first time call this irq_desc */
+		struct irq_desc *desc = irq_to_desc_alloc(i);
+
+		desc->status = IRQ_DISABLED;
+		desc->action = NULL;
+		desc->depth = 1;
+
 		set_irq_chip_and_handler_name(i, &i8259A_chip,
 					      handle_level_irq, "XT");
 	}
-- 
cgit v1.2.3-55-g7522


From 497c9a195db918d3f035e8cb3021e5d4d035516e Mon Sep 17 00:00:00 2001
From: Yinghai Lu
Date: Tue, 19 Aug 2008 20:50:28 -0700
Subject: x86: make 32bit support per_cpu vector

so we can merge io_apic_32.c and io_apic_64.c

v2: Use cpu_online_map as target cpus for bigsmp, just like 64-bit is doing.

Also remove some unused TARGET_CPUS macro.

v3: need to check if desc is null in smp_irq_move_cleanup

also migration needs to reset vector too, so copy __target_IO_APIC_irq
from 64bit.

(the duplication will go away once the two files are unified.)

Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/entry_32.S                |   2 +-
 arch/x86/kernel/io_apic_32.c              | 719 +++++++++++++++++++-----------
 arch/x86/kernel/irq_32.c                  |  18 +-
 arch/x86/kernel/irqinit_32.c              |  41 +-
 arch/x86/lguest/boot.c                    |   2 +-
 arch/x86/mach-generic/bigsmp.c            |   4 +
 arch/x86/mach-generic/es7000.c            |  14 +
 arch/x86/mach-generic/numaq.c             |  14 +
 arch/x86/mach-generic/summit.c            |  14 +
 include/asm-x86/bigsmp/apic.h             |  15 +-
 include/asm-x86/es7000/apic.h             |   3 +-
 include/asm-x86/genapic_32.h              |   2 +
 include/asm-x86/hw_irq.h                  |   6 +-
 include/asm-x86/irq_vectors.h             |  15 +-
 include/asm-x86/mach-default/entry_arch.h |   1 +
 include/asm-x86/mach-default/mach_apic.h  |  15 +-
 include/asm-x86/mach-generic/mach_apic.h  |   1 +
 include/asm-x86/numaq/apic.h              |   2 -
 include/asm-x86/summit/apic.h             |   1 -
 19 files changed, 577 insertions(+), 312 deletions(-)

diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S
index b21fbfaffe39..4d82171d0f9c 100644
--- a/arch/x86/kernel/entry_32.S
+++ b/arch/x86/kernel/entry_32.S
@@ -629,7 +629,7 @@ ENTRY(interrupt)
 ENTRY(irq_entries_start)
 	RING0_INT_FRAME
 vector=0
-.rept NR_IRQS
+.rept NR_VECTORS
 	ALIGN
  .if vector
 	CFI_ADJUST_CFA_OFFSET -4
diff --git a/arch/x86/kernel/io_apic_32.c b/arch/x86/kernel/io_apic_32.c
index 66c0a91362a7..ea33d3c74970 100644
--- a/arch/x86/kernel/io_apic_32.c
+++ b/arch/x86/kernel/io_apic_32.c
@@ -48,6 +48,7 @@
 #include <asm/hypertransport.h>
 #include <asm/setup.h>
 
+#include <mach_ipi.h>
 #include <mach_apic.h>
 #include <mach_apicdef.h>
 
@@ -60,7 +61,7 @@ atomic_t irq_mis_count;
 static struct { int pin, apic; } ioapic_i8259 = { -1, -1 };
 
 static DEFINE_SPINLOCK(ioapic_lock);
-DEFINE_SPINLOCK(vector_lock);
+static DEFINE_SPINLOCK(vector_lock);
 
 int timer_through_8259 __initdata;
 
@@ -100,28 +101,32 @@ struct irq_cfg {
 	unsigned int irq;
 	struct irq_cfg *next;
 	struct irq_pin_list *irq_2_pin;
+	cpumask_t domain;
+	cpumask_t old_domain;
+	unsigned move_cleanup_count;
 	u8 vector;
+	u8 move_in_progress : 1;
 };
 
 
 /* irq_cfg is indexed by the sum of all RTEs in all I/O APICs. */
 static struct irq_cfg irq_cfg_legacy[] __initdata = {
-	[0]  = { .irq =  0, .vector = IRQ0_VECTOR,  },
-	[1]  = { .irq =  1, .vector = IRQ1_VECTOR,  },
-	[2]  = { .irq =  2, .vector = IRQ2_VECTOR,  },
-	[3]  = { .irq =  3, .vector = IRQ3_VECTOR,  },
-	[4]  = { .irq =  4, .vector = IRQ4_VECTOR,  },
-	[5]  = { .irq =  5, .vector = IRQ5_VECTOR,  },
-	[6]  = { .irq =  6, .vector = IRQ6_VECTOR,  },
-	[7]  = { .irq =  7, .vector = IRQ7_VECTOR,  },
-	[8]  = { .irq =  8, .vector = IRQ8_VECTOR,  },
-	[9]  = { .irq =  9, .vector = IRQ9_VECTOR,  },
-	[10] = { .irq = 10, .vector = IRQ10_VECTOR, },
-	[11] = { .irq = 11, .vector = IRQ11_VECTOR, },
-	[12] = { .irq = 12, .vector = IRQ12_VECTOR, },
-	[13] = { .irq = 13, .vector = IRQ13_VECTOR, },
-	[14] = { .irq = 14, .vector = IRQ14_VECTOR, },
-	[15] = { .irq = 15, .vector = IRQ15_VECTOR, },
+	[0]  = { .irq =  0, .domain = CPU_MASK_ALL, .vector = IRQ0_VECTOR,  },
+	[1]  = { .irq =  1, .domain = CPU_MASK_ALL, .vector = IRQ1_VECTOR,  },
+	[2]  = { .irq =  2, .domain = CPU_MASK_ALL, .vector = IRQ2_VECTOR,  },
+	[3]  = { .irq =  3, .domain = CPU_MASK_ALL, .vector = IRQ3_VECTOR,  },
+	[4]  = { .irq =  4, .domain = CPU_MASK_ALL, .vector = IRQ4_VECTOR,  },
+	[5]  = { .irq =  5, .domain = CPU_MASK_ALL, .vector = IRQ5_VECTOR,  },
+	[6]  = { .irq =  6, .domain = CPU_MASK_ALL, .vector = IRQ6_VECTOR,  },
+	[7]  = { .irq =  7, .domain = CPU_MASK_ALL, .vector = IRQ7_VECTOR,  },
+	[8]  = { .irq =  8, .domain = CPU_MASK_ALL, .vector = IRQ8_VECTOR,  },
+	[9]  = { .irq =  9, .domain = CPU_MASK_ALL, .vector = IRQ9_VECTOR,  },
+	[10] = { .irq = 10, .domain = CPU_MASK_ALL, .vector = IRQ10_VECTOR, },
+	[11] = { .irq = 11, .domain = CPU_MASK_ALL, .vector = IRQ11_VECTOR, },
+	[12] = { .irq = 12, .domain = CPU_MASK_ALL, .vector = IRQ12_VECTOR, },
+	[13] = { .irq = 13, .domain = CPU_MASK_ALL, .vector = IRQ13_VECTOR, },
+	[14] = { .irq = 14, .domain = CPU_MASK_ALL, .vector = IRQ14_VECTOR, },
+	[15] = { .irq = 15, .domain = CPU_MASK_ALL, .vector = IRQ15_VECTOR, },
 };
 
 static struct irq_cfg irq_cfg_init = { .irq =  -1U, };
@@ -263,6 +268,7 @@ static struct irq_cfg *irq_cfg_alloc(unsigned int irq)
 	return cfg;
 }
 
+static int assign_irq_vector(int irq, cpumask_t mask);
 /*
  * Rough estimation of how many shared IRQs there are, can
  * be changed anytime.
@@ -432,6 +438,65 @@ static void ioapic_mask_entry(int apic, int pin)
 	spin_unlock_irqrestore(&ioapic_lock, flags);
 }
 
+#ifdef CONFIG_SMP
+static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, u8 vector)
+{
+	int apic, pin;
+	struct irq_cfg *cfg;
+	struct irq_pin_list *entry;
+
+	cfg = irq_cfg(irq);
+	entry = cfg->irq_2_pin;
+	for (;;) {
+		unsigned int reg;
+
+		if (!entry)
+			break;
+
+		apic = entry->apic;
+		pin = entry->pin;
+		io_apic_write(apic, 0x11 + pin*2, dest);
+		reg = io_apic_read(apic, 0x10 + pin*2);
+		reg &= ~IO_APIC_REDIR_VECTOR_MASK;
+		reg |= vector;
+		io_apic_modify(apic, 0x10 + pin *2, reg);
+		if (!entry->next)
+			break;
+		entry = entry->next;
+	}
+}
+static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t mask)
+{
+	struct irq_cfg *cfg;
+	unsigned long flags;
+	unsigned int dest;
+	cpumask_t tmp;
+
+	cfg = irq_cfg(irq);
+
+	cpus_and(tmp, mask, cpu_online_map);
+	if (cpus_empty(tmp))
+		return;
+
+	if (assign_irq_vector(irq, mask))
+		return;
+
+	cpus_and(tmp, cfg->domain, mask);
+
+	dest = cpu_mask_to_apicid(tmp);
+	/*
+	 * Only the high 8 bits are valid.
+	 */
+	dest = SET_APIC_LOGICAL_ID(dest);
+
+	spin_lock_irqsave(&ioapic_lock, flags);
+	__target_IO_APIC_irq(irq, dest, cfg->vector);
+	irq_to_desc(irq)->affinity = mask;
+	spin_unlock_irqrestore(&ioapic_lock, flags);
+}
+
+#endif /* CONFIG_SMP */
+
 /*
  * The common case is 1:1 IRQ<->pin mappings. Sometimes there are
  * shared ISA-space IRQs, so we have to support them. We are super
@@ -586,45 +651,6 @@ static void clear_IO_APIC(void)
 			clear_IO_APIC_pin(apic, pin);
 }
 
-#ifdef CONFIG_SMP
-static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t cpumask)
-{
-	struct irq_cfg *cfg;
-	unsigned long flags;
-	int pin;
-	struct irq_pin_list *entry;
-	unsigned int apicid_value;
-	cpumask_t tmp;
-
-
-	cfg = irq_cfg(irq);
-	entry = cfg->irq_2_pin;
-
-	cpus_and(tmp, cpumask, cpu_online_map);
-	if (cpus_empty(tmp))
-		tmp = TARGET_CPUS;
-
-	cpus_and(cpumask, tmp, CPU_MASK_ALL);
-
-	apicid_value = cpu_mask_to_apicid(cpumask);
-	/* Prepare to do the io_apic_write */
-	apicid_value = apicid_value << 24;
-	spin_lock_irqsave(&ioapic_lock, flags);
-	for (;;) {
-		if (!entry)
-			break;
-		pin = entry->pin;
-		io_apic_write(entry->apic, 0x10 + 1 + pin*2, apicid_value);
-		if (!entry->next)
-			break;
-		entry = entry->next;
-	}
-	irq_to_desc(irq)->affinity = cpumask;
-	spin_unlock_irqrestore(&ioapic_lock, flags);
-}
-
-#endif /* CONFIG_SMP */
-
 #ifndef CONFIG_SMP
 void send_IPI_self(int vector)
 {
@@ -789,32 +815,6 @@ int IO_APIC_get_PCI_irq_vector(int bus, int slot, int pin)
 }
 EXPORT_SYMBOL(IO_APIC_get_PCI_irq_vector);
 
-/*
- * This function currently is only a helper for the i386 smp boot process where
- * we need to reprogram the ioredtbls to cater for the cpus which have come online
- * so mask in all cases should simply be TARGET_CPUS
- */
-#ifdef CONFIG_SMP
-void __init setup_ioapic_dest(void)
-{
-	int pin, ioapic, irq, irq_entry;
-
-	if (skip_ioapic_setup == 1)
-		return;
-
-	for (ioapic = 0; ioapic < nr_ioapics; ioapic++) {
-		for (pin = 0; pin < nr_ioapic_registers[ioapic]; pin++) {
-			irq_entry = find_irq_entry(ioapic, pin, mp_INT);
-			if (irq_entry == -1)
-				continue;
-			irq = pin_2_irq(irq_entry, ioapic, pin);
-			set_ioapic_affinity_irq(irq, TARGET_CPUS);
-		}
-
-	}
-}
-#endif
-
 #if defined(CONFIG_EISA) || defined(CONFIG_MCA)
 /*
  * EISA Edge/Level control register, ELCR
@@ -1046,47 +1046,138 @@ static inline int IO_APIC_irq_trigger(int irq)
 	return 0;
 }
 
+void lock_vector_lock(void)
+{
+	/* Used to the online set of cpus does not change
+	 * during assign_irq_vector.
+	 */
+	spin_lock(&vector_lock);
+}
 
-static int __assign_irq_vector(int irq)
+void unlock_vector_lock(void)
 {
-	static int current_vector = FIRST_DEVICE_VECTOR, current_offset;
-	int vector, offset;
-	struct irq_cfg *cfg;
+	spin_unlock(&vector_lock);
+}
 
-	cfg = irq_cfg(irq);
-	if (cfg->vector > 0)
-		return cfg->vector;
+static int __assign_irq_vector(int irq, cpumask_t mask)
+{
+        static int current_vector = FIRST_DEVICE_VECTOR, current_offset = 0;
+        unsigned int old_vector;
+        int cpu;
+        struct irq_cfg *cfg;
 
-	vector = current_vector;
-	offset = current_offset;
-next:
-	vector += 8;
-	if (vector >= first_system_vector) {
-		offset = (offset + 1) % 8;
-		vector = FIRST_DEVICE_VECTOR + offset;
-	}
-	if (vector == current_vector)
-		return -ENOSPC;
-	if (test_and_set_bit(vector, used_vectors))
-		goto next;
+        cfg = irq_cfg(irq);
 
-	current_vector = vector;
-	current_offset = offset;
-	cfg->vector = vector;
+        /* Only try and allocate irqs on cpus that are present */
+        cpus_and(mask, mask, cpu_online_map);
 
-	return vector;
-}
+        if ((cfg->move_in_progress) || cfg->move_cleanup_count)
+                return -EBUSY;
 
-static int assign_irq_vector(int irq)
-{
+        old_vector = cfg->vector;
+        if (old_vector) {
+                cpumask_t tmp;
+                cpus_and(tmp, cfg->domain, mask);
+                if (!cpus_empty(tmp))
+                        return 0;
+        }
+
+        for_each_cpu_mask_nr(cpu, mask) {
+                cpumask_t domain, new_mask;
+                int new_cpu;
+                int vector, offset;
+
+                domain = vector_allocation_domain(cpu);
+                cpus_and(new_mask, domain, cpu_online_map);
+
+                vector = current_vector;
+                offset = current_offset;
+next:
+                vector += 8;
+                if (vector >= first_system_vector) {
+                        /* If we run out of vectors on large boxen, must share them. */
+                        offset = (offset + 1) % 8;
+                        vector = FIRST_DEVICE_VECTOR + offset;
+                }
+                if (unlikely(current_vector == vector))
+                        continue;
+		if (vector == SYSCALL_VECTOR)
+                        goto next;
+
+                for_each_cpu_mask_nr(new_cpu, new_mask)
+                        if (per_cpu(vector_irq, new_cpu)[vector] != -1)
+                                goto next;
+                /* Found one! */
+                current_vector = vector;
+                current_offset = offset;
+                if (old_vector) {
+                        cfg->move_in_progress = 1;
+                        cfg->old_domain = cfg->domain;
+                }
+                for_each_cpu_mask_nr(new_cpu, new_mask)
+                        per_cpu(vector_irq, new_cpu)[vector] = irq;
+                cfg->vector = vector;
+                cfg->domain = domain;
+                return 0;
+        }
+        return -ENOSPC;
+}
+
+static int assign_irq_vector(int irq, cpumask_t mask)
+{
+	int err;
 	unsigned long flags;
-	int vector;
 
 	spin_lock_irqsave(&vector_lock, flags);
-	vector = __assign_irq_vector(irq);
+	err = __assign_irq_vector(irq, mask);
 	spin_unlock_irqrestore(&vector_lock, flags);
 
-	return vector;
+	return err;
+}
+
+static void __clear_irq_vector(int irq)
+{
+	struct irq_cfg *cfg;
+	cpumask_t mask;
+	int cpu, vector;
+
+	cfg = irq_cfg(irq);
+	BUG_ON(!cfg->vector);
+
+	vector = cfg->vector;
+	cpus_and(mask, cfg->domain, cpu_online_map);
+	for_each_cpu_mask_nr(cpu, mask)
+		per_cpu(vector_irq, cpu)[vector] = -1;
+
+	cfg->vector = 0;
+	cpus_clear(cfg->domain);
+}
+
+void __setup_vector_irq(int cpu)
+{
+	/* Initialize vector_irq on a new cpu */
+	/* This function must be called with vector_lock held */
+	int irq, vector;
+	struct irq_cfg *cfg;
+
+	/* Mark the inuse vectors */
+	for_each_irq_cfg(cfg) {
+		if (!cpu_isset(cpu, cfg->domain))
+			continue;
+		vector = cfg->vector;
+		irq = cfg->irq;
+		per_cpu(vector_irq, cpu)[vector] = irq;
+	}
+	/* Mark the free vectors */
+	for (vector = 0; vector < NR_VECTORS; ++vector) {
+		irq = per_cpu(vector_irq, cpu)[vector];
+		if (irq < 0)
+			continue;
+
+		cfg = irq_cfg(irq);
+		if (!cpu_isset(cpu, cfg->domain))
+			per_cpu(vector_irq, cpu)[vector] = -1;
+        }
 }
 
 static struct irq_chip ioapic_chip;
@@ -1095,7 +1186,7 @@ static struct irq_chip ioapic_chip;
 #define IOAPIC_EDGE	0
 #define IOAPIC_LEVEL	1
 
-static void ioapic_register_intr(int irq, int vector, unsigned long trigger)
+static void ioapic_register_intr(int irq, unsigned long trigger)
 {
 	struct irq_desc *desc;
 
@@ -1115,79 +1206,109 @@ static void ioapic_register_intr(int irq, int vector, unsigned long trigger)
 		set_irq_chip_and_handler_name(irq, &ioapic_chip,
 					 handle_edge_irq, "edge");
 	}
-	set_intr_gate(vector, interrupt[irq]);
 }
 
-static void __init setup_IO_APIC_irqs(void)
+static int setup_ioapic_entry(int apic, int irq,
+			      struct IO_APIC_route_entry *entry,
+			      unsigned int destination, int trigger,
+			      int polarity, int vector)
 {
+	/*
+	 * add it to the IO-APIC irq-routing table:
+	 */
+	memset(entry,0,sizeof(*entry));
+
+	entry->delivery_mode = INT_DELIVERY_MODE;
+	entry->dest_mode = INT_DEST_MODE;
+	entry->dest.logical.logical_dest = destination;
+
+	entry->mask = 0;                                /* enable IRQ */
+	entry->trigger = trigger;
+	entry->polarity = polarity;
+	entry->vector = vector;
+
+	/* Mask level triggered irqs.
+	 * Use IRQ_DELAYED_DISABLE for edge triggered irqs.
+	 */
+	if (trigger)
+		entry->mask = 1;
+
+	return 0;
+}
+
+static void setup_IO_APIC_irq(int apic, int pin, unsigned int irq,
+                              int trigger, int polarity)
+{
+	struct irq_cfg *cfg;
 	struct IO_APIC_route_entry entry;
-	int apic, pin, idx, irq, first_notcon = 1, vector;
+	cpumask_t mask;
+
+	if (!IO_APIC_IRQ(irq))
+		return;
+
+	cfg = irq_cfg(irq);
+
+	mask = TARGET_CPUS;
+	if (assign_irq_vector(irq, mask))
+		return;
+
+	cpus_and(mask, cfg->domain, mask);
+
+	apic_printk(APIC_VERBOSE,KERN_DEBUG
+		    "IOAPIC[%d]: Set routing entry (%d-%d -> 0x%x -> "
+		    "IRQ %d Mode:%i Active:%i)\n",
+		    apic, mp_ioapics[apic].mp_apicid, pin, cfg->vector,
+		    irq, trigger, polarity);
+
+
+	if (setup_ioapic_entry(mp_ioapics[apic].mp_apicid, irq, &entry,
+			       cpu_mask_to_apicid(mask), trigger, polarity,
+			       cfg->vector)) {
+		printk("Failed to setup ioapic entry for ioapic  %d, pin %d\n",
+		       mp_ioapics[apic].mp_apicid, pin);
+		__clear_irq_vector(irq);
+		return;
+	}
+
+	ioapic_register_intr(irq, trigger);
+	if (irq < 16)
+		disable_8259A_irq(irq);
+
+	ioapic_write_entry(apic, pin, entry);
+}
+
+static void __init setup_IO_APIC_irqs(void)
+{
+	int apic, pin, idx, irq, first_notcon = 1;
 
 	apic_printk(APIC_VERBOSE, KERN_DEBUG "init IO_APIC IRQs\n");
 
 	for (apic = 0; apic < nr_ioapics; apic++) {
 	for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) {
 
-		/*
-		 * add it to the IO-APIC irq-routing table:
-		 */
-		memset(&entry, 0, sizeof(entry));
-
-		entry.delivery_mode = INT_DELIVERY_MODE;
-		entry.dest_mode = INT_DEST_MODE;
-		entry.mask = 0;				/* enable IRQ */
-		entry.dest.logical.logical_dest =
-					cpu_mask_to_apicid(TARGET_CPUS);
-
-		idx = find_irq_entry(apic, pin, mp_INT);
+		idx = find_irq_entry(apic,pin,mp_INT);
 		if (idx == -1) {
 			if (first_notcon) {
-				apic_printk(APIC_VERBOSE, KERN_DEBUG
-						" IO-APIC (apicid-pin) %d-%d",
-						mp_ioapics[apic].mp_apicid,
-						pin);
+				apic_printk(APIC_VERBOSE, KERN_DEBUG " IO-APIC (apicid-pin) %d-%d", mp_ioapics[apic].mp_apicid, pin);
 				first_notcon = 0;
 			} else
-				apic_printk(APIC_VERBOSE, ", %d-%d",
-					mp_ioapics[apic].mp_apicid, pin);
+				apic_printk(APIC_VERBOSE, ", %d-%d", mp_ioapics[apic].mp_apicid, pin);
 			continue;
 		}
-
 		if (!first_notcon) {
 			apic_printk(APIC_VERBOSE, " not connected.\n");
 			first_notcon = 1;
 		}
 
-		entry.trigger = irq_trigger(idx);
-		entry.polarity = irq_polarity(idx);
-
-		if (irq_trigger(idx)) {
-			entry.trigger = 1;
-			entry.mask = 1;
-		}
-
 		irq = pin_2_irq(idx, apic, pin);
-		/*
-		 * skip adding the timer int on secondary nodes, which causes
-		 * a small but painful rift in the time-space continuum
-		 */
-		if (multi_timer_check(apic, irq))
-			continue;
-		else
-			add_pin_to_irq(irq, apic, pin);
 
-		if (!apic && !IO_APIC_IRQ(irq))
-			continue;
+                if (multi_timer_check(apic, irq))
+                        continue;
 
-		if (IO_APIC_IRQ(irq)) {
-			vector = assign_irq_vector(irq);
-			entry.vector = vector;
-			ioapic_register_intr(irq, vector, IOAPIC_AUTO);
+		add_pin_to_irq(irq, apic, pin);
 
-			if (!apic && (irq < 16))
-				disable_8259A_irq(irq);
-		}
-		ioapic_write_entry(apic, pin, entry);
+		setup_IO_APIC_irq(apic, pin, irq,
+				  irq_trigger(idx), irq_polarity(idx));
 	}
 	}
 
@@ -1221,7 +1342,7 @@ static void __init setup_timer_IRQ0_pin(unsigned int apic, unsigned int pin,
 	 * The timer IRQ doesn't have to know that behind the
 	 * scene we may have a 8259A-master in AEOI mode ...
 	 */
-	ioapic_register_intr(0, vector, IOAPIC_EDGE);
+	ioapic_register_intr(0, IOAPIC_EDGE);
 
 	/*
 	 * Add it to the IO-APIC irq-routing table:
@@ -1805,8 +1926,10 @@ static unsigned int startup_ioapic_irq(unsigned int irq)
 	return was_pending;
 }
 
+static void irq_complete_move(unsigned int irq);
 static void ack_ioapic_irq(unsigned int irq)
 {
+	irq_complete_move(irq);
 	move_native_irq(irq);
 	ack_APIC_irq();
 }
@@ -1816,6 +1939,7 @@ static void ack_ioapic_quirk_irq(unsigned int irq)
 	unsigned long v;
 	int i;
 
+	irq_complete_move(irq);
 	move_native_irq(irq);
 /*
  * It appears there is an erratum which affects at least version 0x11
@@ -1858,6 +1982,64 @@ static int ioapic_retrigger_irq(unsigned int irq)
 	return 1;
 }
 
+#ifdef CONFIG_SMP
+asmlinkage void smp_irq_move_cleanup_interrupt(void)
+{
+	unsigned vector, me;
+	ack_APIC_irq();
+	irq_enter();
+
+	me = smp_processor_id();
+	for (vector = FIRST_EXTERNAL_VECTOR; vector < NR_VECTORS; vector++) {
+		unsigned int irq;
+		struct irq_desc *desc;
+		struct irq_cfg *cfg;
+		irq = __get_cpu_var(vector_irq)[vector];
+
+		desc = irq_to_desc(irq);
+		if (!desc)
+			continue;
+
+		cfg = irq_cfg(irq);
+		spin_lock(&desc->lock);
+		if (!cfg->move_cleanup_count)
+			goto unlock;
+
+		if ((vector == cfg->vector) && cpu_isset(me, cfg->domain))
+			goto unlock;
+
+		__get_cpu_var(vector_irq)[vector] = -1;
+		cfg->move_cleanup_count--;
+unlock:
+		spin_unlock(&desc->lock);
+	}
+
+	irq_exit();
+}
+
+static void irq_complete_move(unsigned int irq)
+{
+	struct irq_cfg *cfg = irq_cfg(irq);
+	unsigned vector, me;
+
+	if (likely(!cfg->move_in_progress))
+		return;
+
+	vector = ~get_irq_regs()->orig_ax;
+	me = smp_processor_id();
+	if ((vector == cfg->vector) && cpu_isset(me, cfg->domain)) {
+		cpumask_t cleanup_mask;
+
+		cpus_and(cleanup_mask, cfg->old_domain, cpu_online_map);
+		cfg->move_cleanup_count = cpus_weight(cleanup_mask);
+		send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR);
+		cfg->move_in_progress = 0;
+	}
+}
+#else
+static inline void irq_complete_move(unsigned int irq) {}
+#endif
+
 static struct irq_chip ioapic_chip __read_mostly = {
 	.name 		= "IO-APIC",
 	.startup 	= startup_ioapic_irq,
@@ -1940,7 +2122,7 @@ static struct irq_chip lapic_chip __read_mostly = {
 	.ack		= ack_lapic_irq,
 };
 
-static void lapic_register_intr(int irq, int vector)
+static void lapic_register_intr(int irq)
 {
 	struct irq_desc *desc;
 
@@ -1948,7 +2130,6 @@ static void lapic_register_intr(int irq, int vector)
 	desc->status &= ~IRQ_LEVEL;
 	set_irq_chip_and_handler_name(irq, &lapic_chip, handle_edge_irq,
 				      "edge");
-	set_intr_gate(vector, interrupt[irq]);
 }
 
 static void __init setup_nmi(void)
@@ -2036,9 +2217,9 @@ static inline void __init unlock_ExtINT_logic(void)
  */
 static inline void __init check_timer(void)
 {
+	struct irq_cfg *cfg = irq_cfg(0);
 	int apic1, pin1, apic2, pin2;
 	int no_pin1 = 0;
-	int vector;
 	unsigned int ver;
 	unsigned long flags;
 
@@ -2051,8 +2232,7 @@ static inline void __init check_timer(void)
 	 * get/set the timer IRQ vector:
 	 */
 	disable_8259A_irq(0);
-	vector = assign_irq_vector(0);
-	set_intr_gate(vector, interrupt[0]);
+	assign_irq_vector(0, TARGET_CPUS);
 
 	/*
 	 * As IRQ0 is to be enabled in the 8259A, the virtual
@@ -2074,7 +2254,7 @@ static inline void __init check_timer(void)
 
 	apic_printk(APIC_QUIET, KERN_INFO "..TIMER: vector=0x%02X "
 		    "apic1=%d pin1=%d apic2=%d pin2=%d\n",
-		    vector, apic1, pin1, apic2, pin2);
+		    cfg->vector, apic1, pin1, apic2, pin2);
 
 	/*
 	 * Some BIOS writers are clueless and report the ExtINTA
@@ -2098,7 +2278,7 @@ static inline void __init check_timer(void)
 		 */
 		if (no_pin1) {
 			add_pin_to_irq(0, apic1, pin1);
-			setup_timer_IRQ0_pin(apic1, pin1, vector);
+			setup_timer_IRQ0_pin(apic1, pin1, cfg->vector);
 		}
 		unmask_IO_APIC_irq(0);
 		if (timer_irq_works()) {
@@ -2123,7 +2303,7 @@ static inline void __init check_timer(void)
 		 * legacy devices should be connected to IO APIC #0
 		 */
 		replace_pin_at_irq(0, apic1, pin1, apic2, pin2);
-		setup_timer_IRQ0_pin(apic2, pin2, vector);
+		setup_timer_IRQ0_pin(apic2, pin2, cfg->vector);
 		unmask_IO_APIC_irq(0);
 		enable_8259A_irq(0);
 		if (timer_irq_works()) {
@@ -2154,8 +2334,8 @@ static inline void __init check_timer(void)
 	apic_printk(APIC_QUIET, KERN_INFO
 		    "...trying to set up timer as Virtual Wire IRQ...\n");
 
-	lapic_register_intr(0, vector);
-	apic_write(APIC_LVT0, APIC_DM_FIXED | vector);	/* Fixed mode */
+	lapic_register_intr(0);
+	apic_write(APIC_LVT0, APIC_DM_FIXED | cfg->vector);	/* Fixed mode */
 	enable_8259A_irq(0);
 
 	if (timer_irq_works()) {
@@ -2163,7 +2343,7 @@ static inline void __init check_timer(void)
 		goto out;
 	}
 	disable_8259A_irq(0);
-	apic_write(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_FIXED | vector);
+	apic_write(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_FIXED | cfg->vector);
 	apic_printk(APIC_QUIET, KERN_INFO "..... failed.\n");
 
 	apic_printk(APIC_QUIET, KERN_INFO
@@ -2207,12 +2387,6 @@ out:
 
 void __init setup_IO_APIC(void)
 {
-	int i;
-
-	/* Reserve all the system vectors. */
-	for (i = first_system_vector; i < NR_VECTORS; i++)
-		set_bit(i, used_vectors);
-
 	enable_IO_APIC();
 
 	io_apic_irqs = ~PIC_IRQS;
@@ -2334,12 +2508,14 @@ device_initcall(ioapic_init_sysfs);
 unsigned int create_irq_nr(unsigned int irq_want)
 {
 	/* Allocate an unused irq */
-	unsigned int irq, new, vector = 0;
+	unsigned int irq, new;
 	unsigned long flags;
 	struct irq_cfg *cfg_new;
 
+#ifndef CONFIG_HAVE_SPARSE_IRQ
 	/* only can use bus/dev/fn.. when per_cpu vector is used */
 	irq_want = nr_irqs - 1;
+#endif
 
 	irq = 0;
 	spin_lock_irqsave(&vector_lock, flags);
@@ -2351,15 +2527,13 @@ unsigned int create_irq_nr(unsigned int irq_want)
 			continue;
 		if (!cfg_new)
 			cfg_new = irq_cfg_alloc(new);
-		vector = __assign_irq_vector(new);
-		if (likely(vector > 0))
+		if (__assign_irq_vector(new, TARGET_CPUS) == 0)
 			irq = new;
 		break;
 	}
 	spin_unlock_irqrestore(&vector_lock, flags);
 
 	if (irq > 0) {
-		set_intr_gate(vector, interrupt[irq]);
 		dynamic_irq_init(irq);
 	}
 	return irq;
@@ -2377,8 +2551,7 @@ void destroy_irq(unsigned int irq)
 	dynamic_irq_cleanup(irq);
 
 	spin_lock_irqsave(&vector_lock, flags);
-	clear_bit(irq_cfg(irq)->vector, used_vectors);
-	irq_cfg(irq)->vector = 0;
+	__clear_irq_vector(irq);
 	spin_unlock_irqrestore(&vector_lock, flags);
 }
 
@@ -2388,57 +2561,65 @@ void destroy_irq(unsigned int irq)
 #ifdef CONFIG_PCI_MSI
 static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_msg *msg)
 {
-	int vector;
+	struct irq_cfg *cfg;
+	int err;
 	unsigned dest;
+	cpumask_t tmp;
 
-	vector = assign_irq_vector(irq);
-	if (vector >= 0) {
-		dest = cpu_mask_to_apicid(TARGET_CPUS);
-
-		msg->address_hi = MSI_ADDR_BASE_HI;
-		msg->address_lo =
-			MSI_ADDR_BASE_LO |
-			((INT_DEST_MODE == 0) ?
-MSI_ADDR_DEST_MODE_PHYSICAL:
-				MSI_ADDR_DEST_MODE_LOGICAL) |
-			((INT_DELIVERY_MODE != dest_LowestPrio) ?
-				MSI_ADDR_REDIRECTION_CPU:
-				MSI_ADDR_REDIRECTION_LOWPRI) |
-			MSI_ADDR_DEST_ID(dest);
+	tmp = TARGET_CPUS;
+	err = assign_irq_vector(irq, tmp);
+	if (err)
+		return err;
 
-		msg->data =
-			MSI_DATA_TRIGGER_EDGE |
-			MSI_DATA_LEVEL_ASSERT |
-			((INT_DELIVERY_MODE != dest_LowestPrio) ?
-MSI_DATA_DELIVERY_FIXED:
-				MSI_DATA_DELIVERY_LOWPRI) |
-			MSI_DATA_VECTOR(vector);
-	}
-	return vector;
+	cfg = irq_cfg(irq);
+	cpus_and(tmp, cfg->domain, tmp);
+	dest = cpu_mask_to_apicid(tmp);
+
+	msg->address_hi = MSI_ADDR_BASE_HI;
+	msg->address_lo =
+		MSI_ADDR_BASE_LO |
+		((INT_DEST_MODE == 0) ?
+			MSI_ADDR_DEST_MODE_PHYSICAL:
+			MSI_ADDR_DEST_MODE_LOGICAL) |
+		((INT_DELIVERY_MODE != dest_LowestPrio) ?
+			MSI_ADDR_REDIRECTION_CPU:
+			MSI_ADDR_REDIRECTION_LOWPRI) |
+		MSI_ADDR_DEST_ID(dest);
+
+	msg->data =
+		MSI_DATA_TRIGGER_EDGE |
+		MSI_DATA_LEVEL_ASSERT |
+		((INT_DELIVERY_MODE != dest_LowestPrio) ?
+			MSI_DATA_DELIVERY_FIXED:
+			MSI_DATA_DELIVERY_LOWPRI) |
+		MSI_DATA_VECTOR(cfg->vector);
+
+	return err;
 }
 
 #ifdef CONFIG_SMP
 static void set_msi_irq_affinity(unsigned int irq, cpumask_t mask)
 {
+	struct irq_cfg *cfg;
 	struct msi_msg msg;
 	unsigned int dest;
 	cpumask_t tmp;
-	int vector;
 
 	cpus_and(tmp, mask, cpu_online_map);
 	if (cpus_empty(tmp))
-		tmp = TARGET_CPUS;
+		return;
 
-	vector = assign_irq_vector(irq);
-	if (vector < 0)
+	if (assign_irq_vector(irq, mask))
 		return;
 
-	dest = cpu_mask_to_apicid(mask);
+	cfg = irq_cfg(irq);
+	cpus_and(tmp, cfg->domain, mask);
+	dest = cpu_mask_to_apicid(tmp);
 
 	read_msi_msg(irq, &msg);
 
 	msg.data &= ~MSI_DATA_VECTOR_MASK;
-	msg.data |= MSI_DATA_VECTOR(vector);
+	msg.data |= MSI_DATA_VECTOR(cfg->vector);
 	msg.address_lo &= ~MSI_ADDR_DEST_ID_MASK;
 	msg.address_lo |= MSI_ADDR_DEST_ID(dest);
 
@@ -2517,15 +2698,15 @@ void arch_teardown_msi_irq(unsigned int irq)
 
 #ifdef CONFIG_SMP
 
-static void target_ht_irq(unsigned int irq, unsigned int dest)
+static void target_ht_irq(unsigned int irq, unsigned int dest, u8 vector)
 {
 	struct ht_irq_msg msg;
 	fetch_ht_irq_msg(irq, &msg);
 
-	msg.address_lo &= ~(HT_IRQ_LOW_DEST_ID_MASK);
+	msg.address_lo &= ~(HT_IRQ_LOW_VECTOR_MASK | HT_IRQ_LOW_DEST_ID_MASK);
 	msg.address_hi &= ~(HT_IRQ_HIGH_DEST_ID_MASK);
 
-	msg.address_lo |= HT_IRQ_LOW_DEST_ID(dest);
+	msg.address_lo |= HT_IRQ_LOW_VECTOR(vector) | HT_IRQ_LOW_DEST_ID(dest);
 	msg.address_hi |= HT_IRQ_HIGH_DEST_ID(dest);
 
 	write_ht_irq_msg(irq, &msg);
@@ -2533,18 +2714,22 @@ static void target_ht_irq(unsigned int irq, unsigned int dest)
 
 static void set_ht_irq_affinity(unsigned int irq, cpumask_t mask)
 {
+	struct irq_cfg *cfg;
 	unsigned int dest;
 	cpumask_t tmp;
 
 	cpus_and(tmp, mask, cpu_online_map);
 	if (cpus_empty(tmp))
-		tmp = TARGET_CPUS;
+		return;
 
-	cpus_and(mask, tmp, CPU_MASK_ALL);
+	if (assign_irq_vector(irq, mask))
+		return;
 
-	dest = cpu_mask_to_apicid(mask);
+	cfg = irq_cfg(irq);
+	cpus_and(tmp, cfg->domain, mask);
+	dest = cpu_mask_to_apicid(tmp);
 
-	target_ht_irq(irq, dest);
+	target_ht_irq(irq, dest, cfg->vector);
 	irq_to_desc(irq)->affinity = mask;
 }
 #endif
@@ -2562,16 +2747,18 @@ static struct irq_chip ht_irq_chip = {
 
 int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev)
 {
-	int vector;
+	struct irq_cfg *cfg;
+	int err;
+	cpumask_t tmp;
 
-	vector = assign_irq_vector(irq);
-	if (vector >= 0) {
+	tmp = TARGET_CPUS;
+	err = assign_irq_vector(irq, tmp);
+	if ( !err) {
 		struct ht_irq_msg msg;
 		unsigned dest;
-		cpumask_t tmp;
 
-		cpus_clear(tmp);
-		cpu_set(vector >> 8, tmp);
+		cfg = irq_cfg(irq);
+		cpus_and(tmp, cfg->domain, tmp);
 		dest = cpu_mask_to_apicid(tmp);
 
 		msg.address_hi = HT_IRQ_HIGH_DEST_ID(dest);
@@ -2579,7 +2766,7 @@ int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev)
 		msg.address_lo =
 			HT_IRQ_LOW_BASE |
 			HT_IRQ_LOW_DEST_ID(dest) |
-			HT_IRQ_LOW_VECTOR(vector) |
+			HT_IRQ_LOW_VECTOR(cfg->vector) |
 			((INT_DEST_MODE == 0) ?
 				HT_IRQ_LOW_DM_PHYSICAL :
 				HT_IRQ_LOW_DM_LOGICAL) |
@@ -2594,7 +2781,7 @@ int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev)
 		set_irq_chip_and_handler_name(irq, &ht_irq_chip,
 					      handle_edge_irq, "edge");
 	}
-	return vector;
+	return err;
 }
 #endif /* CONFIG_HT_IRQ */
 
@@ -2705,50 +2892,21 @@ int __init io_apic_get_redir_entries(int ioapic)
 }
 
 
-int io_apic_set_pci_routing(int ioapic, int pin, int irq, int edge_level, int active_high_low)
+int io_apic_set_pci_routing(int ioapic, int pin, int irq, int triggering, int polarity)
 {
-	struct IO_APIC_route_entry entry;
-
 	if (!IO_APIC_IRQ(irq)) {
 		printk(KERN_ERR "IOAPIC[%d]: Invalid reference to IRQ 0\n",
 			ioapic);
 		return -EINVAL;
 	}
 
-	/*
-	 * Generate a PCI IRQ routing entry and program the IOAPIC accordingly.
-	 * Note that we mask (disable) IRQs now -- these get enabled when the
-	 * corresponding device driver registers for this IRQ.
-	 */
-
-	memset(&entry, 0, sizeof(entry));
-
-	entry.delivery_mode = INT_DELIVERY_MODE;
-	entry.dest_mode = INT_DEST_MODE;
-	entry.dest.logical.logical_dest = cpu_mask_to_apicid(TARGET_CPUS);
-	entry.trigger = edge_level;
-	entry.polarity = active_high_low;
-	entry.mask  = 1;
-
 	/*
 	 * IRQs < 16 are already in the irq_2_pin[] map
 	 */
 	if (irq >= 16)
 		add_pin_to_irq(irq, ioapic, pin);
 
-	entry.vector = assign_irq_vector(irq);
-
-	apic_printk(APIC_DEBUG, KERN_DEBUG "IOAPIC[%d]: Set PCI routing entry "
-		"(%d-%d -> 0x%x -> IRQ %d Mode:%i Active:%i)\n", ioapic,
-		mp_ioapics[ioapic].mp_apicid, pin, entry.vector, irq,
-		edge_level, active_high_low);
-
-	ioapic_register_intr(irq, entry.vector, edge_level);
-
-	if (!ioapic && (irq < 16))
-		disable_8259A_irq(irq);
-
-	ioapic_write_entry(ioapic, pin, entry);
+	setup_IO_APIC_irq(ioapic, pin, irq, triggering, polarity);
 
 	return 0;
 }
@@ -2774,6 +2932,47 @@ int acpi_get_override_irq(int bus_irq, int *trigger, int *polarity)
 
 #endif /* CONFIG_ACPI */
 
+/*
+ * This function currently is only a helper for the i386 smp boot process where
+ * we need to reprogram the ioredtbls to cater for the cpus which have come online
+ * so mask in all cases should simply be TARGET_CPUS
+ */
+#ifdef CONFIG_SMP
+void __init setup_ioapic_dest(void)
+{
+	int pin, ioapic, irq, irq_entry;
+	struct irq_cfg *cfg;
+	struct irq_desc *desc;
+
+	if (skip_ioapic_setup == 1)
+		return;
+
+	for (ioapic = 0; ioapic < nr_ioapics; ioapic++) {
+		for (pin = 0; pin < nr_ioapic_registers[ioapic]; pin++) {
+			irq_entry = find_irq_entry(ioapic, pin, mp_INT);
+			if (irq_entry == -1)
+				continue;
+			irq = pin_2_irq(irq_entry, ioapic, pin);
+
+			/* setup_IO_APIC_irqs could fail to get vector for some device
+			 * when you have too many devices, because at that time only boot
+			 * cpu is online.
+			 */
+			cfg = irq_cfg(irq);
+			if (!cfg->vector)
+				setup_IO_APIC_irq(ioapic, pin, irq,
+						  irq_trigger(irq_entry),
+						  irq_polarity(irq_entry));
+			else {
+				desc = irq_to_desc(irq);
+				set_ioapic_affinity_irq(irq, TARGET_CPUS);
+			}
+		}
+
+	}
+}
+#endif
+
 static int __init parse_disable_timer_pin_1(char *arg)
 {
 	disable_timer_pin_1 = 1;
diff --git a/arch/x86/kernel/irq_32.c b/arch/x86/kernel/irq_32.c
index 0a57e39159a8..b51ffdcfa31a 100644
--- a/arch/x86/kernel/irq_32.c
+++ b/arch/x86/kernel/irq_32.c
@@ -223,21 +223,25 @@ unsigned int do_IRQ(struct pt_regs *regs)
 {
 	struct pt_regs *old_regs;
 	/* high bit used in ret_from_ code */
-	int overflow, irq = ~regs->orig_ax;
+	int overflow;
+	unsigned vector = ~regs->orig_ax;
 	struct irq_desc *desc;
+	unsigned irq;
 
-	desc = irq_to_desc(irq);
-	if (unlikely(!desc)) {
-		printk(KERN_EMERG "%s: cannot handle IRQ %d\n",
-					__func__, irq);
-		BUG();
-	}
 
 	old_regs = set_irq_regs(regs);
 	irq_enter();
+	irq = __get_cpu_var(vector_irq)[vector];
 
 	overflow = check_stack_overflow();
 
+	desc = irq_to_desc(irq);
+	if (unlikely(!desc)) {
+		printk(KERN_EMERG "%s: cannot handle IRQ %d vector %#x\n",
+					__func__, irq, vector);
+		BUG();
+	}
+
 	if (!execute_on_irq_stack(overflow, desc, irq)) {
 		if (unlikely(overflow))
 			print_stack_overflow();
diff --git a/arch/x86/kernel/irqinit_32.c b/arch/x86/kernel/irqinit_32.c
index ded09ac2642e..9092103a18eb 100644
--- a/arch/x86/kernel/irqinit_32.c
+++ b/arch/x86/kernel/irqinit_32.c
@@ -90,6 +90,27 @@ static struct irqaction irq2 = {
 	.name = "cascade",
 };
 
+DEFINE_PER_CPU(vector_irq_t, vector_irq) = {
+	[0 ... IRQ0_VECTOR - 1] = -1,
+	[IRQ0_VECTOR] = 0,
+	[IRQ1_VECTOR] = 1,
+	[IRQ2_VECTOR] = 2,
+	[IRQ3_VECTOR] = 3,
+	[IRQ4_VECTOR] = 4,
+	[IRQ5_VECTOR] = 5,
+	[IRQ6_VECTOR] = 6,
+	[IRQ7_VECTOR] = 7,
+	[IRQ8_VECTOR] = 8,
+	[IRQ9_VECTOR] = 9,
+	[IRQ10_VECTOR] = 10,
+	[IRQ11_VECTOR] = 11,
+	[IRQ12_VECTOR] = 12,
+	[IRQ13_VECTOR] = 13,
+	[IRQ14_VECTOR] = 14,
+	[IRQ15_VECTOR] = 15,
+	[IRQ15_VECTOR + 1 ... NR_VECTORS - 1] = -1
+};
+
 /* Overridden in paravirt.c */
 void init_IRQ(void) __attribute__((weak, alias("native_init_IRQ")));
 
@@ -105,22 +126,14 @@ void __init native_init_IRQ(void)
 	 * us. (some of these will be overridden and become
 	 * 'special' SMP interrupts)
 	 */
-	for (i = 0; i < (NR_VECTORS - FIRST_EXTERNAL_VECTOR); i++) {
-		int vector = FIRST_EXTERNAL_VECTOR + i;
-		if (i >= nr_irqs)
-			break;
+	for (i =  FIRST_EXTERNAL_VECTOR; i < NR_VECTORS; i++) {
 		/* SYSCALL_VECTOR was reserved in trap_init. */
-		if (!test_bit(vector, used_vectors))
-			set_intr_gate(vector, interrupt[i]);
+		if (i != SYSCALL_VECTOR)
+			set_intr_gate(i, interrupt[i]);
 	}
 
-#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_SMP)
-	/*
-	 * IRQ0 must be given a fixed assignment and initialized,
-	 * because it's used before the IO-APIC is set up.
-	 */
-	set_intr_gate(FIRST_DEVICE_VECTOR, interrupt[0]);
 
+#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_SMP)
 	/*
 	 * The reschedule interrupt is a CPU-to-CPU reschedule-helper
 	 * IPI, driven by wakeup.
@@ -135,6 +148,9 @@ void __init native_init_IRQ(void)
 
 	/* IPI for single call function */
 	set_intr_gate(CALL_FUNCTION_SINGLE_VECTOR, call_function_single_interrupt);
+
+	/* Low priority IPI to cleanup after moving an irq */
+	set_intr_gate(IRQ_MOVE_CLEANUP_VECTOR, irq_move_cleanup_interrupt);
 #endif
 
 #ifdef CONFIG_X86_LOCAL_APIC
@@ -168,3 +184,4 @@ void __init native_init_IRQ(void)
 
 	irq_ctx_init(smp_processor_id());
 }
+
diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c
index 65f0b8a47bed..48ee4f9435f4 100644
--- a/arch/x86/lguest/boot.c
+++ b/arch/x86/lguest/boot.c
@@ -582,7 +582,7 @@ static void __init lguest_init_IRQ(void)
 	for (i = 0; i < LGUEST_IRQS; i++) {
 		int vector = FIRST_EXTERNAL_VECTOR + i;
 		if (vector != SYSCALL_VECTOR) {
-			set_intr_gate(vector, interrupt[i]);
+			set_intr_gate(vector, interrupt[vector]);
 			set_irq_chip_and_handler_name(i, &lguest_irq_controller,
 						      handle_level_irq,
 						      "level");
diff --git a/arch/x86/mach-generic/bigsmp.c b/arch/x86/mach-generic/bigsmp.c
index df37fc9d6a26..3c3b471ea496 100644
--- a/arch/x86/mach-generic/bigsmp.c
+++ b/arch/x86/mach-generic/bigsmp.c
@@ -41,6 +41,10 @@ static const struct dmi_system_id bigsmp_dmi_table[] = {
 	 { }
 };
 
+static cpumask_t vector_allocation_domain(int cpu)
+{
+        return cpumask_of_cpu(cpu);
+}
 
 static int probe_bigsmp(void)
 {
diff --git a/arch/x86/mach-generic/es7000.c b/arch/x86/mach-generic/es7000.c
index 6513d41ea21e..28459cab3ddb 100644
--- a/arch/x86/mach-generic/es7000.c
+++ b/arch/x86/mach-generic/es7000.c
@@ -75,4 +75,18 @@ static int __init acpi_madt_oem_check(char *oem_id, char *oem_table_id)
 }
 #endif
 
+static cpumask_t vector_allocation_domain(int cpu)
+{
+	/* Careful. Some cpus do not strictly honor the set of cpus
+	 * specified in the interrupt destination when using lowest
+	 * priority interrupt delivery mode.
+	 *
+	 * In particular there was a hyperthreading cpu observed to
+	 * deliver interrupts to the wrong hyperthread when only one
+	 * hyperthread was specified in the interrupt desitination.
+	 */
+	cpumask_t domain = { { [0] = APIC_ALL_CPUS, } };
+	return domain;
+}
+
 struct genapic __initdata_refok apic_es7000 = APIC_INIT("es7000", probe_es7000);
diff --git a/arch/x86/mach-generic/numaq.c b/arch/x86/mach-generic/numaq.c
index 8cf58394975e..71a309b122e6 100644
--- a/arch/x86/mach-generic/numaq.c
+++ b/arch/x86/mach-generic/numaq.c
@@ -38,4 +38,18 @@ static int acpi_madt_oem_check(char *oem_id, char *oem_table_id)
 	return 0;
 }
 
+static cpumask_t vector_allocation_domain(int cpu)
+{
+	/* Careful. Some cpus do not strictly honor the set of cpus
+	 * specified in the interrupt destination when using lowest
+	 * priority interrupt delivery mode.
+	 *
+	 * In particular there was a hyperthreading cpu observed to
+	 * deliver interrupts to the wrong hyperthread when only one
+	 * hyperthread was specified in the interrupt desitination.
+	 */
+	cpumask_t domain = { { [0] = APIC_ALL_CPUS, } };
+	return domain;
+}
+
 struct genapic apic_numaq = APIC_INIT("NUMAQ", probe_numaq);
diff --git a/arch/x86/mach-generic/summit.c b/arch/x86/mach-generic/summit.c
index 6ad6b67a723d..6272b5e69da6 100644
--- a/arch/x86/mach-generic/summit.c
+++ b/arch/x86/mach-generic/summit.c
@@ -23,4 +23,18 @@ static int probe_summit(void)
 	return 0;
 }
 
+static cpumask_t vector_allocation_domain(int cpu)
+{
+	/* Careful. Some cpus do not strictly honor the set of cpus
+	 * specified in the interrupt destination when using lowest
+	 * priority interrupt delivery mode.
+	 *
+	 * In particular there was a hyperthreading cpu observed to
+	 * deliver interrupts to the wrong hyperthread when only one
+	 * hyperthread was specified in the interrupt desitination.
+	 */
+	cpumask_t domain = { { [0] = APIC_ALL_CPUS, } };
+	return domain;
+}
+
 struct genapic apic_summit = APIC_INIT("summit", probe_summit);
diff --git a/include/asm-x86/bigsmp/apic.h b/include/asm-x86/bigsmp/apic.h
index 0a9cd7c5ca0c..1d9543b9d358 100644
--- a/include/asm-x86/bigsmp/apic.h
+++ b/include/asm-x86/bigsmp/apic.h
@@ -9,22 +9,17 @@ static inline int apic_id_registered(void)
 	return (1);
 }
 
-/* Round robin the irqs amoung the online cpus */
 static inline cpumask_t target_cpus(void)
 {
-	static unsigned long cpu = NR_CPUS;
-	do {
-		if (cpu >= NR_CPUS)
-			cpu = first_cpu(cpu_online_map);
-		else
-			cpu = next_cpu(cpu, cpu_online_map);
-	} while (cpu >= NR_CPUS);
-	return cpumask_of_cpu(cpu);
+#ifdef CONFIG_SMP
+        return cpu_online_map;
+#else
+        return cpumask_of_cpu(0);
+#endif
 }
 
 #undef APIC_DEST_LOGICAL
 #define APIC_DEST_LOGICAL	0
-#define TARGET_CPUS		(target_cpus())
 #define APIC_DFR_VALUE		(APIC_DFR_FLAT)
 #define INT_DELIVERY_MODE	(dest_Fixed)
 #define INT_DEST_MODE		(0)    /* phys delivery to target proc */
diff --git a/include/asm-x86/es7000/apic.h b/include/asm-x86/es7000/apic.h
index bd2c44d1f7ac..750afada5fbf 100644
--- a/include/asm-x86/es7000/apic.h
+++ b/include/asm-x86/es7000/apic.h
@@ -17,7 +17,6 @@ static inline cpumask_t target_cpus(void)
 	return cpumask_of_cpu(smp_processor_id());
 #endif
 }
-#define TARGET_CPUS	(target_cpus())
 
 #if defined CONFIG_ES7000_CLUSTERED_APIC
 #define APIC_DFR_VALUE		(APIC_DFR_CLUSTER)
@@ -81,7 +80,7 @@ static inline void setup_apic_routing(void)
 	int apic = per_cpu(x86_bios_cpu_apicid, smp_processor_id());
 	printk("Enabling APIC mode:  %s.  Using %d I/O APICs, target cpus %lx\n",
 		(apic_version[apic] == 0x14) ?
-		"Physical Cluster" : "Logical Cluster", nr_ioapics, cpus_addr(TARGET_CPUS)[0]);
+		"Physical Cluster" : "Logical Cluster", nr_ioapics, cpus_addr(target_cpus())[0]);
 }
 
 static inline int multi_timer_check(int apic, int irq)
diff --git a/include/asm-x86/genapic_32.h b/include/asm-x86/genapic_32.h
index 34280f027664..6fe4f81bfcf9 100644
--- a/include/asm-x86/genapic_32.h
+++ b/include/asm-x86/genapic_32.h
@@ -57,6 +57,7 @@ struct genapic {
 	unsigned (*get_apic_id)(unsigned long x);
 	unsigned long apic_id_mask;
 	unsigned int (*cpu_mask_to_apicid)(cpumask_t cpumask);
+	cpumask_t (*vector_allocation_domain)(int cpu);
 
 #ifdef CONFIG_SMP
 	/* ipi */
@@ -104,6 +105,7 @@ struct genapic {
 	APICFUNC(get_apic_id)				\
 	.apic_id_mask = APIC_ID_MASK,			\
 	APICFUNC(cpu_mask_to_apicid)			\
+	APICFUNC(vector_allocation_domain)			\
 	APICFUNC(acpi_madt_oem_check)			\
 	IPIFUNC(send_IPI_mask)				\
 	IPIFUNC(send_IPI_allbutself)			\
diff --git a/include/asm-x86/hw_irq.h b/include/asm-x86/hw_irq.h
index 50f6e0316b50..51c787d17cbc 100644
--- a/include/asm-x86/hw_irq.h
+++ b/include/asm-x86/hw_irq.h
@@ -116,12 +116,12 @@ extern asmlinkage void smp_invalidate_interrupt(struct pt_regs *);
 
 #ifdef CONFIG_X86_32
 extern void (*const interrupt[NR_IRQS])(void);
-#else
+#endif
+
 typedef int vector_irq_t[NR_VECTORS];
 DECLARE_PER_CPU(vector_irq_t, vector_irq);
-#endif
 
-#if defined(CONFIG_X86_IO_APIC) && defined(CONFIG_X86_64)
+#ifdef CONFIG_X86_IO_APIC
 extern void lock_vector_lock(void);
 extern void unlock_vector_lock(void);
 extern void __setup_vector_irq(int cpu);
diff --git a/include/asm-x86/irq_vectors.h b/include/asm-x86/irq_vectors.h
index cb09802ce651..a8d065d85f57 100644
--- a/include/asm-x86/irq_vectors.h
+++ b/include/asm-x86/irq_vectors.h
@@ -19,19 +19,14 @@
 
 /*
  * Reserve the lowest usable priority level 0x20 - 0x2f for triggering
- * cleanup after irq migration on 64 bit.
+ * cleanup after irq migration.
  */
 #define IRQ_MOVE_CLEANUP_VECTOR	FIRST_EXTERNAL_VECTOR
 
 /*
- * Vectors 0x20-0x2f are used for ISA interrupts on 32 bit.
- * Vectors 0x30-0x3f are used for ISA interrupts on 64 bit.
+ * Vectors 0x30-0x3f are used for ISA interrupts.
  */
-#ifdef CONFIG_X86_32
-#define IRQ0_VECTOR		(FIRST_EXTERNAL_VECTOR)
-#else
 #define IRQ0_VECTOR		(FIRST_EXTERNAL_VECTOR + 0x10)
-#endif
 #define IRQ1_VECTOR		(IRQ0_VECTOR + 1)
 #define IRQ2_VECTOR		(IRQ0_VECTOR + 2)
 #define IRQ3_VECTOR		(IRQ0_VECTOR + 3)
@@ -96,11 +91,7 @@
  * start at 0x31(0x41) to spread out vectors evenly between priority
  * levels. (0x80 is the syscall vector)
  */
-#ifdef CONFIG_X86_32
-# define FIRST_DEVICE_VECTOR	0x31
-#else
-# define FIRST_DEVICE_VECTOR	(IRQ15_VECTOR + 2)
-#endif
+#define FIRST_DEVICE_VECTOR	(IRQ15_VECTOR + 2)
 
 #define NR_VECTORS		256
 
diff --git a/include/asm-x86/mach-default/entry_arch.h b/include/asm-x86/mach-default/entry_arch.h
index 9283b60a1dd2..6b1add8e31dd 100644
--- a/include/asm-x86/mach-default/entry_arch.h
+++ b/include/asm-x86/mach-default/entry_arch.h
@@ -14,6 +14,7 @@ BUILD_INTERRUPT(reschedule_interrupt,RESCHEDULE_VECTOR)
 BUILD_INTERRUPT(invalidate_interrupt,INVALIDATE_TLB_VECTOR)
 BUILD_INTERRUPT(call_function_interrupt,CALL_FUNCTION_VECTOR)
 BUILD_INTERRUPT(call_function_single_interrupt,CALL_FUNCTION_SINGLE_VECTOR)
+BUILD_INTERRUPT(irq_move_cleanup_interrupt,IRQ_MOVE_CLEANUP_VECTOR)
 #endif
 
 /*
diff --git a/include/asm-x86/mach-default/mach_apic.h b/include/asm-x86/mach-default/mach_apic.h
index 2a330a41b3dd..3c66f2cdaec1 100644
--- a/include/asm-x86/mach-default/mach_apic.h
+++ b/include/asm-x86/mach-default/mach_apic.h
@@ -85,6 +85,20 @@ static inline int apicid_to_node(int logical_apicid)
 	return 0;
 #endif
 }
+
+static inline cpumask_t vector_allocation_domain(int cpu)
+{
+        /* Careful. Some cpus do not strictly honor the set of cpus
+         * specified in the interrupt destination when using lowest
+         * priority interrupt delivery mode.
+         *
+         * In particular there was a hyperthreading cpu observed to
+         * deliver interrupts to the wrong hyperthread when only one
+         * hyperthread was specified in the interrupt desitination.
+         */
+        cpumask_t domain = { { [0] = APIC_ALL_CPUS, } };
+        return domain;
+}
 #endif
 
 static inline unsigned long check_apicid_used(physid_mask_t bitmap, int apicid)
@@ -138,6 +152,5 @@ static inline int check_phys_apicid_present(int boot_cpu_physical_apicid)
 static inline void enable_apic_mode(void)
 {
 }
-
 #endif /* CONFIG_X86_LOCAL_APIC */
 #endif /* ASM_X86__MACH_DEFAULT__MACH_APIC_H */
diff --git a/include/asm-x86/mach-generic/mach_apic.h b/include/asm-x86/mach-generic/mach_apic.h
index 5d010c6881dd..5085b52da301 100644
--- a/include/asm-x86/mach-generic/mach_apic.h
+++ b/include/asm-x86/mach-generic/mach_apic.h
@@ -24,6 +24,7 @@
 #define check_phys_apicid_present (genapic->check_phys_apicid_present)
 #define check_apicid_used (genapic->check_apicid_used)
 #define cpu_mask_to_apicid (genapic->cpu_mask_to_apicid)
+#define vector_allocation_domain (genapic->vector_allocation_domain)
 #define enable_apic_mode (genapic->enable_apic_mode)
 #define phys_pkg_id (genapic->phys_pkg_id)
 
diff --git a/include/asm-x86/numaq/apic.h b/include/asm-x86/numaq/apic.h
index a8344ba6ea15..0bf2a06b7a4e 100644
--- a/include/asm-x86/numaq/apic.h
+++ b/include/asm-x86/numaq/apic.h
@@ -12,8 +12,6 @@ static inline cpumask_t target_cpus(void)
 	return CPU_MASK_ALL;
 }
 
-#define TARGET_CPUS (target_cpus())
-
 #define NO_BALANCE_IRQ (1)
 #define esr_disable (1)
 
diff --git a/include/asm-x86/summit/apic.h b/include/asm-x86/summit/apic.h
index c5b2e4b10358..0f68037b8f24 100644
--- a/include/asm-x86/summit/apic.h
+++ b/include/asm-x86/summit/apic.h
@@ -22,7 +22,6 @@ static inline cpumask_t target_cpus(void)
 	 */
 	return cpumask_of_cpu(0);
 }
-#define TARGET_CPUS	(target_cpus())
 
 #define INT_DELIVERY_MODE (dest_LowestPrio)
 #define INT_DEST_MODE 1     /* logical delivery broadcast to all procs */
-- 
cgit v1.2.3-55-g7522


From 7a959cff725872ce9c3a534f10724d7bb2cb3c4a Mon Sep 17 00:00:00 2001
From: Yinghai Lu
Date: Tue, 19 Aug 2008 20:50:32 -0700
Subject: x86: add debug info for 32bit sparse_irq

so could figure out bugs where we get an interrupt, but vector_irq is
not initialized yet.

Signed-off-by: Yinghai Lu  <yhlu.kernel@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/io_apic_32.c | 8 ++++++--
 arch/x86/kernel/io_apic_64.c | 2 ++
 arch/x86/kernel/irq_32.c     | 4 ++--
 3 files changed, 10 insertions(+), 4 deletions(-)

diff --git a/arch/x86/kernel/io_apic_32.c b/arch/x86/kernel/io_apic_32.c
index ea33d3c74970..3001924bdd36 100644
--- a/arch/x86/kernel/io_apic_32.c
+++ b/arch/x86/kernel/io_apic_32.c
@@ -1114,8 +1114,12 @@ next:
                         cfg->move_in_progress = 1;
                         cfg->old_domain = cfg->domain;
                 }
-                for_each_cpu_mask_nr(new_cpu, new_mask)
-                        per_cpu(vector_irq, new_cpu)[vector] = irq;
+		printk(KERN_DEBUG "assign_irq_vector: irq %d vector %#x cpu ", irq, vector);
+		for_each_cpu_mask_nr(new_cpu, new_mask) {
+			per_cpu(vector_irq, new_cpu)[vector] = irq;
+			printk(KERN_CONT " %d ", new_cpu);
+		}
+		printk(KERN_CONT "\n");
                 cfg->vector = vector;
                 cfg->domain = domain;
                 return 0;
diff --git a/arch/x86/kernel/io_apic_64.c b/arch/x86/kernel/io_apic_64.c
index b0d4abc55a11..30d2e3811313 100644
--- a/arch/x86/kernel/io_apic_64.c
+++ b/arch/x86/kernel/io_apic_64.c
@@ -1394,6 +1394,8 @@ __apicdebuginit(void) print_IO_APIC(void)
 	printk(KERN_DEBUG "IO APIC #%d......\n", mp_ioapics[apic].mp_apicid);
 	printk(KERN_DEBUG ".... register #00: %08X\n", reg_00.raw);
 	printk(KERN_DEBUG ".......    : physical APIC id: %02X\n", reg_00.bits.ID);
+	printk(KERN_DEBUG ".......    : Delivery Type: %X\n", reg_00.bits.delivery_type);
+	printk(KERN_DEBUG ".......    : LTS          : %X\n", reg_00.bits.LTS);
 
 	printk(KERN_DEBUG ".... register #01: %08X\n", *(int *)&reg_01);
 	printk(KERN_DEBUG ".......     : max redirection entries: %04X\n", reg_01.bits.entries);
diff --git a/arch/x86/kernel/irq_32.c b/arch/x86/kernel/irq_32.c
index b51ffdcfa31a..cc929f2f84f1 100644
--- a/arch/x86/kernel/irq_32.c
+++ b/arch/x86/kernel/irq_32.c
@@ -237,8 +237,8 @@ unsigned int do_IRQ(struct pt_regs *regs)
 
 	desc = irq_to_desc(irq);
 	if (unlikely(!desc)) {
-		printk(KERN_EMERG "%s: cannot handle IRQ %d vector %#x\n",
-					__func__, irq, vector);
+		printk(KERN_EMERG "%s: cannot handle IRQ %d vector %#x cpu %d\n",
+					__func__, irq, vector, smp_processor_id());
 		BUG();
 	}
 
-- 
cgit v1.2.3-55-g7522


From d83e94acd95789829804fd9e442bd18975f4dc89 Mon Sep 17 00:00:00 2001
From: Yinghai Lu
Date: Tue, 19 Aug 2008 20:50:33 -0700
Subject: x86, io-apic: remove union about dest for log/phy

let user decide the meaning of the bits.

This unifies the 32-bit and 64-bit io-apic code a bit.

Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/io_apic_32.c | 18 +++++++-----------
 include/asm-x86/io_apic.h    | 16 ----------------
 2 files changed, 7 insertions(+), 27 deletions(-)

diff --git a/arch/x86/kernel/io_apic_32.c b/arch/x86/kernel/io_apic_32.c
index 3001924bdd36..353e586822af 100644
--- a/arch/x86/kernel/io_apic_32.c
+++ b/arch/x86/kernel/io_apic_32.c
@@ -1224,7 +1224,7 @@ static int setup_ioapic_entry(int apic, int irq,
 
 	entry->delivery_mode = INT_DELIVERY_MODE;
 	entry->dest_mode = INT_DEST_MODE;
-	entry->dest.logical.logical_dest = destination;
+	entry->dest = destination;
 
 	entry->mask = 0;                                /* enable IRQ */
 	entry->trigger = trigger;
@@ -1336,7 +1336,7 @@ static void __init setup_timer_IRQ0_pin(unsigned int apic, unsigned int pin,
 	 */
 	entry.dest_mode = INT_DEST_MODE;
 	entry.mask = 1;					/* mask IRQ now */
-	entry.dest.logical.logical_dest = cpu_mask_to_apicid(TARGET_CPUS);
+	entry.dest = cpu_mask_to_apicid(TARGET_CPUS);
 	entry.delivery_mode = INT_DELIVERY_MODE;
 	entry.polarity = 0;
 	entry.trigger = 0;
@@ -1425,19 +1425,15 @@ __apicdebuginit(void) print_IO_APIC(void)
 
 	printk(KERN_DEBUG ".... IRQ redirection table:\n");
 
-	printk(KERN_DEBUG " NR Log Phy Mask Trig IRR Pol"
-			  " Stat Dest Deli Vect:   \n");
+	printk(KERN_DEBUG " NR Dst Mask Trig IRR Pol"
+			  " Stat Dmod Deli Vect:   \n");
 
 	for (i = 0; i <= reg_01.bits.entries; i++) {
 		struct IO_APIC_route_entry entry;
 
 		entry = ioapic_read_entry(apic, i);
 
-		printk(KERN_DEBUG " %02x %03X %02X  ",
-			i,
-			entry.dest.logical.logical_dest,
-			entry.dest.physical.physical_dest
-		);
+		printk(KERN_DEBUG " %02x %02X  ", i, entry.dest);
 
 		printk("%1d    %1d    %1d   %1d   %1d    %1d    %1d    %02X\n",
 			entry.mask,
@@ -1717,7 +1713,7 @@ void disable_IO_APIC(void)
 		entry.dest_mode       = 0; /* Physical */
 		entry.delivery_mode   = dest_ExtINT; /* ExtInt */
 		entry.vector          = 0;
-		entry.dest.physical.physical_dest = read_apic_id();
+		entry.dest	      = read_apic_id();
 
 		/*
 		 * Add it to the IO-APIC irq-routing table:
@@ -2185,7 +2181,7 @@ static inline void __init unlock_ExtINT_logic(void)
 
 	entry1.dest_mode = 0;			/* physical delivery */
 	entry1.mask = 0;			/* unmask IRQ now */
-	entry1.dest.physical.physical_dest = hard_smp_processor_id();
+	entry1.dest = hard_smp_processor_id();
 	entry1.delivery_mode = dest_ExtINT;
 	entry1.polarity = entry0.polarity;
 	entry1.trigger = 0;
diff --git a/include/asm-x86/io_apic.h b/include/asm-x86/io_apic.h
index 8ec68a50cf10..ce818292d2c7 100644
--- a/include/asm-x86/io_apic.h
+++ b/include/asm-x86/io_apic.h
@@ -87,24 +87,8 @@ struct IO_APIC_route_entry {
 		mask		:  1,	/* 0: enabled, 1: disabled */
 		__reserved_2	: 15;
 
-#ifdef CONFIG_X86_32
-	union {
-		struct {
-			__u32	__reserved_1	: 24,
-				physical_dest	:  4,
-				__reserved_2	:  4;
-		} physical;
-
-		struct {
-			__u32	__reserved_1	: 24,
-				logical_dest	:  8;
-		} logical;
-	} dest;
-#else
 	__u32	__reserved_3	: 24,
 		dest		:  8;
-#endif
-
 } __attribute__ ((packed));
 
 struct IR_IO_APIC_route_entry {
-- 
cgit v1.2.3-55-g7522


From 1d02519242c23450b043e5e8a9e3cb84a8666fe3 Mon Sep 17 00:00:00 2001
From: Yinghai Lu
Date: Tue, 19 Aug 2008 20:50:34 -0700
Subject: x86: ordering functions in io_apic_32.c

prepare for unification:

try to make functions be of the same order to io_apic_64.c.

v2: add calling setup_msi_irq back to arch_setup_msi_irq

Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/io_apic_32.c | 178 +++++++++++++++++++++++--------------------
 1 file changed, 94 insertions(+), 84 deletions(-)

diff --git a/arch/x86/kernel/io_apic_32.c b/arch/x86/kernel/io_apic_32.c
index 353e586822af..9531ef33362b 100644
--- a/arch/x86/kernel/io_apic_32.c
+++ b/arch/x86/kernel/io_apic_32.c
@@ -1029,23 +1029,6 @@ static int pin_2_irq(int idx, int apic, int pin)
 	return irq;
 }
 
-static inline int IO_APIC_irq_trigger(int irq)
-{
-	int apic, idx, pin;
-
-	for (apic = 0; apic < nr_ioapics; apic++) {
-		for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) {
-			idx = find_irq_entry(apic, pin, mp_INT);
-			if ((idx != -1) && (irq == pin_2_irq(idx, apic, pin)))
-				return irq_trigger(idx);
-		}
-	}
-	/*
-	 * nonexistent IRQs are edge default
-	 */
-	return 0;
-}
-
 void lock_vector_lock(void)
 {
 	/* Used to the online set of cpus does not change
@@ -1190,6 +1173,23 @@ static struct irq_chip ioapic_chip;
 #define IOAPIC_EDGE	0
 #define IOAPIC_LEVEL	1
 
+static inline int IO_APIC_irq_trigger(int irq)
+{
+	int apic, idx, pin;
+
+	for (apic = 0; apic < nr_ioapics; apic++) {
+		for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) {
+			idx = find_irq_entry(apic, pin, mp_INT);
+			if ((idx != -1) && (irq == pin_2_irq(idx, apic, pin)))
+				return irq_trigger(idx);
+		}
+	}
+	/*
+	 * nonexistent IRQs are edge default
+	 */
+	return 0;
+}
+
 static void ioapic_register_intr(int irq, unsigned long trigger)
 {
 	struct irq_desc *desc;
@@ -1926,55 +1926,6 @@ static unsigned int startup_ioapic_irq(unsigned int irq)
 	return was_pending;
 }
 
-static void irq_complete_move(unsigned int irq);
-static void ack_ioapic_irq(unsigned int irq)
-{
-	irq_complete_move(irq);
-	move_native_irq(irq);
-	ack_APIC_irq();
-}
-
-static void ack_ioapic_quirk_irq(unsigned int irq)
-{
-	unsigned long v;
-	int i;
-
-	irq_complete_move(irq);
-	move_native_irq(irq);
-/*
- * It appears there is an erratum which affects at least version 0x11
- * of I/O APIC (that's the 82093AA and cores integrated into various
- * chipsets).  Under certain conditions a level-triggered interrupt is
- * erroneously delivered as edge-triggered one but the respective IRR
- * bit gets set nevertheless.  As a result the I/O unit expects an EOI
- * message but it will never arrive and further interrupts are blocked
- * from the source.  The exact reason is so far unknown, but the
- * phenomenon was observed when two consecutive interrupt requests
- * from a given source get delivered to the same CPU and the source is
- * temporarily disabled in between.
- *
- * A workaround is to simulate an EOI message manually.  We achieve it
- * by setting the trigger mode to edge and then to level when the edge
- * trigger mode gets detected in the TMR of a local APIC for a
- * level-triggered interrupt.  We mask the source for the time of the
- * operation to prevent an edge-triggered interrupt escaping meanwhile.
- * The idea is from Manfred Spraul.  --macro
- */
-	i = irq_cfg(irq)->vector;
-
-	v = apic_read(APIC_TMR + ((i & ~0x1f) >> 1));
-
-	ack_APIC_irq();
-
-	if (!(v & (1 << (i & 0x1f)))) {
-		atomic_inc(&irq_mis_count);
-		spin_lock(&ioapic_lock);
-		__mask_and_edge_IO_APIC_irq(irq);
-		__unmask_and_level_IO_APIC_irq(irq);
-		spin_unlock(&ioapic_lock);
-	}
-}
-
 static int ioapic_retrigger_irq(unsigned int irq)
 {
 	send_IPI_self(irq_cfg(irq)->vector);
@@ -2040,13 +1991,61 @@ static void irq_complete_move(unsigned int irq)
 static inline void irq_complete_move(unsigned int irq) {}
 #endif
 
+static void ack_apic_edge(unsigned int irq)
+{
+	irq_complete_move(irq);
+	move_native_irq(irq);
+	ack_APIC_irq();
+}
+
+static void ack_apic_level(unsigned int irq)
+{
+	unsigned long v;
+	int i;
+
+	irq_complete_move(irq);
+	move_native_irq(irq);
+/*
+ * It appears there is an erratum which affects at least version 0x11
+ * of I/O APIC (that's the 82093AA and cores integrated into various
+ * chipsets).  Under certain conditions a level-triggered interrupt is
+ * erroneously delivered as edge-triggered one but the respective IRR
+ * bit gets set nevertheless.  As a result the I/O unit expects an EOI
+ * message but it will never arrive and further interrupts are blocked
+ * from the source.  The exact reason is so far unknown, but the
+ * phenomenon was observed when two consecutive interrupt requests
+ * from a given source get delivered to the same CPU and the source is
+ * temporarily disabled in between.
+ *
+ * A workaround is to simulate an EOI message manually.  We achieve it
+ * by setting the trigger mode to edge and then to level when the edge
+ * trigger mode gets detected in the TMR of a local APIC for a
+ * level-triggered interrupt.  We mask the source for the time of the
+ * operation to prevent an edge-triggered interrupt escaping meanwhile.
+ * The idea is from Manfred Spraul.  --macro
+ */
+	i = irq_cfg(irq)->vector;
+
+	v = apic_read(APIC_TMR + ((i & ~0x1f) >> 1));
+
+	ack_APIC_irq();
+
+	if (!(v & (1 << (i & 0x1f)))) {
+		atomic_inc(&irq_mis_count);
+		spin_lock(&ioapic_lock);
+		__mask_and_edge_IO_APIC_irq(irq);
+		__unmask_and_level_IO_APIC_irq(irq);
+		spin_unlock(&ioapic_lock);
+	}
+}
+
 static struct irq_chip ioapic_chip __read_mostly = {
 	.name 		= "IO-APIC",
 	.startup 	= startup_ioapic_irq,
 	.mask	 	= mask_IO_APIC_irq,
 	.unmask	 	= unmask_IO_APIC_irq,
-	.ack 		= ack_ioapic_irq,
-	.eoi 		= ack_ioapic_quirk_irq,
+	.ack 		= ack_apic_edge,
+	.eoi 		= ack_apic_level,
 #ifdef CONFIG_SMP
 	.set_affinity 	= set_ioapic_affinity_irq,
 #endif
@@ -2094,11 +2093,6 @@ static inline void init_IO_APIC_traps(void)
  * The local APIC irq-chip implementation:
  */
 
-static void ack_lapic_irq(unsigned int irq)
-{
-	ack_APIC_irq();
-}
-
 static void mask_lapic_irq(unsigned int irq)
 {
 	unsigned long v;
@@ -2115,6 +2109,11 @@ static void unmask_lapic_irq(unsigned int irq)
 	apic_write(APIC_LVT0, v & ~APIC_LVT_MASKED);
 }
 
+static void ack_lapic_irq(unsigned int irq)
+{
+	ack_APIC_irq();
+}
+
 static struct irq_chip lapic_chip __read_mostly = {
 	.name		= "local-APIC",
 	.mask		= mask_lapic_irq,
@@ -2636,13 +2635,31 @@ static struct irq_chip msi_chip = {
 	.name		= "PCI-MSI",
 	.unmask		= unmask_msi_irq,
 	.mask		= mask_msi_irq,
-	.ack		= ack_ioapic_irq,
+	.ack		= ack_apic_edge,
 #ifdef CONFIG_SMP
 	.set_affinity	= set_msi_irq_affinity,
 #endif
 	.retrigger	= ioapic_retrigger_irq,
 };
 
+
+static int setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc, int irq)
+{
+	int ret;
+	struct msi_msg msg;
+
+	ret = msi_compose_msg(dev, irq, &msg);
+	if (ret < 0)
+		return ret;
+
+	set_irq_msi(irq, desc);
+	write_msi_msg(irq, &msg);
+
+	set_irq_chip_and_handler_name(irq, &msi_chip, handle_edge_irq, "edge");
+
+	return 0;
+}
+
 static unsigned int build_irq_for_pci_dev(struct pci_dev *dev)
 {
 	unsigned int irq;
@@ -2657,7 +2674,6 @@ static unsigned int build_irq_for_pci_dev(struct pci_dev *dev)
 
 int arch_setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc)
 {
-	struct msi_msg msg;
 	int irq, ret;
 
 	unsigned int irq_want;
@@ -2669,17 +2685,11 @@ int arch_setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc)
 	if (irq == 0)
 		return -1;
 
-	ret = msi_compose_msg(dev, irq, &msg);
+	ret = setup_msi_irq(dev, desc, irq);
 	if (ret < 0) {
 		destroy_irq(irq);
 		return ret;
-	}
-
-	set_irq_msi(irq, desc);
-	write_msi_msg(irq, &msg);
-
-	set_irq_chip_and_handler_name(irq, &msi_chip, handle_edge_irq,
-				      "edge");
+        }
 
 	return 0;
 }
@@ -2738,7 +2748,7 @@ static struct irq_chip ht_irq_chip = {
 	.name		= "PCI-HT",
 	.mask		= mask_ht_irq,
 	.unmask		= unmask_ht_irq,
-	.ack		= ack_ioapic_irq,
+	.ack		= ack_apic_edge,
 #ifdef CONFIG_SMP
 	.set_affinity	= set_ht_irq_affinity,
 #endif
-- 
cgit v1.2.3-55-g7522


From 8ea5371baa82db452a8d93e9977b418d30944e32 Mon Sep 17 00:00:00 2001
From: Yinghai Lu
Date: Tue, 19 Aug 2008 20:50:35 -0700
Subject: x86: ordering functions in io_apic_64.c

try to make functions have the same order between 32-bit and 64-bit.

Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/io_apic_64.c | 67 ++++++++++++++++++++++----------------------
 1 file changed, 33 insertions(+), 34 deletions(-)

diff --git a/arch/x86/kernel/io_apic_64.c b/arch/x86/kernel/io_apic_64.c
index 30d2e3811313..07e1e45ee026 100644
--- a/arch/x86/kernel/io_apic_64.c
+++ b/arch/x86/kernel/io_apic_64.c
@@ -409,40 +409,6 @@ static bool io_apic_level_ack_pending(unsigned int irq)
 	return false;
 }
 
-/*
- * Synchronize the IO-APIC and the CPU by doing
- * a dummy read from the IO-APIC
- */
-static inline void io_apic_sync(unsigned int apic)
-{
-	struct io_apic __iomem *io_apic = io_apic_base(apic);
-	readl(&io_apic->data);
-}
-
-#define __DO_ACTION(R, ACTION, FINAL)					\
-									\
-{									\
-	int pin;							\
-	struct irq_cfg *cfg;						\
-	struct irq_pin_list *entry;					\
-									\
-	cfg = irq_cfg(irq);						\
-	entry = cfg->irq_2_pin;						\
-	for (;;) {							\
-		unsigned int reg;					\
-		if (!entry)						\
-			break;						\
-		pin = entry->pin;					\
-		reg = io_apic_read(entry->apic, 0x10 + R + pin*2);	\
-		reg ACTION;						\
-		io_apic_modify(entry->apic, reg);			\
-		FINAL;							\
-		if (!entry->next)					\
-			break;						\
-		entry = entry->next;					\
-	}								\
-}
-
 union entry_union {
 	struct { u32 w1, w2; };
 	struct IO_APIC_route_entry entry;
@@ -627,6 +593,39 @@ static void __init replace_pin_at_irq(unsigned int irq,
 		add_pin_to_irq(irq, newapic, newpin);
 }
 
+/*
+ * Synchronize the IO-APIC and the CPU by doing
+ * a dummy read from the IO-APIC
+ */
+static inline void io_apic_sync(unsigned int apic)
+{
+	struct io_apic __iomem *io_apic = io_apic_base(apic);
+	readl(&io_apic->data);
+}
+
+#define __DO_ACTION(R, ACTION, FINAL)					\
+									\
+{									\
+	int pin;							\
+	struct irq_cfg *cfg;						\
+	struct irq_pin_list *entry;					\
+									\
+	cfg = irq_cfg(irq);						\
+	entry = cfg->irq_2_pin;						\
+	for (;;) {							\
+		unsigned int reg;					\
+		if (!entry)						\
+			break;						\
+		pin = entry->pin;					\
+		reg = io_apic_read(entry->apic, 0x10 + R + pin*2);	\
+		reg ACTION;						\
+		io_apic_modify(entry->apic, reg);			\
+		FINAL;							\
+		if (!entry->next)					\
+			break;						\
+		entry = entry->next;					\
+	}								\
+}
 
 #define DO_ACTION(name,R,ACTION, FINAL)					\
 									\
-- 
cgit v1.2.3-55-g7522


From efa2559f65167989f1893cb065e3126d4f13ba60 Mon Sep 17 00:00:00 2001
From: Yinghai Lu
Date: Tue, 19 Aug 2008 20:50:36 -0700
Subject: x86: order variables in io_apic_xx.c

move first_system_vector to apic_64.c.

also add #ifdef CONFIG_INTR_REMAP to prepare 32 bit to use
same file.

Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/apic_64.c    |   5 ++
 arch/x86/kernel/io_apic_32.c |  79 ++++++++++-----------
 arch/x86/kernel/io_apic_64.c | 160 +++++++++++++++++++++++--------------------
 3 files changed, 127 insertions(+), 117 deletions(-)

diff --git a/arch/x86/kernel/apic_64.c b/arch/x86/kernel/apic_64.c
index 94ddb69ae15e..4d7a188025b3 100644
--- a/arch/x86/kernel/apic_64.c
+++ b/arch/x86/kernel/apic_64.c
@@ -33,6 +33,7 @@
 #include <asm/smp.h>
 #include <asm/mtrr.h>
 #include <asm/mpspec.h>
+#include <asm/desc.h>
 #include <asm/hpet.h>
 #include <asm/pgalloc.h>
 #include <asm/nmi.h>
@@ -59,6 +60,10 @@ int x2apic_preenabled;
 int local_apic_timer_c2_ok;
 EXPORT_SYMBOL_GPL(local_apic_timer_c2_ok);
 
+int first_system_vector = 0xfe;
+
+char system_vectors[NR_VECTORS] = { [0 ... NR_VECTORS-1] = SYS_VECTOR_FREE};
+
 /*
  * Debug level, exported for io_apic.c
  */
diff --git a/arch/x86/kernel/io_apic_32.c b/arch/x86/kernel/io_apic_32.c
index 9531ef33362b..3010bdd3352d 100644
--- a/arch/x86/kernel/io_apic_32.c
+++ b/arch/x86/kernel/io_apic_32.c
@@ -54,24 +54,22 @@
 
 #define __apicdebuginit(type) static type __init
 
-int (*ioapic_renumber_irq)(int ioapic, int irq);
-atomic_t irq_mis_count;
-
-/* Where if anywhere is the i8259 connect in external int mode */
-static struct { int pin, apic; } ioapic_i8259 = { -1, -1 };
-
-static DEFINE_SPINLOCK(ioapic_lock);
-static DEFINE_SPINLOCK(vector_lock);
-
-int timer_through_8259 __initdata;
-
 /*
  *	Is the SiS APIC rmw bug present ?
  *	-1 = don't know, 0 = no, 1 = yes
  */
 int sis_apic_bug = -1;
 
+static DEFINE_SPINLOCK(ioapic_lock);
+static DEFINE_SPINLOCK(vector_lock);
+
 int first_free_entry;
+/*
+ * Rough estimation of how many shared IRQs there are, can
+ * be changed anytime.
+ */
+int pin_map_size;
+
 /*
  * # of IRQ routing registers
  */
@@ -93,7 +91,15 @@ int mp_bus_id_to_type[MAX_MP_BUSSES];
 
 DECLARE_BITMAP(mp_bus_not_pci, MAX_MP_BUSSES);
 
-static int disable_timer_pin_1 __initdata;
+int skip_ioapic_setup;
+
+static int __init parse_noapic(char *arg)
+{
+	/* disable IO-APIC */
+	disable_ioapic_setup();
+	return 0;
+}
+early_param("noapic", parse_noapic);
 
 struct irq_cfg;
 struct irq_pin_list;
@@ -268,13 +274,6 @@ static struct irq_cfg *irq_cfg_alloc(unsigned int irq)
 	return cfg;
 }
 
-static int assign_irq_vector(int irq, cpumask_t mask);
-/*
- * Rough estimation of how many shared IRQs there are, can
- * be changed anytime.
- */
-int pin_map_size;
-
 /*
  * This is performance-critical, we want to do it O(1)
  *
@@ -465,6 +464,9 @@ static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, u8 vector)
 		entry = entry->next;
 	}
 }
+
+static int assign_irq_vector(int irq, cpumask_t mask);
+
 static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t mask)
 {
 	struct irq_cfg *cfg;
@@ -677,7 +679,6 @@ void send_IPI_self(int vector)
 #define MAX_PIRQS 8
 static int pirq_entries [MAX_PIRQS];
 static int pirqs_enabled;
-int skip_ioapic_setup;
 
 static int __init ioapic_pirq_setup(char *str)
 {
@@ -981,6 +982,7 @@ static inline int irq_trigger(int idx)
 	return MPBIOS_trigger(idx);
 }
 
+int (*ioapic_renumber_irq)(int ioapic, int irq);
 static int pin_2_irq(int idx, int apic, int pin)
 {
 	int irq, i;
@@ -1621,6 +1623,9 @@ __apicdebuginit(int) print_all_ICs(void)
 fs_initcall(print_all_ICs);
 
 
+/* Where if anywhere is the i8259 connect in external int mode */
+static struct { int pin, apic; } ioapic_i8259 = { -1, -1 };
+
 static void __init enable_IO_APIC(void)
 {
 	union IO_APIC_reg_01 reg_01;
@@ -1998,6 +2003,7 @@ static void ack_apic_edge(unsigned int irq)
 	ack_APIC_irq();
 }
 
+atomic_t irq_mis_count;
 static void ack_apic_level(unsigned int irq)
 {
 	unsigned long v;
@@ -2208,6 +2214,17 @@ static inline void __init unlock_ExtINT_logic(void)
 	ioapic_write_entry(apic, pin, entry0);
 }
 
+static int disable_timer_pin_1 __initdata;
+
+static int __init parse_disable_timer_pin_1(char *arg)
+{
+	disable_timer_pin_1 = 1;
+	return 0;
+}
+early_param("disable_timer_pin_1", parse_disable_timer_pin_1);
+
+int timer_through_8259 __initdata;
+
 /*
  * This code may look a bit paranoid, but it's supposed to cooperate with
  * a wide range of boards and BIOS bugs.  Fortunately only the timer IRQ
@@ -2983,28 +3000,6 @@ void __init setup_ioapic_dest(void)
 }
 #endif
 
-static int __init parse_disable_timer_pin_1(char *arg)
-{
-	disable_timer_pin_1 = 1;
-	return 0;
-}
-early_param("disable_timer_pin_1", parse_disable_timer_pin_1);
-
-static int __init parse_enable_timer_pin_1(char *arg)
-{
-	disable_timer_pin_1 = -1;
-	return 0;
-}
-early_param("enable_timer_pin_1", parse_enable_timer_pin_1);
-
-static int __init parse_noapic(char *arg)
-{
-	/* disable IO-APIC */
-	disable_ioapic_setup();
-	return 0;
-}
-early_param("noapic", parse_noapic);
-
 void __init ioapic_init_mappings(void)
 {
 	unsigned long ioapic_phys, idx = FIX_IO_APIC_BASE_0;
diff --git a/arch/x86/kernel/io_apic_64.c b/arch/x86/kernel/io_apic_64.c
index 07e1e45ee026..847aa7987645 100644
--- a/arch/x86/kernel/io_apic_64.c
+++ b/arch/x86/kernel/io_apic_64.c
@@ -57,6 +57,47 @@
 
 #define __apicdebuginit(type) static type __init
 
+int ioapic_force;
+
+int sis_apic_bug; /* not actually supported, dummy for compile */
+
+static DEFINE_SPINLOCK(ioapic_lock);
+static DEFINE_SPINLOCK(vector_lock);
+
+int first_free_entry;
+/*
+ * Rough estimation of how many shared IRQs there are, can
+ * be changed anytime.
+ */
+int pin_map_size;
+
+/*
+ * # of IRQ routing registers
+ */
+int nr_ioapic_registers[MAX_IO_APICS];
+
+/* I/O APIC entries */
+struct mp_config_ioapic mp_ioapics[MAX_IO_APICS];
+int nr_ioapics;
+
+/* MP IRQ source entries */
+struct mp_config_intsrc mp_irqs[MAX_IRQ_SOURCES];
+
+/* # of MP IRQ source entries */
+int mp_irq_entries;
+
+DECLARE_BITMAP(mp_bus_not_pci, MAX_MP_BUSSES);
+
+int skip_ioapic_setup;
+
+static int __init parse_noapic(char *str)
+{
+	disable_ioapic_setup();
+	return 0;
+}
+early_param("noapic", parse_noapic);
+
+
 struct irq_cfg;
 struct irq_pin_list;
 struct irq_cfg {
@@ -228,53 +269,6 @@ static struct irq_cfg *irq_cfg_alloc(unsigned int irq)
 	return cfg;
 }
 
-static int assign_irq_vector(int irq, cpumask_t mask);
-
-int first_system_vector = 0xfe;
-
-char system_vectors[NR_VECTORS] = { [0 ... NR_VECTORS-1] = SYS_VECTOR_FREE};
-
-int sis_apic_bug; /* not actually supported, dummy for compile */
-
-static int no_timer_check;
-
-static int disable_timer_pin_1 __initdata;
-
-int timer_through_8259 __initdata;
-
-/* Where if anywhere is the i8259 connect in external int mode */
-static struct { int pin, apic; } ioapic_i8259 = { -1, -1 };
-
-static DEFINE_SPINLOCK(ioapic_lock);
-static DEFINE_SPINLOCK(vector_lock);
-
-/*
- * # of IRQ routing registers
- */
-int nr_ioapic_registers[MAX_IO_APICS];
-
-/* I/O APIC RTE contents at the OS boot up */
-struct IO_APIC_route_entry *early_ioapic_entries[MAX_IO_APICS];
-
-/* I/O APIC entries */
-struct mp_config_ioapic mp_ioapics[MAX_IO_APICS];
-int nr_ioapics;
-
-/* MP IRQ source entries */
-struct mp_config_intsrc mp_irqs[MAX_IRQ_SOURCES];
-
-/* # of MP IRQ source entries */
-int mp_irq_entries;
-
-DECLARE_BITMAP(mp_bus_not_pci, MAX_MP_BUSSES);
-
-/*
- * Rough estimation of how many shared IRQs there are, can
- * be changed anytime.
- */
-
-int pin_map_size;
-
 /*
  * This is performance-critical, we want to do it O(1)
  *
@@ -481,12 +475,16 @@ static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, u8 vector)
 
 		apic = entry->apic;
 		pin = entry->pin;
+#ifdef CONFIG_INTR_REMAP
 		/*
 		 * With interrupt-remapping, destination information comes
 		 * from interrupt-remapping table entry.
 		 */
 		if (!irq_remapped(irq))
 			io_apic_write(apic, 0x11 + pin*2, dest);
+#else
+		io_apic_write(apic, 0x11 + pin*2, dest);
+#endif
 		reg = io_apic_read(apic, 0x10 + pin*2);
 		reg &= ~IO_APIC_REDIR_VECTOR_MASK;
 		reg |= vector;
@@ -497,6 +495,8 @@ static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, u8 vector)
 	}
 }
 
+static int assign_irq_vector(int irq, cpumask_t mask);
+
 static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t mask)
 {
 	struct irq_cfg *cfg = irq_cfg(irq);
@@ -533,7 +533,6 @@ static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t mask)
  * shared ISA-space IRQs, so we have to support them. We are super
  * fast in the common case, and fast for shared ISA-space IRQs.
  */
-int first_free_entry;
 static void add_pin_to_irq(unsigned int irq, int apic, int pin)
 {
 	struct irq_cfg *cfg;
@@ -679,6 +678,10 @@ static void clear_IO_APIC (void)
 			clear_IO_APIC_pin(apic, pin);
 }
 
+#ifdef CONFIG_INTR_REMAP
+/* I/O APIC RTE contents at the OS boot up */
+static struct IO_APIC_route_entry *early_ioapic_entries[MAX_IO_APICS];
+
 /*
  * Saves and masks all the unmasked IO-APIC RTE's
  */
@@ -741,25 +744,7 @@ void reinit_intr_remapped_IO_APIC(int intr_remapping)
 	 */
 	restore_IO_APIC_setup();
 }
-
-int skip_ioapic_setup;
-int ioapic_force;
-
-static int __init parse_noapic(char *str)
-{
-	disable_ioapic_setup();
-	return 0;
-}
-early_param("noapic", parse_noapic);
-
-/* Actually the next is obsolete, but keep it for paranoid reasons -AK */
-static int __init disable_timer_pin_setup(char *arg)
-{
-	disable_timer_pin_1 = 1;
-	return 1;
-}
-__setup("disable_timer_pin_1", disable_timer_pin_setup);
-
+#endif
 
 /*
  * Find the IRQ entry number of a certain pin.
@@ -1327,8 +1312,10 @@ static void __init setup_timer_IRQ0_pin(unsigned int apic, unsigned int pin,
 {
 	struct IO_APIC_route_entry entry;
 
+#ifdef CONFIG_INTR_REMAP
 	if (intr_remapping_enabled)
 		return;
+#endif
 
 	memset(&entry, 0, sizeof(entry));
 
@@ -1601,6 +1588,9 @@ __apicdebuginit(int) print_all_ICs(void)
 fs_initcall(print_all_ICs);
 
 
+/* Where if anywhere is the i8259 connect in external int mode */
+static struct { int pin, apic; } ioapic_i8259 = { -1, -1 };
+
 void __init enable_IO_APIC(void)
 {
 	union IO_APIC_reg_01 reg_01;
@@ -1695,6 +1685,15 @@ void disable_IO_APIC(void)
 	disconnect_bsp_APIC(ioapic_i8259.pin != -1);
 }
 
+static int no_timer_check;
+
+static int __init notimercheck(char *s)
+{
+	no_timer_check = 1;
+	return 1;
+}
+__setup("no_timer_check", notimercheck);
+
 /*
  * There is a nasty bug in some older SMP boards, their mptable lies
  * about the timer IRQ. We do the following to work around the situation:
@@ -1708,6 +1707,9 @@ static int __init timer_irq_works(void)
 	unsigned long t1 = jiffies;
 	unsigned long flags;
 
+	if (no_timer_check)
+		return 1;
+
 	local_save_flags(flags);
 	local_irq_enable();
 	/* Let ten ticks pass... */
@@ -2239,6 +2241,17 @@ static inline void __init unlock_ExtINT_logic(void)
 	ioapic_write_entry(apic, pin, entry0);
 }
 
+static int disable_timer_pin_1 __initdata;
+/* Actually the next is obsolete, but keep it for paranoid reasons -AK */
+static int __init disable_timer_pin_setup(char *arg)
+{
+	disable_timer_pin_1 = 1;
+	return 0;
+}
+early_param("disable_timer_pin_1", disable_timer_pin_setup);
+
+int timer_through_8259 __initdata;
+
 /*
  * This code may look a bit paranoid, but it's supposed to cooperate with
  * a wide range of boards and BIOS bugs.  Fortunately only the timer IRQ
@@ -2286,8 +2299,10 @@ static inline void __init check_timer(void)
 	 * 8259A.
 	 */
 	if (pin1 == -1) {
+#ifdef CONFIG_INTR_REMAP
 		if (intr_remapping_enabled)
 			panic("BIOS bug: timer not connected to IO-APIC");
+#endif
 		pin1 = pin2;
 		apic1 = apic2;
 		no_pin1 = 1;
@@ -2305,7 +2320,7 @@ static inline void __init check_timer(void)
 			setup_timer_IRQ0_pin(apic1, pin1, cfg->vector);
 		}
 		unmask_IO_APIC_irq(0);
-		if (!no_timer_check && timer_irq_works()) {
+		if (timer_irq_works()) {
 			if (nmi_watchdog == NMI_IO_APIC) {
 				setup_nmi();
 				enable_8259A_irq(0);
@@ -2314,8 +2329,10 @@ static inline void __init check_timer(void)
 				clear_IO_APIC_pin(0, pin1);
 			goto out;
 		}
+#ifdef CONFIG_INTR_REMAP
 		if (intr_remapping_enabled)
 			panic("timer doesn't work through Interrupt-remapped IO-APIC");
+#endif
 		clear_IO_APIC_pin(apic1, pin1);
 		if (!no_pin1)
 			apic_printk(APIC_QUIET, KERN_ERR "..MP-BIOS bug: "
@@ -2391,13 +2408,6 @@ out:
 	local_irq_restore(flags);
 }
 
-static int __init notimercheck(char *s)
-{
-	no_timer_check = 1;
-	return 1;
-}
-__setup("no_timer_check", notimercheck);
-
 /*
  * Traditionally ISA IRQ2 is the cascade IRQ, and is not available
  * to devices.  However there may be an I/O APIC pin available for
-- 
cgit v1.2.3-55-g7522


From e955b5398b660a204854bdff059d050b44090879 Mon Sep 17 00:00:00 2001
From: Ingo Molnar
Date: Tue, 19 Aug 2008 20:50:37 -0700
Subject: sparseirq: fix lockdep

-tip testing found this lockdep splat:

[    0.000000] Initializing CPU#0
[    0.000000] found new irq_desc for irq 0
[    0.000000] INFO: trying to register non-static key.
[    0.000000] the code is fine but needs lockdep annotation.
[    0.000000] turning off the locking correctness validator.
[    0.000000] Pid: 0, comm: swapper Not tainted 2.6.27-rc3-tip-00191-g98ccb89-dirty #1
[    0.000000]  [<c0153c22>] register_lock_class+0x3d2/0x400
[    0.000000]  [<c0104d87>] ? mcount_call+0x5/0xa
[    0.000000]  [<c0154f3a>] __lock_acquire+0x22a/0x5d0
[    0.000000]  [<c0104d87>] ? mcount_call+0x5/0xa
[    0.000000]  [<c0155351>] lock_acquire+0x71/0xa0
[    0.000000]  [<c016d61f>] ? set_irq_chip+0x3f/0x90
[    0.000000]  [<c070f148>] _spin_lock_irqsave+0x58/0x90
[    0.000000]  [<c016d61f>] ? set_irq_chip+0x3f/0x90
[    0.000000]  [<c016d61f>] set_irq_chip+0x3f/0x90
[    0.000000]  [<c016d7e0>] ? handle_level_irq+0x0/0xe0
[    0.000000]  [<c016da1a>] set_irq_chip_and_handler_name+0x1a/0x40
[    0.000000]  [<c0a396c1>] init_ISA_irqs+0x51/0xa0
[    0.000000]  [<c0a4a365>] pre_intr_init_hook+0x25/0x30
[    0.000000]  [<c0a39723>] native_init_IRQ+0x13/0x370
[    0.000000]  [<c015569c>] ? lock_release+0xcc/0x1d0
[    0.000000]  [<c0104d87>] ? mcount_call+0x5/0xa
[    0.000000]  [<c070dc22>] ? __mutex_unlock_slowpath+0x92/0x110
[    0.000000]  [<c070dcad>] ? mutex_unlock+0xd/0x10
[    0.000000]  [<c0135f62>] ? cpu_maps_update_done+0x12/0x20
[    0.000000]  [<c06c6743>] ? register_cpu_notifier+0x23/0x30
[    0.000000]  [<c011e8ae>] init_IRQ+0xe/0x10
[    0.000000]  [<c0a357a5>] start_kernel+0x1c5/0x340
[    0.000000]  [<c0a35280>] ? unknown_bootoption+0x0/0x210
[    0.000000]  [<c0a3506b>] i386_start_kernel+0x6b/0x80
[    0.000000]  =======================
[    0.000000] found new irq_desc for irq 1
[    0.000000] found new irq_desc for irq 2
[    0.000000] found new irq_desc for irq 3

this:

 static void init_one_irq_desc(struct irq_desc *desc)
 {
         memcpy(desc, &irq_desc_init, sizeof(struct irq_desc));
 #ifdef CONFIG_TRACE_IRQFLAGS
         lockdep_set_class(&desc->lock, &irq_desc_lock_class);
 #endif
 }

should be unconditional.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 kernel/irq/handle.c | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c
index 6d174390f3a0..24c83a3cee4d 100644
--- a/kernel/irq/handle.c
+++ b/kernel/irq/handle.c
@@ -18,13 +18,10 @@
 
 #include "internals.h"
 
-#ifdef CONFIG_TRACE_IRQFLAGS
-
 /*
  * lockdep: we want to handle all irq_desc locks as a single lock-class:
  */
 static struct lock_class_key irq_desc_lock_class;
-#endif
 
 /**
  * handle_bad_irq - handle spurious and unhandled irqs
@@ -75,9 +72,7 @@ static struct irq_desc irq_desc_init = {
 static void init_one_irq_desc(struct irq_desc *desc)
 {
 	memcpy(desc, &irq_desc_init, sizeof(struct irq_desc));
-#ifdef CONFIG_TRACE_IRQFLAGS
 	lockdep_set_class(&desc->lock, &irq_desc_lock_class);
-#endif
 }
 
 extern int after_bootmem;
-- 
cgit v1.2.3-55-g7522


From d4057bdb6a3bb85dd44f9f39f41eac53696fd637 Mon Sep 17 00:00:00 2001
From: Yinghai Lu
Date: Tue, 19 Aug 2008 20:50:38 -0700
Subject: x86: make headers files the same in io_apic_xx.c

also make no_timer_check to be global on 64 bit, because vmi_32 is using that.

Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/io_apic_32.c | 15 ++++++++++++---
 arch/x86/kernel/io_apic_64.c | 12 +++++++++---
 2 files changed, 21 insertions(+), 6 deletions(-)

diff --git a/arch/x86/kernel/io_apic_32.c b/arch/x86/kernel/io_apic_32.c
index 3010bdd3352d..48184e126f2c 100644
--- a/arch/x86/kernel/io_apic_32.c
+++ b/arch/x86/kernel/io_apic_32.c
@@ -25,28 +25,37 @@
 #include <linux/init.h>
 #include <linux/delay.h>
 #include <linux/sched.h>
-#include <linux/bootmem.h>
+#include <linux/pci.h>
 #include <linux/mc146818rtc.h>
 #include <linux/compiler.h>
 #include <linux/acpi.h>
 #include <linux/module.h>
 #include <linux/sysdev.h>
-#include <linux/pci.h>
 #include <linux/msi.h>
 #include <linux/htirq.h>
 #include <linux/freezer.h>
 #include <linux/kthread.h>
-#include <linux/jiffies.h>	/* time_after() */
+#include <linux/jiffies.h>      /* time_after() */
+#ifdef CONFIG_ACPI
+#include <acpi/acpi_bus.h>
+#endif
+#include <linux/bootmem.h>
+#include <linux/dmar.h>
 
+#include <asm/idle.h>
 #include <asm/io.h>
 #include <asm/smp.h>
 #include <asm/desc.h>
+#include <asm/proto.h>
+#include <asm/acpi.h>
+#include <asm/dma.h>
 #include <asm/timer.h>
 #include <asm/i8259.h>
 #include <asm/nmi.h>
 #include <asm/msidef.h>
 #include <asm/hypertransport.h>
 #include <asm/setup.h>
+#include <asm/irq_remapping.h>
 
 #include <mach_ipi.h>
 #include <mach_apic.h>
diff --git a/arch/x86/kernel/io_apic_64.c b/arch/x86/kernel/io_apic_64.c
index 847aa7987645..3c66e5a8e899 100644
--- a/arch/x86/kernel/io_apic_64.c
+++ b/arch/x86/kernel/io_apic_64.c
@@ -27,12 +27,15 @@
 #include <linux/sched.h>
 #include <linux/pci.h>
 #include <linux/mc146818rtc.h>
+#include <linux/compiler.h>
 #include <linux/acpi.h>
+#include <linux/module.h>
 #include <linux/sysdev.h>
 #include <linux/msi.h>
 #include <linux/htirq.h>
-#include <linux/dmar.h>
-#include <linux/jiffies.h>
+#include <linux/freezer.h>
+#include <linux/kthread.h>
+#include <linux/jiffies.h>	/* time_after() */
 #ifdef CONFIG_ACPI
 #include <acpi/acpi_bus.h>
 #endif
@@ -46,14 +49,17 @@
 #include <asm/proto.h>
 #include <asm/acpi.h>
 #include <asm/dma.h>
+#include <asm/timer.h>
 #include <asm/i8259.h>
 #include <asm/nmi.h>
 #include <asm/msidef.h>
 #include <asm/hypertransport.h>
+#include <asm/setup.h>
 #include <asm/irq_remapping.h>
 
 #include <mach_ipi.h>
 #include <mach_apic.h>
+#include <mach_apicdef.h>
 
 #define __apicdebuginit(type) static type __init
 
@@ -1685,7 +1691,7 @@ void disable_IO_APIC(void)
 	disconnect_bsp_APIC(ioapic_i8259.pin != -1);
 }
 
-static int no_timer_check;
+int no_timer_check __initdata;
 
 static int __init notimercheck(char *s)
 {
-- 
cgit v1.2.3-55-g7522


From f876d213a59c363d2492e399cc6c24edd6f3c368 Mon Sep 17 00:00:00 2001
From: Yinghai Lu
Date: Tue, 19 Aug 2008 20:50:39 -0700
Subject: x86: make 64 handle sis_apic_bug like the 32 bit

do we have 64bit system with sis chipset?

[ mingo@elte.hu: nope, the problem chipset was 32-bit only.
                 The code symmetry is good nevertheless. ]

Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/io_apic_64.c | 28 ++++++++++++++++++++++++----
 1 file changed, 24 insertions(+), 4 deletions(-)

diff --git a/arch/x86/kernel/io_apic_64.c b/arch/x86/kernel/io_apic_64.c
index 3c66e5a8e899..915af9b87aa4 100644
--- a/arch/x86/kernel/io_apic_64.c
+++ b/arch/x86/kernel/io_apic_64.c
@@ -65,7 +65,11 @@
 
 int ioapic_force;
 
-int sis_apic_bug; /* not actually supported, dummy for compile */
+/*
+ *      Is the SiS APIC rmw bug present ?
+ *      -1 = don't know, 0 = no, 1 = yes
+ */
+int sis_apic_bug = -1;
 
 static DEFINE_SPINLOCK(ioapic_lock);
 static DEFINE_SPINLOCK(vector_lock);
@@ -373,9 +377,11 @@ static inline void io_apic_write(unsigned int apic, unsigned int reg, unsigned i
  * Re-write a value: to be used for read-modify-write
  * cycles where the read already set up the index register.
  */
-static inline void io_apic_modify(unsigned int apic, unsigned int value)
+static inline void io_apic_modify(unsigned int apic, unsigned int reg, unsigned int value)
 {
 	struct io_apic __iomem *io_apic = io_apic_base(apic);
+        if (sis_apic_bug)
+                writel(reg, &io_apic->index);
 	writel(value, &io_apic->data);
 }
 
@@ -494,7 +500,7 @@ static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, u8 vector)
 		reg = io_apic_read(apic, 0x10 + pin*2);
 		reg &= ~IO_APIC_REDIR_VECTOR_MASK;
 		reg |= vector;
-		io_apic_modify(apic, reg);
+		io_apic_modify(apic, 0x10 + pin*2, reg);
 		if (!entry->next)
 			break;
 		entry = entry->next;
@@ -624,7 +630,7 @@ static inline void io_apic_sync(unsigned int apic)
 		pin = entry->pin;					\
 		reg = io_apic_read(entry->apic, 0x10 + R + pin*2);	\
 		reg ACTION;						\
-		io_apic_modify(entry->apic, reg);			\
+		io_apic_modify(entry->apic, 0x10 + R + pin*2, reg);	\
 		FINAL;							\
 		if (!entry->next)					\
 			break;						\
@@ -2450,6 +2456,20 @@ void __init setup_IO_APIC(void)
 	check_timer();
 }
 
+/*
+ *      Called after all the initialization is done. If we didnt find any
+ *      APIC bugs then we can allow the modify fast path
+ */
+
+static int __init io_apic_bug_finalize(void)
+{
+        if (sis_apic_bug == -1)
+                sis_apic_bug = 0;
+        return 0;
+}
+
+late_initcall(io_apic_bug_finalize);
+
 struct sysfs_ioapic_data {
 	struct sys_device dev;
 	struct IO_APIC_route_entry entry[0];
-- 
cgit v1.2.3-55-g7522


From aa45f97b1bb40adae1288669e73350907ffae85e Mon Sep 17 00:00:00 2001
From: Yinghai Lu
Date: Tue, 19 Aug 2008 20:50:40 -0700
Subject: x86: remove ioapic_force

no user left.

Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/apic_64.c    | 1 -
 arch/x86/kernel/io_apic_64.c | 2 --
 include/asm-x86/apic.h       | 2 --
 3 files changed, 5 deletions(-)

diff --git a/arch/x86/kernel/apic_64.c b/arch/x86/kernel/apic_64.c
index 4d7a188025b3..cd860856a0b7 100644
--- a/arch/x86/kernel/apic_64.c
+++ b/arch/x86/kernel/apic_64.c
@@ -1813,7 +1813,6 @@ static int __init apic_set_verbosity(char *arg)
 	if (!arg)  {
 #ifdef CONFIG_X86_64
 		skip_ioapic_setup = 0;
-		ioapic_force = 1;
 		return 0;
 #endif
 		return -EINVAL;
diff --git a/arch/x86/kernel/io_apic_64.c b/arch/x86/kernel/io_apic_64.c
index 915af9b87aa4..b70fd8232185 100644
--- a/arch/x86/kernel/io_apic_64.c
+++ b/arch/x86/kernel/io_apic_64.c
@@ -63,8 +63,6 @@
 
 #define __apicdebuginit(type) static type __init
 
-int ioapic_force;
-
 /*
  *      Is the SiS APIC rmw bug present ?
  *      -1 = don't know, 0 = no, 1 = yes
diff --git a/include/asm-x86/apic.h b/include/asm-x86/apic.h
index d76a0839abe9..2d970f6bc2a1 100644
--- a/include/asm-x86/apic.h
+++ b/include/asm-x86/apic.h
@@ -40,8 +40,6 @@ extern void generic_apic_probe(void);
 extern unsigned int apic_verbosity;
 extern int local_apic_timer_c2_ok;
 
-extern int ioapic_force;
-
 extern int disable_apic;
 /*
  * Basic functions accessing APICs.
-- 
cgit v1.2.3-55-g7522


From 047c8fdb8718890e3340073b178d0859d0c7f91f Mon Sep 17 00:00:00 2001
From: Yinghai Lu
Date: Tue, 19 Aug 2008 20:50:41 -0700
Subject: x86: make io_apic_64.c and io_apic_32.c the same

all the same except INTR_REMAPPING related and ioapic io resource.

Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/io_apic_32.c | 213 +++++++++++++-
 arch/x86/kernel/io_apic_64.c | 663 ++++++++++++++++++++++++++++++++++++++++---
 2 files changed, 829 insertions(+), 47 deletions(-)

diff --git a/arch/x86/kernel/io_apic_32.c b/arch/x86/kernel/io_apic_32.c
index 48184e126f2c..3ed36041c81e 100644
--- a/arch/x86/kernel/io_apic_32.c
+++ b/arch/x86/kernel/io_apic_32.c
@@ -123,7 +123,6 @@ struct irq_cfg {
 	u8 move_in_progress : 1;
 };
 
-
 /* irq_cfg is indexed by the sum of all RTEs in all I/O APICs. */
 static struct irq_cfg irq_cfg_legacy[] __initdata = {
 	[0]  = { .irq =  0, .domain = CPU_MASK_ALL, .vector = IRQ0_VECTOR,  },
@@ -391,6 +390,38 @@ static inline void io_apic_modify(unsigned int apic, unsigned int reg, unsigned
 	writel(value, &io_apic->data);
 }
 
+#ifdef CONFIG_X86_64
+static bool io_apic_level_ack_pending(unsigned int irq)
+{
+	struct irq_pin_list *entry;
+	unsigned long flags;
+	struct irq_cfg *cfg = irq_cfg(irq);
+
+	spin_lock_irqsave(&ioapic_lock, flags);
+	entry = cfg->irq_2_pin;
+	for (;;) {
+		unsigned int reg;
+		int pin;
+
+		if (!entry)
+			break;
+		pin = entry->pin;
+		reg = io_apic_read(entry->apic, 0x10 + pin*2);
+		/* Is the remote IRR bit set? */
+		if (reg & IO_APIC_REDIR_REMOTE_IRR) {
+			spin_unlock_irqrestore(&ioapic_lock, flags);
+			return true;
+		}
+		if (!entry->next)
+			break;
+		entry = entry->next;
+	}
+	spin_unlock_irqrestore(&ioapic_lock, flags);
+
+	return false;
+}
+#endif
+
 union entry_union {
 	struct { u32 w1, w2; };
 	struct IO_APIC_route_entry entry;
@@ -483,17 +514,15 @@ static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t mask)
 	unsigned int dest;
 	cpumask_t tmp;
 
-	cfg = irq_cfg(irq);
-
 	cpus_and(tmp, mask, cpu_online_map);
 	if (cpus_empty(tmp))
 		return;
 
+	cfg = irq_cfg(irq);
 	if (assign_irq_vector(irq, mask))
 		return;
 
 	cpus_and(tmp, cfg->domain, mask);
-
 	dest = cpu_mask_to_apicid(tmp);
 	/*
 	 * Only the high 8 bits are valid.
@@ -572,6 +601,54 @@ static void __init replace_pin_at_irq(unsigned int irq,
 		add_pin_to_irq(irq, newapic, newpin);
 }
 
+#ifdef CONFIG_X86_64
+/*
+ * Synchronize the IO-APIC and the CPU by doing
+ * a dummy read from the IO-APIC
+ */
+static inline void io_apic_sync(unsigned int apic)
+{
+	struct io_apic __iomem *io_apic = io_apic_base(apic);
+	readl(&io_apic->data);
+}
+
+#define __DO_ACTION(R, ACTION, FINAL)					\
+									\
+{									\
+	int pin;							\
+	struct irq_cfg *cfg;						\
+	struct irq_pin_list *entry;					\
+									\
+	cfg = irq_cfg(irq);						\
+	entry = cfg->irq_2_pin;						\
+	for (;;) {							\
+		unsigned int reg;					\
+		if (!entry)						\
+			break;						\
+		pin = entry->pin;					\
+		reg = io_apic_read(entry->apic, 0x10 + R + pin*2);	\
+		reg ACTION;						\
+		io_apic_modify(entry->apic, 0x10 + R + pin*2, reg);	\
+		FINAL;							\
+		if (!entry->next)					\
+			break;						\
+		entry = entry->next;					\
+	}								\
+}
+
+#define DO_ACTION(name,R,ACTION, FINAL)					\
+									\
+	static void name##_IO_APIC_irq (unsigned int irq)		\
+	__DO_ACTION(R, ACTION, FINAL)
+
+/* mask = 1 */
+DO_ACTION(__mask,	0, |= IO_APIC_REDIR_MASKED, io_apic_sync(entry->apic))
+
+/* mask = 0 */
+DO_ACTION(__unmask,	0, &= ~IO_APIC_REDIR_MASKED, )
+
+#else
+
 static void __modify_IO_APIC_irq(unsigned int irq, unsigned long enable, unsigned long disable)
 {
 	struct irq_cfg *cfg;
@@ -620,6 +697,8 @@ static void __unmask_and_level_IO_APIC_irq(unsigned int irq)
 				IO_APIC_REDIR_MASKED);
 }
 
+#endif
+
 static void mask_IO_APIC_irq(unsigned int irq)
 {
 	unsigned long flags;
@@ -1055,6 +1134,17 @@ void unlock_vector_lock(void)
 
 static int __assign_irq_vector(int irq, cpumask_t mask)
 {
+	/*
+	 * NOTE! The local APIC isn't very good at handling
+	 * multiple interrupts at the same interrupt level.
+	 * As the interrupt level is determined by taking the
+	 * vector number and shifting that right by 4, we
+	 * want to spread these out a bit so that they don't
+	 * all fall in the same interrupt level.
+	 *
+	 * Also, we've got to be careful not to trash gate
+	 * 0x80, because int 0x80 is hm, kind of importantish. ;)
+	 */
         static int current_vector = FIRST_DEVICE_VECTOR, current_offset = 0;
         unsigned int old_vector;
         int cpu;
@@ -1095,9 +1185,13 @@ next:
                 }
                 if (unlikely(current_vector == vector))
                         continue;
-		if (vector == SYSCALL_VECTOR)
+#ifdef CONFIG_X86_64
+                if (vector == IA32_SYSCALL_VECTOR)
                         goto next;
-
+#else
+                if (vector == SYSCALL_VECTOR)
+                        goto next;
+#endif
                 for_each_cpu_mask_nr(new_cpu, new_mask)
                         if (per_cpu(vector_irq, new_cpu)[vector] != -1)
                                 goto next;
@@ -1184,6 +1278,7 @@ static struct irq_chip ioapic_chip;
 #define IOAPIC_EDGE	0
 #define IOAPIC_LEVEL	1
 
+#ifdef CONFIG_X86_32
 static inline int IO_APIC_irq_trigger(int irq)
 {
 	int apic, idx, pin;
@@ -1200,6 +1295,12 @@ static inline int IO_APIC_irq_trigger(int irq)
 	 */
 	return 0;
 }
+#else
+static inline int IO_APIC_irq_trigger(int irq)
+{
+        return 1;
+}
+#endif
 
 static void ioapic_register_intr(int irq, unsigned long trigger)
 {
@@ -1212,15 +1313,18 @@ static void ioapic_register_intr(int irq, unsigned long trigger)
 		desc = irq_to_desc_alloc(irq);
 
 	if ((trigger == IOAPIC_AUTO && IO_APIC_irq_trigger(irq)) ||
-	    trigger == IOAPIC_LEVEL) {
+	    trigger == IOAPIC_LEVEL)
 		desc->status |= IRQ_LEVEL;
+	else
+		desc->status &= ~IRQ_LEVEL;
+
+	if ((trigger == IOAPIC_AUTO && IO_APIC_irq_trigger(irq)) ||
+	    trigger == IOAPIC_LEVEL)
 		set_irq_chip_and_handler_name(irq, &ioapic_chip,
 					 handle_fasteoi_irq, "fasteoi");
-	} else {
-		desc->status &= ~IRQ_LEVEL;
+	else
 		set_irq_chip_and_handler_name(irq, &ioapic_chip,
 					 handle_edge_irq, "edge");
-	}
 }
 
 static int setup_ioapic_entry(int apic, int irq,
@@ -1662,7 +1766,6 @@ static void __init enable_IO_APIC(void)
 			struct IO_APIC_route_entry entry;
 			entry = ioapic_read_entry(apic, pin);
 
-
 			/* If the interrupt line is enabled and in ExtInt mode
 			 * I have found the pin where the i8259 is connected.
 			 */
@@ -2012,6 +2115,60 @@ static void ack_apic_edge(unsigned int irq)
 	ack_APIC_irq();
 }
 
+#ifdef CONFIG_X86_64
+static void ack_apic_level(unsigned int irq)
+{
+        int do_unmask_irq = 0;
+
+        irq_complete_move(irq);
+#ifdef CONFIG_GENERIC_PENDING_IRQ
+        /* If we are moving the irq we need to mask it */
+        if (unlikely(desc->status & IRQ_MOVE_PENDING)) {
+                do_unmask_irq = 1;
+                mask_IO_APIC_irq(irq);
+        }
+#endif
+
+        /*
+         * We must acknowledge the irq before we move it or the acknowledge will
+         * not propagate properly.
+         */
+        ack_APIC_irq();
+
+        /* Now we can move and renable the irq */
+        if (unlikely(do_unmask_irq)) {
+                /* Only migrate the irq if the ack has been received.
+                 *
+                 * On rare occasions the broadcast level triggered ack gets
+                 * delayed going to ioapics, and if we reprogram the
+                 * vector while Remote IRR is still set the irq will never
+                 * fire again.
+                 *
+                 * To prevent this scenario we read the Remote IRR bit
+                 * of the ioapic.  This has two effects.
+                 * - On any sane system the read of the ioapic will
+                 *   flush writes (and acks) going to the ioapic from
+                 *   this cpu.
+                 * - We get to see if the ACK has actually been delivered.
+                 *
+                 * Based on failed experiments of reprogramming the
+                 * ioapic entry from outside of irq context starting
+                 * with masking the ioapic entry and then polling until
+                 * Remote IRR was clear before reprogramming the
+                 * ioapic I don't trust the Remote IRR bit to be
+                 * completey accurate.
+                 *
+                 * However there appears to be no other way to plug
+                 * this race, so if the Remote IRR bit is not
+                 * accurate and is causing problems then it is a hardware bug
+                 * and you can go talk to the chipset vendor about it.
+                 */
+                if (!io_apic_level_ack_pending(irq))
+                        move_masked_irq(irq, desc);
+                unmask_IO_APIC_irq(irq);
+        }
+}
+#else
 atomic_t irq_mis_count;
 static void ack_apic_level(unsigned int irq)
 {
@@ -2053,6 +2210,7 @@ static void ack_apic_level(unsigned int irq)
 		spin_unlock(&ioapic_lock);
 	}
 }
+#endif
 
 static struct irq_chip ioapic_chip __read_mostly = {
 	.name 		= "IO-APIC",
@@ -2224,7 +2382,7 @@ static inline void __init unlock_ExtINT_logic(void)
 }
 
 static int disable_timer_pin_1 __initdata;
-
+/* Actually the next is obsolete, but keep it for paranoid reasons -AK */
 static int __init parse_disable_timer_pin_1(char *arg)
 {
 	disable_timer_pin_1 = 1;
@@ -2244,9 +2402,9 @@ static inline void __init check_timer(void)
 {
 	struct irq_cfg *cfg = irq_cfg(0);
 	int apic1, pin1, apic2, pin2;
-	int no_pin1 = 0;
-	unsigned int ver;
 	unsigned long flags;
+	unsigned int ver;
+	int no_pin1 = 0;
 
 	local_irq_save(flags);
 
@@ -2550,6 +2708,7 @@ unsigned int create_irq_nr(unsigned int irq_want)
 		cfg_new = irq_cfg(new);
 		if (cfg_new && cfg_new->vector != 0)
 			continue;
+		/* check if need to create one */
 		if (!cfg_new)
 			cfg_new = irq_cfg_alloc(new);
 		if (__assign_irq_vector(new, TARGET_CPUS) == 0)
@@ -2720,6 +2879,32 @@ int arch_setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc)
 	return 0;
 }
 
+int arch_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
+{
+        unsigned int irq;
+        int ret, sub_handle;
+        struct msi_desc *desc;
+        unsigned int irq_want;
+
+        irq_want = build_irq_for_pci_dev(dev) + 0x100;
+        sub_handle = 0;
+        list_for_each_entry(desc, &dev->msi_list, list) {
+                irq = create_irq_nr(irq_want--);
+                if (irq == 0)
+                        return -1;
+                ret = setup_msi_irq(dev, desc, irq);
+                if (ret < 0)
+                        goto error;
+                sub_handle++;
+        }
+        return 0;
+
+error:
+        destroy_irq(irq);
+        return ret;
+}
+
+
 void arch_teardown_msi_irq(unsigned int irq)
 {
 	destroy_irq(irq);
diff --git a/arch/x86/kernel/io_apic_64.c b/arch/x86/kernel/io_apic_64.c
index b70fd8232185..940c4167b325 100644
--- a/arch/x86/kernel/io_apic_64.c
+++ b/arch/x86/kernel/io_apic_64.c
@@ -94,18 +94,22 @@ struct mp_config_intsrc mp_irqs[MAX_IRQ_SOURCES];
 /* # of MP IRQ source entries */
 int mp_irq_entries;
 
+#if defined (CONFIG_MCA) || defined (CONFIG_EISA)
+int mp_bus_id_to_type[MAX_MP_BUSSES];
+#endif
+
 DECLARE_BITMAP(mp_bus_not_pci, MAX_MP_BUSSES);
 
 int skip_ioapic_setup;
 
 static int __init parse_noapic(char *str)
 {
+	/* disable IO-APIC */
 	disable_ioapic_setup();
 	return 0;
 }
 early_param("noapic", parse_noapic);
 
-
 struct irq_cfg;
 struct irq_pin_list;
 struct irq_cfg {
@@ -374,6 +378,8 @@ static inline void io_apic_write(unsigned int apic, unsigned int reg, unsigned i
 /*
  * Re-write a value: to be used for read-modify-write
  * cycles where the read already set up the index register.
+ *
+ * Older SiS APIC requires we rewrite the index register
  */
 static inline void io_apic_modify(unsigned int apic, unsigned int reg, unsigned int value)
 {
@@ -383,6 +389,7 @@ static inline void io_apic_modify(unsigned int apic, unsigned int reg, unsigned
 	writel(value, &io_apic->data);
 }
 
+#ifdef CONFIG_X86_64
 static bool io_apic_level_ack_pending(unsigned int irq)
 {
 	struct irq_pin_list *entry;
@@ -412,6 +419,7 @@ static bool io_apic_level_ack_pending(unsigned int irq)
 
 	return false;
 }
+#endif
 
 union entry_union {
 	struct { u32 w1, w2; };
@@ -509,7 +517,7 @@ static int assign_irq_vector(int irq, cpumask_t mask);
 
 static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t mask)
 {
-	struct irq_cfg *cfg = irq_cfg(irq);
+	struct irq_cfg *cfg;
 	unsigned long flags;
 	unsigned int dest;
 	cpumask_t tmp;
@@ -519,12 +527,12 @@ static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t mask)
 	if (cpus_empty(tmp))
 		return;
 
+	cfg = irq_cfg(irq);
 	if (assign_irq_vector(irq, mask))
 		return;
 
 	cpus_and(tmp, cfg->domain, mask);
 	dest = cpu_mask_to_apicid(tmp);
-
 	/*
 	 * Only the high 8 bits are valid.
 	 */
@@ -536,7 +544,7 @@ static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t mask)
 	desc->affinity = mask;
 	spin_unlock_irqrestore(&ioapic_lock, flags);
 }
-#endif
+#endif /* CONFIG_SMP */
 
 /*
  * The common case is 1:1 IRQ<->pin mappings. Sometimes there are
@@ -602,6 +610,7 @@ static void __init replace_pin_at_irq(unsigned int irq,
 		add_pin_to_irq(irq, newapic, newpin);
 }
 
+#ifdef CONFIG_X86_64
 /*
  * Synchronize the IO-APIC and the CPU by doing
  * a dummy read from the IO-APIC
@@ -647,6 +656,58 @@ DO_ACTION(__mask,	0, |= IO_APIC_REDIR_MASKED, io_apic_sync(entry->apic))
 /* mask = 0 */
 DO_ACTION(__unmask,	0, &= ~IO_APIC_REDIR_MASKED, )
 
+#else
+
+static void __modify_IO_APIC_irq(unsigned int irq, unsigned long enable, unsigned long disable)
+{
+	struct irq_cfg *cfg;
+	struct irq_pin_list *entry;
+	unsigned int pin, reg;
+
+	cfg = irq_cfg(irq);
+	entry = cfg->irq_2_pin;
+	for (;;) {
+		if (!entry)
+			break;
+		pin = entry->pin;
+		reg = io_apic_read(entry->apic, 0x10 + pin*2);
+		reg &= ~disable;
+		reg |= enable;
+		io_apic_modify(entry->apic, 0x10 + pin*2, reg);
+		if (!entry->next)
+			break;
+		entry = entry->next;
+	}
+}
+
+/* mask = 1 */
+static void __mask_IO_APIC_irq(unsigned int irq)
+{
+	__modify_IO_APIC_irq(irq, IO_APIC_REDIR_MASKED, 0);
+}
+
+/* mask = 0 */
+static void __unmask_IO_APIC_irq(unsigned int irq)
+{
+	__modify_IO_APIC_irq(irq, 0, IO_APIC_REDIR_MASKED);
+}
+
+/* mask = 1, trigger = 0 */
+static void __mask_and_edge_IO_APIC_irq(unsigned int irq)
+{
+	__modify_IO_APIC_irq(irq, IO_APIC_REDIR_MASKED,
+				IO_APIC_REDIR_LEVEL_TRIGGER);
+}
+
+/* mask = 0, trigger = 1 */
+static void __unmask_and_level_IO_APIC_irq(unsigned int irq)
+{
+	__modify_IO_APIC_irq(irq, IO_APIC_REDIR_LEVEL_TRIGGER,
+				IO_APIC_REDIR_MASKED);
+}
+
+#endif
+
 static void mask_IO_APIC_irq (unsigned int irq)
 {
 	unsigned long flags;
@@ -688,6 +749,64 @@ static void clear_IO_APIC (void)
 			clear_IO_APIC_pin(apic, pin);
 }
 
+#if !defined(CONFIG_SMP) && defined(CONFIG_X86_32)
+void send_IPI_self(int vector)
+{
+	unsigned int cfg;
+
+	/*
+	 * Wait for idle.
+	 */
+	apic_wait_icr_idle();
+	cfg = APIC_DM_FIXED | APIC_DEST_SELF | vector | APIC_DEST_LOGICAL;
+	/*
+	 * Send the IPI. The write to APIC_ICR fires this off.
+	 */
+	apic_write(APIC_ICR, cfg);
+}
+#endif /* !CONFIG_SMP && CONFIG_X86_32*/
+
+#ifdef CONFIG_X86_32
+/*
+ * support for broken MP BIOSs, enables hand-redirection of PIRQ0-7 to
+ * specific CPU-side IRQs.
+ */
+
+#define MAX_PIRQS 8
+static int pirq_entries [MAX_PIRQS];
+static int pirqs_enabled;
+
+static int __init ioapic_pirq_setup(char *str)
+{
+	int i, max;
+	int ints[MAX_PIRQS+1];
+
+	get_options(str, ARRAY_SIZE(ints), ints);
+
+	for (i = 0; i < MAX_PIRQS; i++)
+		pirq_entries[i] = -1;
+
+	pirqs_enabled = 1;
+	apic_printk(APIC_VERBOSE, KERN_INFO
+			"PIRQ redirection, working around broken MP-BIOS.\n");
+	max = MAX_PIRQS;
+	if (ints[0] < MAX_PIRQS)
+		max = ints[0];
+
+	for (i = 0; i < max; i++) {
+		apic_printk(APIC_VERBOSE, KERN_DEBUG
+				"... PIRQ%d -> IRQ %d\n", i, ints[i+1]);
+		/*
+		 * PIRQs are mapped upside down, usually.
+		 */
+		pirq_entries[MAX_PIRQS-i-1] = ints[i+1];
+	}
+	return 1;
+}
+
+__setup("pirq=", ioapic_pirq_setup);
+#endif /* CONFIG_X86_32 */
+
 #ifdef CONFIG_INTR_REMAP
 /* I/O APIC RTE contents at the OS boot up */
 static struct IO_APIC_route_entry *early_ioapic_entries[MAX_IO_APICS];
@@ -861,18 +980,51 @@ int IO_APIC_get_PCI_irq_vector(int bus, int slot, int pin)
 	return best_guess;
 }
 
+EXPORT_SYMBOL(IO_APIC_get_PCI_irq_vector);
+
+#if defined(CONFIG_EISA) || defined(CONFIG_MCA)
+/*
+ * EISA Edge/Level control register, ELCR
+ */
+static int EISA_ELCR(unsigned int irq)
+{
+	if (irq < 16) {
+		unsigned int port = 0x4d0 + (irq >> 3);
+		return (inb(port) >> (irq & 7)) & 1;
+	}
+	apic_printk(APIC_VERBOSE, KERN_INFO
+			"Broken MPtable reports ISA irq %d\n", irq);
+	return 0;
+}
+
+#endif
+
 /* ISA interrupts are always polarity zero edge triggered,
  * when listed as conforming in the MP table. */
 
 #define default_ISA_trigger(idx)	(0)
 #define default_ISA_polarity(idx)	(0)
 
+/* EISA interrupts are always polarity zero and can be edge or level
+ * trigger depending on the ELCR value.  If an interrupt is listed as
+ * EISA conforming in the MP table, that means its trigger type must
+ * be read in from the ELCR */
+
+#define default_EISA_trigger(idx)	(EISA_ELCR(mp_irqs[idx].mp_srcbusirq))
+#define default_EISA_polarity(idx)	default_ISA_polarity(idx)
+
 /* PCI interrupts are always polarity one level triggered,
  * when listed as conforming in the MP table. */
 
 #define default_PCI_trigger(idx)	(1)
 #define default_PCI_polarity(idx)	(1)
 
+/* MCA interrupts are always polarity zero level triggered,
+ * when listed as conforming in the MP table. */
+
+#define default_MCA_trigger(idx)	(1)
+#define default_MCA_polarity(idx)	default_ISA_polarity(idx)
+
 static int MPBIOS_polarity(int idx)
 {
 	int bus = mp_irqs[idx].mp_srcbus;
@@ -930,6 +1082,36 @@ static int MPBIOS_trigger(int idx)
 				trigger = default_ISA_trigger(idx);
 			else
 				trigger = default_PCI_trigger(idx);
+#if defined(CONFIG_EISA) || defined(CONFIG_MCA)
+			switch (mp_bus_id_to_type[bus]) {
+				case MP_BUS_ISA: /* ISA pin */
+				{
+					/* set before the switch */
+					break;
+				}
+				case MP_BUS_EISA: /* EISA pin */
+				{
+					trigger = default_EISA_trigger(idx);
+					break;
+				}
+				case MP_BUS_PCI: /* PCI pin */
+				{
+					/* set before the switch */
+					break;
+				}
+				case MP_BUS_MCA: /* MCA pin */
+				{
+					trigger = default_MCA_trigger(idx);
+					break;
+				}
+				default:
+				{
+					printk(KERN_WARNING "broken BIOS!!\n");
+					trigger = 1;
+					break;
+				}
+			}
+#endif
 			break;
 		case 1: /* edge */
 		{
@@ -967,6 +1149,7 @@ static inline int irq_trigger(int idx)
 	return MPBIOS_trigger(idx);
 }
 
+int (*ioapic_renumber_irq)(int ioapic, int irq);
 static int pin_2_irq(int idx, int apic, int pin)
 {
 	int irq, i;
@@ -988,7 +1171,32 @@ static int pin_2_irq(int idx, int apic, int pin)
 		while (i < apic)
 			irq += nr_ioapic_registers[i++];
 		irq += pin;
+                /*
+                 * For MPS mode, so far only needed by ES7000 platform
+                 */
+                if (ioapic_renumber_irq)
+                        irq = ioapic_renumber_irq(apic, irq);
 	}
+
+#ifdef CONFIG_X86_32
+	/*
+	 * PCI IRQ command line redirection. Yes, limits are hardcoded.
+	 */
+	if ((pin >= 16) && (pin <= 23)) {
+		if (pirq_entries[pin-16] != -1) {
+			if (!pirq_entries[pin-16]) {
+				apic_printk(APIC_VERBOSE, KERN_DEBUG
+						"disabling PIRQ%d\n", pin-16);
+			} else {
+				irq = pirq_entries[pin-16];
+				apic_printk(APIC_VERBOSE, KERN_DEBUG
+						"using PIRQ%d -> IRQ %d\n",
+						pin-16, irq);
+			}
+		}
+	}
+#endif
+
 	return irq;
 }
 
@@ -1058,8 +1266,13 @@ next:
 		}
 		if (unlikely(current_vector == vector))
 			continue;
+#ifdef CONFIG_X86_64
 		if (vector == IA32_SYSCALL_VECTOR)
 			goto next;
+#else
+		if (vector == SYSCALL_VECTOR)
+			goto next;
+#endif
 		for_each_cpu_mask_nr(new_cpu, new_mask)
 			if (per_cpu(vector_irq, new_cpu)[vector] != -1)
 				goto next;
@@ -1140,6 +1353,34 @@ static struct irq_chip ioapic_chip;
 static struct irq_chip ir_ioapic_chip;
 #endif
 
+#define IOAPIC_AUTO     -1
+#define IOAPIC_EDGE     0
+#define IOAPIC_LEVEL    1
+
+#ifdef CONFIG_X86_32
+static inline int IO_APIC_irq_trigger(int irq)
+{
+        int apic, idx, pin;
+
+        for (apic = 0; apic < nr_ioapics; apic++) {
+                for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) {
+                        idx = find_irq_entry(apic, pin, mp_INT);
+                        if ((idx != -1) && (irq == pin_2_irq(idx, apic, pin)))
+                                return irq_trigger(idx);
+                }
+        }
+        /*
+         * nonexistent IRQs are edge default
+         */
+        return 0;
+}
+#else
+static inline int IO_APIC_irq_trigger(int irq)
+{
+	return 1;
+}
+#endif
+
 static void ioapic_register_intr(int irq, unsigned long trigger)
 {
 	struct irq_desc *desc;
@@ -1150,7 +1391,8 @@ static void ioapic_register_intr(int irq, unsigned long trigger)
 	else
 		desc = irq_to_desc_alloc(irq);
 
-	if (trigger)
+	if ((trigger == IOAPIC_AUTO && IO_APIC_irq_trigger(irq)) ||
+	    trigger == IOAPIC_LEVEL)
 		desc->status |= IRQ_LEVEL;
 	else
 		desc->status &= ~IRQ_LEVEL;
@@ -1168,7 +1410,8 @@ static void ioapic_register_intr(int irq, unsigned long trigger)
 		return;
 	}
 #endif
-	if (trigger)
+	if ((trigger == IOAPIC_AUTO && IO_APIC_irq_trigger(irq)) ||
+	    trigger == IOAPIC_LEVEL)
 		set_irq_chip_and_handler_name(irq, &ioapic_chip,
 					      handle_fasteoi_irq,
 					      "fasteoi");
@@ -1303,6 +1546,10 @@ static void __init setup_IO_APIC_irqs(void)
 		}
 
 		irq = pin_2_irq(idx, apic, pin);
+#ifdef CONFIG_X86_32
+                if (multi_timer_check(apic, irq))
+                        continue;
+#endif
 		add_pin_to_irq(irq, apic, pin);
 
 		setup_IO_APIC_irq(apic, pin, irq,
@@ -1360,6 +1607,7 @@ __apicdebuginit(void) print_IO_APIC(void)
 	union IO_APIC_reg_00 reg_00;
 	union IO_APIC_reg_01 reg_01;
 	union IO_APIC_reg_02 reg_02;
+	union IO_APIC_reg_03 reg_03;
 	unsigned long flags;
 	struct irq_cfg *cfg;
 
@@ -1384,6 +1632,8 @@ __apicdebuginit(void) print_IO_APIC(void)
 	reg_01.raw = io_apic_read(apic, 1);
 	if (reg_01.bits.version >= 0x10)
 		reg_02.raw = io_apic_read(apic, 2);
+        if (reg_01.bits.version >= 0x20)
+                reg_03.raw = io_apic_read(apic, 3);
 	spin_unlock_irqrestore(&ioapic_lock, flags);
 
 	printk("\n");
@@ -1399,11 +1649,27 @@ __apicdebuginit(void) print_IO_APIC(void)
 	printk(KERN_DEBUG ".......     : PRQ implemented: %X\n", reg_01.bits.PRQ);
 	printk(KERN_DEBUG ".......     : IO APIC version: %04X\n", reg_01.bits.version);
 
-	if (reg_01.bits.version >= 0x10) {
+	/*
+	 * Some Intel chipsets with IO APIC VERSION of 0x1? don't have reg_02,
+	 * but the value of reg_02 is read as the previous read register
+	 * value, so ignore it if reg_02 == reg_01.
+	 */
+	if (reg_01.bits.version >= 0x10 && reg_02.raw != reg_01.raw) {
 		printk(KERN_DEBUG ".... register #02: %08X\n", reg_02.raw);
 		printk(KERN_DEBUG ".......     : arbitration: %02X\n", reg_02.bits.arbitration);
 	}
 
+	/*
+	 * Some Intel chipsets with IO APIC VERSION of 0x2? don't have reg_02
+	 * or reg_03, but the value of reg_0[23] is read as the previous read
+	 * register value, so ignore it if reg_03 == reg_0[12].
+	 */
+	if (reg_01.bits.version >= 0x20 && reg_03.raw != reg_02.raw &&
+	    reg_03.raw != reg_01.raw) {
+		printk(KERN_DEBUG ".... register #03: %08X\n", reg_03.raw);
+		printk(KERN_DEBUG ".......     : Boot DT    : %X\n", reg_03.bits.boot_DT);
+	}
+
 	printk(KERN_DEBUG ".... IRQ redirection table:\n");
 
 	printk(KERN_DEBUG " NR Dst Mask Trig IRR Pol"
@@ -1475,7 +1741,7 @@ __apicdebuginit(void) print_APIC_bitfield(int base)
 __apicdebuginit(void) print_local_APIC(void *dummy)
 {
 	unsigned int v, ver, maxlvt;
-	unsigned long icr;
+	u64 icr;
 
 	if (apic_verbosity == APIC_QUIET)
 		return;
@@ -1492,11 +1758,13 @@ __apicdebuginit(void) print_local_APIC(void *dummy)
 	v = apic_read(APIC_TASKPRI);
 	printk(KERN_DEBUG "... APIC TASKPRI: %08x (%02x)\n", v, v & APIC_TPRI_MASK);
 
-	v = apic_read(APIC_ARBPRI);
-	printk(KERN_DEBUG "... APIC ARBPRI: %08x (%02x)\n", v,
-		v & APIC_ARBPRI_MASK);
-	v = apic_read(APIC_PROCPRI);
-	printk(KERN_DEBUG "... APIC PROCPRI: %08x\n", v);
+	if (APIC_INTEGRATED(ver)) {                     /* !82489DX */
+		v = apic_read(APIC_ARBPRI);
+		printk(KERN_DEBUG "... APIC ARBPRI: %08x (%02x)\n", v,
+			v & APIC_ARBPRI_MASK);
+		v = apic_read(APIC_PROCPRI);
+		printk(KERN_DEBUG "... APIC PROCPRI: %08x\n", v);
+	}
 
 	v = apic_read(APIC_EOI);
 	printk(KERN_DEBUG "... APIC EOI: %08x\n", v);
@@ -1516,8 +1784,13 @@ __apicdebuginit(void) print_local_APIC(void *dummy)
 	printk(KERN_DEBUG "... APIC IRR field:\n");
 	print_APIC_bitfield(APIC_IRR);
 
-	v = apic_read(APIC_ESR);
-	printk(KERN_DEBUG "... APIC ESR: %08x\n", v);
+	if (APIC_INTEGRATED(ver)) {             /* !82489DX */
+		if (maxlvt > 3)         /* Due to the Pentium erratum 3AP. */
+			apic_write(APIC_ESR, 0);
+
+		v = apic_read(APIC_ESR);
+		printk(KERN_DEBUG "... APIC ESR: %08x\n", v);
+	}
 
 	icr = apic_icr_read();
 	printk(KERN_DEBUG "... APIC ICR: %08x\n", (u32)icr);
@@ -1608,6 +1881,13 @@ void __init enable_IO_APIC(void)
 	int apic;
 	unsigned long flags;
 
+#ifdef CONFIG_X86_32
+	int i;
+	if (!pirqs_enabled)
+		for (i = 0; i < MAX_PIRQS; i++)
+			pirq_entries[i] = -1;
+#endif
+
 	/*
 	 * The number of IO-APIC IRQ registers (== #pins):
 	 */
@@ -1636,6 +1916,10 @@ void __init enable_IO_APIC(void)
 	}
  found_i8259:
 	/* Look to see what if the MP table has reported the ExtINT */
+	/* If we could not find the appropriate pin by looking at the ioapic
+	 * the i8259 probably is not connected the ioapic but give the
+	 * mptable a chance anyway.
+	 */
 	i8259_pin  = find_isa_irq_pin(0, mp_ExtINT);
 	i8259_apic = find_isa_irq_apic(0, mp_ExtINT);
 	/* Trust the MP table if nothing is setup in the hardware */
@@ -1695,6 +1979,122 @@ void disable_IO_APIC(void)
 	disconnect_bsp_APIC(ioapic_i8259.pin != -1);
 }
 
+#ifdef CONFIG_X86_32
+/*
+ * function to set the IO-APIC physical IDs based on the
+ * values stored in the MPC table.
+ *
+ * by Matt Domsch <Matt_Domsch@dell.com>  Tue Dec 21 12:25:05 CST 1999
+ */
+
+static void __init setup_ioapic_ids_from_mpc(void)
+{
+	union IO_APIC_reg_00 reg_00;
+	physid_mask_t phys_id_present_map;
+	int apic;
+	int i;
+	unsigned char old_id;
+	unsigned long flags;
+
+	if (x86_quirks->setup_ioapic_ids && x86_quirks->setup_ioapic_ids())
+		return;
+
+	/*
+	 * Don't check I/O APIC IDs for xAPIC systems.  They have
+	 * no meaning without the serial APIC bus.
+	 */
+	if (!(boot_cpu_data.x86_vendor == X86_VENDOR_INTEL)
+		|| APIC_XAPIC(apic_version[boot_cpu_physical_apicid]))
+		return;
+	/*
+	 * This is broken; anything with a real cpu count has to
+	 * circumvent this idiocy regardless.
+	 */
+	phys_id_present_map = ioapic_phys_id_map(phys_cpu_present_map);
+
+	/*
+	 * Set the IOAPIC ID to the value stored in the MPC table.
+	 */
+	for (apic = 0; apic < nr_ioapics; apic++) {
+
+		/* Read the register 0 value */
+		spin_lock_irqsave(&ioapic_lock, flags);
+		reg_00.raw = io_apic_read(apic, 0);
+		spin_unlock_irqrestore(&ioapic_lock, flags);
+
+		old_id = mp_ioapics[apic].mp_apicid;
+
+		if (mp_ioapics[apic].mp_apicid >= get_physical_broadcast()) {
+			printk(KERN_ERR "BIOS bug, IO-APIC#%d ID is %d in the MPC table!...\n",
+				apic, mp_ioapics[apic].mp_apicid);
+			printk(KERN_ERR "... fixing up to %d. (tell your hw vendor)\n",
+				reg_00.bits.ID);
+			mp_ioapics[apic].mp_apicid = reg_00.bits.ID;
+		}
+
+		/*
+		 * Sanity check, is the ID really free? Every APIC in a
+		 * system must have a unique ID or we get lots of nice
+		 * 'stuck on smp_invalidate_needed IPI wait' messages.
+		 */
+		if (check_apicid_used(phys_id_present_map,
+					mp_ioapics[apic].mp_apicid)) {
+			printk(KERN_ERR "BIOS bug, IO-APIC#%d ID %d is already used!...\n",
+				apic, mp_ioapics[apic].mp_apicid);
+			for (i = 0; i < get_physical_broadcast(); i++)
+				if (!physid_isset(i, phys_id_present_map))
+					break;
+			if (i >= get_physical_broadcast())
+				panic("Max APIC ID exceeded!\n");
+			printk(KERN_ERR "... fixing up to %d. (tell your hw vendor)\n",
+				i);
+			physid_set(i, phys_id_present_map);
+			mp_ioapics[apic].mp_apicid = i;
+		} else {
+			physid_mask_t tmp;
+			tmp = apicid_to_cpu_present(mp_ioapics[apic].mp_apicid);
+			apic_printk(APIC_VERBOSE, "Setting %d in the "
+					"phys_id_present_map\n",
+					mp_ioapics[apic].mp_apicid);
+			physids_or(phys_id_present_map, phys_id_present_map, tmp);
+		}
+
+
+		/*
+		 * We need to adjust the IRQ routing table
+		 * if the ID changed.
+		 */
+		if (old_id != mp_ioapics[apic].mp_apicid)
+			for (i = 0; i < mp_irq_entries; i++)
+				if (mp_irqs[i].mp_dstapic == old_id)
+					mp_irqs[i].mp_dstapic
+						= mp_ioapics[apic].mp_apicid;
+
+		/*
+		 * Read the right value from the MPC table and
+		 * write it into the ID register.
+		 */
+		apic_printk(APIC_VERBOSE, KERN_INFO
+			"...changing IO-APIC physical APIC ID to %d ...",
+			mp_ioapics[apic].mp_apicid);
+
+		reg_00.bits.ID = mp_ioapics[apic].mp_apicid;
+		spin_lock_irqsave(&ioapic_lock, flags);
+
+		/*
+		 * Sanity check
+		 */
+		spin_lock_irqsave(&ioapic_lock, flags);
+		reg_00.raw = io_apic_read(apic, 0);
+		spin_unlock_irqrestore(&ioapic_lock, flags);
+		if (reg_00.bits.ID != mp_ioapics[apic].mp_apicid)
+			printk("could not set ID!\n");
+		else
+			apic_printk(APIC_VERBOSE, " ok.\n");
+	}
+}
+#endif
+
 int no_timer_check __initdata;
 
 static int __init notimercheck(char *s)
@@ -1780,8 +2180,10 @@ static unsigned int startup_ioapic_irq(unsigned int irq)
 	return was_pending;
 }
 
+#ifdef CONFIG_X86_64
 static int ioapic_retrigger_irq(unsigned int irq)
 {
+
 	struct irq_cfg *cfg = irq_cfg(irq);
 	unsigned long flags;
 
@@ -1791,6 +2193,14 @@ static int ioapic_retrigger_irq(unsigned int irq)
 
 	return 1;
 }
+#else
+static int ioapic_retrigger_irq(unsigned int irq)
+{
+        send_IPI_self(irq_cfg(irq)->vector);
+
+        return 1;
+}
+#endif
 
 /*
  * Level and edge triggered IO-APIC interrupts need different handling,
@@ -1952,7 +2362,9 @@ asmlinkage void smp_irq_move_cleanup_interrupt(void)
 {
 	unsigned vector, me;
 	ack_APIC_irq();
+#ifdef CONFIG_X86_64
 	exit_idle();
+#endif
 	irq_enter();
 
 	me = smp_processor_id();
@@ -2024,6 +2436,7 @@ static void ack_apic_edge(unsigned int irq)
 	ack_APIC_irq();
 }
 
+#ifdef CONFIG_X86_64
 static void ack_apic_level(unsigned int irq)
 {
 	int do_unmask_irq = 0;
@@ -2076,6 +2489,49 @@ static void ack_apic_level(unsigned int irq)
 		unmask_IO_APIC_irq(irq);
 	}
 }
+#else
+atomic_t irq_mis_count;
+static void ack_apic_level(unsigned int irq)
+{
+	unsigned long v;
+	int i;
+
+	irq_complete_move(irq);
+	move_native_irq(irq);
+	/*
+	* It appears there is an erratum which affects at least version 0x11
+	* of I/O APIC (that's the 82093AA and cores integrated into various
+	* chipsets).  Under certain conditions a level-triggered interrupt is
+	* erroneously delivered as edge-triggered one but the respective IRR
+	* bit gets set nevertheless.  As a result the I/O unit expects an EOI
+	* message but it will never arrive and further interrupts are blocked
+	* from the source.  The exact reason is so far unknown, but the
+	* phenomenon was observed when two consecutive interrupt requests
+	* from a given source get delivered to the same CPU and the source is
+	* temporarily disabled in between.
+	*
+	* A workaround is to simulate an EOI message manually.  We achieve it
+	* by setting the trigger mode to edge and then to level when the edge
+	* trigger mode gets detected in the TMR of a local APIC for a
+	* level-triggered interrupt.  We mask the source for the time of the
+	* operation to prevent an edge-triggered interrupt escaping meanwhile.
+	* The idea is from Manfred Spraul.  --macro
+	*/
+	i = irq_cfg(irq)->vector;
+
+	v = apic_read(APIC_TMR + ((i & ~0x1f) >> 1));
+
+	ack_APIC_irq();
+
+	if (!(v & (1 << (i & 0x1f)))) {
+		atomic_inc(&irq_mis_count);
+		spin_lock(&ioapic_lock);
+		__mask_and_edge_IO_APIC_irq(irq);
+		__unmask_and_level_IO_APIC_irq(irq);
+		spin_unlock(&ioapic_lock);
+	}
+}
+#endif
 
 static struct irq_chip ioapic_chip __read_mostly = {
 	.name 		= "IO-APIC",
@@ -2141,20 +2597,24 @@ static inline void init_IO_APIC_traps(void)
 	}
 }
 
-static void unmask_lapic_irq(unsigned int irq)
+/*
+ * The local APIC irq-chip implementation:
+ */
+
+static void mask_lapic_irq(unsigned int irq)
 {
 	unsigned long v;
 
 	v = apic_read(APIC_LVT0);
-	apic_write(APIC_LVT0, v & ~APIC_LVT_MASKED);
+	apic_write(APIC_LVT0, v | APIC_LVT_MASKED);
 }
 
-static void mask_lapic_irq(unsigned int irq)
+static void unmask_lapic_irq(unsigned int irq)
 {
 	unsigned long v;
 
 	v = apic_read(APIC_LVT0);
-	apic_write(APIC_LVT0, v | APIC_LVT_MASKED);
+	apic_write(APIC_LVT0, v & ~APIC_LVT_MASKED);
 }
 
 static void ack_lapic_irq (unsigned int irq)
@@ -2182,19 +2642,19 @@ static void lapic_register_intr(int irq)
 static void __init setup_nmi(void)
 {
 	/*
- 	 * Dirty trick to enable the NMI watchdog ...
+	 * Dirty trick to enable the NMI watchdog ...
 	 * We put the 8259A master into AEOI mode and
 	 * unmask on all local APICs LVT0 as NMI.
 	 *
 	 * The idea to use the 8259A in AEOI mode ('8259A Virtual Wire')
 	 * is from Maciej W. Rozycki - so we do not have to EOI from
 	 * the NMI handler or the timer interrupt.
-	 */ 
-	printk(KERN_INFO "activating NMI Watchdog ...");
+	 */
+	apic_printk(APIC_VERBOSE, KERN_INFO "activating NMI Watchdog ...");
 
 	enable_NMI_through_LVT0();
 
-	printk(" done.\n");
+	apic_printk(APIC_VERBOSE, " done.\n");
 }
 
 /*
@@ -2211,12 +2671,17 @@ static inline void __init unlock_ExtINT_logic(void)
 	unsigned char save_control, save_freq_select;
 
 	pin  = find_isa_irq_pin(8, mp_INT);
+	if (pin == -1) {
+		WARN_ON_ONCE(1);
+		return;
+	}
 	apic = find_isa_irq_apic(8, mp_INT);
-	if (pin == -1)
+	if (apic == -1) {
+		WARN_ON_ONCE(1);
 		return;
+	}
 
 	entry0 = ioapic_read_entry(apic, pin);
-
 	clear_IO_APIC_pin(apic, pin);
 
 	memset(&entry1, 0, sizeof(entry1));
@@ -2268,17 +2733,21 @@ int timer_through_8259 __initdata;
  * is so screwy.  Thanks to Brian Perkins for testing/hacking this beast
  * fanatically on his truly buggy board.
  *
- * FIXME: really need to revamp this for modern platforms only.
+ * FIXME: really need to revamp this for all platforms.
  */
 static inline void __init check_timer(void)
 {
 	struct irq_cfg *cfg = irq_cfg(0);
 	int apic1, pin1, apic2, pin2;
 	unsigned long flags;
+	unsigned int ver;
 	int no_pin1 = 0;
 
 	local_irq_save(flags);
 
+        ver = apic_read(APIC_LVR);
+        ver = GET_APIC_VERSION(ver);
+
 	/*
 	 * get/set the timer IRQ vector:
 	 */
@@ -2287,10 +2756,18 @@ static inline void __init check_timer(void)
 
 	/*
 	 * As IRQ0 is to be enabled in the 8259A, the virtual
-	 * wire has to be disabled in the local APIC.
+	 * wire has to be disabled in the local APIC.  Also
+	 * timer interrupts need to be acknowledged manually in
+	 * the 8259A for the i82489DX when using the NMI
+	 * watchdog as that APIC treats NMIs as level-triggered.
+	 * The AEOI mode will finish them in the 8259A
+	 * automatically.
 	 */
 	apic_write(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_EXTINT);
 	init_8259A(1);
+#ifdef CONFIG_X86_32
+	timer_ack = (nmi_watchdog == NMI_IO_APIC && !APIC_INTEGRATED(ver));
+#endif
 
 	pin1  = find_isa_irq_pin(0, mp_INT);
 	apic1 = find_isa_irq_apic(0, mp_INT);
@@ -2382,6 +2859,9 @@ static inline void __init check_timer(void)
 			    "through the IO-APIC - disabling NMI Watchdog!\n");
 		nmi_watchdog = NMI_NONE;
 	}
+#ifdef CONFIG_X86_32
+	timer_ack = 0;
+#endif
 
 	apic_printk(APIC_QUIET, KERN_INFO
 		    "...trying to set up timer as Virtual Wire IRQ...\n");
@@ -2435,19 +2915,29 @@ out:
  * the I/O APIC in all cases now.  No actual device should request
  * it anyway.  --macro
  */
-#define PIC_IRQS	(1<<2)
+#define PIC_IRQS	(1 << PIC_CASCADE_IR)
 
 void __init setup_IO_APIC(void)
 {
 
+#ifdef CONFIG_X86_32
+	enable_IO_APIC();
+#else
 	/*
 	 * calling enable_IO_APIC() is moved to setup_local_APIC for BP
 	 */
+#endif
 
 	io_apic_irqs = ~PIC_IRQS;
 
 	apic_printk(APIC_VERBOSE, "ENABLING IO-APIC IRQs\n");
-
+        /*
+         * Set up IO-APIC IRQ routing.
+         */
+#ifdef CONFIG_X86_32
+        if (!acpi_ioapic)
+                setup_ioapic_ids_from_mpc();
+#endif
 	sync_Arb_IDs();
 	setup_IO_APIC_irqs();
 	init_IO_APIC_traps();
@@ -3128,7 +3618,93 @@ int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev)
 
 #ifdef CONFIG_ACPI
 
-#define IO_APIC_MAX_ID		0xFE
+#ifdef CONFIG_X86_32
+int __init io_apic_get_unique_id(int ioapic, int apic_id)
+{
+	union IO_APIC_reg_00 reg_00;
+	static physid_mask_t apic_id_map = PHYSID_MASK_NONE;
+	physid_mask_t tmp;
+	unsigned long flags;
+	int i = 0;
+
+	/*
+	 * The P4 platform supports up to 256 APIC IDs on two separate APIC
+	 * buses (one for LAPICs, one for IOAPICs), where predecessors only
+	 * supports up to 16 on one shared APIC bus.
+	 *
+	 * TBD: Expand LAPIC/IOAPIC support on P4-class systems to take full
+	 *      advantage of new APIC bus architecture.
+	 */
+
+	if (physids_empty(apic_id_map))
+		apic_id_map = ioapic_phys_id_map(phys_cpu_present_map);
+
+	spin_lock_irqsave(&ioapic_lock, flags);
+	reg_00.raw = io_apic_read(ioapic, 0);
+	spin_unlock_irqrestore(&ioapic_lock, flags);
+
+	if (apic_id >= get_physical_broadcast()) {
+		printk(KERN_WARNING "IOAPIC[%d]: Invalid apic_id %d, trying "
+			"%d\n", ioapic, apic_id, reg_00.bits.ID);
+		apic_id = reg_00.bits.ID;
+	}
+
+	/*
+	 * Every APIC in a system must have a unique ID or we get lots of nice
+	 * 'stuck on smp_invalidate_needed IPI wait' messages.
+	 */
+	if (check_apicid_used(apic_id_map, apic_id)) {
+
+		for (i = 0; i < get_physical_broadcast(); i++) {
+			if (!check_apicid_used(apic_id_map, i))
+				break;
+		}
+
+		if (i == get_physical_broadcast())
+			panic("Max apic_id exceeded!\n");
+
+		printk(KERN_WARNING "IOAPIC[%d]: apic_id %d already used, "
+			"trying %d\n", ioapic, apic_id, i);
+
+		apic_id = i;
+	}
+
+	tmp = apicid_to_cpu_present(apic_id);
+	physids_or(apic_id_map, apic_id_map, tmp);
+
+	if (reg_00.bits.ID != apic_id) {
+		reg_00.bits.ID = apic_id;
+
+		spin_lock_irqsave(&ioapic_lock, flags);
+		io_apic_write(ioapic, 0, reg_00.raw);
+		reg_00.raw = io_apic_read(ioapic, 0);
+		spin_unlock_irqrestore(&ioapic_lock, flags);
+
+		/* Sanity check */
+		if (reg_00.bits.ID != apic_id) {
+			printk("IOAPIC[%d]: Unable to change apic_id!\n", ioapic);
+			return -1;
+		}
+	}
+
+	apic_printk(APIC_VERBOSE, KERN_INFO
+			"IOAPIC[%d]: Assigned apic_id %d\n", ioapic, apic_id);
+
+	return apic_id;
+}
+
+int __init io_apic_get_version(int ioapic)
+{
+	union IO_APIC_reg_01	reg_01;
+	unsigned long flags;
+
+	spin_lock_irqsave(&ioapic_lock, flags);
+	reg_01.raw = io_apic_read(ioapic, 1);
+	spin_unlock_irqrestore(&ioapic_lock, flags);
+
+	return reg_01.bits.version;
+}
+#endif
 
 int __init io_apic_get_redir_entries (int ioapic)
 {
@@ -3226,6 +3802,7 @@ void __init setup_ioapic_dest(void)
 }
 #endif
 
+#ifdef CONFIG_X86_64
 #define IOAPIC_RESOURCE_NAME_SIZE 11
 
 static struct resource *ioapic_resources;
@@ -3261,36 +3838,56 @@ static struct resource * __init ioapic_setup_resources(void)
 
 	return res;
 }
+#endif
 
 void __init ioapic_init_mappings(void)
 {
 	unsigned long ioapic_phys, idx = FIX_IO_APIC_BASE_0;
-	struct resource *ioapic_res;
 	int i;
+#ifdef CONFIG_X86_64
+	struct resource *ioapic_res;
 
 	ioapic_res = ioapic_setup_resources();
+#endif
 	for (i = 0; i < nr_ioapics; i++) {
 		if (smp_found_config) {
 			ioapic_phys = mp_ioapics[i].mp_apicaddr;
+#ifdef CONFIG_X86_32
+                        if (!ioapic_phys) {
+                                printk(KERN_ERR
+                                       "WARNING: bogus zero IO-APIC "
+                                       "address found in MPTABLE, "
+                                       "disabling IO/APIC support!\n");
+                                smp_found_config = 0;
+                                skip_ioapic_setup = 1;
+                                goto fake_ioapic_page;
+                        }
+#endif
 		} else {
+#ifdef CONFIG_X86_32
+fake_ioapic_page:
+#endif
 			ioapic_phys = (unsigned long)
 				alloc_bootmem_pages(PAGE_SIZE);
 			ioapic_phys = __pa(ioapic_phys);
 		}
 		set_fixmap_nocache(idx, ioapic_phys);
 		apic_printk(APIC_VERBOSE,
-			    "mapped IOAPIC to %016lx (%016lx)\n",
+			    "mapped IOAPIC to %08lx (%08lx)\n",
 			    __fix_to_virt(idx), ioapic_phys);
 		idx++;
 
+#ifdef CONFIG_X86_64
 		if (ioapic_res != NULL) {
 			ioapic_res->start = ioapic_phys;
 			ioapic_res->end = ioapic_phys + (4 * 1024) - 1;
 			ioapic_res++;
 		}
+#endif
 	}
 }
 
+#ifdef CONFIG_X86_64
 static int __init ioapic_insert_resources(void)
 {
 	int i;
@@ -3313,4 +3910,4 @@ static int __init ioapic_insert_resources(void)
 /* Insert the IO APIC resources after PCI initialization has occured to handle
  * IO APICS that are mapped in on a BAR in PCI space. */
 late_initcall(ioapic_insert_resources);
-
+#endif
-- 
cgit v1.2.3-55-g7522


From 54168ed7f2a4f3fc2780e645124ae952598da601 Mon Sep 17 00:00:00 2001
From: Ingo Molnar
Date: Wed, 20 Aug 2008 09:07:45 +0200
Subject: x86: make io_apic_32.c the same as io_apic_64.c

Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/io_apic_32.c | 1521 ++++++++++++++++++++++++++++++------------
 1 file changed, 1104 insertions(+), 417 deletions(-)

diff --git a/arch/x86/kernel/io_apic_32.c b/arch/x86/kernel/io_apic_32.c
index 3ed36041c81e..fba6d6ee3480 100644
--- a/arch/x86/kernel/io_apic_32.c
+++ b/arch/x86/kernel/io_apic_32.c
@@ -35,7 +35,7 @@
 #include <linux/htirq.h>
 #include <linux/freezer.h>
 #include <linux/kthread.h>
-#include <linux/jiffies.h>      /* time_after() */
+#include <linux/jiffies.h>	/* time_after() */
 #ifdef CONFIG_ACPI
 #include <acpi/acpi_bus.h>
 #endif
@@ -64,8 +64,8 @@
 #define __apicdebuginit(type) static type __init
 
 /*
- *	Is the SiS APIC rmw bug present ?
- *	-1 = don't know, 0 = no, 1 = yes
+ *      Is the SiS APIC rmw bug present ?
+ *      -1 = don't know, 0 = no, 1 = yes
  */
 int sis_apic_bug = -1;
 
@@ -102,7 +102,7 @@ DECLARE_BITMAP(mp_bus_not_pci, MAX_MP_BUSSES);
 
 int skip_ioapic_setup;
 
-static int __init parse_noapic(char *arg)
+static int __init parse_noapic(char *str)
 {
 	/* disable IO-APIC */
 	disable_ioapic_setup();
@@ -188,7 +188,7 @@ static void __init init_work(void *data)
 	irq_cfgx[legacy_count - 1].next = NULL;
 }
 
-#define for_each_irq_cfg(cfg)           \
+#define for_each_irq_cfg(cfg)		\
 	for (cfg = irq_cfgx; cfg; cfg = cfg->next)
 
 DEFINE_DYN_ARRAY(irq_cfgx, sizeof(struct irq_cfg), nr_irq_cfg, PAGE_SIZE, init_work);
@@ -262,7 +262,6 @@ static struct irq_cfg *irq_cfg_alloc(unsigned int irq)
 		irq_cfgx = cfg;
 	cfg->irq = irq;
 	printk(KERN_DEBUG "found new irq_cfg for irq %d\n", cfg->irq);
-
 #ifdef CONFIG_HAVE_SPARSE_IRQ_DEBUG
 	{
 		/* dump the results */
@@ -384,9 +383,9 @@ static inline void io_apic_write(unsigned int apic, unsigned int reg, unsigned i
  */
 static inline void io_apic_modify(unsigned int apic, unsigned int reg, unsigned int value)
 {
-	volatile struct io_apic __iomem *io_apic = io_apic_base(apic);
-	if (sis_apic_bug)
-		writel(reg, &io_apic->index);
+	struct io_apic __iomem *io_apic = io_apic_base(apic);
+        if (sis_apic_bug)
+                writel(reg, &io_apic->index);
 	writel(value, &io_apic->data);
 }
 
@@ -494,11 +493,20 @@ static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, u8 vector)
 
 		apic = entry->apic;
 		pin = entry->pin;
+#ifdef CONFIG_INTR_REMAP
+		/*
+		 * With interrupt-remapping, destination information comes
+		 * from interrupt-remapping table entry.
+		 */
+		if (!irq_remapped(irq))
+			io_apic_write(apic, 0x11 + pin*2, dest);
+#else
 		io_apic_write(apic, 0x11 + pin*2, dest);
+#endif
 		reg = io_apic_read(apic, 0x10 + pin*2);
 		reg &= ~IO_APIC_REDIR_VECTOR_MASK;
 		reg |= vector;
-		io_apic_modify(apic, 0x10 + pin *2, reg);
+		io_apic_modify(apic, 0x10 + pin*2, reg);
 		if (!entry->next)
 			break;
 		entry = entry->next;
@@ -513,6 +521,7 @@ static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t mask)
 	unsigned long flags;
 	unsigned int dest;
 	cpumask_t tmp;
+	struct irq_desc *desc;
 
 	cpus_and(tmp, mask, cpu_online_map);
 	if (cpus_empty(tmp))
@@ -529,12 +538,12 @@ static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t mask)
 	 */
 	dest = SET_APIC_LOGICAL_ID(dest);
 
+	desc = irq_to_desc(irq);
 	spin_lock_irqsave(&ioapic_lock, flags);
 	__target_IO_APIC_irq(irq, dest, cfg->vector);
-	irq_to_desc(irq)->affinity = mask;
+	desc->affinity = mask;
 	spin_unlock_irqrestore(&ioapic_lock, flags);
 }
-
 #endif /* CONFIG_SMP */
 
 /*
@@ -699,7 +708,7 @@ static void __unmask_and_level_IO_APIC_irq(unsigned int irq)
 
 #endif
 
-static void mask_IO_APIC_irq(unsigned int irq)
+static void mask_IO_APIC_irq (unsigned int irq)
 {
 	unsigned long flags;
 
@@ -708,7 +717,7 @@ static void mask_IO_APIC_irq(unsigned int irq)
 	spin_unlock_irqrestore(&ioapic_lock, flags);
 }
 
-static void unmask_IO_APIC_irq(unsigned int irq)
+static void unmask_IO_APIC_irq (unsigned int irq)
 {
 	unsigned long flags;
 
@@ -725,14 +734,13 @@ static void clear_IO_APIC_pin(unsigned int apic, unsigned int pin)
 	entry = ioapic_read_entry(apic, pin);
 	if (entry.delivery_mode == dest_SMI)
 		return;
-
 	/*
 	 * Disable it in the IO-APIC irq-routing table:
 	 */
 	ioapic_mask_entry(apic, pin);
 }
 
-static void clear_IO_APIC(void)
+static void clear_IO_APIC (void)
 {
 	int apic, pin;
 
@@ -741,7 +749,7 @@ static void clear_IO_APIC(void)
 			clear_IO_APIC_pin(apic, pin);
 }
 
-#ifndef CONFIG_SMP
+#if !defined(CONFIG_SMP) && defined(CONFIG_X86_32)
 void send_IPI_self(int vector)
 {
 	unsigned int cfg;
@@ -756,9 +764,9 @@ void send_IPI_self(int vector)
 	 */
 	apic_write(APIC_ICR, cfg);
 }
-#endif /* !CONFIG_SMP */
-
+#endif /* !CONFIG_SMP && CONFIG_X86_32*/
 
+#ifdef CONFIG_X86_32
 /*
  * support for broken MP BIOSs, enables hand-redirection of PIRQ0-7 to
  * specific CPU-side IRQs.
@@ -797,6 +805,75 @@ static int __init ioapic_pirq_setup(char *str)
 }
 
 __setup("pirq=", ioapic_pirq_setup);
+#endif /* CONFIG_X86_32 */
+
+#ifdef CONFIG_INTR_REMAP
+/* I/O APIC RTE contents at the OS boot up */
+static struct IO_APIC_route_entry *early_ioapic_entries[MAX_IO_APICS];
+
+/*
+ * Saves and masks all the unmasked IO-APIC RTE's
+ */
+int save_mask_IO_APIC_setup(void)
+{
+	union IO_APIC_reg_01 reg_01;
+	unsigned long flags;
+	int apic, pin;
+
+	/*
+	 * The number of IO-APIC IRQ registers (== #pins):
+	 */
+	for (apic = 0; apic < nr_ioapics; apic++) {
+		spin_lock_irqsave(&ioapic_lock, flags);
+		reg_01.raw = io_apic_read(apic, 1);
+		spin_unlock_irqrestore(&ioapic_lock, flags);
+		nr_ioapic_registers[apic] = reg_01.bits.entries+1;
+	}
+
+	for (apic = 0; apic < nr_ioapics; apic++) {
+		early_ioapic_entries[apic] =
+			kzalloc(sizeof(struct IO_APIC_route_entry) *
+				nr_ioapic_registers[apic], GFP_KERNEL);
+		if (!early_ioapic_entries[apic])
+			return -ENOMEM;
+	}
+
+	for (apic = 0; apic < nr_ioapics; apic++)
+		for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) {
+			struct IO_APIC_route_entry entry;
+
+			entry = early_ioapic_entries[apic][pin] =
+				ioapic_read_entry(apic, pin);
+			if (!entry.mask) {
+				entry.mask = 1;
+				ioapic_write_entry(apic, pin, entry);
+			}
+		}
+	return 0;
+}
+
+void restore_IO_APIC_setup(void)
+{
+	int apic, pin;
+
+	for (apic = 0; apic < nr_ioapics; apic++)
+		for (pin = 0; pin < nr_ioapic_registers[apic]; pin++)
+			ioapic_write_entry(apic, pin,
+					   early_ioapic_entries[apic][pin]);
+}
+
+void reinit_intr_remapped_IO_APIC(int intr_remapping)
+{
+	/*
+	 * for now plain restore of previous settings.
+	 * TBD: In the case of OS enabling interrupt-remapping,
+	 * IO-APIC RTE's need to be setup to point to interrupt-remapping
+	 * table entries. for now, do a plain restore, and wait for
+	 * the setup_IO_APIC_irqs() to do proper initialization.
+	 */
+	restore_IO_APIC_setup();
+}
+#endif
 
 /*
  * Find the IRQ entry number of a certain pin.
@@ -848,7 +925,7 @@ static int __init find_isa_irq_apic(int irq, int type)
 	}
 	if (i < mp_irq_entries) {
 		int apic;
-		for (apic = 0; apic < nr_ioapics; apic++) {
+		for(apic = 0; apic < nr_ioapics; apic++) {
 			if (mp_ioapics[apic].mp_apicid == mp_irqs[i].mp_dstapic)
 				return apic;
 		}
@@ -867,10 +944,10 @@ int IO_APIC_get_PCI_irq_vector(int bus, int slot, int pin)
 {
 	int apic, i, best_guess = -1;
 
-	apic_printk(APIC_DEBUG, "querying PCI -> IRQ mapping bus:%d, "
-		"slot:%d, pin:%d.\n", bus, slot, pin);
+	apic_printk(APIC_DEBUG, "querying PCI -> IRQ mapping bus:%d, slot:%d, pin:%d.\n",
+		bus, slot, pin);
 	if (test_bit(bus, mp_bus_not_pci)) {
-		printk(KERN_WARNING "PCI BIOS passed nonexistent PCI bus %d!\n", bus);
+		apic_printk(APIC_VERBOSE, "PCI BIOS passed nonexistent PCI bus %d!\n", bus);
 		return -1;
 	}
 	for (i = 0; i < mp_irq_entries; i++) {
@@ -885,7 +962,7 @@ int IO_APIC_get_PCI_irq_vector(int bus, int slot, int pin)
 		    !mp_irqs[i].mp_irqtype &&
 		    (bus == lbus) &&
 		    (slot == ((mp_irqs[i].mp_srcbusirq >> 2) & 0x1f))) {
-			int irq = pin_2_irq(i, apic, mp_irqs[i].mp_dstirq);
+			int irq = pin_2_irq(i,apic,mp_irqs[i].mp_dstirq);
 
 			if (!(apic || IO_APIC_IRQ(irq)))
 				continue;
@@ -902,6 +979,7 @@ int IO_APIC_get_PCI_irq_vector(int bus, int slot, int pin)
 	}
 	return best_guess;
 }
+
 EXPORT_SYMBOL(IO_APIC_get_PCI_irq_vector);
 
 #if defined(CONFIG_EISA) || defined(CONFIG_MCA)
@@ -918,6 +996,7 @@ static int EISA_ELCR(unsigned int irq)
 			"Broken MPtable reports ISA irq %d\n", irq);
 	return 0;
 }
+
 #endif
 
 /* ISA interrupts are always polarity zero edge triggered,
@@ -954,36 +1033,36 @@ static int MPBIOS_polarity(int idx)
 	/*
 	 * Determine IRQ line polarity (high active or low active):
 	 */
-	switch (mp_irqs[idx].mp_irqflag & 3) {
-	case 0: /* conforms, ie. bus-type dependent polarity */
-	{
-		polarity = test_bit(bus, mp_bus_not_pci)?
-			default_ISA_polarity(idx):
-			default_PCI_polarity(idx);
-		break;
-	}
-	case 1: /* high active */
-	{
-		polarity = 0;
-		break;
-	}
-	case 2: /* reserved */
-	{
-		printk(KERN_WARNING "broken BIOS!!\n");
-		polarity = 1;
-		break;
-	}
-	case 3: /* low active */
-	{
-		polarity = 1;
-		break;
-	}
-	default: /* invalid */
+	switch (mp_irqs[idx].mp_irqflag & 3)
 	{
-		printk(KERN_WARNING "broken BIOS!!\n");
-		polarity = 1;
-		break;
-	}
+		case 0: /* conforms, ie. bus-type dependent polarity */
+			if (test_bit(bus, mp_bus_not_pci))
+				polarity = default_ISA_polarity(idx);
+			else
+				polarity = default_PCI_polarity(idx);
+			break;
+		case 1: /* high active */
+		{
+			polarity = 0;
+			break;
+		}
+		case 2: /* reserved */
+		{
+			printk(KERN_WARNING "broken BIOS!!\n");
+			polarity = 1;
+			break;
+		}
+		case 3: /* low active */
+		{
+			polarity = 1;
+			break;
+		}
+		default: /* invalid */
+		{
+			printk(KERN_WARNING "broken BIOS!!\n");
+			polarity = 1;
+			break;
+		}
 	}
 	return polarity;
 }
@@ -996,67 +1075,67 @@ static int MPBIOS_trigger(int idx)
 	/*
 	 * Determine IRQ trigger mode (edge or level sensitive):
 	 */
-	switch ((mp_irqs[idx].mp_irqflag>>2) & 3) {
-	case 0: /* conforms, ie. bus-type dependent */
+	switch ((mp_irqs[idx].mp_irqflag>>2) & 3)
 	{
-		trigger = test_bit(bus, mp_bus_not_pci)?
-				default_ISA_trigger(idx):
-				default_PCI_trigger(idx);
+		case 0: /* conforms, ie. bus-type dependent */
+			if (test_bit(bus, mp_bus_not_pci))
+				trigger = default_ISA_trigger(idx);
+			else
+				trigger = default_PCI_trigger(idx);
 #if defined(CONFIG_EISA) || defined(CONFIG_MCA)
-		switch (mp_bus_id_to_type[bus]) {
-		case MP_BUS_ISA: /* ISA pin */
-		{
-			/* set before the switch */
+			switch (mp_bus_id_to_type[bus]) {
+				case MP_BUS_ISA: /* ISA pin */
+				{
+					/* set before the switch */
+					break;
+				}
+				case MP_BUS_EISA: /* EISA pin */
+				{
+					trigger = default_EISA_trigger(idx);
+					break;
+				}
+				case MP_BUS_PCI: /* PCI pin */
+				{
+					/* set before the switch */
+					break;
+				}
+				case MP_BUS_MCA: /* MCA pin */
+				{
+					trigger = default_MCA_trigger(idx);
+					break;
+				}
+				default:
+				{
+					printk(KERN_WARNING "broken BIOS!!\n");
+					trigger = 1;
+					break;
+				}
+			}
+#endif
 			break;
-		}
-		case MP_BUS_EISA: /* EISA pin */
+		case 1: /* edge */
 		{
-			trigger = default_EISA_trigger(idx);
+			trigger = 0;
 			break;
 		}
-		case MP_BUS_PCI: /* PCI pin */
+		case 2: /* reserved */
 		{
-			/* set before the switch */
+			printk(KERN_WARNING "broken BIOS!!\n");
+			trigger = 1;
 			break;
 		}
-		case MP_BUS_MCA: /* MCA pin */
+		case 3: /* level */
 		{
-			trigger = default_MCA_trigger(idx);
+			trigger = 1;
 			break;
 		}
-		default:
+		default: /* invalid */
 		{
 			printk(KERN_WARNING "broken BIOS!!\n");
-			trigger = 1;
+			trigger = 0;
 			break;
 		}
 	}
-#endif
-		break;
-	}
-	case 1: /* edge */
-	{
-		trigger = 0;
-		break;
-	}
-	case 2: /* reserved */
-	{
-		printk(KERN_WARNING "broken BIOS!!\n");
-		trigger = 1;
-		break;
-	}
-	case 3: /* level */
-	{
-		trigger = 1;
-		break;
-	}
-	default: /* invalid */
-	{
-		printk(KERN_WARNING "broken BIOS!!\n");
-		trigger = 0;
-		break;
-	}
-	}
 	return trigger;
 }
 
@@ -1082,9 +1161,9 @@ static int pin_2_irq(int idx, int apic, int pin)
 	if (mp_irqs[idx].mp_dstirq != pin)
 		printk(KERN_ERR "broken BIOS or MPTABLE parser, ayiee!!\n");
 
-	if (test_bit(bus, mp_bus_not_pci))
+	if (test_bit(bus, mp_bus_not_pci)) {
 		irq = mp_irqs[idx].mp_srcbusirq;
-	else {
+	} else {
 		/*
 		 * PCI IRQs are mapped in order
 		 */
@@ -1092,14 +1171,14 @@ static int pin_2_irq(int idx, int apic, int pin)
 		while (i < apic)
 			irq += nr_ioapic_registers[i++];
 		irq += pin;
-
-		/*
-		 * For MPS mode, so far only needed by ES7000 platform
-		 */
-		if (ioapic_renumber_irq)
-			irq = ioapic_renumber_irq(apic, irq);
+                /*
+                 * For MPS mode, so far only needed by ES7000 platform
+                 */
+                if (ioapic_renumber_irq)
+                        irq = ioapic_renumber_irq(apic, irq);
 	}
 
+#ifdef CONFIG_X86_32
 	/*
 	 * PCI IRQ command line redirection. Yes, limits are hardcoded.
 	 */
@@ -1116,6 +1195,8 @@ static int pin_2_irq(int idx, int apic, int pin)
 			}
 		}
 	}
+#endif
+
 	return irq;
 }
 
@@ -1145,74 +1226,70 @@ static int __assign_irq_vector(int irq, cpumask_t mask)
 	 * Also, we've got to be careful not to trash gate
 	 * 0x80, because int 0x80 is hm, kind of importantish. ;)
 	 */
-        static int current_vector = FIRST_DEVICE_VECTOR, current_offset = 0;
-        unsigned int old_vector;
-        int cpu;
-        struct irq_cfg *cfg;
+	static int current_vector = FIRST_DEVICE_VECTOR, current_offset = 0;
+	unsigned int old_vector;
+	int cpu;
+	struct irq_cfg *cfg;
 
-        cfg = irq_cfg(irq);
+	cfg = irq_cfg(irq);
 
-        /* Only try and allocate irqs on cpus that are present */
-        cpus_and(mask, mask, cpu_online_map);
+	/* Only try and allocate irqs on cpus that are present */
+	cpus_and(mask, mask, cpu_online_map);
 
-        if ((cfg->move_in_progress) || cfg->move_cleanup_count)
-                return -EBUSY;
+	if ((cfg->move_in_progress) || cfg->move_cleanup_count)
+		return -EBUSY;
 
-        old_vector = cfg->vector;
-        if (old_vector) {
-                cpumask_t tmp;
-                cpus_and(tmp, cfg->domain, mask);
-                if (!cpus_empty(tmp))
-                        return 0;
-        }
+	old_vector = cfg->vector;
+	if (old_vector) {
+		cpumask_t tmp;
+		cpus_and(tmp, cfg->domain, mask);
+		if (!cpus_empty(tmp))
+			return 0;
+	}
 
-        for_each_cpu_mask_nr(cpu, mask) {
-                cpumask_t domain, new_mask;
-                int new_cpu;
-                int vector, offset;
+	for_each_cpu_mask_nr(cpu, mask) {
+		cpumask_t domain, new_mask;
+		int new_cpu;
+		int vector, offset;
 
-                domain = vector_allocation_domain(cpu);
-                cpus_and(new_mask, domain, cpu_online_map);
+		domain = vector_allocation_domain(cpu);
+		cpus_and(new_mask, domain, cpu_online_map);
 
-                vector = current_vector;
-                offset = current_offset;
+		vector = current_vector;
+		offset = current_offset;
 next:
-                vector += 8;
-                if (vector >= first_system_vector) {
-                        /* If we run out of vectors on large boxen, must share them. */
-                        offset = (offset + 1) % 8;
-                        vector = FIRST_DEVICE_VECTOR + offset;
-                }
-                if (unlikely(current_vector == vector))
-                        continue;
+		vector += 8;
+		if (vector >= first_system_vector) {
+			/* If we run out of vectors on large boxen, must share them. */
+			offset = (offset + 1) % 8;
+			vector = FIRST_DEVICE_VECTOR + offset;
+		}
+		if (unlikely(current_vector == vector))
+			continue;
 #ifdef CONFIG_X86_64
-                if (vector == IA32_SYSCALL_VECTOR)
-                        goto next;
+		if (vector == IA32_SYSCALL_VECTOR)
+			goto next;
 #else
-                if (vector == SYSCALL_VECTOR)
-                        goto next;
+		if (vector == SYSCALL_VECTOR)
+			goto next;
 #endif
-                for_each_cpu_mask_nr(new_cpu, new_mask)
-                        if (per_cpu(vector_irq, new_cpu)[vector] != -1)
-                                goto next;
-                /* Found one! */
-                current_vector = vector;
-                current_offset = offset;
-                if (old_vector) {
-                        cfg->move_in_progress = 1;
-                        cfg->old_domain = cfg->domain;
-                }
-		printk(KERN_DEBUG "assign_irq_vector: irq %d vector %#x cpu ", irq, vector);
-		for_each_cpu_mask_nr(new_cpu, new_mask) {
-			per_cpu(vector_irq, new_cpu)[vector] = irq;
-			printk(KERN_CONT " %d ", new_cpu);
+		for_each_cpu_mask_nr(new_cpu, new_mask)
+			if (per_cpu(vector_irq, new_cpu)[vector] != -1)
+				goto next;
+		/* Found one! */
+		current_vector = vector;
+		current_offset = offset;
+		if (old_vector) {
+			cfg->move_in_progress = 1;
+			cfg->old_domain = cfg->domain;
 		}
-		printk(KERN_CONT "\n");
-                cfg->vector = vector;
-                cfg->domain = domain;
-                return 0;
-        }
-        return -ENOSPC;
+		for_each_cpu_mask_nr(new_cpu, new_mask)
+			per_cpu(vector_irq, new_cpu)[vector] = irq;
+		cfg->vector = vector;
+		cfg->domain = domain;
+		return 0;
+	}
+	return -ENOSPC;
 }
 
 static int assign_irq_vector(int irq, cpumask_t mask)
@@ -1223,7 +1300,6 @@ static int assign_irq_vector(int irq, cpumask_t mask)
 	spin_lock_irqsave(&vector_lock, flags);
 	err = __assign_irq_vector(irq, mask);
 	spin_unlock_irqrestore(&vector_lock, flags);
-
 	return err;
 }
 
@@ -1269,36 +1345,39 @@ void __setup_vector_irq(int cpu)
 		cfg = irq_cfg(irq);
 		if (!cpu_isset(cpu, cfg->domain))
 			per_cpu(vector_irq, cpu)[vector] = -1;
-        }
+	}
 }
 
 static struct irq_chip ioapic_chip;
+#ifdef CONFIG_INTR_REMAP
+static struct irq_chip ir_ioapic_chip;
+#endif
 
-#define IOAPIC_AUTO	-1
-#define IOAPIC_EDGE	0
-#define IOAPIC_LEVEL	1
+#define IOAPIC_AUTO     -1
+#define IOAPIC_EDGE     0
+#define IOAPIC_LEVEL    1
 
 #ifdef CONFIG_X86_32
 static inline int IO_APIC_irq_trigger(int irq)
 {
-	int apic, idx, pin;
+        int apic, idx, pin;
 
-	for (apic = 0; apic < nr_ioapics; apic++) {
-		for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) {
-			idx = find_irq_entry(apic, pin, mp_INT);
-			if ((idx != -1) && (irq == pin_2_irq(idx, apic, pin)))
-				return irq_trigger(idx);
-		}
-	}
-	/*
-	 * nonexistent IRQs are edge default
-	 */
-	return 0;
+        for (apic = 0; apic < nr_ioapics; apic++) {
+                for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) {
+                        idx = find_irq_entry(apic, pin, mp_INT);
+                        if ((idx != -1) && (irq == pin_2_irq(idx, apic, pin)))
+                                return irq_trigger(idx);
+                }
+        }
+        /*
+         * nonexistent IRQs are edge default
+         */
+        return 0;
 }
 #else
 static inline int IO_APIC_irq_trigger(int irq)
 {
-        return 1;
+	return 1;
 }
 #endif
 
@@ -1318,13 +1397,27 @@ static void ioapic_register_intr(int irq, unsigned long trigger)
 	else
 		desc->status &= ~IRQ_LEVEL;
 
+#ifdef CONFIG_INTR_REMAP
+	if (irq_remapped(irq)) {
+		desc->status |= IRQ_MOVE_PCNTXT;
+		if (trigger)
+			set_irq_chip_and_handler_name(irq, &ir_ioapic_chip,
+						      handle_fasteoi_irq,
+						     "fasteoi");
+		else
+			set_irq_chip_and_handler_name(irq, &ir_ioapic_chip,
+						      handle_edge_irq, "edge");
+		return;
+	}
+#endif
 	if ((trigger == IOAPIC_AUTO && IO_APIC_irq_trigger(irq)) ||
 	    trigger == IOAPIC_LEVEL)
 		set_irq_chip_and_handler_name(irq, &ioapic_chip,
-					 handle_fasteoi_irq, "fasteoi");
+					      handle_fasteoi_irq,
+					      "fasteoi");
 	else
 		set_irq_chip_and_handler_name(irq, &ioapic_chip,
-					 handle_edge_irq, "edge");
+					      handle_edge_irq, "edge");
 }
 
 static int setup_ioapic_entry(int apic, int irq,
@@ -1337,11 +1430,45 @@ static int setup_ioapic_entry(int apic, int irq,
 	 */
 	memset(entry,0,sizeof(*entry));
 
-	entry->delivery_mode = INT_DELIVERY_MODE;
-	entry->dest_mode = INT_DEST_MODE;
-	entry->dest = destination;
+#ifdef CONFIG_INTR_REMAP
+	if (intr_remapping_enabled) {
+		struct intel_iommu *iommu = map_ioapic_to_ir(apic);
+		struct irte irte;
+		struct IR_IO_APIC_route_entry *ir_entry =
+			(struct IR_IO_APIC_route_entry *) entry;
+		int index;
+
+		if (!iommu)
+			panic("No mapping iommu for ioapic %d\n", apic);
+
+		index = alloc_irte(iommu, irq, 1);
+		if (index < 0)
+			panic("Failed to allocate IRTE for ioapic %d\n", apic);
+
+		memset(&irte, 0, sizeof(irte));
+
+		irte.present = 1;
+		irte.dst_mode = INT_DEST_MODE;
+		irte.trigger_mode = trigger;
+		irte.dlvry_mode = INT_DELIVERY_MODE;
+		irte.vector = vector;
+		irte.dest_id = IRTE_DEST(destination);
+
+		modify_irte(irq, &irte);
+
+		ir_entry->index2 = (index >> 15) & 0x1;
+		ir_entry->zero = 0;
+		ir_entry->format = 1;
+		ir_entry->index = (index & 0x7fff);
+	} else
+#endif
+	{
+		entry->delivery_mode = INT_DELIVERY_MODE;
+		entry->dest_mode = INT_DEST_MODE;
+		entry->dest = destination;
+	}
 
-	entry->mask = 0;                                /* enable IRQ */
+	entry->mask = 0;				/* enable IRQ */
 	entry->trigger = trigger;
 	entry->polarity = polarity;
 	entry->vector = vector;
@@ -1351,12 +1478,11 @@ static int setup_ioapic_entry(int apic, int irq,
 	 */
 	if (trigger)
 		entry->mask = 1;
-
 	return 0;
 }
 
 static void setup_IO_APIC_irq(int apic, int pin, unsigned int irq,
-                              int trigger, int polarity)
+			      int trigger, int polarity)
 {
 	struct irq_cfg *cfg;
 	struct IO_APIC_route_entry entry;
@@ -1420,10 +1546,10 @@ static void __init setup_IO_APIC_irqs(void)
 		}
 
 		irq = pin_2_irq(idx, apic, pin);
-
+#ifdef CONFIG_X86_32
                 if (multi_timer_check(apic, irq))
                         continue;
-
+#endif
 		add_pin_to_irq(irq, apic, pin);
 
 		setup_IO_APIC_irq(apic, pin, irq,
@@ -1443,6 +1569,11 @@ static void __init setup_timer_IRQ0_pin(unsigned int apic, unsigned int pin,
 {
 	struct IO_APIC_route_entry entry;
 
+#ifdef CONFIG_INTR_REMAP
+	if (intr_remapping_enabled)
+		return;
+#endif
+
 	memset(&entry, 0, sizeof(entry));
 
 	/*
@@ -1461,7 +1592,7 @@ static void __init setup_timer_IRQ0_pin(unsigned int apic, unsigned int pin,
 	 * The timer IRQ doesn't have to know that behind the
 	 * scene we may have a 8259A-master in AEOI mode ...
 	 */
-	ioapic_register_intr(0, IOAPIC_EDGE);
+	set_irq_chip_and_handler_name(0, &ioapic_chip, handle_edge_irq, "edge");
 
 	/*
 	 * Add it to the IO-APIC irq-routing table:
@@ -1501,17 +1632,18 @@ __apicdebuginit(void) print_IO_APIC(void)
 	reg_01.raw = io_apic_read(apic, 1);
 	if (reg_01.bits.version >= 0x10)
 		reg_02.raw = io_apic_read(apic, 2);
-	if (reg_01.bits.version >= 0x20)
-		reg_03.raw = io_apic_read(apic, 3);
+        if (reg_01.bits.version >= 0x20)
+                reg_03.raw = io_apic_read(apic, 3);
 	spin_unlock_irqrestore(&ioapic_lock, flags);
 
+	printk("\n");
 	printk(KERN_DEBUG "IO APIC #%d......\n", mp_ioapics[apic].mp_apicid);
 	printk(KERN_DEBUG ".... register #00: %08X\n", reg_00.raw);
 	printk(KERN_DEBUG ".......    : physical APIC id: %02X\n", reg_00.bits.ID);
 	printk(KERN_DEBUG ".......    : Delivery Type: %X\n", reg_00.bits.delivery_type);
 	printk(KERN_DEBUG ".......    : LTS          : %X\n", reg_00.bits.LTS);
 
-	printk(KERN_DEBUG ".... register #01: %08X\n", reg_01.raw);
+	printk(KERN_DEBUG ".... register #01: %08X\n", *(int *)&reg_01);
 	printk(KERN_DEBUG ".......     : max redirection entries: %04X\n", reg_01.bits.entries);
 
 	printk(KERN_DEBUG ".......     : PRQ implemented: %X\n", reg_01.bits.PRQ);
@@ -1548,7 +1680,10 @@ __apicdebuginit(void) print_IO_APIC(void)
 
 		entry = ioapic_read_entry(apic, i);
 
-		printk(KERN_DEBUG " %02x %02X  ", i, entry.dest);
+		printk(KERN_DEBUG " %02x %03X ",
+			i,
+			entry.dest
+		);
 
 		printk("%1d    %1d    %1d   %1d   %1d    %1d    %1d    %02X\n",
 			entry.mask,
@@ -1567,7 +1702,7 @@ __apicdebuginit(void) print_IO_APIC(void)
 		struct irq_pin_list *entry = cfg->irq_2_pin;
 		if (!entry)
 			continue;
-		printk(KERN_DEBUG "IRQ%d ", i);
+		printk(KERN_DEBUG "IRQ%d ", cfg->irq);
 		for (;;) {
 			printk("-> %d:%d", entry->apic, entry->pin);
 			if (!entry->next)
@@ -1614,8 +1749,7 @@ __apicdebuginit(void) print_local_APIC(void *dummy)
 	printk("\n" KERN_DEBUG "printing local APIC contents on CPU#%d/%d:\n",
 		smp_processor_id(), hard_smp_processor_id());
 	v = apic_read(APIC_ID);
-	printk(KERN_INFO "... APIC ID:      %08x (%01x)\n", v,
-			GET_APIC_ID(v));
+	printk(KERN_INFO "... APIC ID:      %08x (%01x)\n", v, read_apic_id());
 	v = apic_read(APIC_LVR);
 	printk(KERN_INFO "... APIC VERSION: %08x\n", v);
 	ver = GET_APIC_VERSION(v);
@@ -1624,7 +1758,7 @@ __apicdebuginit(void) print_local_APIC(void *dummy)
 	v = apic_read(APIC_TASKPRI);
 	printk(KERN_DEBUG "... APIC TASKPRI: %08x (%02x)\n", v, v & APIC_TPRI_MASK);
 
-	if (APIC_INTEGRATED(ver)) {			/* !82489DX */
+	if (APIC_INTEGRATED(ver)) {                     /* !82489DX */
 		v = apic_read(APIC_ARBPRI);
 		printk(KERN_DEBUG "... APIC ARBPRI: %08x (%02x)\n", v,
 			v & APIC_ARBPRI_MASK);
@@ -1650,9 +1784,10 @@ __apicdebuginit(void) print_local_APIC(void *dummy)
 	printk(KERN_DEBUG "... APIC IRR field:\n");
 	print_APIC_bitfield(APIC_IRR);
 
-	if (APIC_INTEGRATED(ver)) {		/* !82489DX */
-		if (maxlvt > 3)		/* Due to the Pentium erratum 3AP. */
+	if (APIC_INTEGRATED(ver)) {             /* !82489DX */
+		if (maxlvt > 3)         /* Due to the Pentium erratum 3AP. */
 			apic_write(APIC_ESR, 0);
+
 		v = apic_read(APIC_ESR);
 		printk(KERN_DEBUG "... APIC ESR: %08x\n", v);
 	}
@@ -1710,11 +1845,11 @@ __apicdebuginit(void) print_PIC(void)
 	v = inb(0xa0) << 8 | inb(0x20);
 	printk(KERN_DEBUG "... PIC  IRR: %04x\n", v);
 
-	outb(0x0b, 0xa0);
-	outb(0x0b, 0x20);
+	outb(0x0b,0xa0);
+	outb(0x0b,0x20);
 	v = inb(0xa0) << 8 | inb(0x20);
-	outb(0x0a, 0xa0);
-	outb(0x0a, 0x20);
+	outb(0x0a,0xa0);
+	outb(0x0a,0x20);
 
 	spin_unlock_irqrestore(&i8259A_lock, flags);
 
@@ -1739,16 +1874,19 @@ fs_initcall(print_all_ICs);
 /* Where if anywhere is the i8259 connect in external int mode */
 static struct { int pin, apic; } ioapic_i8259 = { -1, -1 };
 
-static void __init enable_IO_APIC(void)
+void __init enable_IO_APIC(void)
 {
 	union IO_APIC_reg_01 reg_01;
 	int i8259_apic, i8259_pin;
-	int i, apic;
+	int apic;
 	unsigned long flags;
 
+#ifdef CONFIG_X86_32
+	int i;
 	if (!pirqs_enabled)
 		for (i = 0; i < MAX_PIRQS; i++)
 			pirq_entries[i] = -1;
+#endif
 
 	/*
 	 * The number of IO-APIC IRQ registers (== #pins):
@@ -1759,7 +1897,7 @@ static void __init enable_IO_APIC(void)
 		spin_unlock_irqrestore(&ioapic_lock, flags);
 		nr_ioapic_registers[apic] = reg_01.bits.entries+1;
 	}
-	for (apic = 0; apic < nr_ioapics; apic++) {
+	for(apic = 0; apic < nr_ioapics; apic++) {
 		int pin;
 		/* See if any of the pins is in ExtINT mode */
 		for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) {
@@ -1830,16 +1968,18 @@ void disable_IO_APIC(void)
 		entry.dest_mode       = 0; /* Physical */
 		entry.delivery_mode   = dest_ExtINT; /* ExtInt */
 		entry.vector          = 0;
-		entry.dest	      = read_apic_id();
+		entry.dest            = read_apic_id();
 
 		/*
 		 * Add it to the IO-APIC irq-routing table:
 		 */
 		ioapic_write_entry(ioapic_i8259.apic, ioapic_i8259.pin, entry);
 	}
+
 	disconnect_bsp_APIC(ioapic_i8259.pin != -1);
 }
 
+#ifdef CONFIG_X86_32
 /*
  * function to set the IO-APIC physical IDs based on the
  * values stored in the MPC table.
@@ -1940,8 +2080,6 @@ static void __init setup_ioapic_ids_from_mpc(void)
 
 		reg_00.bits.ID = mp_ioapics[apic].mp_apicid;
 		spin_lock_irqsave(&ioapic_lock, flags);
-		io_apic_write(apic, 0, reg_00.raw);
-		spin_unlock_irqrestore(&ioapic_lock, flags);
 
 		/*
 		 * Sanity check
@@ -1955,6 +2093,7 @@ static void __init setup_ioapic_ids_from_mpc(void)
 			apic_printk(APIC_VERBOSE, " ok.\n");
 	}
 }
+#endif
 
 int no_timer_check __initdata;
 
@@ -1994,9 +2133,10 @@ static int __init timer_irq_works(void)
 	 * might have cached one ExtINT interrupt.  Finally, at
 	 * least one tick may be lost due to delays.
 	 */
+
+	/* jiffies wrap? */
 	if (time_after(jiffies, t1 + 4))
 		return 1;
-
 	return 0;
 }
 
@@ -2014,8 +2154,6 @@ static int __init timer_irq_works(void)
  */
 
 /*
- * Startup quirk:
- *
  * Starting up a edge-triggered IO-APIC interrupt is
  * nasty - we need to make sure that we get the edge.
  * If it is already asserted for some reason, we need
@@ -2023,9 +2161,8 @@ static int __init timer_irq_works(void)
  *
  * This is not complete - we should be able to fake
  * an edge even if it isn't on the 8259A...
- *
- * (We do this for level-triggered IRQs too - it cannot hurt.)
  */
+
 static unsigned int startup_ioapic_irq(unsigned int irq)
 {
 	int was_pending = 0;
@@ -2043,70 +2180,254 @@ static unsigned int startup_ioapic_irq(unsigned int irq)
 	return was_pending;
 }
 
+#ifdef CONFIG_X86_64
 static int ioapic_retrigger_irq(unsigned int irq)
 {
-	send_IPI_self(irq_cfg(irq)->vector);
+
+	struct irq_cfg *cfg = irq_cfg(irq);
+	unsigned long flags;
+
+	spin_lock_irqsave(&vector_lock, flags);
+	send_IPI_mask(cpumask_of_cpu(first_cpu(cfg->domain)), cfg->vector);
+	spin_unlock_irqrestore(&vector_lock, flags);
 
 	return 1;
 }
-
-#ifdef CONFIG_SMP
-asmlinkage void smp_irq_move_cleanup_interrupt(void)
+#else
+static int ioapic_retrigger_irq(unsigned int irq)
 {
-	unsigned vector, me;
-	ack_APIC_irq();
-	irq_enter();
-
-	me = smp_processor_id();
-	for (vector = FIRST_EXTERNAL_VECTOR; vector < NR_VECTORS; vector++) {
-		unsigned int irq;
-		struct irq_desc *desc;
-		struct irq_cfg *cfg;
-		irq = __get_cpu_var(vector_irq)[vector];
+        send_IPI_self(irq_cfg(irq)->vector);
 
-		desc = irq_to_desc(irq);
-		if (!desc)
-			continue;
+        return 1;
+}
+#endif
 
-		cfg = irq_cfg(irq);
-		spin_lock(&desc->lock);
-		if (!cfg->move_cleanup_count)
-			goto unlock;
+/*
+ * Level and edge triggered IO-APIC interrupts need different handling,
+ * so we use two separate IRQ descriptors. Edge triggered IRQs can be
+ * handled with the level-triggered descriptor, but that one has slightly
+ * more overhead. Level-triggered interrupts cannot be handled with the
+ * edge-triggered handler, without risking IRQ storms and other ugly
+ * races.
+ */
 
-		if ((vector == cfg->vector) && cpu_isset(me, cfg->domain))
-			goto unlock;
+#ifdef CONFIG_SMP
 
-		__get_cpu_var(vector_irq)[vector] = -1;
-		cfg->move_cleanup_count--;
-unlock:
-		spin_unlock(&desc->lock);
-	}
+#ifdef CONFIG_INTR_REMAP
+static void ir_irq_migration(struct work_struct *work);
 
-	irq_exit();
-}
+static DECLARE_DELAYED_WORK(ir_migration_work, ir_irq_migration);
 
-static void irq_complete_move(unsigned int irq)
+/*
+ * Migrate the IO-APIC irq in the presence of intr-remapping.
+ *
+ * For edge triggered, irq migration is a simple atomic update(of vector
+ * and cpu destination) of IRTE and flush the hardware cache.
+ *
+ * For level triggered, we need to modify the io-apic RTE aswell with the update
+ * vector information, along with modifying IRTE with vector and destination.
+ * So irq migration for level triggered is little  bit more complex compared to
+ * edge triggered migration. But the good news is, we use the same algorithm
+ * for level triggered migration as we have today, only difference being,
+ * we now initiate the irq migration from process context instead of the
+ * interrupt context.
+ *
+ * In future, when we do a directed EOI (combined with cpu EOI broadcast
+ * suppression) to the IO-APIC, level triggered irq migration will also be
+ * as simple as edge triggered migration and we can do the irq migration
+ * with a simple atomic update to IO-APIC RTE.
+ */
+static void migrate_ioapic_irq(int irq, cpumask_t mask)
 {
-	struct irq_cfg *cfg = irq_cfg(irq);
-	unsigned vector, me;
+	struct irq_cfg *cfg;
+	struct irq_desc *desc;
+	cpumask_t tmp, cleanup_mask;
+	struct irte irte;
+	int modify_ioapic_rte;
+	unsigned int dest;
+	unsigned long flags;
 
-	if (likely(!cfg->move_in_progress))
+	cpus_and(tmp, mask, cpu_online_map);
+	if (cpus_empty(tmp))
 		return;
 
-	vector = ~get_irq_regs()->orig_ax;
-	me = smp_processor_id();
-	if ((vector == cfg->vector) && cpu_isset(me, cfg->domain)) {
-		cpumask_t cleanup_mask;
+	if (get_irte(irq, &irte))
+		return;
 
-		cpus_and(cleanup_mask, cfg->old_domain, cpu_online_map);
-		cfg->move_cleanup_count = cpus_weight(cleanup_mask);
-		send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR);
+	if (assign_irq_vector(irq, mask))
+		return;
+
+	cfg = irq_cfg(irq);
+	cpus_and(tmp, cfg->domain, mask);
+	dest = cpu_mask_to_apicid(tmp);
+
+	desc = irq_to_desc(irq);
+	modify_ioapic_rte = desc->status & IRQ_LEVEL;
+	if (modify_ioapic_rte) {
+		spin_lock_irqsave(&ioapic_lock, flags);
+		__target_IO_APIC_irq(irq, dest, cfg->vector);
+		spin_unlock_irqrestore(&ioapic_lock, flags);
+	}
+
+	irte.vector = cfg->vector;
+	irte.dest_id = IRTE_DEST(dest);
+
+	/*
+	 * Modified the IRTE and flushes the Interrupt entry cache.
+	 */
+	modify_irte(irq, &irte);
+
+	if (cfg->move_in_progress) {
+		cpus_and(cleanup_mask, cfg->old_domain, cpu_online_map);
+		cfg->move_cleanup_count = cpus_weight(cleanup_mask);
+		send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR);
+		cfg->move_in_progress = 0;
+	}
+
+	desc->affinity = mask;
+}
+
+static int migrate_irq_remapped_level(int irq)
+{
+	int ret = -1;
+	struct irq_desc *desc = irq_to_desc(irq);
+
+	mask_IO_APIC_irq(irq);
+
+	if (io_apic_level_ack_pending(irq)) {
+		/*
+	 	 * Interrupt in progress. Migrating irq now will change the
+		 * vector information in the IO-APIC RTE and that will confuse
+		 * the EOI broadcast performed by cpu.
+		 * So, delay the irq migration to the next instance.
+		 */
+		schedule_delayed_work(&ir_migration_work, 1);
+		goto unmask;
+	}
+
+	/* everthing is clear. we have right of way */
+	migrate_ioapic_irq(irq, desc->pending_mask);
+
+	ret = 0;
+	desc->status &= ~IRQ_MOVE_PENDING;
+	cpus_clear(desc->pending_mask);
+
+unmask:
+	unmask_IO_APIC_irq(irq);
+	return ret;
+}
+
+static void ir_irq_migration(struct work_struct *work)
+{
+	unsigned int irq;
+	struct irq_desc *desc;
+
+	for_each_irq_desc(irq, desc) {
+		if (desc->status & IRQ_MOVE_PENDING) {
+			unsigned long flags;
+
+			spin_lock_irqsave(&desc->lock, flags);
+			if (!desc->chip->set_affinity ||
+			    !(desc->status & IRQ_MOVE_PENDING)) {
+				desc->status &= ~IRQ_MOVE_PENDING;
+				spin_unlock_irqrestore(&desc->lock, flags);
+				continue;
+			}
+
+			desc->chip->set_affinity(irq, desc->pending_mask);
+			spin_unlock_irqrestore(&desc->lock, flags);
+		}
+	}
+}
+
+/*
+ * Migrates the IRQ destination in the process context.
+ */
+static void set_ir_ioapic_affinity_irq(unsigned int irq, cpumask_t mask)
+{
+	struct irq_desc *desc = irq_to_desc(irq);
+
+	if (desc->status & IRQ_LEVEL) {
+		desc->status |= IRQ_MOVE_PENDING;
+		desc->pending_mask = mask;
+		migrate_irq_remapped_level(irq);
+		return;
+	}
+
+	migrate_ioapic_irq(irq, mask);
+}
+#endif
+
+asmlinkage void smp_irq_move_cleanup_interrupt(void)
+{
+	unsigned vector, me;
+	ack_APIC_irq();
+#ifdef CONFIG_X86_64
+	exit_idle();
+#endif
+	irq_enter();
+
+	me = smp_processor_id();
+	for (vector = FIRST_EXTERNAL_VECTOR; vector < NR_VECTORS; vector++) {
+		unsigned int irq;
+		struct irq_desc *desc;
+		struct irq_cfg *cfg;
+		irq = __get_cpu_var(vector_irq)[vector];
+
+		desc = irq_to_desc(irq);
+		if (!desc)
+			continue;
+
+		cfg = irq_cfg(irq);
+		spin_lock(&desc->lock);
+		if (!cfg->move_cleanup_count)
+			goto unlock;
+
+		if ((vector == cfg->vector) && cpu_isset(me, cfg->domain))
+			goto unlock;
+
+		__get_cpu_var(vector_irq)[vector] = -1;
+		cfg->move_cleanup_count--;
+unlock:
+		spin_unlock(&desc->lock);
+	}
+
+	irq_exit();
+}
+
+static void irq_complete_move(unsigned int irq)
+{
+	struct irq_cfg *cfg = irq_cfg(irq);
+	unsigned vector, me;
+
+	if (likely(!cfg->move_in_progress))
+		return;
+
+	vector = ~get_irq_regs()->orig_ax;
+	me = smp_processor_id();
+	if ((vector == cfg->vector) && cpu_isset(me, cfg->domain)) {
+		cpumask_t cleanup_mask;
+
+		cpus_and(cleanup_mask, cfg->old_domain, cpu_online_map);
+		cfg->move_cleanup_count = cpus_weight(cleanup_mask);
+		send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR);
 		cfg->move_in_progress = 0;
 	}
 }
 #else
 static inline void irq_complete_move(unsigned int irq) {}
 #endif
+#ifdef CONFIG_INTR_REMAP
+static void ack_x2apic_level(unsigned int irq)
+{
+	ack_x2APIC_irq();
+}
+
+static void ack_x2apic_edge(unsigned int irq)
+{
+	ack_x2APIC_irq();
+}
+#endif
 
 static void ack_apic_edge(unsigned int irq)
 {
@@ -2118,55 +2439,55 @@ static void ack_apic_edge(unsigned int irq)
 #ifdef CONFIG_X86_64
 static void ack_apic_level(unsigned int irq)
 {
-        int do_unmask_irq = 0;
+	int do_unmask_irq = 0;
 
-        irq_complete_move(irq);
+	irq_complete_move(irq);
 #ifdef CONFIG_GENERIC_PENDING_IRQ
-        /* If we are moving the irq we need to mask it */
-        if (unlikely(desc->status & IRQ_MOVE_PENDING)) {
-                do_unmask_irq = 1;
-                mask_IO_APIC_irq(irq);
-        }
+	/* If we are moving the irq we need to mask it */
+	if (unlikely(irq_to_desc(irq)->status & IRQ_MOVE_PENDING)) {
+		do_unmask_irq = 1;
+		mask_IO_APIC_irq(irq);
+	}
 #endif
 
-        /*
-         * We must acknowledge the irq before we move it or the acknowledge will
-         * not propagate properly.
-         */
-        ack_APIC_irq();
-
-        /* Now we can move and renable the irq */
-        if (unlikely(do_unmask_irq)) {
-                /* Only migrate the irq if the ack has been received.
-                 *
-                 * On rare occasions the broadcast level triggered ack gets
-                 * delayed going to ioapics, and if we reprogram the
-                 * vector while Remote IRR is still set the irq will never
-                 * fire again.
-                 *
-                 * To prevent this scenario we read the Remote IRR bit
-                 * of the ioapic.  This has two effects.
-                 * - On any sane system the read of the ioapic will
-                 *   flush writes (and acks) going to the ioapic from
-                 *   this cpu.
-                 * - We get to see if the ACK has actually been delivered.
-                 *
-                 * Based on failed experiments of reprogramming the
-                 * ioapic entry from outside of irq context starting
-                 * with masking the ioapic entry and then polling until
-                 * Remote IRR was clear before reprogramming the
-                 * ioapic I don't trust the Remote IRR bit to be
-                 * completey accurate.
-                 *
-                 * However there appears to be no other way to plug
-                 * this race, so if the Remote IRR bit is not
-                 * accurate and is causing problems then it is a hardware bug
-                 * and you can go talk to the chipset vendor about it.
-                 */
-                if (!io_apic_level_ack_pending(irq))
-                        move_masked_irq(irq, desc);
-                unmask_IO_APIC_irq(irq);
-        }
+	/*
+	 * We must acknowledge the irq before we move it or the acknowledge will
+	 * not propagate properly.
+	 */
+	ack_APIC_irq();
+
+	/* Now we can move and renable the irq */
+	if (unlikely(do_unmask_irq)) {
+		/* Only migrate the irq if the ack has been received.
+		 *
+		 * On rare occasions the broadcast level triggered ack gets
+		 * delayed going to ioapics, and if we reprogram the
+		 * vector while Remote IRR is still set the irq will never
+		 * fire again.
+		 *
+		 * To prevent this scenario we read the Remote IRR bit
+		 * of the ioapic.  This has two effects.
+		 * - On any sane system the read of the ioapic will
+		 *   flush writes (and acks) going to the ioapic from
+		 *   this cpu.
+		 * - We get to see if the ACK has actually been delivered.
+		 *
+		 * Based on failed experiments of reprogramming the
+		 * ioapic entry from outside of irq context starting
+		 * with masking the ioapic entry and then polling until
+		 * Remote IRR was clear before reprogramming the
+		 * ioapic I don't trust the Remote IRR bit to be
+		 * completey accurate.
+		 *
+		 * However there appears to be no other way to plug
+		 * this race, so if the Remote IRR bit is not
+		 * accurate and is causing problems then it is a hardware bug
+		 * and you can go talk to the chipset vendor about it.
+		 */
+		if (!io_apic_level_ack_pending(irq))
+			move_masked_irq(irq);
+		unmask_IO_APIC_irq(irq);
+	}
 }
 #else
 atomic_t irq_mis_count;
@@ -2177,25 +2498,25 @@ static void ack_apic_level(unsigned int irq)
 
 	irq_complete_move(irq);
 	move_native_irq(irq);
-/*
- * It appears there is an erratum which affects at least version 0x11
- * of I/O APIC (that's the 82093AA and cores integrated into various
- * chipsets).  Under certain conditions a level-triggered interrupt is
- * erroneously delivered as edge-triggered one but the respective IRR
- * bit gets set nevertheless.  As a result the I/O unit expects an EOI
- * message but it will never arrive and further interrupts are blocked
- * from the source.  The exact reason is so far unknown, but the
- * phenomenon was observed when two consecutive interrupt requests
- * from a given source get delivered to the same CPU and the source is
- * temporarily disabled in between.
- *
- * A workaround is to simulate an EOI message manually.  We achieve it
- * by setting the trigger mode to edge and then to level when the edge
- * trigger mode gets detected in the TMR of a local APIC for a
- * level-triggered interrupt.  We mask the source for the time of the
- * operation to prevent an edge-triggered interrupt escaping meanwhile.
- * The idea is from Manfred Spraul.  --macro
- */
+	/*
+	* It appears there is an erratum which affects at least version 0x11
+	* of I/O APIC (that's the 82093AA and cores integrated into various
+	* chipsets).  Under certain conditions a level-triggered interrupt is
+	* erroneously delivered as edge-triggered one but the respective IRR
+	* bit gets set nevertheless.  As a result the I/O unit expects an EOI
+	* message but it will never arrive and further interrupts are blocked
+	* from the source.  The exact reason is so far unknown, but the
+	* phenomenon was observed when two consecutive interrupt requests
+	* from a given source get delivered to the same CPU and the source is
+	* temporarily disabled in between.
+	*
+	* A workaround is to simulate an EOI message manually.  We achieve it
+	* by setting the trigger mode to edge and then to level when the edge
+	* trigger mode gets detected in the TMR of a local APIC for a
+	* level-triggered interrupt.  We mask the source for the time of the
+	* operation to prevent an edge-triggered interrupt escaping meanwhile.
+	* The idea is from Manfred Spraul.  --macro
+	*/
 	i = irq_cfg(irq)->vector;
 
 	v = apic_read(APIC_TMR + ((i & ~0x1f) >> 1));
@@ -2225,6 +2546,20 @@ static struct irq_chip ioapic_chip __read_mostly = {
 	.retrigger	= ioapic_retrigger_irq,
 };
 
+#ifdef CONFIG_INTR_REMAP
+static struct irq_chip ir_ioapic_chip __read_mostly = {
+	.name 		= "IR-IO-APIC",
+	.startup 	= startup_ioapic_irq,
+	.mask	 	= mask_IO_APIC_irq,
+	.unmask	 	= unmask_IO_APIC_irq,
+	.ack 		= ack_x2apic_edge,
+	.eoi 		= ack_x2apic_level,
+#ifdef CONFIG_SMP
+	.set_affinity 	= set_ir_ioapic_affinity_irq,
+#endif
+	.retrigger	= ioapic_retrigger_irq,
+};
+#endif
 
 static inline void init_IO_APIC_traps(void)
 {
@@ -2282,7 +2617,7 @@ static void unmask_lapic_irq(unsigned int irq)
 	apic_write(APIC_LVT0, v & ~APIC_LVT_MASKED);
 }
 
-static void ack_lapic_irq(unsigned int irq)
+static void ack_lapic_irq (unsigned int irq)
 {
 	ack_APIC_irq();
 }
@@ -2383,12 +2718,12 @@ static inline void __init unlock_ExtINT_logic(void)
 
 static int disable_timer_pin_1 __initdata;
 /* Actually the next is obsolete, but keep it for paranoid reasons -AK */
-static int __init parse_disable_timer_pin_1(char *arg)
+static int __init disable_timer_pin_setup(char *arg)
 {
 	disable_timer_pin_1 = 1;
 	return 0;
 }
-early_param("disable_timer_pin_1", parse_disable_timer_pin_1);
+early_param("disable_timer_pin_1", disable_timer_pin_setup);
 
 int timer_through_8259 __initdata;
 
@@ -2397,6 +2732,8 @@ int timer_through_8259 __initdata;
  * a wide range of boards and BIOS bugs.  Fortunately only the timer IRQ
  * is so screwy.  Thanks to Brian Perkins for testing/hacking this beast
  * fanatically on his truly buggy board.
+ *
+ * FIXME: really need to revamp this for all platforms.
  */
 static inline void __init check_timer(void)
 {
@@ -2408,8 +2745,8 @@ static inline void __init check_timer(void)
 
 	local_irq_save(flags);
 
-	ver = apic_read(APIC_LVR);
-	ver = GET_APIC_VERSION(ver);
+        ver = apic_read(APIC_LVR);
+        ver = GET_APIC_VERSION(ver);
 
 	/*
 	 * get/set the timer IRQ vector:
@@ -2428,7 +2765,9 @@ static inline void __init check_timer(void)
 	 */
 	apic_write(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_EXTINT);
 	init_8259A(1);
+#ifdef CONFIG_X86_32
 	timer_ack = (nmi_watchdog == NMI_IO_APIC && !APIC_INTEGRATED(ver));
+#endif
 
 	pin1  = find_isa_irq_pin(0, mp_INT);
 	apic1 = find_isa_irq_apic(0, mp_INT);
@@ -2447,6 +2786,10 @@ static inline void __init check_timer(void)
 	 * 8259A.
 	 */
 	if (pin1 == -1) {
+#ifdef CONFIG_INTR_REMAP
+		if (intr_remapping_enabled)
+			panic("BIOS bug: timer not connected to IO-APIC");
+#endif
 		pin1 = pin2;
 		apic1 = apic2;
 		no_pin1 = 1;
@@ -2473,6 +2816,10 @@ static inline void __init check_timer(void)
 				clear_IO_APIC_pin(0, pin1);
 			goto out;
 		}
+#ifdef CONFIG_INTR_REMAP
+		if (intr_remapping_enabled)
+			panic("timer doesn't work through Interrupt-remapped IO-APIC");
+#endif
 		clear_IO_APIC_pin(apic1, pin1);
 		if (!no_pin1)
 			apic_printk(APIC_QUIET, KERN_ERR "..MP-BIOS bug: "
@@ -2512,7 +2859,9 @@ static inline void __init check_timer(void)
 			    "through the IO-APIC - disabling NMI Watchdog!\n");
 		nmi_watchdog = NMI_NONE;
 	}
+#ifdef CONFIG_X86_32
 	timer_ack = 0;
+#endif
 
 	apic_printk(APIC_QUIET, KERN_INFO
 		    "...trying to set up timer as Virtual Wire IRQ...\n");
@@ -2570,17 +2919,25 @@ out:
 
 void __init setup_IO_APIC(void)
 {
+
+#ifdef CONFIG_X86_32
 	enable_IO_APIC();
+#else
+	/*
+	 * calling enable_IO_APIC() is moved to setup_local_APIC for BP
+	 */
+#endif
 
 	io_apic_irqs = ~PIC_IRQS;
 
-	printk("ENABLING IO-APIC IRQs\n");
-
-	/*
-	 * Set up IO-APIC IRQ routing.
-	 */
-	if (!acpi_ioapic)
-		setup_ioapic_ids_from_mpc();
+	apic_printk(APIC_VERBOSE, "ENABLING IO-APIC IRQs\n");
+        /*
+         * Set up IO-APIC IRQ routing.
+         */
+#ifdef CONFIG_X86_32
+        if (!acpi_ioapic)
+                setup_ioapic_ids_from_mpc();
+#endif
 	sync_Arb_IDs();
 	setup_IO_APIC_irqs();
 	init_IO_APIC_traps();
@@ -2588,15 +2945,15 @@ void __init setup_IO_APIC(void)
 }
 
 /*
- *	Called after all the initialization is done. If we didnt find any
- *	APIC bugs then we can allow the modify fast path
+ *      Called after all the initialization is done. If we didnt find any
+ *      APIC bugs then we can allow the modify fast path
  */
 
 static int __init io_apic_bug_finalize(void)
 {
-	if (sis_apic_bug == -1)
-		sis_apic_bug = 0;
-	return 0;
+        if (sis_apic_bug == -1)
+                sis_apic_bug = 0;
+        return 0;
 }
 
 late_initcall(io_apic_bug_finalize);
@@ -2605,7 +2962,7 @@ struct sysfs_ioapic_data {
 	struct sys_device dev;
 	struct IO_APIC_route_entry entry[0];
 };
-static struct sysfs_ioapic_data *mp_ioapic_data[MAX_IO_APICS];
+static struct sysfs_ioapic_data * mp_ioapic_data[MAX_IO_APICS];
 
 static int ioapic_suspend(struct sys_device *dev, pm_message_t state)
 {
@@ -2615,8 +2972,8 @@ static int ioapic_suspend(struct sys_device *dev, pm_message_t state)
 
 	data = container_of(dev, struct sysfs_ioapic_data, dev);
 	entry = data->entry;
-	for (i = 0; i < nr_ioapic_registers[dev->id]; i++)
-		entry[i] = ioapic_read_entry(dev->id, i);
+	for (i = 0; i < nr_ioapic_registers[dev->id]; i ++, entry ++ )
+		*entry = ioapic_read_entry(dev->id, i);
 
 	return 0;
 }
@@ -2653,14 +3010,14 @@ static struct sysdev_class ioapic_sysdev_class = {
 
 static int __init ioapic_init_sysfs(void)
 {
-	struct sys_device *dev;
-	int i, size, error = 0;
+	struct sys_device * dev;
+	int i, size, error;
 
 	error = sysdev_class_register(&ioapic_sysdev_class);
 	if (error)
 		return error;
 
-	for (i = 0; i < nr_ioapics; i++) {
+	for (i = 0; i < nr_ioapics; i++ ) {
 		size = sizeof(struct sys_device) + nr_ioapic_registers[i]
 			* sizeof(struct IO_APIC_route_entry);
 		mp_ioapic_data[i] = kzalloc(size, GFP_KERNEL);
@@ -2691,18 +3048,18 @@ device_initcall(ioapic_init_sysfs);
 unsigned int create_irq_nr(unsigned int irq_want)
 {
 	/* Allocate an unused irq */
-	unsigned int irq, new;
+	unsigned int irq;
+	unsigned int new;
 	unsigned long flags;
 	struct irq_cfg *cfg_new;
 
 #ifndef CONFIG_HAVE_SPARSE_IRQ
-	/* only can use bus/dev/fn.. when per_cpu vector is used */
 	irq_want = nr_irqs - 1;
 #endif
 
 	irq = 0;
 	spin_lock_irqsave(&vector_lock, flags);
-	for (new = (nr_irqs - 1); new > 0; new--) {
+	for (new = irq_want; new > 0; new--) {
 		if (platform_legacy_irq(new))
 			continue;
 		cfg_new = irq_cfg(new);
@@ -2725,7 +3082,14 @@ unsigned int create_irq_nr(unsigned int irq_want)
 
 int create_irq(void)
 {
-	return create_irq_nr(nr_irqs - 1);
+	int irq;
+
+	irq = create_irq_nr(nr_irqs - 1);
+
+	if (irq == 0)
+		irq = -1;
+
+	return irq;
 }
 
 void destroy_irq(unsigned int irq)
@@ -2734,6 +3098,9 @@ void destroy_irq(unsigned int irq)
 
 	dynamic_irq_cleanup(irq);
 
+#ifdef CONFIG_INTR_REMAP
+	free_irte(irq);
+#endif
 	spin_lock_irqsave(&vector_lock, flags);
 	__clear_irq_vector(irq);
 	spin_unlock_irqrestore(&vector_lock, flags);
@@ -2759,25 +3126,54 @@ static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_ms
 	cpus_and(tmp, cfg->domain, tmp);
 	dest = cpu_mask_to_apicid(tmp);
 
-	msg->address_hi = MSI_ADDR_BASE_HI;
-	msg->address_lo =
-		MSI_ADDR_BASE_LO |
-		((INT_DEST_MODE == 0) ?
-			MSI_ADDR_DEST_MODE_PHYSICAL:
-			MSI_ADDR_DEST_MODE_LOGICAL) |
-		((INT_DELIVERY_MODE != dest_LowestPrio) ?
-			MSI_ADDR_REDIRECTION_CPU:
-			MSI_ADDR_REDIRECTION_LOWPRI) |
-		MSI_ADDR_DEST_ID(dest);
-
-	msg->data =
-		MSI_DATA_TRIGGER_EDGE |
-		MSI_DATA_LEVEL_ASSERT |
-		((INT_DELIVERY_MODE != dest_LowestPrio) ?
-			MSI_DATA_DELIVERY_FIXED:
-			MSI_DATA_DELIVERY_LOWPRI) |
-		MSI_DATA_VECTOR(cfg->vector);
+#ifdef CONFIG_INTR_REMAP
+	if (irq_remapped(irq)) {
+		struct irte irte;
+		int ir_index;
+		u16 sub_handle;
+
+		ir_index = map_irq_to_irte_handle(irq, &sub_handle);
+		BUG_ON(ir_index == -1);
+
+		memset (&irte, 0, sizeof(irte));
+
+		irte.present = 1;
+		irte.dst_mode = INT_DEST_MODE;
+		irte.trigger_mode = 0; /* edge */
+		irte.dlvry_mode = INT_DELIVERY_MODE;
+		irte.vector = cfg->vector;
+		irte.dest_id = IRTE_DEST(dest);
+
+		modify_irte(irq, &irte);
+
+		msg->address_hi = MSI_ADDR_BASE_HI;
+		msg->data = sub_handle;
+		msg->address_lo = MSI_ADDR_BASE_LO | MSI_ADDR_IR_EXT_INT |
+				  MSI_ADDR_IR_SHV |
+				  MSI_ADDR_IR_INDEX1(ir_index) |
+				  MSI_ADDR_IR_INDEX2(ir_index);
+	} else
+#endif
+	{
+		msg->address_hi = MSI_ADDR_BASE_HI;
+		msg->address_lo =
+			MSI_ADDR_BASE_LO |
+			((INT_DEST_MODE == 0) ?
+				MSI_ADDR_DEST_MODE_PHYSICAL:
+				MSI_ADDR_DEST_MODE_LOGICAL) |
+			((INT_DELIVERY_MODE != dest_LowestPrio) ?
+				MSI_ADDR_REDIRECTION_CPU:
+				MSI_ADDR_REDIRECTION_LOWPRI) |
+			MSI_ADDR_DEST_ID(dest);
 
+		msg->data =
+			MSI_DATA_TRIGGER_EDGE |
+			MSI_DATA_LEVEL_ASSERT |
+			((INT_DELIVERY_MODE != dest_LowestPrio) ?
+				MSI_DATA_DELIVERY_FIXED:
+				MSI_DATA_DELIVERY_LOWPRI) |
+			MSI_DATA_VECTOR(cfg->vector);
+	}
 	return err;
 }
 
@@ -2788,6 +3184,7 @@ static void set_msi_irq_affinity(unsigned int irq, cpumask_t mask)
 	struct msi_msg msg;
 	unsigned int dest;
 	cpumask_t tmp;
+	struct irq_desc *desc;
 
 	cpus_and(tmp, mask, cpu_online_map);
 	if (cpus_empty(tmp))
@@ -2808,8 +3205,61 @@ static void set_msi_irq_affinity(unsigned int irq, cpumask_t mask)
 	msg.address_lo |= MSI_ADDR_DEST_ID(dest);
 
 	write_msi_msg(irq, &msg);
-	irq_to_desc(irq)->affinity = mask;
+	desc = irq_to_desc(irq);
+	desc->affinity = mask;
 }
+
+#ifdef CONFIG_INTR_REMAP
+/*
+ * Migrate the MSI irq to another cpumask. This migration is
+ * done in the process context using interrupt-remapping hardware.
+ */
+static void ir_set_msi_irq_affinity(unsigned int irq, cpumask_t mask)
+{
+	struct irq_cfg *cfg;
+	unsigned int dest;
+	cpumask_t tmp, cleanup_mask;
+	struct irte irte;
+	struct irq_desc *desc;
+
+	cpus_and(tmp, mask, cpu_online_map);
+	if (cpus_empty(tmp))
+		return;
+
+	if (get_irte(irq, &irte))
+		return;
+
+	if (assign_irq_vector(irq, mask))
+		return;
+
+	cfg = irq_cfg(irq);
+	cpus_and(tmp, cfg->domain, mask);
+	dest = cpu_mask_to_apicid(tmp);
+
+	irte.vector = cfg->vector;
+	irte.dest_id = IRTE_DEST(dest);
+
+	/*
+	 * atomically update the IRTE with the new destination and vector.
+	 */
+	modify_irte(irq, &irte);
+
+	/*
+	 * After this point, all the interrupts will start arriving
+	 * at the new destination. So, time to cleanup the previous
+	 * vector allocation.
+	 */
+	if (cfg->move_in_progress) {
+		cpus_and(cleanup_mask, cfg->old_domain, cpu_online_map);
+		cfg->move_cleanup_count = cpus_weight(cleanup_mask);
+		send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR);
+		cfg->move_in_progress = 0;
+	}
+
+	desc = irq_to_desc(irq);
+	desc->affinity = mask;
+}
+#endif
 #endif /* CONFIG_SMP */
 
 /*
@@ -2827,6 +3277,45 @@ static struct irq_chip msi_chip = {
 	.retrigger	= ioapic_retrigger_irq,
 };
 
+#ifdef CONFIG_INTR_REMAP
+static struct irq_chip msi_ir_chip = {
+	.name		= "IR-PCI-MSI",
+	.unmask		= unmask_msi_irq,
+	.mask		= mask_msi_irq,
+	.ack		= ack_x2apic_edge,
+#ifdef CONFIG_SMP
+	.set_affinity	= ir_set_msi_irq_affinity,
+#endif
+	.retrigger	= ioapic_retrigger_irq,
+};
+
+/*
+ * Map the PCI dev to the corresponding remapping hardware unit
+ * and allocate 'nvec' consecutive interrupt-remapping table entries
+ * in it.
+ */
+static int msi_alloc_irte(struct pci_dev *dev, int irq, int nvec)
+{
+	struct intel_iommu *iommu;
+	int index;
+
+	iommu = map_dev_to_ir(dev);
+	if (!iommu) {
+		printk(KERN_ERR
+		       "Unable to map PCI %s to iommu\n", pci_name(dev));
+		return -ENOENT;
+	}
+
+	index = alloc_irte(iommu, irq, nvec);
+	if (index < 0) {
+		printk(KERN_ERR
+		       "Unable to allocate %d IRTE for PCI %s\n", nvec,
+		        pci_name(dev));
+		return -ENOSPC;
+	}
+	return index;
+}
+#endif
 
 static int setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc, int irq)
 {
@@ -2840,7 +3329,17 @@ static int setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc, int irq)
 	set_irq_msi(irq, desc);
 	write_msi_msg(irq, &msg);
 
-	set_irq_chip_and_handler_name(irq, &msi_chip, handle_edge_irq, "edge");
+#ifdef CONFIG_INTR_REMAP
+	if (irq_remapped(irq)) {
+		struct irq_desc *desc = irq_to_desc(irq);
+		/*
+		 * irq migration in process context
+		 */
+		desc->status |= IRQ_MOVE_PCNTXT;
+		set_irq_chip_and_handler_name(irq, &msi_ir_chip, handle_edge_irq, "edge");
+	} else
+#endif
+		set_irq_chip_and_handler_name(irq, &msi_chip, handle_edge_irq, "edge");
 
 	return 0;
 }
@@ -2859,59 +3358,164 @@ static unsigned int build_irq_for_pci_dev(struct pci_dev *dev)
 
 int arch_setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc)
 {
-	int irq, ret;
-
+	unsigned int irq;
+	int ret;
 	unsigned int irq_want;
 
 	irq_want = build_irq_for_pci_dev(dev) + 0x100;
 
 	irq = create_irq_nr(irq_want);
-
 	if (irq == 0)
 		return -1;
 
+#ifdef CONFIG_INTR_REMAP
+	if (!intr_remapping_enabled)
+		goto no_ir;
+
+	ret = msi_alloc_irte(dev, irq, 1);
+	if (ret < 0)
+		goto error;
+no_ir:
+#endif
 	ret = setup_msi_irq(dev, desc, irq);
 	if (ret < 0) {
 		destroy_irq(irq);
 		return ret;
-        }
-
+	}
 	return 0;
+
+#ifdef CONFIG_INTR_REMAP
+error:
+	destroy_irq(irq);
+	return ret;
+#endif
 }
 
 int arch_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
 {
-        unsigned int irq;
-        int ret, sub_handle;
-        struct msi_desc *desc;
-        unsigned int irq_want;
-
-        irq_want = build_irq_for_pci_dev(dev) + 0x100;
-        sub_handle = 0;
-        list_for_each_entry(desc, &dev->msi_list, list) {
-                irq = create_irq_nr(irq_want--);
-                if (irq == 0)
-                        return -1;
-                ret = setup_msi_irq(dev, desc, irq);
-                if (ret < 0)
-                        goto error;
-                sub_handle++;
-        }
-        return 0;
+	unsigned int irq;
+	int ret, sub_handle;
+	struct msi_desc *desc;
+	unsigned int irq_want;
+
+#ifdef CONFIG_INTR_REMAP
+	struct intel_iommu *iommu = 0;
+	int index = 0;
+#endif
+
+	irq_want = build_irq_for_pci_dev(dev) + 0x100;
+	sub_handle = 0;
+	list_for_each_entry(desc, &dev->msi_list, list) {
+		irq = create_irq_nr(irq_want--);
+		if (irq == 0)
+			return -1;
+#ifdef CONFIG_INTR_REMAP
+		if (!intr_remapping_enabled)
+			goto no_ir;
+
+		if (!sub_handle) {
+			/*
+			 * allocate the consecutive block of IRTE's
+			 * for 'nvec'
+			 */
+			index = msi_alloc_irte(dev, irq, nvec);
+			if (index < 0) {
+				ret = index;
+				goto error;
+			}
+		} else {
+			iommu = map_dev_to_ir(dev);
+			if (!iommu) {
+				ret = -ENOENT;
+				goto error;
+			}
+			/*
+			 * setup the mapping between the irq and the IRTE
+			 * base index, the sub_handle pointing to the
+			 * appropriate interrupt remap table entry.
+			 */
+			set_irte_irq(irq, iommu, index, sub_handle);
+		}
+no_ir:
+#endif
+		ret = setup_msi_irq(dev, desc, irq);
+		if (ret < 0)
+			goto error;
+		sub_handle++;
+	}
+	return 0;
 
 error:
-        destroy_irq(irq);
-        return ret;
+	destroy_irq(irq);
+	return ret;
 }
 
-
 void arch_teardown_msi_irq(unsigned int irq)
 {
 	destroy_irq(irq);
 }
 
-#endif /* CONFIG_PCI_MSI */
+#ifdef CONFIG_DMAR
+#ifdef CONFIG_SMP
+static void dmar_msi_set_affinity(unsigned int irq, cpumask_t mask)
+{
+	struct irq_cfg *cfg;
+	struct msi_msg msg;
+	unsigned int dest;
+	cpumask_t tmp;
+	struct irq_desc *desc;
+
+	cpus_and(tmp, mask, cpu_online_map);
+	if (cpus_empty(tmp))
+		return;
+
+	if (assign_irq_vector(irq, mask))
+		return;
+
+	cfg = irq_cfg(irq);
+	cpus_and(tmp, cfg->domain, mask);
+	dest = cpu_mask_to_apicid(tmp);
+
+	dmar_msi_read(irq, &msg);
+
+	msg.data &= ~MSI_DATA_VECTOR_MASK;
+	msg.data |= MSI_DATA_VECTOR(cfg->vector);
+	msg.address_lo &= ~MSI_ADDR_DEST_ID_MASK;
+	msg.address_lo |= MSI_ADDR_DEST_ID(dest);
+
+	dmar_msi_write(irq, &msg);
+	desc = irq_to_desc(irq);
+	desc->affinity = mask;
+}
+#endif /* CONFIG_SMP */
+
+struct irq_chip dmar_msi_type = {
+	.name = "DMAR_MSI",
+	.unmask = dmar_msi_unmask,
+	.mask = dmar_msi_mask,
+	.ack = ack_apic_edge,
+#ifdef CONFIG_SMP
+	.set_affinity = dmar_msi_set_affinity,
+#endif
+	.retrigger = ioapic_retrigger_irq,
+};
+
+int arch_setup_dmar_msi(unsigned int irq)
+{
+	int ret;
+	struct msi_msg msg;
 
+	ret = msi_compose_msg(NULL, irq, &msg);
+	if (ret < 0)
+		return ret;
+	dmar_msi_write(irq, &msg);
+	set_irq_chip_and_handler_name(irq, &dmar_msi_type, handle_edge_irq,
+		"edge");
+	return 0;
+}
+#endif
+
+#endif /* CONFIG_PCI_MSI */
 /*
  * Hypertransport interrupt support
  */
@@ -2938,6 +3542,7 @@ static void set_ht_irq_affinity(unsigned int irq, cpumask_t mask)
 	struct irq_cfg *cfg;
 	unsigned int dest;
 	cpumask_t tmp;
+	struct irq_desc *desc;
 
 	cpus_and(tmp, mask, cpu_online_map);
 	if (cpus_empty(tmp))
@@ -2951,7 +3556,8 @@ static void set_ht_irq_affinity(unsigned int irq, cpumask_t mask)
 	dest = cpu_mask_to_apicid(tmp);
 
 	target_ht_irq(irq, dest, cfg->vector);
-	irq_to_desc(irq)->affinity = mask;
+	desc = irq_to_desc(irq);
+	desc->affinity = mask;
 }
 #endif
 
@@ -2974,7 +3580,7 @@ int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev)
 
 	tmp = TARGET_CPUS;
 	err = assign_irq_vector(irq, tmp);
-	if ( !err) {
+	if (!err) {
 		struct ht_irq_msg msg;
 		unsigned dest;
 
@@ -3007,11 +3613,12 @@ int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev)
 #endif /* CONFIG_HT_IRQ */
 
 /* --------------------------------------------------------------------------
-			ACPI-based IOAPIC Configuration
+                          ACPI-based IOAPIC Configuration
    -------------------------------------------------------------------------- */
 
 #ifdef CONFIG_ACPI
 
+#ifdef CONFIG_X86_32
 int __init io_apic_get_unique_id(int ioapic, int apic_id)
 {
 	union IO_APIC_reg_00 reg_00;
@@ -3086,7 +3693,6 @@ int __init io_apic_get_unique_id(int ioapic, int apic_id)
 	return apic_id;
 }
 
-
 int __init io_apic_get_version(int ioapic)
 {
 	union IO_APIC_reg_01	reg_01;
@@ -3098,9 +3704,9 @@ int __init io_apic_get_version(int ioapic)
 
 	return reg_01.bits.version;
 }
+#endif
 
-
-int __init io_apic_get_redir_entries(int ioapic)
+int __init io_apic_get_redir_entries (int ioapic)
 {
 	union IO_APIC_reg_01	reg_01;
 	unsigned long flags;
@@ -3113,10 +3719,10 @@ int __init io_apic_get_redir_entries(int ioapic)
 }
 
 
-int io_apic_set_pci_routing(int ioapic, int pin, int irq, int triggering, int polarity)
+int io_apic_set_pci_routing (int ioapic, int pin, int irq, int triggering, int polarity)
 {
 	if (!IO_APIC_IRQ(irq)) {
-		printk(KERN_ERR "IOAPIC[%d]: Invalid reference to IRQ 0\n",
+		apic_printk(APIC_QUIET,KERN_ERR "IOAPIC[%d]: Invalid reference to IRQ 0\n",
 			ioapic);
 		return -EINVAL;
 	}
@@ -3132,6 +3738,7 @@ int io_apic_set_pci_routing(int ioapic, int pin, int irq, int triggering, int po
 	return 0;
 }
 
+
 int acpi_get_override_irq(int bus_irq, int *trigger, int *polarity)
 {
 	int i;
@@ -3163,7 +3770,6 @@ void __init setup_ioapic_dest(void)
 {
 	int pin, ioapic, irq, irq_entry;
 	struct irq_cfg *cfg;
-	struct irq_desc *desc;
 
 	if (skip_ioapic_setup == 1)
 		return;
@@ -3184,43 +3790,124 @@ void __init setup_ioapic_dest(void)
 				setup_IO_APIC_irq(ioapic, pin, irq,
 						  irq_trigger(irq_entry),
 						  irq_polarity(irq_entry));
-			else {
-				desc = irq_to_desc(irq);
+#ifdef CONFIG_INTR_REMAP
+			else if (intr_remapping_enabled)
+				set_ir_ioapic_affinity_irq(irq, TARGET_CPUS);
+#endif
+			else
 				set_ioapic_affinity_irq(irq, TARGET_CPUS);
-			}
 		}
 
 	}
 }
 #endif
 
+#ifdef CONFIG_X86_64
+#define IOAPIC_RESOURCE_NAME_SIZE 11
+
+static struct resource *ioapic_resources;
+
+static struct resource * __init ioapic_setup_resources(void)
+{
+	unsigned long n;
+	struct resource *res;
+	char *mem;
+	int i;
+
+	if (nr_ioapics <= 0)
+		return NULL;
+
+	n = IOAPIC_RESOURCE_NAME_SIZE + sizeof(struct resource);
+	n *= nr_ioapics;
+
+	mem = alloc_bootmem(n);
+	res = (void *)mem;
+
+	if (mem != NULL) {
+		mem += sizeof(struct resource) * nr_ioapics;
+
+		for (i = 0; i < nr_ioapics; i++) {
+			res[i].name = mem;
+			res[i].flags = IORESOURCE_MEM | IORESOURCE_BUSY;
+			sprintf(mem,  "IOAPIC %u", i);
+			mem += IOAPIC_RESOURCE_NAME_SIZE;
+		}
+	}
+
+	ioapic_resources = res;
+
+	return res;
+}
+#endif
+
 void __init ioapic_init_mappings(void)
 {
 	unsigned long ioapic_phys, idx = FIX_IO_APIC_BASE_0;
 	int i;
+#ifdef CONFIG_X86_64
+	struct resource *ioapic_res;
 
+	ioapic_res = ioapic_setup_resources();
+#endif
 	for (i = 0; i < nr_ioapics; i++) {
 		if (smp_found_config) {
 			ioapic_phys = mp_ioapics[i].mp_apicaddr;
-			if (!ioapic_phys) {
-				printk(KERN_ERR
-				       "WARNING: bogus zero IO-APIC "
-				       "address found in MPTABLE, "
-				       "disabling IO/APIC support!\n");
-				smp_found_config = 0;
-				skip_ioapic_setup = 1;
-				goto fake_ioapic_page;
-			}
+#ifdef CONFIG_X86_32
+                        if (!ioapic_phys) {
+                                printk(KERN_ERR
+                                       "WARNING: bogus zero IO-APIC "
+                                       "address found in MPTABLE, "
+                                       "disabling IO/APIC support!\n");
+                                smp_found_config = 0;
+                                skip_ioapic_setup = 1;
+                                goto fake_ioapic_page;
+                        }
+#endif
 		} else {
+#ifdef CONFIG_X86_32
 fake_ioapic_page:
+#endif
 			ioapic_phys = (unsigned long)
-				      alloc_bootmem_pages(PAGE_SIZE);
+				alloc_bootmem_pages(PAGE_SIZE);
 			ioapic_phys = __pa(ioapic_phys);
 		}
 		set_fixmap_nocache(idx, ioapic_phys);
-		printk(KERN_DEBUG "mapped IOAPIC to %08lx (%08lx)\n",
-		       __fix_to_virt(idx), ioapic_phys);
+		apic_printk(APIC_VERBOSE,
+			    "mapped IOAPIC to %08lx (%08lx)\n",
+			    __fix_to_virt(idx), ioapic_phys);
 		idx++;
+
+#ifdef CONFIG_X86_64
+		if (ioapic_res != NULL) {
+			ioapic_res->start = ioapic_phys;
+			ioapic_res->end = ioapic_phys + (4 * 1024) - 1;
+			ioapic_res++;
+		}
+#endif
 	}
 }
 
+#ifdef CONFIG_X86_64
+static int __init ioapic_insert_resources(void)
+{
+	int i;
+	struct resource *r = ioapic_resources;
+
+	if (!r) {
+		printk(KERN_ERR
+		       "IO APIC resources could be not be allocated.\n");
+		return -1;
+	}
+
+	for (i = 0; i < nr_ioapics; i++) {
+		insert_resource(&iomem_resource, r);
+		r++;
+	}
+
+	return 0;
+}
+
+/* Insert the IO APIC resources after PCI initialization has occured to handle
+ * IO APICS that are mapped in on a BAR in PCI space. */
+late_initcall(ioapic_insert_resources);
+#endif
-- 
cgit v1.2.3-55-g7522


From 26d347c2c035b1f4c5b3c5094f3046db9ec920f5 Mon Sep 17 00:00:00 2001
From: Yinghai Lu
Date: Tue, 19 Aug 2008 20:50:42 -0700
Subject: rename io_apic_64.c and io_apic_32.c to io_apic.c

The two files are now line by line equal. (sans a printk)

Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/Makefile     |    2 +-
 arch/x86/kernel/io_apic.c    | 3913 ++++++++++++++++++++++++++++++++++++++++++
 arch/x86/kernel/io_apic_32.c | 3913 ------------------------------------------
 arch/x86/kernel/io_apic_64.c | 3913 ------------------------------------------
 4 files changed, 3914 insertions(+), 7827 deletions(-)
 create mode 100644 arch/x86/kernel/io_apic.c
 delete mode 100644 arch/x86/kernel/io_apic_32.c
 delete mode 100644 arch/x86/kernel/io_apic_64.c

diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index 0d41f0343dc0..7264f9c3af8f 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -61,7 +61,7 @@ obj-$(CONFIG_X86_64_SMP)	+= tsc_sync.o smpcommon.o
 obj-$(CONFIG_X86_TRAMPOLINE)	+= trampoline_$(BITS).o
 obj-$(CONFIG_X86_MPPARSE)	+= mpparse.o
 obj-$(CONFIG_X86_LOCAL_APIC)	+= apic_$(BITS).o nmi.o
-obj-$(CONFIG_X86_IO_APIC)	+= io_apic_$(BITS).o
+obj-$(CONFIG_X86_IO_APIC)	+= io_apic.o
 obj-$(CONFIG_X86_REBOOTFIXUPS)	+= reboot_fixups_32.o
 obj-$(CONFIG_DYNAMIC_FTRACE)	+= ftrace.o
 obj-$(CONFIG_KEXEC)		+= machine_kexec_$(BITS).o
diff --git a/arch/x86/kernel/io_apic.c b/arch/x86/kernel/io_apic.c
new file mode 100644
index 000000000000..fba6d6ee3480
--- /dev/null
+++ b/arch/x86/kernel/io_apic.c
@@ -0,0 +1,3913 @@
+/*
+ *	Intel IO-APIC support for multi-Pentium hosts.
+ *
+ *	Copyright (C) 1997, 1998, 1999, 2000 Ingo Molnar, Hajnalka Szabo
+ *
+ *	Many thanks to Stig Venaas for trying out countless experimental
+ *	patches and reporting/debugging problems patiently!
+ *
+ *	(c) 1999, Multiple IO-APIC support, developed by
+ *	Ken-ichi Yaku <yaku@css1.kbnes.nec.co.jp> and
+ *      Hidemi Kishimoto <kisimoto@css1.kbnes.nec.co.jp>,
+ *	further tested and cleaned up by Zach Brown <zab@redhat.com>
+ *	and Ingo Molnar <mingo@redhat.com>
+ *
+ *	Fixes
+ *	Maciej W. Rozycki	:	Bits for genuine 82489DX APICs;
+ *					thanks to Eric Gilmore
+ *					and Rolf G. Tews
+ *					for testing these extensively
+ *	Paul Diefenbaugh	:	Added full ACPI support
+ */
+
+#include <linux/mm.h>
+#include <linux/interrupt.h>
+#include <linux/init.h>
+#include <linux/delay.h>
+#include <linux/sched.h>
+#include <linux/pci.h>
+#include <linux/mc146818rtc.h>
+#include <linux/compiler.h>
+#include <linux/acpi.h>
+#include <linux/module.h>
+#include <linux/sysdev.h>
+#include <linux/msi.h>
+#include <linux/htirq.h>
+#include <linux/freezer.h>
+#include <linux/kthread.h>
+#include <linux/jiffies.h>	/* time_after() */
+#ifdef CONFIG_ACPI
+#include <acpi/acpi_bus.h>
+#endif
+#include <linux/bootmem.h>
+#include <linux/dmar.h>
+
+#include <asm/idle.h>
+#include <asm/io.h>
+#include <asm/smp.h>
+#include <asm/desc.h>
+#include <asm/proto.h>
+#include <asm/acpi.h>
+#include <asm/dma.h>
+#include <asm/timer.h>
+#include <asm/i8259.h>
+#include <asm/nmi.h>
+#include <asm/msidef.h>
+#include <asm/hypertransport.h>
+#include <asm/setup.h>
+#include <asm/irq_remapping.h>
+
+#include <mach_ipi.h>
+#include <mach_apic.h>
+#include <mach_apicdef.h>
+
+#define __apicdebuginit(type) static type __init
+
+/*
+ *      Is the SiS APIC rmw bug present ?
+ *      -1 = don't know, 0 = no, 1 = yes
+ */
+int sis_apic_bug = -1;
+
+static DEFINE_SPINLOCK(ioapic_lock);
+static DEFINE_SPINLOCK(vector_lock);
+
+int first_free_entry;
+/*
+ * Rough estimation of how many shared IRQs there are, can
+ * be changed anytime.
+ */
+int pin_map_size;
+
+/*
+ * # of IRQ routing registers
+ */
+int nr_ioapic_registers[MAX_IO_APICS];
+
+/* I/O APIC entries */
+struct mp_config_ioapic mp_ioapics[MAX_IO_APICS];
+int nr_ioapics;
+
+/* MP IRQ source entries */
+struct mp_config_intsrc mp_irqs[MAX_IRQ_SOURCES];
+
+/* # of MP IRQ source entries */
+int mp_irq_entries;
+
+#if defined (CONFIG_MCA) || defined (CONFIG_EISA)
+int mp_bus_id_to_type[MAX_MP_BUSSES];
+#endif
+
+DECLARE_BITMAP(mp_bus_not_pci, MAX_MP_BUSSES);
+
+int skip_ioapic_setup;
+
+static int __init parse_noapic(char *str)
+{
+	/* disable IO-APIC */
+	disable_ioapic_setup();
+	return 0;
+}
+early_param("noapic", parse_noapic);
+
+struct irq_cfg;
+struct irq_pin_list;
+struct irq_cfg {
+	unsigned int irq;
+	struct irq_cfg *next;
+	struct irq_pin_list *irq_2_pin;
+	cpumask_t domain;
+	cpumask_t old_domain;
+	unsigned move_cleanup_count;
+	u8 vector;
+	u8 move_in_progress : 1;
+};
+
+/* irq_cfg is indexed by the sum of all RTEs in all I/O APICs. */
+static struct irq_cfg irq_cfg_legacy[] __initdata = {
+	[0]  = { .irq =  0, .domain = CPU_MASK_ALL, .vector = IRQ0_VECTOR,  },
+	[1]  = { .irq =  1, .domain = CPU_MASK_ALL, .vector = IRQ1_VECTOR,  },
+	[2]  = { .irq =  2, .domain = CPU_MASK_ALL, .vector = IRQ2_VECTOR,  },
+	[3]  = { .irq =  3, .domain = CPU_MASK_ALL, .vector = IRQ3_VECTOR,  },
+	[4]  = { .irq =  4, .domain = CPU_MASK_ALL, .vector = IRQ4_VECTOR,  },
+	[5]  = { .irq =  5, .domain = CPU_MASK_ALL, .vector = IRQ5_VECTOR,  },
+	[6]  = { .irq =  6, .domain = CPU_MASK_ALL, .vector = IRQ6_VECTOR,  },
+	[7]  = { .irq =  7, .domain = CPU_MASK_ALL, .vector = IRQ7_VECTOR,  },
+	[8]  = { .irq =  8, .domain = CPU_MASK_ALL, .vector = IRQ8_VECTOR,  },
+	[9]  = { .irq =  9, .domain = CPU_MASK_ALL, .vector = IRQ9_VECTOR,  },
+	[10] = { .irq = 10, .domain = CPU_MASK_ALL, .vector = IRQ10_VECTOR, },
+	[11] = { .irq = 11, .domain = CPU_MASK_ALL, .vector = IRQ11_VECTOR, },
+	[12] = { .irq = 12, .domain = CPU_MASK_ALL, .vector = IRQ12_VECTOR, },
+	[13] = { .irq = 13, .domain = CPU_MASK_ALL, .vector = IRQ13_VECTOR, },
+	[14] = { .irq = 14, .domain = CPU_MASK_ALL, .vector = IRQ14_VECTOR, },
+	[15] = { .irq = 15, .domain = CPU_MASK_ALL, .vector = IRQ15_VECTOR, },
+};
+
+static struct irq_cfg irq_cfg_init = { .irq =  -1U, };
+/* need to be biger than size of irq_cfg_legacy */
+static int nr_irq_cfg = 32;
+
+static int __init parse_nr_irq_cfg(char *arg)
+{
+	if (arg) {
+		nr_irq_cfg = simple_strtoul(arg, NULL, 0);
+		if (nr_irq_cfg < 32)
+			nr_irq_cfg = 32;
+	}
+	return 0;
+}
+
+early_param("nr_irq_cfg", parse_nr_irq_cfg);
+
+static void init_one_irq_cfg(struct irq_cfg *cfg)
+{
+	memcpy(cfg, &irq_cfg_init, sizeof(struct irq_cfg));
+}
+
+static struct irq_cfg *irq_cfgx;
+static struct irq_cfg *irq_cfgx_free;
+static void __init init_work(void *data)
+{
+	struct dyn_array *da = data;
+	struct irq_cfg *cfg;
+	int legacy_count;
+	int i;
+
+	cfg = *da->name;
+
+	memcpy(cfg, irq_cfg_legacy, sizeof(irq_cfg_legacy));
+
+	legacy_count = sizeof(irq_cfg_legacy)/sizeof(irq_cfg_legacy[0]);
+	for (i = legacy_count; i < *da->nr; i++)
+		init_one_irq_cfg(&cfg[i]);
+
+	for (i = 1; i < *da->nr; i++)
+		cfg[i-1].next = &cfg[i];
+
+	irq_cfgx_free = &irq_cfgx[legacy_count];
+	irq_cfgx[legacy_count - 1].next = NULL;
+}
+
+#define for_each_irq_cfg(cfg)		\
+	for (cfg = irq_cfgx; cfg; cfg = cfg->next)
+
+DEFINE_DYN_ARRAY(irq_cfgx, sizeof(struct irq_cfg), nr_irq_cfg, PAGE_SIZE, init_work);
+
+static struct irq_cfg *irq_cfg(unsigned int irq)
+{
+	struct irq_cfg *cfg;
+
+	cfg = irq_cfgx;
+	while (cfg) {
+		if (cfg->irq == irq)
+			return cfg;
+
+		cfg = cfg->next;
+	}
+
+	return NULL;
+}
+
+static struct irq_cfg *irq_cfg_alloc(unsigned int irq)
+{
+	struct irq_cfg *cfg, *cfg_pri;
+	int i;
+	int count = 0;
+
+	cfg_pri = cfg = irq_cfgx;
+	while (cfg) {
+		if (cfg->irq == irq)
+			return cfg;
+
+		cfg_pri = cfg;
+		cfg = cfg->next;
+		count++;
+	}
+
+	if (!irq_cfgx_free) {
+		unsigned long phys;
+		unsigned long total_bytes;
+		/*
+		 *  we run out of pre-allocate ones, allocate more
+		 */
+		printk(KERN_DEBUG "try to get more irq_cfg %d\n", nr_irq_cfg);
+
+		total_bytes = sizeof(struct irq_cfg) * nr_irq_cfg;
+		if (after_bootmem)
+			cfg = kzalloc(total_bytes, GFP_ATOMIC);
+		else
+			cfg = __alloc_bootmem_nopanic(total_bytes, PAGE_SIZE, 0);
+
+		if (!cfg)
+			panic("please boot with nr_irq_cfg= %d\n", count * 2);
+
+		phys = __pa(cfg);
+		printk(KERN_DEBUG "irq_irq ==> [%#lx - %#lx]\n", phys, phys + total_bytes);
+
+		for (i = 0; i < nr_irq_cfg; i++)
+			init_one_irq_cfg(&cfg[i]);
+
+		for (i = 1; i < nr_irq_cfg; i++)
+			cfg[i-1].next = &cfg[i];
+
+		irq_cfgx_free = cfg;
+	}
+
+	cfg = irq_cfgx_free;
+	irq_cfgx_free = irq_cfgx_free->next;
+	cfg->next = NULL;
+	if (cfg_pri)
+		cfg_pri->next = cfg;
+	else
+		irq_cfgx = cfg;
+	cfg->irq = irq;
+	printk(KERN_DEBUG "found new irq_cfg for irq %d\n", cfg->irq);
+#ifdef CONFIG_HAVE_SPARSE_IRQ_DEBUG
+	{
+		/* dump the results */
+		struct irq_cfg *cfg;
+		unsigned long phys;
+		unsigned long bytes = sizeof(struct irq_cfg);
+
+		printk(KERN_DEBUG "=========================== %d\n", irq);
+		printk(KERN_DEBUG "irq_cfg dump after get that for %d\n", irq);
+		for_each_irq_cfg(cfg) {
+			phys = __pa(cfg);
+			printk(KERN_DEBUG "irq_cfg %d ==> [%#lx - %#lx]\n", cfg->irq, phys, phys + bytes);
+		}
+		printk(KERN_DEBUG "===========================\n");
+	}
+#endif
+	return cfg;
+}
+
+/*
+ * This is performance-critical, we want to do it O(1)
+ *
+ * the indexing order of this array favors 1:1 mappings
+ * between pins and IRQs.
+ */
+
+struct irq_pin_list {
+	int apic, pin;
+	struct irq_pin_list *next;
+};
+
+static struct irq_pin_list *irq_2_pin_head;
+/* fill one page ? */
+static int nr_irq_2_pin = 0x100;
+static struct irq_pin_list *irq_2_pin_ptr;
+static void __init irq_2_pin_init_work(void *data)
+{
+	struct dyn_array *da = data;
+	struct irq_pin_list *pin;
+	int i;
+
+	pin = *da->name;
+
+	for (i = 1; i < *da->nr; i++)
+		pin[i-1].next = &pin[i];
+
+	irq_2_pin_ptr = &pin[0];
+}
+DEFINE_DYN_ARRAY(irq_2_pin_head, sizeof(struct irq_pin_list), nr_irq_2_pin, PAGE_SIZE, irq_2_pin_init_work);
+
+static struct irq_pin_list *get_one_free_irq_2_pin(void)
+{
+	struct irq_pin_list *pin;
+	int i;
+
+	pin = irq_2_pin_ptr;
+
+	if (pin) {
+		irq_2_pin_ptr = pin->next;
+		pin->next = NULL;
+		return pin;
+	}
+
+	/*
+	 *  we run out of pre-allocate ones, allocate more
+	 */
+	printk(KERN_DEBUG "try to get more irq_2_pin %d\n", nr_irq_2_pin);
+
+	if (after_bootmem)
+		pin = kzalloc(sizeof(struct irq_pin_list)*nr_irq_2_pin,
+				 GFP_ATOMIC);
+	else
+		pin = __alloc_bootmem_nopanic(sizeof(struct irq_pin_list) *
+				nr_irq_2_pin, PAGE_SIZE, 0);
+
+	if (!pin)
+		panic("can not get more irq_2_pin\n");
+
+	for (i = 1; i < nr_irq_2_pin; i++)
+		pin[i-1].next = &pin[i];
+
+	irq_2_pin_ptr = pin->next;
+	pin->next = NULL;
+
+	return pin;
+}
+
+struct io_apic {
+	unsigned int index;
+	unsigned int unused[3];
+	unsigned int data;
+};
+
+static __attribute_const__ struct io_apic __iomem *io_apic_base(int idx)
+{
+	return (void __iomem *) __fix_to_virt(FIX_IO_APIC_BASE_0 + idx)
+		+ (mp_ioapics[idx].mp_apicaddr & ~PAGE_MASK);
+}
+
+static inline unsigned int io_apic_read(unsigned int apic, unsigned int reg)
+{
+	struct io_apic __iomem *io_apic = io_apic_base(apic);
+	writel(reg, &io_apic->index);
+	return readl(&io_apic->data);
+}
+
+static inline void io_apic_write(unsigned int apic, unsigned int reg, unsigned int value)
+{
+	struct io_apic __iomem *io_apic = io_apic_base(apic);
+	writel(reg, &io_apic->index);
+	writel(value, &io_apic->data);
+}
+
+/*
+ * Re-write a value: to be used for read-modify-write
+ * cycles where the read already set up the index register.
+ *
+ * Older SiS APIC requires we rewrite the index register
+ */
+static inline void io_apic_modify(unsigned int apic, unsigned int reg, unsigned int value)
+{
+	struct io_apic __iomem *io_apic = io_apic_base(apic);
+        if (sis_apic_bug)
+                writel(reg, &io_apic->index);
+	writel(value, &io_apic->data);
+}
+
+#ifdef CONFIG_X86_64
+static bool io_apic_level_ack_pending(unsigned int irq)
+{
+	struct irq_pin_list *entry;
+	unsigned long flags;
+	struct irq_cfg *cfg = irq_cfg(irq);
+
+	spin_lock_irqsave(&ioapic_lock, flags);
+	entry = cfg->irq_2_pin;
+	for (;;) {
+		unsigned int reg;
+		int pin;
+
+		if (!entry)
+			break;
+		pin = entry->pin;
+		reg = io_apic_read(entry->apic, 0x10 + pin*2);
+		/* Is the remote IRR bit set? */
+		if (reg & IO_APIC_REDIR_REMOTE_IRR) {
+			spin_unlock_irqrestore(&ioapic_lock, flags);
+			return true;
+		}
+		if (!entry->next)
+			break;
+		entry = entry->next;
+	}
+	spin_unlock_irqrestore(&ioapic_lock, flags);
+
+	return false;
+}
+#endif
+
+union entry_union {
+	struct { u32 w1, w2; };
+	struct IO_APIC_route_entry entry;
+};
+
+static struct IO_APIC_route_entry ioapic_read_entry(int apic, int pin)
+{
+	union entry_union eu;
+	unsigned long flags;
+	spin_lock_irqsave(&ioapic_lock, flags);
+	eu.w1 = io_apic_read(apic, 0x10 + 2 * pin);
+	eu.w2 = io_apic_read(apic, 0x11 + 2 * pin);
+	spin_unlock_irqrestore(&ioapic_lock, flags);
+	return eu.entry;
+}
+
+/*
+ * When we write a new IO APIC routing entry, we need to write the high
+ * word first! If the mask bit in the low word is clear, we will enable
+ * the interrupt, and we need to make sure the entry is fully populated
+ * before that happens.
+ */
+static void
+__ioapic_write_entry(int apic, int pin, struct IO_APIC_route_entry e)
+{
+	union entry_union eu;
+	eu.entry = e;
+	io_apic_write(apic, 0x11 + 2*pin, eu.w2);
+	io_apic_write(apic, 0x10 + 2*pin, eu.w1);
+}
+
+static void ioapic_write_entry(int apic, int pin, struct IO_APIC_route_entry e)
+{
+	unsigned long flags;
+	spin_lock_irqsave(&ioapic_lock, flags);
+	__ioapic_write_entry(apic, pin, e);
+	spin_unlock_irqrestore(&ioapic_lock, flags);
+}
+
+/*
+ * When we mask an IO APIC routing entry, we need to write the low
+ * word first, in order to set the mask bit before we change the
+ * high bits!
+ */
+static void ioapic_mask_entry(int apic, int pin)
+{
+	unsigned long flags;
+	union entry_union eu = { .entry.mask = 1 };
+
+	spin_lock_irqsave(&ioapic_lock, flags);
+	io_apic_write(apic, 0x10 + 2*pin, eu.w1);
+	io_apic_write(apic, 0x11 + 2*pin, eu.w2);
+	spin_unlock_irqrestore(&ioapic_lock, flags);
+}
+
+#ifdef CONFIG_SMP
+static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, u8 vector)
+{
+	int apic, pin;
+	struct irq_cfg *cfg;
+	struct irq_pin_list *entry;
+
+	cfg = irq_cfg(irq);
+	entry = cfg->irq_2_pin;
+	for (;;) {
+		unsigned int reg;
+
+		if (!entry)
+			break;
+
+		apic = entry->apic;
+		pin = entry->pin;
+#ifdef CONFIG_INTR_REMAP
+		/*
+		 * With interrupt-remapping, destination information comes
+		 * from interrupt-remapping table entry.
+		 */
+		if (!irq_remapped(irq))
+			io_apic_write(apic, 0x11 + pin*2, dest);
+#else
+		io_apic_write(apic, 0x11 + pin*2, dest);
+#endif
+		reg = io_apic_read(apic, 0x10 + pin*2);
+		reg &= ~IO_APIC_REDIR_VECTOR_MASK;
+		reg |= vector;
+		io_apic_modify(apic, 0x10 + pin*2, reg);
+		if (!entry->next)
+			break;
+		entry = entry->next;
+	}
+}
+
+static int assign_irq_vector(int irq, cpumask_t mask);
+
+static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t mask)
+{
+	struct irq_cfg *cfg;
+	unsigned long flags;
+	unsigned int dest;
+	cpumask_t tmp;
+	struct irq_desc *desc;
+
+	cpus_and(tmp, mask, cpu_online_map);
+	if (cpus_empty(tmp))
+		return;
+
+	cfg = irq_cfg(irq);
+	if (assign_irq_vector(irq, mask))
+		return;
+
+	cpus_and(tmp, cfg->domain, mask);
+	dest = cpu_mask_to_apicid(tmp);
+	/*
+	 * Only the high 8 bits are valid.
+	 */
+	dest = SET_APIC_LOGICAL_ID(dest);
+
+	desc = irq_to_desc(irq);
+	spin_lock_irqsave(&ioapic_lock, flags);
+	__target_IO_APIC_irq(irq, dest, cfg->vector);
+	desc->affinity = mask;
+	spin_unlock_irqrestore(&ioapic_lock, flags);
+}
+#endif /* CONFIG_SMP */
+
+/*
+ * The common case is 1:1 IRQ<->pin mappings. Sometimes there are
+ * shared ISA-space IRQs, so we have to support them. We are super
+ * fast in the common case, and fast for shared ISA-space IRQs.
+ */
+static void add_pin_to_irq(unsigned int irq, int apic, int pin)
+{
+	struct irq_cfg *cfg;
+	struct irq_pin_list *entry;
+
+	/* first time to refer irq_cfg, so with new */
+	cfg = irq_cfg_alloc(irq);
+	entry = cfg->irq_2_pin;
+	if (!entry) {
+		entry = get_one_free_irq_2_pin();
+		cfg->irq_2_pin = entry;
+		entry->apic = apic;
+		entry->pin = pin;
+		printk(KERN_DEBUG " 0 add_pin_to_irq: irq %d --> apic %d pin %d\n", irq, apic, pin);
+		return;
+	}
+
+	while (entry->next) {
+		/* not again, please */
+		if (entry->apic == apic && entry->pin == pin)
+			return;
+
+		entry = entry->next;
+	}
+
+	entry->next = get_one_free_irq_2_pin();
+	entry = entry->next;
+	entry->apic = apic;
+	entry->pin = pin;
+	printk(KERN_DEBUG " x add_pin_to_irq: irq %d --> apic %d pin %d\n", irq, apic, pin);
+}
+
+/*
+ * Reroute an IRQ to a different pin.
+ */
+static void __init replace_pin_at_irq(unsigned int irq,
+				      int oldapic, int oldpin,
+				      int newapic, int newpin)
+{
+	struct irq_cfg *cfg = irq_cfg(irq);
+	struct irq_pin_list *entry = cfg->irq_2_pin;
+	int replaced = 0;
+
+	while (entry) {
+		if (entry->apic == oldapic && entry->pin == oldpin) {
+			entry->apic = newapic;
+			entry->pin = newpin;
+			replaced = 1;
+			/* every one is different, right? */
+			break;
+		}
+		entry = entry->next;
+	}
+
+	/* why? call replace before add? */
+	if (!replaced)
+		add_pin_to_irq(irq, newapic, newpin);
+}
+
+#ifdef CONFIG_X86_64
+/*
+ * Synchronize the IO-APIC and the CPU by doing
+ * a dummy read from the IO-APIC
+ */
+static inline void io_apic_sync(unsigned int apic)
+{
+	struct io_apic __iomem *io_apic = io_apic_base(apic);
+	readl(&io_apic->data);
+}
+
+#define __DO_ACTION(R, ACTION, FINAL)					\
+									\
+{									\
+	int pin;							\
+	struct irq_cfg *cfg;						\
+	struct irq_pin_list *entry;					\
+									\
+	cfg = irq_cfg(irq);						\
+	entry = cfg->irq_2_pin;						\
+	for (;;) {							\
+		unsigned int reg;					\
+		if (!entry)						\
+			break;						\
+		pin = entry->pin;					\
+		reg = io_apic_read(entry->apic, 0x10 + R + pin*2);	\
+		reg ACTION;						\
+		io_apic_modify(entry->apic, 0x10 + R + pin*2, reg);	\
+		FINAL;							\
+		if (!entry->next)					\
+			break;						\
+		entry = entry->next;					\
+	}								\
+}
+
+#define DO_ACTION(name,R,ACTION, FINAL)					\
+									\
+	static void name##_IO_APIC_irq (unsigned int irq)		\
+	__DO_ACTION(R, ACTION, FINAL)
+
+/* mask = 1 */
+DO_ACTION(__mask,	0, |= IO_APIC_REDIR_MASKED, io_apic_sync(entry->apic))
+
+/* mask = 0 */
+DO_ACTION(__unmask,	0, &= ~IO_APIC_REDIR_MASKED, )
+
+#else
+
+static void __modify_IO_APIC_irq(unsigned int irq, unsigned long enable, unsigned long disable)
+{
+	struct irq_cfg *cfg;
+	struct irq_pin_list *entry;
+	unsigned int pin, reg;
+
+	cfg = irq_cfg(irq);
+	entry = cfg->irq_2_pin;
+	for (;;) {
+		if (!entry)
+			break;
+		pin = entry->pin;
+		reg = io_apic_read(entry->apic, 0x10 + pin*2);
+		reg &= ~disable;
+		reg |= enable;
+		io_apic_modify(entry->apic, 0x10 + pin*2, reg);
+		if (!entry->next)
+			break;
+		entry = entry->next;
+	}
+}
+
+/* mask = 1 */
+static void __mask_IO_APIC_irq(unsigned int irq)
+{
+	__modify_IO_APIC_irq(irq, IO_APIC_REDIR_MASKED, 0);
+}
+
+/* mask = 0 */
+static void __unmask_IO_APIC_irq(unsigned int irq)
+{
+	__modify_IO_APIC_irq(irq, 0, IO_APIC_REDIR_MASKED);
+}
+
+/* mask = 1, trigger = 0 */
+static void __mask_and_edge_IO_APIC_irq(unsigned int irq)
+{
+	__modify_IO_APIC_irq(irq, IO_APIC_REDIR_MASKED,
+				IO_APIC_REDIR_LEVEL_TRIGGER);
+}
+
+/* mask = 0, trigger = 1 */
+static void __unmask_and_level_IO_APIC_irq(unsigned int irq)
+{
+	__modify_IO_APIC_irq(irq, IO_APIC_REDIR_LEVEL_TRIGGER,
+				IO_APIC_REDIR_MASKED);
+}
+
+#endif
+
+static void mask_IO_APIC_irq (unsigned int irq)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&ioapic_lock, flags);
+	__mask_IO_APIC_irq(irq);
+	spin_unlock_irqrestore(&ioapic_lock, flags);
+}
+
+static void unmask_IO_APIC_irq (unsigned int irq)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&ioapic_lock, flags);
+	__unmask_IO_APIC_irq(irq);
+	spin_unlock_irqrestore(&ioapic_lock, flags);
+}
+
+static void clear_IO_APIC_pin(unsigned int apic, unsigned int pin)
+{
+	struct IO_APIC_route_entry entry;
+
+	/* Check delivery_mode to be sure we're not clearing an SMI pin */
+	entry = ioapic_read_entry(apic, pin);
+	if (entry.delivery_mode == dest_SMI)
+		return;
+	/*
+	 * Disable it in the IO-APIC irq-routing table:
+	 */
+	ioapic_mask_entry(apic, pin);
+}
+
+static void clear_IO_APIC (void)
+{
+	int apic, pin;
+
+	for (apic = 0; apic < nr_ioapics; apic++)
+		for (pin = 0; pin < nr_ioapic_registers[apic]; pin++)
+			clear_IO_APIC_pin(apic, pin);
+}
+
+#if !defined(CONFIG_SMP) && defined(CONFIG_X86_32)
+void send_IPI_self(int vector)
+{
+	unsigned int cfg;
+
+	/*
+	 * Wait for idle.
+	 */
+	apic_wait_icr_idle();
+	cfg = APIC_DM_FIXED | APIC_DEST_SELF | vector | APIC_DEST_LOGICAL;
+	/*
+	 * Send the IPI. The write to APIC_ICR fires this off.
+	 */
+	apic_write(APIC_ICR, cfg);
+}
+#endif /* !CONFIG_SMP && CONFIG_X86_32*/
+
+#ifdef CONFIG_X86_32
+/*
+ * support for broken MP BIOSs, enables hand-redirection of PIRQ0-7 to
+ * specific CPU-side IRQs.
+ */
+
+#define MAX_PIRQS 8
+static int pirq_entries [MAX_PIRQS];
+static int pirqs_enabled;
+
+static int __init ioapic_pirq_setup(char *str)
+{
+	int i, max;
+	int ints[MAX_PIRQS+1];
+
+	get_options(str, ARRAY_SIZE(ints), ints);
+
+	for (i = 0; i < MAX_PIRQS; i++)
+		pirq_entries[i] = -1;
+
+	pirqs_enabled = 1;
+	apic_printk(APIC_VERBOSE, KERN_INFO
+			"PIRQ redirection, working around broken MP-BIOS.\n");
+	max = MAX_PIRQS;
+	if (ints[0] < MAX_PIRQS)
+		max = ints[0];
+
+	for (i = 0; i < max; i++) {
+		apic_printk(APIC_VERBOSE, KERN_DEBUG
+				"... PIRQ%d -> IRQ %d\n", i, ints[i+1]);
+		/*
+		 * PIRQs are mapped upside down, usually.
+		 */
+		pirq_entries[MAX_PIRQS-i-1] = ints[i+1];
+	}
+	return 1;
+}
+
+__setup("pirq=", ioapic_pirq_setup);
+#endif /* CONFIG_X86_32 */
+
+#ifdef CONFIG_INTR_REMAP
+/* I/O APIC RTE contents at the OS boot up */
+static struct IO_APIC_route_entry *early_ioapic_entries[MAX_IO_APICS];
+
+/*
+ * Saves and masks all the unmasked IO-APIC RTE's
+ */
+int save_mask_IO_APIC_setup(void)
+{
+	union IO_APIC_reg_01 reg_01;
+	unsigned long flags;
+	int apic, pin;
+
+	/*
+	 * The number of IO-APIC IRQ registers (== #pins):
+	 */
+	for (apic = 0; apic < nr_ioapics; apic++) {
+		spin_lock_irqsave(&ioapic_lock, flags);
+		reg_01.raw = io_apic_read(apic, 1);
+		spin_unlock_irqrestore(&ioapic_lock, flags);
+		nr_ioapic_registers[apic] = reg_01.bits.entries+1;
+	}
+
+	for (apic = 0; apic < nr_ioapics; apic++) {
+		early_ioapic_entries[apic] =
+			kzalloc(sizeof(struct IO_APIC_route_entry) *
+				nr_ioapic_registers[apic], GFP_KERNEL);
+		if (!early_ioapic_entries[apic])
+			return -ENOMEM;
+	}
+
+	for (apic = 0; apic < nr_ioapics; apic++)
+		for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) {
+			struct IO_APIC_route_entry entry;
+
+			entry = early_ioapic_entries[apic][pin] =
+				ioapic_read_entry(apic, pin);
+			if (!entry.mask) {
+				entry.mask = 1;
+				ioapic_write_entry(apic, pin, entry);
+			}
+		}
+	return 0;
+}
+
+void restore_IO_APIC_setup(void)
+{
+	int apic, pin;
+
+	for (apic = 0; apic < nr_ioapics; apic++)
+		for (pin = 0; pin < nr_ioapic_registers[apic]; pin++)
+			ioapic_write_entry(apic, pin,
+					   early_ioapic_entries[apic][pin]);
+}
+
+void reinit_intr_remapped_IO_APIC(int intr_remapping)
+{
+	/*
+	 * for now plain restore of previous settings.
+	 * TBD: In the case of OS enabling interrupt-remapping,
+	 * IO-APIC RTE's need to be setup to point to interrupt-remapping
+	 * table entries. for now, do a plain restore, and wait for
+	 * the setup_IO_APIC_irqs() to do proper initialization.
+	 */
+	restore_IO_APIC_setup();
+}
+#endif
+
+/*
+ * Find the IRQ entry number of a certain pin.
+ */
+static int find_irq_entry(int apic, int pin, int type)
+{
+	int i;
+
+	for (i = 0; i < mp_irq_entries; i++)
+		if (mp_irqs[i].mp_irqtype == type &&
+		    (mp_irqs[i].mp_dstapic == mp_ioapics[apic].mp_apicid ||
+		     mp_irqs[i].mp_dstapic == MP_APIC_ALL) &&
+		    mp_irqs[i].mp_dstirq == pin)
+			return i;
+
+	return -1;
+}
+
+/*
+ * Find the pin to which IRQ[irq] (ISA) is connected
+ */
+static int __init find_isa_irq_pin(int irq, int type)
+{
+	int i;
+
+	for (i = 0; i < mp_irq_entries; i++) {
+		int lbus = mp_irqs[i].mp_srcbus;
+
+		if (test_bit(lbus, mp_bus_not_pci) &&
+		    (mp_irqs[i].mp_irqtype == type) &&
+		    (mp_irqs[i].mp_srcbusirq == irq))
+
+			return mp_irqs[i].mp_dstirq;
+	}
+	return -1;
+}
+
+static int __init find_isa_irq_apic(int irq, int type)
+{
+	int i;
+
+	for (i = 0; i < mp_irq_entries; i++) {
+		int lbus = mp_irqs[i].mp_srcbus;
+
+		if (test_bit(lbus, mp_bus_not_pci) &&
+		    (mp_irqs[i].mp_irqtype == type) &&
+		    (mp_irqs[i].mp_srcbusirq == irq))
+			break;
+	}
+	if (i < mp_irq_entries) {
+		int apic;
+		for(apic = 0; apic < nr_ioapics; apic++) {
+			if (mp_ioapics[apic].mp_apicid == mp_irqs[i].mp_dstapic)
+				return apic;
+		}
+	}
+
+	return -1;
+}
+
+/*
+ * Find a specific PCI IRQ entry.
+ * Not an __init, possibly needed by modules
+ */
+static int pin_2_irq(int idx, int apic, int pin);
+
+int IO_APIC_get_PCI_irq_vector(int bus, int slot, int pin)
+{
+	int apic, i, best_guess = -1;
+
+	apic_printk(APIC_DEBUG, "querying PCI -> IRQ mapping bus:%d, slot:%d, pin:%d.\n",
+		bus, slot, pin);
+	if (test_bit(bus, mp_bus_not_pci)) {
+		apic_printk(APIC_VERBOSE, "PCI BIOS passed nonexistent PCI bus %d!\n", bus);
+		return -1;
+	}
+	for (i = 0; i < mp_irq_entries; i++) {
+		int lbus = mp_irqs[i].mp_srcbus;
+
+		for (apic = 0; apic < nr_ioapics; apic++)
+			if (mp_ioapics[apic].mp_apicid == mp_irqs[i].mp_dstapic ||
+			    mp_irqs[i].mp_dstapic == MP_APIC_ALL)
+				break;
+
+		if (!test_bit(lbus, mp_bus_not_pci) &&
+		    !mp_irqs[i].mp_irqtype &&
+		    (bus == lbus) &&
+		    (slot == ((mp_irqs[i].mp_srcbusirq >> 2) & 0x1f))) {
+			int irq = pin_2_irq(i,apic,mp_irqs[i].mp_dstirq);
+
+			if (!(apic || IO_APIC_IRQ(irq)))
+				continue;
+
+			if (pin == (mp_irqs[i].mp_srcbusirq & 3))
+				return irq;
+			/*
+			 * Use the first all-but-pin matching entry as a
+			 * best-guess fuzzy result for broken mptables.
+			 */
+			if (best_guess < 0)
+				best_guess = irq;
+		}
+	}
+	return best_guess;
+}
+
+EXPORT_SYMBOL(IO_APIC_get_PCI_irq_vector);
+
+#if defined(CONFIG_EISA) || defined(CONFIG_MCA)
+/*
+ * EISA Edge/Level control register, ELCR
+ */
+static int EISA_ELCR(unsigned int irq)
+{
+	if (irq < 16) {
+		unsigned int port = 0x4d0 + (irq >> 3);
+		return (inb(port) >> (irq & 7)) & 1;
+	}
+	apic_printk(APIC_VERBOSE, KERN_INFO
+			"Broken MPtable reports ISA irq %d\n", irq);
+	return 0;
+}
+
+#endif
+
+/* ISA interrupts are always polarity zero edge triggered,
+ * when listed as conforming in the MP table. */
+
+#define default_ISA_trigger(idx)	(0)
+#define default_ISA_polarity(idx)	(0)
+
+/* EISA interrupts are always polarity zero and can be edge or level
+ * trigger depending on the ELCR value.  If an interrupt is listed as
+ * EISA conforming in the MP table, that means its trigger type must
+ * be read in from the ELCR */
+
+#define default_EISA_trigger(idx)	(EISA_ELCR(mp_irqs[idx].mp_srcbusirq))
+#define default_EISA_polarity(idx)	default_ISA_polarity(idx)
+
+/* PCI interrupts are always polarity one level triggered,
+ * when listed as conforming in the MP table. */
+
+#define default_PCI_trigger(idx)	(1)
+#define default_PCI_polarity(idx)	(1)
+
+/* MCA interrupts are always polarity zero level triggered,
+ * when listed as conforming in the MP table. */
+
+#define default_MCA_trigger(idx)	(1)
+#define default_MCA_polarity(idx)	default_ISA_polarity(idx)
+
+static int MPBIOS_polarity(int idx)
+{
+	int bus = mp_irqs[idx].mp_srcbus;
+	int polarity;
+
+	/*
+	 * Determine IRQ line polarity (high active or low active):
+	 */
+	switch (mp_irqs[idx].mp_irqflag & 3)
+	{
+		case 0: /* conforms, ie. bus-type dependent polarity */
+			if (test_bit(bus, mp_bus_not_pci))
+				polarity = default_ISA_polarity(idx);
+			else
+				polarity = default_PCI_polarity(idx);
+			break;
+		case 1: /* high active */
+		{
+			polarity = 0;
+			break;
+		}
+		case 2: /* reserved */
+		{
+			printk(KERN_WARNING "broken BIOS!!\n");
+			polarity = 1;
+			break;
+		}
+		case 3: /* low active */
+		{
+			polarity = 1;
+			break;
+		}
+		default: /* invalid */
+		{
+			printk(KERN_WARNING "broken BIOS!!\n");
+			polarity = 1;
+			break;
+		}
+	}
+	return polarity;
+}
+
+static int MPBIOS_trigger(int idx)
+{
+	int bus = mp_irqs[idx].mp_srcbus;
+	int trigger;
+
+	/*
+	 * Determine IRQ trigger mode (edge or level sensitive):
+	 */
+	switch ((mp_irqs[idx].mp_irqflag>>2) & 3)
+	{
+		case 0: /* conforms, ie. bus-type dependent */
+			if (test_bit(bus, mp_bus_not_pci))
+				trigger = default_ISA_trigger(idx);
+			else
+				trigger = default_PCI_trigger(idx);
+#if defined(CONFIG_EISA) || defined(CONFIG_MCA)
+			switch (mp_bus_id_to_type[bus]) {
+				case MP_BUS_ISA: /* ISA pin */
+				{
+					/* set before the switch */
+					break;
+				}
+				case MP_BUS_EISA: /* EISA pin */
+				{
+					trigger = default_EISA_trigger(idx);
+					break;
+				}
+				case MP_BUS_PCI: /* PCI pin */
+				{
+					/* set before the switch */
+					break;
+				}
+				case MP_BUS_MCA: /* MCA pin */
+				{
+					trigger = default_MCA_trigger(idx);
+					break;
+				}
+				default:
+				{
+					printk(KERN_WARNING "broken BIOS!!\n");
+					trigger = 1;
+					break;
+				}
+			}
+#endif
+			break;
+		case 1: /* edge */
+		{
+			trigger = 0;
+			break;
+		}
+		case 2: /* reserved */
+		{
+			printk(KERN_WARNING "broken BIOS!!\n");
+			trigger = 1;
+			break;
+		}
+		case 3: /* level */
+		{
+			trigger = 1;
+			break;
+		}
+		default: /* invalid */
+		{
+			printk(KERN_WARNING "broken BIOS!!\n");
+			trigger = 0;
+			break;
+		}
+	}
+	return trigger;
+}
+
+static inline int irq_polarity(int idx)
+{
+	return MPBIOS_polarity(idx);
+}
+
+static inline int irq_trigger(int idx)
+{
+	return MPBIOS_trigger(idx);
+}
+
+int (*ioapic_renumber_irq)(int ioapic, int irq);
+static int pin_2_irq(int idx, int apic, int pin)
+{
+	int irq, i;
+	int bus = mp_irqs[idx].mp_srcbus;
+
+	/*
+	 * Debugging check, we are in big trouble if this message pops up!
+	 */
+	if (mp_irqs[idx].mp_dstirq != pin)
+		printk(KERN_ERR "broken BIOS or MPTABLE parser, ayiee!!\n");
+
+	if (test_bit(bus, mp_bus_not_pci)) {
+		irq = mp_irqs[idx].mp_srcbusirq;
+	} else {
+		/*
+		 * PCI IRQs are mapped in order
+		 */
+		i = irq = 0;
+		while (i < apic)
+			irq += nr_ioapic_registers[i++];
+		irq += pin;
+                /*
+                 * For MPS mode, so far only needed by ES7000 platform
+                 */
+                if (ioapic_renumber_irq)
+                        irq = ioapic_renumber_irq(apic, irq);
+	}
+
+#ifdef CONFIG_X86_32
+	/*
+	 * PCI IRQ command line redirection. Yes, limits are hardcoded.
+	 */
+	if ((pin >= 16) && (pin <= 23)) {
+		if (pirq_entries[pin-16] != -1) {
+			if (!pirq_entries[pin-16]) {
+				apic_printk(APIC_VERBOSE, KERN_DEBUG
+						"disabling PIRQ%d\n", pin-16);
+			} else {
+				irq = pirq_entries[pin-16];
+				apic_printk(APIC_VERBOSE, KERN_DEBUG
+						"using PIRQ%d -> IRQ %d\n",
+						pin-16, irq);
+			}
+		}
+	}
+#endif
+
+	return irq;
+}
+
+void lock_vector_lock(void)
+{
+	/* Used to the online set of cpus does not change
+	 * during assign_irq_vector.
+	 */
+	spin_lock(&vector_lock);
+}
+
+void unlock_vector_lock(void)
+{
+	spin_unlock(&vector_lock);
+}
+
+static int __assign_irq_vector(int irq, cpumask_t mask)
+{
+	/*
+	 * NOTE! The local APIC isn't very good at handling
+	 * multiple interrupts at the same interrupt level.
+	 * As the interrupt level is determined by taking the
+	 * vector number and shifting that right by 4, we
+	 * want to spread these out a bit so that they don't
+	 * all fall in the same interrupt level.
+	 *
+	 * Also, we've got to be careful not to trash gate
+	 * 0x80, because int 0x80 is hm, kind of importantish. ;)
+	 */
+	static int current_vector = FIRST_DEVICE_VECTOR, current_offset = 0;
+	unsigned int old_vector;
+	int cpu;
+	struct irq_cfg *cfg;
+
+	cfg = irq_cfg(irq);
+
+	/* Only try and allocate irqs on cpus that are present */
+	cpus_and(mask, mask, cpu_online_map);
+
+	if ((cfg->move_in_progress) || cfg->move_cleanup_count)
+		return -EBUSY;
+
+	old_vector = cfg->vector;
+	if (old_vector) {
+		cpumask_t tmp;
+		cpus_and(tmp, cfg->domain, mask);
+		if (!cpus_empty(tmp))
+			return 0;
+	}
+
+	for_each_cpu_mask_nr(cpu, mask) {
+		cpumask_t domain, new_mask;
+		int new_cpu;
+		int vector, offset;
+
+		domain = vector_allocation_domain(cpu);
+		cpus_and(new_mask, domain, cpu_online_map);
+
+		vector = current_vector;
+		offset = current_offset;
+next:
+		vector += 8;
+		if (vector >= first_system_vector) {
+			/* If we run out of vectors on large boxen, must share them. */
+			offset = (offset + 1) % 8;
+			vector = FIRST_DEVICE_VECTOR + offset;
+		}
+		if (unlikely(current_vector == vector))
+			continue;
+#ifdef CONFIG_X86_64
+		if (vector == IA32_SYSCALL_VECTOR)
+			goto next;
+#else
+		if (vector == SYSCALL_VECTOR)
+			goto next;
+#endif
+		for_each_cpu_mask_nr(new_cpu, new_mask)
+			if (per_cpu(vector_irq, new_cpu)[vector] != -1)
+				goto next;
+		/* Found one! */
+		current_vector = vector;
+		current_offset = offset;
+		if (old_vector) {
+			cfg->move_in_progress = 1;
+			cfg->old_domain = cfg->domain;
+		}
+		for_each_cpu_mask_nr(new_cpu, new_mask)
+			per_cpu(vector_irq, new_cpu)[vector] = irq;
+		cfg->vector = vector;
+		cfg->domain = domain;
+		return 0;
+	}
+	return -ENOSPC;
+}
+
+static int assign_irq_vector(int irq, cpumask_t mask)
+{
+	int err;
+	unsigned long flags;
+
+	spin_lock_irqsave(&vector_lock, flags);
+	err = __assign_irq_vector(irq, mask);
+	spin_unlock_irqrestore(&vector_lock, flags);
+	return err;
+}
+
+static void __clear_irq_vector(int irq)
+{
+	struct irq_cfg *cfg;
+	cpumask_t mask;
+	int cpu, vector;
+
+	cfg = irq_cfg(irq);
+	BUG_ON(!cfg->vector);
+
+	vector = cfg->vector;
+	cpus_and(mask, cfg->domain, cpu_online_map);
+	for_each_cpu_mask_nr(cpu, mask)
+		per_cpu(vector_irq, cpu)[vector] = -1;
+
+	cfg->vector = 0;
+	cpus_clear(cfg->domain);
+}
+
+void __setup_vector_irq(int cpu)
+{
+	/* Initialize vector_irq on a new cpu */
+	/* This function must be called with vector_lock held */
+	int irq, vector;
+	struct irq_cfg *cfg;
+
+	/* Mark the inuse vectors */
+	for_each_irq_cfg(cfg) {
+		if (!cpu_isset(cpu, cfg->domain))
+			continue;
+		vector = cfg->vector;
+		irq = cfg->irq;
+		per_cpu(vector_irq, cpu)[vector] = irq;
+	}
+	/* Mark the free vectors */
+	for (vector = 0; vector < NR_VECTORS; ++vector) {
+		irq = per_cpu(vector_irq, cpu)[vector];
+		if (irq < 0)
+			continue;
+
+		cfg = irq_cfg(irq);
+		if (!cpu_isset(cpu, cfg->domain))
+			per_cpu(vector_irq, cpu)[vector] = -1;
+	}
+}
+
+static struct irq_chip ioapic_chip;
+#ifdef CONFIG_INTR_REMAP
+static struct irq_chip ir_ioapic_chip;
+#endif
+
+#define IOAPIC_AUTO     -1
+#define IOAPIC_EDGE     0
+#define IOAPIC_LEVEL    1
+
+#ifdef CONFIG_X86_32
+static inline int IO_APIC_irq_trigger(int irq)
+{
+        int apic, idx, pin;
+
+        for (apic = 0; apic < nr_ioapics; apic++) {
+                for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) {
+                        idx = find_irq_entry(apic, pin, mp_INT);
+                        if ((idx != -1) && (irq == pin_2_irq(idx, apic, pin)))
+                                return irq_trigger(idx);
+                }
+        }
+        /*
+         * nonexistent IRQs are edge default
+         */
+        return 0;
+}
+#else
+static inline int IO_APIC_irq_trigger(int irq)
+{
+	return 1;
+}
+#endif
+
+static void ioapic_register_intr(int irq, unsigned long trigger)
+{
+	struct irq_desc *desc;
+
+	/* first time to use this irq_desc */
+	if (irq < 16)
+		desc = irq_to_desc(irq);
+	else
+		desc = irq_to_desc_alloc(irq);
+
+	if ((trigger == IOAPIC_AUTO && IO_APIC_irq_trigger(irq)) ||
+	    trigger == IOAPIC_LEVEL)
+		desc->status |= IRQ_LEVEL;
+	else
+		desc->status &= ~IRQ_LEVEL;
+
+#ifdef CONFIG_INTR_REMAP
+	if (irq_remapped(irq)) {
+		desc->status |= IRQ_MOVE_PCNTXT;
+		if (trigger)
+			set_irq_chip_and_handler_name(irq, &ir_ioapic_chip,
+						      handle_fasteoi_irq,
+						     "fasteoi");
+		else
+			set_irq_chip_and_handler_name(irq, &ir_ioapic_chip,
+						      handle_edge_irq, "edge");
+		return;
+	}
+#endif
+	if ((trigger == IOAPIC_AUTO && IO_APIC_irq_trigger(irq)) ||
+	    trigger == IOAPIC_LEVEL)
+		set_irq_chip_and_handler_name(irq, &ioapic_chip,
+					      handle_fasteoi_irq,
+					      "fasteoi");
+	else
+		set_irq_chip_and_handler_name(irq, &ioapic_chip,
+					      handle_edge_irq, "edge");
+}
+
+static int setup_ioapic_entry(int apic, int irq,
+			      struct IO_APIC_route_entry *entry,
+			      unsigned int destination, int trigger,
+			      int polarity, int vector)
+{
+	/*
+	 * add it to the IO-APIC irq-routing table:
+	 */
+	memset(entry,0,sizeof(*entry));
+
+#ifdef CONFIG_INTR_REMAP
+	if (intr_remapping_enabled) {
+		struct intel_iommu *iommu = map_ioapic_to_ir(apic);
+		struct irte irte;
+		struct IR_IO_APIC_route_entry *ir_entry =
+			(struct IR_IO_APIC_route_entry *) entry;
+		int index;
+
+		if (!iommu)
+			panic("No mapping iommu for ioapic %d\n", apic);
+
+		index = alloc_irte(iommu, irq, 1);
+		if (index < 0)
+			panic("Failed to allocate IRTE for ioapic %d\n", apic);
+
+		memset(&irte, 0, sizeof(irte));
+
+		irte.present = 1;
+		irte.dst_mode = INT_DEST_MODE;
+		irte.trigger_mode = trigger;
+		irte.dlvry_mode = INT_DELIVERY_MODE;
+		irte.vector = vector;
+		irte.dest_id = IRTE_DEST(destination);
+
+		modify_irte(irq, &irte);
+
+		ir_entry->index2 = (index >> 15) & 0x1;
+		ir_entry->zero = 0;
+		ir_entry->format = 1;
+		ir_entry->index = (index & 0x7fff);
+	} else
+#endif
+	{
+		entry->delivery_mode = INT_DELIVERY_MODE;
+		entry->dest_mode = INT_DEST_MODE;
+		entry->dest = destination;
+	}
+
+	entry->mask = 0;				/* enable IRQ */
+	entry->trigger = trigger;
+	entry->polarity = polarity;
+	entry->vector = vector;
+
+	/* Mask level triggered irqs.
+	 * Use IRQ_DELAYED_DISABLE for edge triggered irqs.
+	 */
+	if (trigger)
+		entry->mask = 1;
+	return 0;
+}
+
+static void setup_IO_APIC_irq(int apic, int pin, unsigned int irq,
+			      int trigger, int polarity)
+{
+	struct irq_cfg *cfg;
+	struct IO_APIC_route_entry entry;
+	cpumask_t mask;
+
+	if (!IO_APIC_IRQ(irq))
+		return;
+
+	cfg = irq_cfg(irq);
+
+	mask = TARGET_CPUS;
+	if (assign_irq_vector(irq, mask))
+		return;
+
+	cpus_and(mask, cfg->domain, mask);
+
+	apic_printk(APIC_VERBOSE,KERN_DEBUG
+		    "IOAPIC[%d]: Set routing entry (%d-%d -> 0x%x -> "
+		    "IRQ %d Mode:%i Active:%i)\n",
+		    apic, mp_ioapics[apic].mp_apicid, pin, cfg->vector,
+		    irq, trigger, polarity);
+
+
+	if (setup_ioapic_entry(mp_ioapics[apic].mp_apicid, irq, &entry,
+			       cpu_mask_to_apicid(mask), trigger, polarity,
+			       cfg->vector)) {
+		printk("Failed to setup ioapic entry for ioapic  %d, pin %d\n",
+		       mp_ioapics[apic].mp_apicid, pin);
+		__clear_irq_vector(irq);
+		return;
+	}
+
+	ioapic_register_intr(irq, trigger);
+	if (irq < 16)
+		disable_8259A_irq(irq);
+
+	ioapic_write_entry(apic, pin, entry);
+}
+
+static void __init setup_IO_APIC_irqs(void)
+{
+	int apic, pin, idx, irq, first_notcon = 1;
+
+	apic_printk(APIC_VERBOSE, KERN_DEBUG "init IO_APIC IRQs\n");
+
+	for (apic = 0; apic < nr_ioapics; apic++) {
+	for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) {
+
+		idx = find_irq_entry(apic,pin,mp_INT);
+		if (idx == -1) {
+			if (first_notcon) {
+				apic_printk(APIC_VERBOSE, KERN_DEBUG " IO-APIC (apicid-pin) %d-%d", mp_ioapics[apic].mp_apicid, pin);
+				first_notcon = 0;
+			} else
+				apic_printk(APIC_VERBOSE, ", %d-%d", mp_ioapics[apic].mp_apicid, pin);
+			continue;
+		}
+		if (!first_notcon) {
+			apic_printk(APIC_VERBOSE, " not connected.\n");
+			first_notcon = 1;
+		}
+
+		irq = pin_2_irq(idx, apic, pin);
+#ifdef CONFIG_X86_32
+                if (multi_timer_check(apic, irq))
+                        continue;
+#endif
+		add_pin_to_irq(irq, apic, pin);
+
+		setup_IO_APIC_irq(apic, pin, irq,
+				  irq_trigger(idx), irq_polarity(idx));
+	}
+	}
+
+	if (!first_notcon)
+		apic_printk(APIC_VERBOSE, " not connected.\n");
+}
+
+/*
+ * Set up the timer pin, possibly with the 8259A-master behind.
+ */
+static void __init setup_timer_IRQ0_pin(unsigned int apic, unsigned int pin,
+					int vector)
+{
+	struct IO_APIC_route_entry entry;
+
+#ifdef CONFIG_INTR_REMAP
+	if (intr_remapping_enabled)
+		return;
+#endif
+
+	memset(&entry, 0, sizeof(entry));
+
+	/*
+	 * We use logical delivery to get the timer IRQ
+	 * to the first CPU.
+	 */
+	entry.dest_mode = INT_DEST_MODE;
+	entry.mask = 1;					/* mask IRQ now */
+	entry.dest = cpu_mask_to_apicid(TARGET_CPUS);
+	entry.delivery_mode = INT_DELIVERY_MODE;
+	entry.polarity = 0;
+	entry.trigger = 0;
+	entry.vector = vector;
+
+	/*
+	 * The timer IRQ doesn't have to know that behind the
+	 * scene we may have a 8259A-master in AEOI mode ...
+	 */
+	set_irq_chip_and_handler_name(0, &ioapic_chip, handle_edge_irq, "edge");
+
+	/*
+	 * Add it to the IO-APIC irq-routing table:
+	 */
+	ioapic_write_entry(apic, pin, entry);
+}
+
+
+__apicdebuginit(void) print_IO_APIC(void)
+{
+	int apic, i;
+	union IO_APIC_reg_00 reg_00;
+	union IO_APIC_reg_01 reg_01;
+	union IO_APIC_reg_02 reg_02;
+	union IO_APIC_reg_03 reg_03;
+	unsigned long flags;
+	struct irq_cfg *cfg;
+
+	if (apic_verbosity == APIC_QUIET)
+		return;
+
+	printk(KERN_DEBUG "number of MP IRQ sources: %d.\n", mp_irq_entries);
+	for (i = 0; i < nr_ioapics; i++)
+		printk(KERN_DEBUG "number of IO-APIC #%d registers: %d.\n",
+		       mp_ioapics[i].mp_apicid, nr_ioapic_registers[i]);
+
+	/*
+	 * We are a bit conservative about what we expect.  We have to
+	 * know about every hardware change ASAP.
+	 */
+	printk(KERN_INFO "testing the IO APIC.......................\n");
+
+	for (apic = 0; apic < nr_ioapics; apic++) {
+
+	spin_lock_irqsave(&ioapic_lock, flags);
+	reg_00.raw = io_apic_read(apic, 0);
+	reg_01.raw = io_apic_read(apic, 1);
+	if (reg_01.bits.version >= 0x10)
+		reg_02.raw = io_apic_read(apic, 2);
+        if (reg_01.bits.version >= 0x20)
+                reg_03.raw = io_apic_read(apic, 3);
+	spin_unlock_irqrestore(&ioapic_lock, flags);
+
+	printk("\n");
+	printk(KERN_DEBUG "IO APIC #%d......\n", mp_ioapics[apic].mp_apicid);
+	printk(KERN_DEBUG ".... register #00: %08X\n", reg_00.raw);
+	printk(KERN_DEBUG ".......    : physical APIC id: %02X\n", reg_00.bits.ID);
+	printk(KERN_DEBUG ".......    : Delivery Type: %X\n", reg_00.bits.delivery_type);
+	printk(KERN_DEBUG ".......    : LTS          : %X\n", reg_00.bits.LTS);
+
+	printk(KERN_DEBUG ".... register #01: %08X\n", *(int *)&reg_01);
+	printk(KERN_DEBUG ".......     : max redirection entries: %04X\n", reg_01.bits.entries);
+
+	printk(KERN_DEBUG ".......     : PRQ implemented: %X\n", reg_01.bits.PRQ);
+	printk(KERN_DEBUG ".......     : IO APIC version: %04X\n", reg_01.bits.version);
+
+	/*
+	 * Some Intel chipsets with IO APIC VERSION of 0x1? don't have reg_02,
+	 * but the value of reg_02 is read as the previous read register
+	 * value, so ignore it if reg_02 == reg_01.
+	 */
+	if (reg_01.bits.version >= 0x10 && reg_02.raw != reg_01.raw) {
+		printk(KERN_DEBUG ".... register #02: %08X\n", reg_02.raw);
+		printk(KERN_DEBUG ".......     : arbitration: %02X\n", reg_02.bits.arbitration);
+	}
+
+	/*
+	 * Some Intel chipsets with IO APIC VERSION of 0x2? don't have reg_02
+	 * or reg_03, but the value of reg_0[23] is read as the previous read
+	 * register value, so ignore it if reg_03 == reg_0[12].
+	 */
+	if (reg_01.bits.version >= 0x20 && reg_03.raw != reg_02.raw &&
+	    reg_03.raw != reg_01.raw) {
+		printk(KERN_DEBUG ".... register #03: %08X\n", reg_03.raw);
+		printk(KERN_DEBUG ".......     : Boot DT    : %X\n", reg_03.bits.boot_DT);
+	}
+
+	printk(KERN_DEBUG ".... IRQ redirection table:\n");
+
+	printk(KERN_DEBUG " NR Dst Mask Trig IRR Pol"
+			  " Stat Dmod Deli Vect:   \n");
+
+	for (i = 0; i <= reg_01.bits.entries; i++) {
+		struct IO_APIC_route_entry entry;
+
+		entry = ioapic_read_entry(apic, i);
+
+		printk(KERN_DEBUG " %02x %03X ",
+			i,
+			entry.dest
+		);
+
+		printk("%1d    %1d    %1d   %1d   %1d    %1d    %1d    %02X\n",
+			entry.mask,
+			entry.trigger,
+			entry.irr,
+			entry.polarity,
+			entry.delivery_status,
+			entry.dest_mode,
+			entry.delivery_mode,
+			entry.vector
+		);
+	}
+	}
+	printk(KERN_DEBUG "IRQ to pin mappings:\n");
+	for_each_irq_cfg(cfg) {
+		struct irq_pin_list *entry = cfg->irq_2_pin;
+		if (!entry)
+			continue;
+		printk(KERN_DEBUG "IRQ%d ", cfg->irq);
+		for (;;) {
+			printk("-> %d:%d", entry->apic, entry->pin);
+			if (!entry->next)
+				break;
+			entry = entry->next;
+		}
+		printk("\n");
+	}
+
+	printk(KERN_INFO ".................................... done.\n");
+
+	return;
+}
+
+__apicdebuginit(void) print_APIC_bitfield(int base)
+{
+	unsigned int v;
+	int i, j;
+
+	if (apic_verbosity == APIC_QUIET)
+		return;
+
+	printk(KERN_DEBUG "0123456789abcdef0123456789abcdef\n" KERN_DEBUG);
+	for (i = 0; i < 8; i++) {
+		v = apic_read(base + i*0x10);
+		for (j = 0; j < 32; j++) {
+			if (v & (1<<j))
+				printk("1");
+			else
+				printk("0");
+		}
+		printk("\n");
+	}
+}
+
+__apicdebuginit(void) print_local_APIC(void *dummy)
+{
+	unsigned int v, ver, maxlvt;
+	u64 icr;
+
+	if (apic_verbosity == APIC_QUIET)
+		return;
+
+	printk("\n" KERN_DEBUG "printing local APIC contents on CPU#%d/%d:\n",
+		smp_processor_id(), hard_smp_processor_id());
+	v = apic_read(APIC_ID);
+	printk(KERN_INFO "... APIC ID:      %08x (%01x)\n", v, read_apic_id());
+	v = apic_read(APIC_LVR);
+	printk(KERN_INFO "... APIC VERSION: %08x\n", v);
+	ver = GET_APIC_VERSION(v);
+	maxlvt = lapic_get_maxlvt();
+
+	v = apic_read(APIC_TASKPRI);
+	printk(KERN_DEBUG "... APIC TASKPRI: %08x (%02x)\n", v, v & APIC_TPRI_MASK);
+
+	if (APIC_INTEGRATED(ver)) {                     /* !82489DX */
+		v = apic_read(APIC_ARBPRI);
+		printk(KERN_DEBUG "... APIC ARBPRI: %08x (%02x)\n", v,
+			v & APIC_ARBPRI_MASK);
+		v = apic_read(APIC_PROCPRI);
+		printk(KERN_DEBUG "... APIC PROCPRI: %08x\n", v);
+	}
+
+	v = apic_read(APIC_EOI);
+	printk(KERN_DEBUG "... APIC EOI: %08x\n", v);
+	v = apic_read(APIC_RRR);
+	printk(KERN_DEBUG "... APIC RRR: %08x\n", v);
+	v = apic_read(APIC_LDR);
+	printk(KERN_DEBUG "... APIC LDR: %08x\n", v);
+	v = apic_read(APIC_DFR);
+	printk(KERN_DEBUG "... APIC DFR: %08x\n", v);
+	v = apic_read(APIC_SPIV);
+	printk(KERN_DEBUG "... APIC SPIV: %08x\n", v);
+
+	printk(KERN_DEBUG "... APIC ISR field:\n");
+	print_APIC_bitfield(APIC_ISR);
+	printk(KERN_DEBUG "... APIC TMR field:\n");
+	print_APIC_bitfield(APIC_TMR);
+	printk(KERN_DEBUG "... APIC IRR field:\n");
+	print_APIC_bitfield(APIC_IRR);
+
+	if (APIC_INTEGRATED(ver)) {             /* !82489DX */
+		if (maxlvt > 3)         /* Due to the Pentium erratum 3AP. */
+			apic_write(APIC_ESR, 0);
+
+		v = apic_read(APIC_ESR);
+		printk(KERN_DEBUG "... APIC ESR: %08x\n", v);
+	}
+
+	icr = apic_icr_read();
+	printk(KERN_DEBUG "... APIC ICR: %08x\n", icr);
+	printk(KERN_DEBUG "... APIC ICR2: %08x\n", icr >> 32);
+
+	v = apic_read(APIC_LVTT);
+	printk(KERN_DEBUG "... APIC LVTT: %08x\n", v);
+
+	if (maxlvt > 3) {                       /* PC is LVT#4. */
+		v = apic_read(APIC_LVTPC);
+		printk(KERN_DEBUG "... APIC LVTPC: %08x\n", v);
+	}
+	v = apic_read(APIC_LVT0);
+	printk(KERN_DEBUG "... APIC LVT0: %08x\n", v);
+	v = apic_read(APIC_LVT1);
+	printk(KERN_DEBUG "... APIC LVT1: %08x\n", v);
+
+	if (maxlvt > 2) {			/* ERR is LVT#3. */
+		v = apic_read(APIC_LVTERR);
+		printk(KERN_DEBUG "... APIC LVTERR: %08x\n", v);
+	}
+
+	v = apic_read(APIC_TMICT);
+	printk(KERN_DEBUG "... APIC TMICT: %08x\n", v);
+	v = apic_read(APIC_TMCCT);
+	printk(KERN_DEBUG "... APIC TMCCT: %08x\n", v);
+	v = apic_read(APIC_TDCR);
+	printk(KERN_DEBUG "... APIC TDCR: %08x\n", v);
+	printk("\n");
+}
+
+__apicdebuginit(void) print_all_local_APICs(void)
+{
+	on_each_cpu(print_local_APIC, NULL, 1);
+}
+
+__apicdebuginit(void) print_PIC(void)
+{
+	unsigned int v;
+	unsigned long flags;
+
+	if (apic_verbosity == APIC_QUIET)
+		return;
+
+	printk(KERN_DEBUG "\nprinting PIC contents\n");
+
+	spin_lock_irqsave(&i8259A_lock, flags);
+
+	v = inb(0xa1) << 8 | inb(0x21);
+	printk(KERN_DEBUG "... PIC  IMR: %04x\n", v);
+
+	v = inb(0xa0) << 8 | inb(0x20);
+	printk(KERN_DEBUG "... PIC  IRR: %04x\n", v);
+
+	outb(0x0b,0xa0);
+	outb(0x0b,0x20);
+	v = inb(0xa0) << 8 | inb(0x20);
+	outb(0x0a,0xa0);
+	outb(0x0a,0x20);
+
+	spin_unlock_irqrestore(&i8259A_lock, flags);
+
+	printk(KERN_DEBUG "... PIC  ISR: %04x\n", v);
+
+	v = inb(0x4d1) << 8 | inb(0x4d0);
+	printk(KERN_DEBUG "... PIC ELCR: %04x\n", v);
+}
+
+__apicdebuginit(int) print_all_ICs(void)
+{
+	print_PIC();
+	print_all_local_APICs();
+	print_IO_APIC();
+
+	return 0;
+}
+
+fs_initcall(print_all_ICs);
+
+
+/* Where if anywhere is the i8259 connect in external int mode */
+static struct { int pin, apic; } ioapic_i8259 = { -1, -1 };
+
+void __init enable_IO_APIC(void)
+{
+	union IO_APIC_reg_01 reg_01;
+	int i8259_apic, i8259_pin;
+	int apic;
+	unsigned long flags;
+
+#ifdef CONFIG_X86_32
+	int i;
+	if (!pirqs_enabled)
+		for (i = 0; i < MAX_PIRQS; i++)
+			pirq_entries[i] = -1;
+#endif
+
+	/*
+	 * The number of IO-APIC IRQ registers (== #pins):
+	 */
+	for (apic = 0; apic < nr_ioapics; apic++) {
+		spin_lock_irqsave(&ioapic_lock, flags);
+		reg_01.raw = io_apic_read(apic, 1);
+		spin_unlock_irqrestore(&ioapic_lock, flags);
+		nr_ioapic_registers[apic] = reg_01.bits.entries+1;
+	}
+	for(apic = 0; apic < nr_ioapics; apic++) {
+		int pin;
+		/* See if any of the pins is in ExtINT mode */
+		for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) {
+			struct IO_APIC_route_entry entry;
+			entry = ioapic_read_entry(apic, pin);
+
+			/* If the interrupt line is enabled and in ExtInt mode
+			 * I have found the pin where the i8259 is connected.
+			 */
+			if ((entry.mask == 0) && (entry.delivery_mode == dest_ExtINT)) {
+				ioapic_i8259.apic = apic;
+				ioapic_i8259.pin  = pin;
+				goto found_i8259;
+			}
+		}
+	}
+ found_i8259:
+	/* Look to see what if the MP table has reported the ExtINT */
+	/* If we could not find the appropriate pin by looking at the ioapic
+	 * the i8259 probably is not connected the ioapic but give the
+	 * mptable a chance anyway.
+	 */
+	i8259_pin  = find_isa_irq_pin(0, mp_ExtINT);
+	i8259_apic = find_isa_irq_apic(0, mp_ExtINT);
+	/* Trust the MP table if nothing is setup in the hardware */
+	if ((ioapic_i8259.pin == -1) && (i8259_pin >= 0)) {
+		printk(KERN_WARNING "ExtINT not setup in hardware but reported by MP table\n");
+		ioapic_i8259.pin  = i8259_pin;
+		ioapic_i8259.apic = i8259_apic;
+	}
+	/* Complain if the MP table and the hardware disagree */
+	if (((ioapic_i8259.apic != i8259_apic) || (ioapic_i8259.pin != i8259_pin)) &&
+		(i8259_pin >= 0) && (ioapic_i8259.pin >= 0))
+	{
+		printk(KERN_WARNING "ExtINT in hardware and MP table differ\n");
+	}
+
+	/*
+	 * Do not trust the IO-APIC being empty at bootup
+	 */
+	clear_IO_APIC();
+}
+
+/*
+ * Not an __init, needed by the reboot code
+ */
+void disable_IO_APIC(void)
+{
+	/*
+	 * Clear the IO-APIC before rebooting:
+	 */
+	clear_IO_APIC();
+
+	/*
+	 * If the i8259 is routed through an IOAPIC
+	 * Put that IOAPIC in virtual wire mode
+	 * so legacy interrupts can be delivered.
+	 */
+	if (ioapic_i8259.pin != -1) {
+		struct IO_APIC_route_entry entry;
+
+		memset(&entry, 0, sizeof(entry));
+		entry.mask            = 0; /* Enabled */
+		entry.trigger         = 0; /* Edge */
+		entry.irr             = 0;
+		entry.polarity        = 0; /* High */
+		entry.delivery_status = 0;
+		entry.dest_mode       = 0; /* Physical */
+		entry.delivery_mode   = dest_ExtINT; /* ExtInt */
+		entry.vector          = 0;
+		entry.dest            = read_apic_id();
+
+		/*
+		 * Add it to the IO-APIC irq-routing table:
+		 */
+		ioapic_write_entry(ioapic_i8259.apic, ioapic_i8259.pin, entry);
+	}
+
+	disconnect_bsp_APIC(ioapic_i8259.pin != -1);
+}
+
+#ifdef CONFIG_X86_32
+/*
+ * function to set the IO-APIC physical IDs based on the
+ * values stored in the MPC table.
+ *
+ * by Matt Domsch <Matt_Domsch@dell.com>  Tue Dec 21 12:25:05 CST 1999
+ */
+
+static void __init setup_ioapic_ids_from_mpc(void)
+{
+	union IO_APIC_reg_00 reg_00;
+	physid_mask_t phys_id_present_map;
+	int apic;
+	int i;
+	unsigned char old_id;
+	unsigned long flags;
+
+	if (x86_quirks->setup_ioapic_ids && x86_quirks->setup_ioapic_ids())
+		return;
+
+	/*
+	 * Don't check I/O APIC IDs for xAPIC systems.  They have
+	 * no meaning without the serial APIC bus.
+	 */
+	if (!(boot_cpu_data.x86_vendor == X86_VENDOR_INTEL)
+		|| APIC_XAPIC(apic_version[boot_cpu_physical_apicid]))
+		return;
+	/*
+	 * This is broken; anything with a real cpu count has to
+	 * circumvent this idiocy regardless.
+	 */
+	phys_id_present_map = ioapic_phys_id_map(phys_cpu_present_map);
+
+	/*
+	 * Set the IOAPIC ID to the value stored in the MPC table.
+	 */
+	for (apic = 0; apic < nr_ioapics; apic++) {
+
+		/* Read the register 0 value */
+		spin_lock_irqsave(&ioapic_lock, flags);
+		reg_00.raw = io_apic_read(apic, 0);
+		spin_unlock_irqrestore(&ioapic_lock, flags);
+
+		old_id = mp_ioapics[apic].mp_apicid;
+
+		if (mp_ioapics[apic].mp_apicid >= get_physical_broadcast()) {
+			printk(KERN_ERR "BIOS bug, IO-APIC#%d ID is %d in the MPC table!...\n",
+				apic, mp_ioapics[apic].mp_apicid);
+			printk(KERN_ERR "... fixing up to %d. (tell your hw vendor)\n",
+				reg_00.bits.ID);
+			mp_ioapics[apic].mp_apicid = reg_00.bits.ID;
+		}
+
+		/*
+		 * Sanity check, is the ID really free? Every APIC in a
+		 * system must have a unique ID or we get lots of nice
+		 * 'stuck on smp_invalidate_needed IPI wait' messages.
+		 */
+		if (check_apicid_used(phys_id_present_map,
+					mp_ioapics[apic].mp_apicid)) {
+			printk(KERN_ERR "BIOS bug, IO-APIC#%d ID %d is already used!...\n",
+				apic, mp_ioapics[apic].mp_apicid);
+			for (i = 0; i < get_physical_broadcast(); i++)
+				if (!physid_isset(i, phys_id_present_map))
+					break;
+			if (i >= get_physical_broadcast())
+				panic("Max APIC ID exceeded!\n");
+			printk(KERN_ERR "... fixing up to %d. (tell your hw vendor)\n",
+				i);
+			physid_set(i, phys_id_present_map);
+			mp_ioapics[apic].mp_apicid = i;
+		} else {
+			physid_mask_t tmp;
+			tmp = apicid_to_cpu_present(mp_ioapics[apic].mp_apicid);
+			apic_printk(APIC_VERBOSE, "Setting %d in the "
+					"phys_id_present_map\n",
+					mp_ioapics[apic].mp_apicid);
+			physids_or(phys_id_present_map, phys_id_present_map, tmp);
+		}
+
+
+		/*
+		 * We need to adjust the IRQ routing table
+		 * if the ID changed.
+		 */
+		if (old_id != mp_ioapics[apic].mp_apicid)
+			for (i = 0; i < mp_irq_entries; i++)
+				if (mp_irqs[i].mp_dstapic == old_id)
+					mp_irqs[i].mp_dstapic
+						= mp_ioapics[apic].mp_apicid;
+
+		/*
+		 * Read the right value from the MPC table and
+		 * write it into the ID register.
+		 */
+		apic_printk(APIC_VERBOSE, KERN_INFO
+			"...changing IO-APIC physical APIC ID to %d ...",
+			mp_ioapics[apic].mp_apicid);
+
+		reg_00.bits.ID = mp_ioapics[apic].mp_apicid;
+		spin_lock_irqsave(&ioapic_lock, flags);
+
+		/*
+		 * Sanity check
+		 */
+		spin_lock_irqsave(&ioapic_lock, flags);
+		reg_00.raw = io_apic_read(apic, 0);
+		spin_unlock_irqrestore(&ioapic_lock, flags);
+		if (reg_00.bits.ID != mp_ioapics[apic].mp_apicid)
+			printk("could not set ID!\n");
+		else
+			apic_printk(APIC_VERBOSE, " ok.\n");
+	}
+}
+#endif
+
+int no_timer_check __initdata;
+
+static int __init notimercheck(char *s)
+{
+	no_timer_check = 1;
+	return 1;
+}
+__setup("no_timer_check", notimercheck);
+
+/*
+ * There is a nasty bug in some older SMP boards, their mptable lies
+ * about the timer IRQ. We do the following to work around the situation:
+ *
+ *	- timer IRQ defaults to IO-APIC IRQ
+ *	- if this function detects that timer IRQs are defunct, then we fall
+ *	  back to ISA timer IRQs
+ */
+static int __init timer_irq_works(void)
+{
+	unsigned long t1 = jiffies;
+	unsigned long flags;
+
+	if (no_timer_check)
+		return 1;
+
+	local_save_flags(flags);
+	local_irq_enable();
+	/* Let ten ticks pass... */
+	mdelay((10 * 1000) / HZ);
+	local_irq_restore(flags);
+
+	/*
+	 * Expect a few ticks at least, to be sure some possible
+	 * glue logic does not lock up after one or two first
+	 * ticks in a non-ExtINT mode.  Also the local APIC
+	 * might have cached one ExtINT interrupt.  Finally, at
+	 * least one tick may be lost due to delays.
+	 */
+
+	/* jiffies wrap? */
+	if (time_after(jiffies, t1 + 4))
+		return 1;
+	return 0;
+}
+
+/*
+ * In the SMP+IOAPIC case it might happen that there are an unspecified
+ * number of pending IRQ events unhandled. These cases are very rare,
+ * so we 'resend' these IRQs via IPIs, to the same CPU. It's much
+ * better to do it this way as thus we do not have to be aware of
+ * 'pending' interrupts in the IRQ path, except at this point.
+ */
+/*
+ * Edge triggered needs to resend any interrupt
+ * that was delayed but this is now handled in the device
+ * independent code.
+ */
+
+/*
+ * Starting up a edge-triggered IO-APIC interrupt is
+ * nasty - we need to make sure that we get the edge.
+ * If it is already asserted for some reason, we need
+ * return 1 to indicate that is was pending.
+ *
+ * This is not complete - we should be able to fake
+ * an edge even if it isn't on the 8259A...
+ */
+
+static unsigned int startup_ioapic_irq(unsigned int irq)
+{
+	int was_pending = 0;
+	unsigned long flags;
+
+	spin_lock_irqsave(&ioapic_lock, flags);
+	if (irq < 16) {
+		disable_8259A_irq(irq);
+		if (i8259A_irq_pending(irq))
+			was_pending = 1;
+	}
+	__unmask_IO_APIC_irq(irq);
+	spin_unlock_irqrestore(&ioapic_lock, flags);
+
+	return was_pending;
+}
+
+#ifdef CONFIG_X86_64
+static int ioapic_retrigger_irq(unsigned int irq)
+{
+
+	struct irq_cfg *cfg = irq_cfg(irq);
+	unsigned long flags;
+
+	spin_lock_irqsave(&vector_lock, flags);
+	send_IPI_mask(cpumask_of_cpu(first_cpu(cfg->domain)), cfg->vector);
+	spin_unlock_irqrestore(&vector_lock, flags);
+
+	return 1;
+}
+#else
+static int ioapic_retrigger_irq(unsigned int irq)
+{
+        send_IPI_self(irq_cfg(irq)->vector);
+
+        return 1;
+}
+#endif
+
+/*
+ * Level and edge triggered IO-APIC interrupts need different handling,
+ * so we use two separate IRQ descriptors. Edge triggered IRQs can be
+ * handled with the level-triggered descriptor, but that one has slightly
+ * more overhead. Level-triggered interrupts cannot be handled with the
+ * edge-triggered handler, without risking IRQ storms and other ugly
+ * races.
+ */
+
+#ifdef CONFIG_SMP
+
+#ifdef CONFIG_INTR_REMAP
+static void ir_irq_migration(struct work_struct *work);
+
+static DECLARE_DELAYED_WORK(ir_migration_work, ir_irq_migration);
+
+/*
+ * Migrate the IO-APIC irq in the presence of intr-remapping.
+ *
+ * For edge triggered, irq migration is a simple atomic update(of vector
+ * and cpu destination) of IRTE and flush the hardware cache.
+ *
+ * For level triggered, we need to modify the io-apic RTE aswell with the update
+ * vector information, along with modifying IRTE with vector and destination.
+ * So irq migration for level triggered is little  bit more complex compared to
+ * edge triggered migration. But the good news is, we use the same algorithm
+ * for level triggered migration as we have today, only difference being,
+ * we now initiate the irq migration from process context instead of the
+ * interrupt context.
+ *
+ * In future, when we do a directed EOI (combined with cpu EOI broadcast
+ * suppression) to the IO-APIC, level triggered irq migration will also be
+ * as simple as edge triggered migration and we can do the irq migration
+ * with a simple atomic update to IO-APIC RTE.
+ */
+static void migrate_ioapic_irq(int irq, cpumask_t mask)
+{
+	struct irq_cfg *cfg;
+	struct irq_desc *desc;
+	cpumask_t tmp, cleanup_mask;
+	struct irte irte;
+	int modify_ioapic_rte;
+	unsigned int dest;
+	unsigned long flags;
+
+	cpus_and(tmp, mask, cpu_online_map);
+	if (cpus_empty(tmp))
+		return;
+
+	if (get_irte(irq, &irte))
+		return;
+
+	if (assign_irq_vector(irq, mask))
+		return;
+
+	cfg = irq_cfg(irq);
+	cpus_and(tmp, cfg->domain, mask);
+	dest = cpu_mask_to_apicid(tmp);
+
+	desc = irq_to_desc(irq);
+	modify_ioapic_rte = desc->status & IRQ_LEVEL;
+	if (modify_ioapic_rte) {
+		spin_lock_irqsave(&ioapic_lock, flags);
+		__target_IO_APIC_irq(irq, dest, cfg->vector);
+		spin_unlock_irqrestore(&ioapic_lock, flags);
+	}
+
+	irte.vector = cfg->vector;
+	irte.dest_id = IRTE_DEST(dest);
+
+	/*
+	 * Modified the IRTE and flushes the Interrupt entry cache.
+	 */
+	modify_irte(irq, &irte);
+
+	if (cfg->move_in_progress) {
+		cpus_and(cleanup_mask, cfg->old_domain, cpu_online_map);
+		cfg->move_cleanup_count = cpus_weight(cleanup_mask);
+		send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR);
+		cfg->move_in_progress = 0;
+	}
+
+	desc->affinity = mask;
+}
+
+static int migrate_irq_remapped_level(int irq)
+{
+	int ret = -1;
+	struct irq_desc *desc = irq_to_desc(irq);
+
+	mask_IO_APIC_irq(irq);
+
+	if (io_apic_level_ack_pending(irq)) {
+		/*
+	 	 * Interrupt in progress. Migrating irq now will change the
+		 * vector information in the IO-APIC RTE and that will confuse
+		 * the EOI broadcast performed by cpu.
+		 * So, delay the irq migration to the next instance.
+		 */
+		schedule_delayed_work(&ir_migration_work, 1);
+		goto unmask;
+	}
+
+	/* everthing is clear. we have right of way */
+	migrate_ioapic_irq(irq, desc->pending_mask);
+
+	ret = 0;
+	desc->status &= ~IRQ_MOVE_PENDING;
+	cpus_clear(desc->pending_mask);
+
+unmask:
+	unmask_IO_APIC_irq(irq);
+	return ret;
+}
+
+static void ir_irq_migration(struct work_struct *work)
+{
+	unsigned int irq;
+	struct irq_desc *desc;
+
+	for_each_irq_desc(irq, desc) {
+		if (desc->status & IRQ_MOVE_PENDING) {
+			unsigned long flags;
+
+			spin_lock_irqsave(&desc->lock, flags);
+			if (!desc->chip->set_affinity ||
+			    !(desc->status & IRQ_MOVE_PENDING)) {
+				desc->status &= ~IRQ_MOVE_PENDING;
+				spin_unlock_irqrestore(&desc->lock, flags);
+				continue;
+			}
+
+			desc->chip->set_affinity(irq, desc->pending_mask);
+			spin_unlock_irqrestore(&desc->lock, flags);
+		}
+	}
+}
+
+/*
+ * Migrates the IRQ destination in the process context.
+ */
+static void set_ir_ioapic_affinity_irq(unsigned int irq, cpumask_t mask)
+{
+	struct irq_desc *desc = irq_to_desc(irq);
+
+	if (desc->status & IRQ_LEVEL) {
+		desc->status |= IRQ_MOVE_PENDING;
+		desc->pending_mask = mask;
+		migrate_irq_remapped_level(irq);
+		return;
+	}
+
+	migrate_ioapic_irq(irq, mask);
+}
+#endif
+
+asmlinkage void smp_irq_move_cleanup_interrupt(void)
+{
+	unsigned vector, me;
+	ack_APIC_irq();
+#ifdef CONFIG_X86_64
+	exit_idle();
+#endif
+	irq_enter();
+
+	me = smp_processor_id();
+	for (vector = FIRST_EXTERNAL_VECTOR; vector < NR_VECTORS; vector++) {
+		unsigned int irq;
+		struct irq_desc *desc;
+		struct irq_cfg *cfg;
+		irq = __get_cpu_var(vector_irq)[vector];
+
+		desc = irq_to_desc(irq);
+		if (!desc)
+			continue;
+
+		cfg = irq_cfg(irq);
+		spin_lock(&desc->lock);
+		if (!cfg->move_cleanup_count)
+			goto unlock;
+
+		if ((vector == cfg->vector) && cpu_isset(me, cfg->domain))
+			goto unlock;
+
+		__get_cpu_var(vector_irq)[vector] = -1;
+		cfg->move_cleanup_count--;
+unlock:
+		spin_unlock(&desc->lock);
+	}
+
+	irq_exit();
+}
+
+static void irq_complete_move(unsigned int irq)
+{
+	struct irq_cfg *cfg = irq_cfg(irq);
+	unsigned vector, me;
+
+	if (likely(!cfg->move_in_progress))
+		return;
+
+	vector = ~get_irq_regs()->orig_ax;
+	me = smp_processor_id();
+	if ((vector == cfg->vector) && cpu_isset(me, cfg->domain)) {
+		cpumask_t cleanup_mask;
+
+		cpus_and(cleanup_mask, cfg->old_domain, cpu_online_map);
+		cfg->move_cleanup_count = cpus_weight(cleanup_mask);
+		send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR);
+		cfg->move_in_progress = 0;
+	}
+}
+#else
+static inline void irq_complete_move(unsigned int irq) {}
+#endif
+#ifdef CONFIG_INTR_REMAP
+static void ack_x2apic_level(unsigned int irq)
+{
+	ack_x2APIC_irq();
+}
+
+static void ack_x2apic_edge(unsigned int irq)
+{
+	ack_x2APIC_irq();
+}
+#endif
+
+static void ack_apic_edge(unsigned int irq)
+{
+	irq_complete_move(irq);
+	move_native_irq(irq);
+	ack_APIC_irq();
+}
+
+#ifdef CONFIG_X86_64
+static void ack_apic_level(unsigned int irq)
+{
+	int do_unmask_irq = 0;
+
+	irq_complete_move(irq);
+#ifdef CONFIG_GENERIC_PENDING_IRQ
+	/* If we are moving the irq we need to mask it */
+	if (unlikely(irq_to_desc(irq)->status & IRQ_MOVE_PENDING)) {
+		do_unmask_irq = 1;
+		mask_IO_APIC_irq(irq);
+	}
+#endif
+
+	/*
+	 * We must acknowledge the irq before we move it or the acknowledge will
+	 * not propagate properly.
+	 */
+	ack_APIC_irq();
+
+	/* Now we can move and renable the irq */
+	if (unlikely(do_unmask_irq)) {
+		/* Only migrate the irq if the ack has been received.
+		 *
+		 * On rare occasions the broadcast level triggered ack gets
+		 * delayed going to ioapics, and if we reprogram the
+		 * vector while Remote IRR is still set the irq will never
+		 * fire again.
+		 *
+		 * To prevent this scenario we read the Remote IRR bit
+		 * of the ioapic.  This has two effects.
+		 * - On any sane system the read of the ioapic will
+		 *   flush writes (and acks) going to the ioapic from
+		 *   this cpu.
+		 * - We get to see if the ACK has actually been delivered.
+		 *
+		 * Based on failed experiments of reprogramming the
+		 * ioapic entry from outside of irq context starting
+		 * with masking the ioapic entry and then polling until
+		 * Remote IRR was clear before reprogramming the
+		 * ioapic I don't trust the Remote IRR bit to be
+		 * completey accurate.
+		 *
+		 * However there appears to be no other way to plug
+		 * this race, so if the Remote IRR bit is not
+		 * accurate and is causing problems then it is a hardware bug
+		 * and you can go talk to the chipset vendor about it.
+		 */
+		if (!io_apic_level_ack_pending(irq))
+			move_masked_irq(irq);
+		unmask_IO_APIC_irq(irq);
+	}
+}
+#else
+atomic_t irq_mis_count;
+static void ack_apic_level(unsigned int irq)
+{
+	unsigned long v;
+	int i;
+
+	irq_complete_move(irq);
+	move_native_irq(irq);
+	/*
+	* It appears there is an erratum which affects at least version 0x11
+	* of I/O APIC (that's the 82093AA and cores integrated into various
+	* chipsets).  Under certain conditions a level-triggered interrupt is
+	* erroneously delivered as edge-triggered one but the respective IRR
+	* bit gets set nevertheless.  As a result the I/O unit expects an EOI
+	* message but it will never arrive and further interrupts are blocked
+	* from the source.  The exact reason is so far unknown, but the
+	* phenomenon was observed when two consecutive interrupt requests
+	* from a given source get delivered to the same CPU and the source is
+	* temporarily disabled in between.
+	*
+	* A workaround is to simulate an EOI message manually.  We achieve it
+	* by setting the trigger mode to edge and then to level when the edge
+	* trigger mode gets detected in the TMR of a local APIC for a
+	* level-triggered interrupt.  We mask the source for the time of the
+	* operation to prevent an edge-triggered interrupt escaping meanwhile.
+	* The idea is from Manfred Spraul.  --macro
+	*/
+	i = irq_cfg(irq)->vector;
+
+	v = apic_read(APIC_TMR + ((i & ~0x1f) >> 1));
+
+	ack_APIC_irq();
+
+	if (!(v & (1 << (i & 0x1f)))) {
+		atomic_inc(&irq_mis_count);
+		spin_lock(&ioapic_lock);
+		__mask_and_edge_IO_APIC_irq(irq);
+		__unmask_and_level_IO_APIC_irq(irq);
+		spin_unlock(&ioapic_lock);
+	}
+}
+#endif
+
+static struct irq_chip ioapic_chip __read_mostly = {
+	.name 		= "IO-APIC",
+	.startup 	= startup_ioapic_irq,
+	.mask	 	= mask_IO_APIC_irq,
+	.unmask	 	= unmask_IO_APIC_irq,
+	.ack 		= ack_apic_edge,
+	.eoi 		= ack_apic_level,
+#ifdef CONFIG_SMP
+	.set_affinity 	= set_ioapic_affinity_irq,
+#endif
+	.retrigger	= ioapic_retrigger_irq,
+};
+
+#ifdef CONFIG_INTR_REMAP
+static struct irq_chip ir_ioapic_chip __read_mostly = {
+	.name 		= "IR-IO-APIC",
+	.startup 	= startup_ioapic_irq,
+	.mask	 	= mask_IO_APIC_irq,
+	.unmask	 	= unmask_IO_APIC_irq,
+	.ack 		= ack_x2apic_edge,
+	.eoi 		= ack_x2apic_level,
+#ifdef CONFIG_SMP
+	.set_affinity 	= set_ir_ioapic_affinity_irq,
+#endif
+	.retrigger	= ioapic_retrigger_irq,
+};
+#endif
+
+static inline void init_IO_APIC_traps(void)
+{
+	int irq;
+	struct irq_desc *desc;
+	struct irq_cfg *cfg;
+
+	/*
+	 * NOTE! The local APIC isn't very good at handling
+	 * multiple interrupts at the same interrupt level.
+	 * As the interrupt level is determined by taking the
+	 * vector number and shifting that right by 4, we
+	 * want to spread these out a bit so that they don't
+	 * all fall in the same interrupt level.
+	 *
+	 * Also, we've got to be careful not to trash gate
+	 * 0x80, because int 0x80 is hm, kind of importantish. ;)
+	 */
+	for_each_irq_cfg(cfg) {
+		irq = cfg->irq;
+		if (IO_APIC_IRQ(irq) && !cfg->vector) {
+			/*
+			 * Hmm.. We don't have an entry for this,
+			 * so default to an old-fashioned 8259
+			 * interrupt if we can..
+			 */
+			if (irq < 16)
+				make_8259A_irq(irq);
+			else {
+				desc = irq_to_desc(irq);
+				/* Strange. Oh, well.. */
+				desc->chip = &no_irq_chip;
+			}
+		}
+	}
+}
+
+/*
+ * The local APIC irq-chip implementation:
+ */
+
+static void mask_lapic_irq(unsigned int irq)
+{
+	unsigned long v;
+
+	v = apic_read(APIC_LVT0);
+	apic_write(APIC_LVT0, v | APIC_LVT_MASKED);
+}
+
+static void unmask_lapic_irq(unsigned int irq)
+{
+	unsigned long v;
+
+	v = apic_read(APIC_LVT0);
+	apic_write(APIC_LVT0, v & ~APIC_LVT_MASKED);
+}
+
+static void ack_lapic_irq (unsigned int irq)
+{
+	ack_APIC_irq();
+}
+
+static struct irq_chip lapic_chip __read_mostly = {
+	.name		= "local-APIC",
+	.mask		= mask_lapic_irq,
+	.unmask		= unmask_lapic_irq,
+	.ack		= ack_lapic_irq,
+};
+
+static void lapic_register_intr(int irq)
+{
+	struct irq_desc *desc;
+
+	desc = irq_to_desc(irq);
+	desc->status &= ~IRQ_LEVEL;
+	set_irq_chip_and_handler_name(irq, &lapic_chip, handle_edge_irq,
+				      "edge");
+}
+
+static void __init setup_nmi(void)
+{
+	/*
+	 * Dirty trick to enable the NMI watchdog ...
+	 * We put the 8259A master into AEOI mode and
+	 * unmask on all local APICs LVT0 as NMI.
+	 *
+	 * The idea to use the 8259A in AEOI mode ('8259A Virtual Wire')
+	 * is from Maciej W. Rozycki - so we do not have to EOI from
+	 * the NMI handler or the timer interrupt.
+	 */
+	apic_printk(APIC_VERBOSE, KERN_INFO "activating NMI Watchdog ...");
+
+	enable_NMI_through_LVT0();
+
+	apic_printk(APIC_VERBOSE, " done.\n");
+}
+
+/*
+ * This looks a bit hackish but it's about the only one way of sending
+ * a few INTA cycles to 8259As and any associated glue logic.  ICR does
+ * not support the ExtINT mode, unfortunately.  We need to send these
+ * cycles as some i82489DX-based boards have glue logic that keeps the
+ * 8259A interrupt line asserted until INTA.  --macro
+ */
+static inline void __init unlock_ExtINT_logic(void)
+{
+	int apic, pin, i;
+	struct IO_APIC_route_entry entry0, entry1;
+	unsigned char save_control, save_freq_select;
+
+	pin  = find_isa_irq_pin(8, mp_INT);
+	if (pin == -1) {
+		WARN_ON_ONCE(1);
+		return;
+	}
+	apic = find_isa_irq_apic(8, mp_INT);
+	if (apic == -1) {
+		WARN_ON_ONCE(1);
+		return;
+	}
+
+	entry0 = ioapic_read_entry(apic, pin);
+	clear_IO_APIC_pin(apic, pin);
+
+	memset(&entry1, 0, sizeof(entry1));
+
+	entry1.dest_mode = 0;			/* physical delivery */
+	entry1.mask = 0;			/* unmask IRQ now */
+	entry1.dest = hard_smp_processor_id();
+	entry1.delivery_mode = dest_ExtINT;
+	entry1.polarity = entry0.polarity;
+	entry1.trigger = 0;
+	entry1.vector = 0;
+
+	ioapic_write_entry(apic, pin, entry1);
+
+	save_control = CMOS_READ(RTC_CONTROL);
+	save_freq_select = CMOS_READ(RTC_FREQ_SELECT);
+	CMOS_WRITE((save_freq_select & ~RTC_RATE_SELECT) | 0x6,
+		   RTC_FREQ_SELECT);
+	CMOS_WRITE(save_control | RTC_PIE, RTC_CONTROL);
+
+	i = 100;
+	while (i-- > 0) {
+		mdelay(10);
+		if ((CMOS_READ(RTC_INTR_FLAGS) & RTC_PF) == RTC_PF)
+			i -= 10;
+	}
+
+	CMOS_WRITE(save_control, RTC_CONTROL);
+	CMOS_WRITE(save_freq_select, RTC_FREQ_SELECT);
+	clear_IO_APIC_pin(apic, pin);
+
+	ioapic_write_entry(apic, pin, entry0);
+}
+
+static int disable_timer_pin_1 __initdata;
+/* Actually the next is obsolete, but keep it for paranoid reasons -AK */
+static int __init disable_timer_pin_setup(char *arg)
+{
+	disable_timer_pin_1 = 1;
+	return 0;
+}
+early_param("disable_timer_pin_1", disable_timer_pin_setup);
+
+int timer_through_8259 __initdata;
+
+/*
+ * This code may look a bit paranoid, but it's supposed to cooperate with
+ * a wide range of boards and BIOS bugs.  Fortunately only the timer IRQ
+ * is so screwy.  Thanks to Brian Perkins for testing/hacking this beast
+ * fanatically on his truly buggy board.
+ *
+ * FIXME: really need to revamp this for all platforms.
+ */
+static inline void __init check_timer(void)
+{
+	struct irq_cfg *cfg = irq_cfg(0);
+	int apic1, pin1, apic2, pin2;
+	unsigned long flags;
+	unsigned int ver;
+	int no_pin1 = 0;
+
+	local_irq_save(flags);
+
+        ver = apic_read(APIC_LVR);
+        ver = GET_APIC_VERSION(ver);
+
+	/*
+	 * get/set the timer IRQ vector:
+	 */
+	disable_8259A_irq(0);
+	assign_irq_vector(0, TARGET_CPUS);
+
+	/*
+	 * As IRQ0 is to be enabled in the 8259A, the virtual
+	 * wire has to be disabled in the local APIC.  Also
+	 * timer interrupts need to be acknowledged manually in
+	 * the 8259A for the i82489DX when using the NMI
+	 * watchdog as that APIC treats NMIs as level-triggered.
+	 * The AEOI mode will finish them in the 8259A
+	 * automatically.
+	 */
+	apic_write(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_EXTINT);
+	init_8259A(1);
+#ifdef CONFIG_X86_32
+	timer_ack = (nmi_watchdog == NMI_IO_APIC && !APIC_INTEGRATED(ver));
+#endif
+
+	pin1  = find_isa_irq_pin(0, mp_INT);
+	apic1 = find_isa_irq_apic(0, mp_INT);
+	pin2  = ioapic_i8259.pin;
+	apic2 = ioapic_i8259.apic;
+
+	apic_printk(APIC_QUIET, KERN_INFO "..TIMER: vector=0x%02X "
+		    "apic1=%d pin1=%d apic2=%d pin2=%d\n",
+		    cfg->vector, apic1, pin1, apic2, pin2);
+
+	/*
+	 * Some BIOS writers are clueless and report the ExtINTA
+	 * I/O APIC input from the cascaded 8259A as the timer
+	 * interrupt input.  So just in case, if only one pin
+	 * was found above, try it both directly and through the
+	 * 8259A.
+	 */
+	if (pin1 == -1) {
+#ifdef CONFIG_INTR_REMAP
+		if (intr_remapping_enabled)
+			panic("BIOS bug: timer not connected to IO-APIC");
+#endif
+		pin1 = pin2;
+		apic1 = apic2;
+		no_pin1 = 1;
+	} else if (pin2 == -1) {
+		pin2 = pin1;
+		apic2 = apic1;
+	}
+
+	if (pin1 != -1) {
+		/*
+		 * Ok, does IRQ0 through the IOAPIC work?
+		 */
+		if (no_pin1) {
+			add_pin_to_irq(0, apic1, pin1);
+			setup_timer_IRQ0_pin(apic1, pin1, cfg->vector);
+		}
+		unmask_IO_APIC_irq(0);
+		if (timer_irq_works()) {
+			if (nmi_watchdog == NMI_IO_APIC) {
+				setup_nmi();
+				enable_8259A_irq(0);
+			}
+			if (disable_timer_pin_1 > 0)
+				clear_IO_APIC_pin(0, pin1);
+			goto out;
+		}
+#ifdef CONFIG_INTR_REMAP
+		if (intr_remapping_enabled)
+			panic("timer doesn't work through Interrupt-remapped IO-APIC");
+#endif
+		clear_IO_APIC_pin(apic1, pin1);
+		if (!no_pin1)
+			apic_printk(APIC_QUIET, KERN_ERR "..MP-BIOS bug: "
+				    "8254 timer not connected to IO-APIC\n");
+
+		apic_printk(APIC_QUIET, KERN_INFO "...trying to set up timer "
+			    "(IRQ0) through the 8259A ...\n");
+		apic_printk(APIC_QUIET, KERN_INFO
+			    "..... (found apic %d pin %d) ...\n", apic2, pin2);
+		/*
+		 * legacy devices should be connected to IO APIC #0
+		 */
+		replace_pin_at_irq(0, apic1, pin1, apic2, pin2);
+		setup_timer_IRQ0_pin(apic2, pin2, cfg->vector);
+		unmask_IO_APIC_irq(0);
+		enable_8259A_irq(0);
+		if (timer_irq_works()) {
+			apic_printk(APIC_QUIET, KERN_INFO "....... works.\n");
+			timer_through_8259 = 1;
+			if (nmi_watchdog == NMI_IO_APIC) {
+				disable_8259A_irq(0);
+				setup_nmi();
+				enable_8259A_irq(0);
+			}
+			goto out;
+		}
+		/*
+		 * Cleanup, just in case ...
+		 */
+		disable_8259A_irq(0);
+		clear_IO_APIC_pin(apic2, pin2);
+		apic_printk(APIC_QUIET, KERN_INFO "....... failed.\n");
+	}
+
+	if (nmi_watchdog == NMI_IO_APIC) {
+		apic_printk(APIC_QUIET, KERN_WARNING "timer doesn't work "
+			    "through the IO-APIC - disabling NMI Watchdog!\n");
+		nmi_watchdog = NMI_NONE;
+	}
+#ifdef CONFIG_X86_32
+	timer_ack = 0;
+#endif
+
+	apic_printk(APIC_QUIET, KERN_INFO
+		    "...trying to set up timer as Virtual Wire IRQ...\n");
+
+	lapic_register_intr(0);
+	apic_write(APIC_LVT0, APIC_DM_FIXED | cfg->vector);	/* Fixed mode */
+	enable_8259A_irq(0);
+
+	if (timer_irq_works()) {
+		apic_printk(APIC_QUIET, KERN_INFO "..... works.\n");
+		goto out;
+	}
+	disable_8259A_irq(0);
+	apic_write(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_FIXED | cfg->vector);
+	apic_printk(APIC_QUIET, KERN_INFO "..... failed.\n");
+
+	apic_printk(APIC_QUIET, KERN_INFO
+		    "...trying to set up timer as ExtINT IRQ...\n");
+
+	init_8259A(0);
+	make_8259A_irq(0);
+	apic_write(APIC_LVT0, APIC_DM_EXTINT);
+
+	unlock_ExtINT_logic();
+
+	if (timer_irq_works()) {
+		apic_printk(APIC_QUIET, KERN_INFO "..... works.\n");
+		goto out;
+	}
+	apic_printk(APIC_QUIET, KERN_INFO "..... failed :(.\n");
+	panic("IO-APIC + timer doesn't work!  Boot with apic=debug and send a "
+		"report.  Then try booting with the 'noapic' option.\n");
+out:
+	local_irq_restore(flags);
+}
+
+/*
+ * Traditionally ISA IRQ2 is the cascade IRQ, and is not available
+ * to devices.  However there may be an I/O APIC pin available for
+ * this interrupt regardless.  The pin may be left unconnected, but
+ * typically it will be reused as an ExtINT cascade interrupt for
+ * the master 8259A.  In the MPS case such a pin will normally be
+ * reported as an ExtINT interrupt in the MP table.  With ACPI
+ * there is no provision for ExtINT interrupts, and in the absence
+ * of an override it would be treated as an ordinary ISA I/O APIC
+ * interrupt, that is edge-triggered and unmasked by default.  We
+ * used to do this, but it caused problems on some systems because
+ * of the NMI watchdog and sometimes IRQ0 of the 8254 timer using
+ * the same ExtINT cascade interrupt to drive the local APIC of the
+ * bootstrap processor.  Therefore we refrain from routing IRQ2 to
+ * the I/O APIC in all cases now.  No actual device should request
+ * it anyway.  --macro
+ */
+#define PIC_IRQS	(1 << PIC_CASCADE_IR)
+
+void __init setup_IO_APIC(void)
+{
+
+#ifdef CONFIG_X86_32
+	enable_IO_APIC();
+#else
+	/*
+	 * calling enable_IO_APIC() is moved to setup_local_APIC for BP
+	 */
+#endif
+
+	io_apic_irqs = ~PIC_IRQS;
+
+	apic_printk(APIC_VERBOSE, "ENABLING IO-APIC IRQs\n");
+        /*
+         * Set up IO-APIC IRQ routing.
+         */
+#ifdef CONFIG_X86_32
+        if (!acpi_ioapic)
+                setup_ioapic_ids_from_mpc();
+#endif
+	sync_Arb_IDs();
+	setup_IO_APIC_irqs();
+	init_IO_APIC_traps();
+	check_timer();
+}
+
+/*
+ *      Called after all the initialization is done. If we didnt find any
+ *      APIC bugs then we can allow the modify fast path
+ */
+
+static int __init io_apic_bug_finalize(void)
+{
+        if (sis_apic_bug == -1)
+                sis_apic_bug = 0;
+        return 0;
+}
+
+late_initcall(io_apic_bug_finalize);
+
+struct sysfs_ioapic_data {
+	struct sys_device dev;
+	struct IO_APIC_route_entry entry[0];
+};
+static struct sysfs_ioapic_data * mp_ioapic_data[MAX_IO_APICS];
+
+static int ioapic_suspend(struct sys_device *dev, pm_message_t state)
+{
+	struct IO_APIC_route_entry *entry;
+	struct sysfs_ioapic_data *data;
+	int i;
+
+	data = container_of(dev, struct sysfs_ioapic_data, dev);
+	entry = data->entry;
+	for (i = 0; i < nr_ioapic_registers[dev->id]; i ++, entry ++ )
+		*entry = ioapic_read_entry(dev->id, i);
+
+	return 0;
+}
+
+static int ioapic_resume(struct sys_device *dev)
+{
+	struct IO_APIC_route_entry *entry;
+	struct sysfs_ioapic_data *data;
+	unsigned long flags;
+	union IO_APIC_reg_00 reg_00;
+	int i;
+
+	data = container_of(dev, struct sysfs_ioapic_data, dev);
+	entry = data->entry;
+
+	spin_lock_irqsave(&ioapic_lock, flags);
+	reg_00.raw = io_apic_read(dev->id, 0);
+	if (reg_00.bits.ID != mp_ioapics[dev->id].mp_apicid) {
+		reg_00.bits.ID = mp_ioapics[dev->id].mp_apicid;
+		io_apic_write(dev->id, 0, reg_00.raw);
+	}
+	spin_unlock_irqrestore(&ioapic_lock, flags);
+	for (i = 0; i < nr_ioapic_registers[dev->id]; i++)
+		ioapic_write_entry(dev->id, i, entry[i]);
+
+	return 0;
+}
+
+static struct sysdev_class ioapic_sysdev_class = {
+	.name = "ioapic",
+	.suspend = ioapic_suspend,
+	.resume = ioapic_resume,
+};
+
+static int __init ioapic_init_sysfs(void)
+{
+	struct sys_device * dev;
+	int i, size, error;
+
+	error = sysdev_class_register(&ioapic_sysdev_class);
+	if (error)
+		return error;
+
+	for (i = 0; i < nr_ioapics; i++ ) {
+		size = sizeof(struct sys_device) + nr_ioapic_registers[i]
+			* sizeof(struct IO_APIC_route_entry);
+		mp_ioapic_data[i] = kzalloc(size, GFP_KERNEL);
+		if (!mp_ioapic_data[i]) {
+			printk(KERN_ERR "Can't suspend/resume IOAPIC %d\n", i);
+			continue;
+		}
+		dev = &mp_ioapic_data[i]->dev;
+		dev->id = i;
+		dev->cls = &ioapic_sysdev_class;
+		error = sysdev_register(dev);
+		if (error) {
+			kfree(mp_ioapic_data[i]);
+			mp_ioapic_data[i] = NULL;
+			printk(KERN_ERR "Can't suspend/resume IOAPIC %d\n", i);
+			continue;
+		}
+	}
+
+	return 0;
+}
+
+device_initcall(ioapic_init_sysfs);
+
+/*
+ * Dynamic irq allocate and deallocation
+ */
+unsigned int create_irq_nr(unsigned int irq_want)
+{
+	/* Allocate an unused irq */
+	unsigned int irq;
+	unsigned int new;
+	unsigned long flags;
+	struct irq_cfg *cfg_new;
+
+#ifndef CONFIG_HAVE_SPARSE_IRQ
+	irq_want = nr_irqs - 1;
+#endif
+
+	irq = 0;
+	spin_lock_irqsave(&vector_lock, flags);
+	for (new = irq_want; new > 0; new--) {
+		if (platform_legacy_irq(new))
+			continue;
+		cfg_new = irq_cfg(new);
+		if (cfg_new && cfg_new->vector != 0)
+			continue;
+		/* check if need to create one */
+		if (!cfg_new)
+			cfg_new = irq_cfg_alloc(new);
+		if (__assign_irq_vector(new, TARGET_CPUS) == 0)
+			irq = new;
+		break;
+	}
+	spin_unlock_irqrestore(&vector_lock, flags);
+
+	if (irq > 0) {
+		dynamic_irq_init(irq);
+	}
+	return irq;
+}
+
+int create_irq(void)
+{
+	int irq;
+
+	irq = create_irq_nr(nr_irqs - 1);
+
+	if (irq == 0)
+		irq = -1;
+
+	return irq;
+}
+
+void destroy_irq(unsigned int irq)
+{
+	unsigned long flags;
+
+	dynamic_irq_cleanup(irq);
+
+#ifdef CONFIG_INTR_REMAP
+	free_irte(irq);
+#endif
+	spin_lock_irqsave(&vector_lock, flags);
+	__clear_irq_vector(irq);
+	spin_unlock_irqrestore(&vector_lock, flags);
+}
+
+/*
+ * MSI message composition
+ */
+#ifdef CONFIG_PCI_MSI
+static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_msg *msg)
+{
+	struct irq_cfg *cfg;
+	int err;
+	unsigned dest;
+	cpumask_t tmp;
+
+	tmp = TARGET_CPUS;
+	err = assign_irq_vector(irq, tmp);
+	if (err)
+		return err;
+
+	cfg = irq_cfg(irq);
+	cpus_and(tmp, cfg->domain, tmp);
+	dest = cpu_mask_to_apicid(tmp);
+
+#ifdef CONFIG_INTR_REMAP
+	if (irq_remapped(irq)) {
+		struct irte irte;
+		int ir_index;
+		u16 sub_handle;
+
+		ir_index = map_irq_to_irte_handle(irq, &sub_handle);
+		BUG_ON(ir_index == -1);
+
+		memset (&irte, 0, sizeof(irte));
+
+		irte.present = 1;
+		irte.dst_mode = INT_DEST_MODE;
+		irte.trigger_mode = 0; /* edge */
+		irte.dlvry_mode = INT_DELIVERY_MODE;
+		irte.vector = cfg->vector;
+		irte.dest_id = IRTE_DEST(dest);
+
+		modify_irte(irq, &irte);
+
+		msg->address_hi = MSI_ADDR_BASE_HI;
+		msg->data = sub_handle;
+		msg->address_lo = MSI_ADDR_BASE_LO | MSI_ADDR_IR_EXT_INT |
+				  MSI_ADDR_IR_SHV |
+				  MSI_ADDR_IR_INDEX1(ir_index) |
+				  MSI_ADDR_IR_INDEX2(ir_index);
+	} else
+#endif
+	{
+		msg->address_hi = MSI_ADDR_BASE_HI;
+		msg->address_lo =
+			MSI_ADDR_BASE_LO |
+			((INT_DEST_MODE == 0) ?
+				MSI_ADDR_DEST_MODE_PHYSICAL:
+				MSI_ADDR_DEST_MODE_LOGICAL) |
+			((INT_DELIVERY_MODE != dest_LowestPrio) ?
+				MSI_ADDR_REDIRECTION_CPU:
+				MSI_ADDR_REDIRECTION_LOWPRI) |
+			MSI_ADDR_DEST_ID(dest);
+
+		msg->data =
+			MSI_DATA_TRIGGER_EDGE |
+			MSI_DATA_LEVEL_ASSERT |
+			((INT_DELIVERY_MODE != dest_LowestPrio) ?
+				MSI_DATA_DELIVERY_FIXED:
+				MSI_DATA_DELIVERY_LOWPRI) |
+			MSI_DATA_VECTOR(cfg->vector);
+	}
+	return err;
+}
+
+#ifdef CONFIG_SMP
+static void set_msi_irq_affinity(unsigned int irq, cpumask_t mask)
+{
+	struct irq_cfg *cfg;
+	struct msi_msg msg;
+	unsigned int dest;
+	cpumask_t tmp;
+	struct irq_desc *desc;
+
+	cpus_and(tmp, mask, cpu_online_map);
+	if (cpus_empty(tmp))
+		return;
+
+	if (assign_irq_vector(irq, mask))
+		return;
+
+	cfg = irq_cfg(irq);
+	cpus_and(tmp, cfg->domain, mask);
+	dest = cpu_mask_to_apicid(tmp);
+
+	read_msi_msg(irq, &msg);
+
+	msg.data &= ~MSI_DATA_VECTOR_MASK;
+	msg.data |= MSI_DATA_VECTOR(cfg->vector);
+	msg.address_lo &= ~MSI_ADDR_DEST_ID_MASK;
+	msg.address_lo |= MSI_ADDR_DEST_ID(dest);
+
+	write_msi_msg(irq, &msg);
+	desc = irq_to_desc(irq);
+	desc->affinity = mask;
+}
+
+#ifdef CONFIG_INTR_REMAP
+/*
+ * Migrate the MSI irq to another cpumask. This migration is
+ * done in the process context using interrupt-remapping hardware.
+ */
+static void ir_set_msi_irq_affinity(unsigned int irq, cpumask_t mask)
+{
+	struct irq_cfg *cfg;
+	unsigned int dest;
+	cpumask_t tmp, cleanup_mask;
+	struct irte irte;
+	struct irq_desc *desc;
+
+	cpus_and(tmp, mask, cpu_online_map);
+	if (cpus_empty(tmp))
+		return;
+
+	if (get_irte(irq, &irte))
+		return;
+
+	if (assign_irq_vector(irq, mask))
+		return;
+
+	cfg = irq_cfg(irq);
+	cpus_and(tmp, cfg->domain, mask);
+	dest = cpu_mask_to_apicid(tmp);
+
+	irte.vector = cfg->vector;
+	irte.dest_id = IRTE_DEST(dest);
+
+	/*
+	 * atomically update the IRTE with the new destination and vector.
+	 */
+	modify_irte(irq, &irte);
+
+	/*
+	 * After this point, all the interrupts will start arriving
+	 * at the new destination. So, time to cleanup the previous
+	 * vector allocation.
+	 */
+	if (cfg->move_in_progress) {
+		cpus_and(cleanup_mask, cfg->old_domain, cpu_online_map);
+		cfg->move_cleanup_count = cpus_weight(cleanup_mask);
+		send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR);
+		cfg->move_in_progress = 0;
+	}
+
+	desc = irq_to_desc(irq);
+	desc->affinity = mask;
+}
+#endif
+#endif /* CONFIG_SMP */
+
+/*
+ * IRQ Chip for MSI PCI/PCI-X/PCI-Express Devices,
+ * which implement the MSI or MSI-X Capability Structure.
+ */
+static struct irq_chip msi_chip = {
+	.name		= "PCI-MSI",
+	.unmask		= unmask_msi_irq,
+	.mask		= mask_msi_irq,
+	.ack		= ack_apic_edge,
+#ifdef CONFIG_SMP
+	.set_affinity	= set_msi_irq_affinity,
+#endif
+	.retrigger	= ioapic_retrigger_irq,
+};
+
+#ifdef CONFIG_INTR_REMAP
+static struct irq_chip msi_ir_chip = {
+	.name		= "IR-PCI-MSI",
+	.unmask		= unmask_msi_irq,
+	.mask		= mask_msi_irq,
+	.ack		= ack_x2apic_edge,
+#ifdef CONFIG_SMP
+	.set_affinity	= ir_set_msi_irq_affinity,
+#endif
+	.retrigger	= ioapic_retrigger_irq,
+};
+
+/*
+ * Map the PCI dev to the corresponding remapping hardware unit
+ * and allocate 'nvec' consecutive interrupt-remapping table entries
+ * in it.
+ */
+static int msi_alloc_irte(struct pci_dev *dev, int irq, int nvec)
+{
+	struct intel_iommu *iommu;
+	int index;
+
+	iommu = map_dev_to_ir(dev);
+	if (!iommu) {
+		printk(KERN_ERR
+		       "Unable to map PCI %s to iommu\n", pci_name(dev));
+		return -ENOENT;
+	}
+
+	index = alloc_irte(iommu, irq, nvec);
+	if (index < 0) {
+		printk(KERN_ERR
+		       "Unable to allocate %d IRTE for PCI %s\n", nvec,
+		        pci_name(dev));
+		return -ENOSPC;
+	}
+	return index;
+}
+#endif
+
+static int setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc, int irq)
+{
+	int ret;
+	struct msi_msg msg;
+
+	ret = msi_compose_msg(dev, irq, &msg);
+	if (ret < 0)
+		return ret;
+
+	set_irq_msi(irq, desc);
+	write_msi_msg(irq, &msg);
+
+#ifdef CONFIG_INTR_REMAP
+	if (irq_remapped(irq)) {
+		struct irq_desc *desc = irq_to_desc(irq);
+		/*
+		 * irq migration in process context
+		 */
+		desc->status |= IRQ_MOVE_PCNTXT;
+		set_irq_chip_and_handler_name(irq, &msi_ir_chip, handle_edge_irq, "edge");
+	} else
+#endif
+		set_irq_chip_and_handler_name(irq, &msi_chip, handle_edge_irq, "edge");
+
+	return 0;
+}
+
+static unsigned int build_irq_for_pci_dev(struct pci_dev *dev)
+{
+	unsigned int irq;
+
+	irq = dev->bus->number;
+	irq <<= 8;
+	irq |= dev->devfn;
+	irq <<= 12;
+
+	return irq;
+}
+
+int arch_setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc)
+{
+	unsigned int irq;
+	int ret;
+	unsigned int irq_want;
+
+	irq_want = build_irq_for_pci_dev(dev) + 0x100;
+
+	irq = create_irq_nr(irq_want);
+	if (irq == 0)
+		return -1;
+
+#ifdef CONFIG_INTR_REMAP
+	if (!intr_remapping_enabled)
+		goto no_ir;
+
+	ret = msi_alloc_irte(dev, irq, 1);
+	if (ret < 0)
+		goto error;
+no_ir:
+#endif
+	ret = setup_msi_irq(dev, desc, irq);
+	if (ret < 0) {
+		destroy_irq(irq);
+		return ret;
+	}
+	return 0;
+
+#ifdef CONFIG_INTR_REMAP
+error:
+	destroy_irq(irq);
+	return ret;
+#endif
+}
+
+int arch_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
+{
+	unsigned int irq;
+	int ret, sub_handle;
+	struct msi_desc *desc;
+	unsigned int irq_want;
+
+#ifdef CONFIG_INTR_REMAP
+	struct intel_iommu *iommu = 0;
+	int index = 0;
+#endif
+
+	irq_want = build_irq_for_pci_dev(dev) + 0x100;
+	sub_handle = 0;
+	list_for_each_entry(desc, &dev->msi_list, list) {
+		irq = create_irq_nr(irq_want--);
+		if (irq == 0)
+			return -1;
+#ifdef CONFIG_INTR_REMAP
+		if (!intr_remapping_enabled)
+			goto no_ir;
+
+		if (!sub_handle) {
+			/*
+			 * allocate the consecutive block of IRTE's
+			 * for 'nvec'
+			 */
+			index = msi_alloc_irte(dev, irq, nvec);
+			if (index < 0) {
+				ret = index;
+				goto error;
+			}
+		} else {
+			iommu = map_dev_to_ir(dev);
+			if (!iommu) {
+				ret = -ENOENT;
+				goto error;
+			}
+			/*
+			 * setup the mapping between the irq and the IRTE
+			 * base index, the sub_handle pointing to the
+			 * appropriate interrupt remap table entry.
+			 */
+			set_irte_irq(irq, iommu, index, sub_handle);
+		}
+no_ir:
+#endif
+		ret = setup_msi_irq(dev, desc, irq);
+		if (ret < 0)
+			goto error;
+		sub_handle++;
+	}
+	return 0;
+
+error:
+	destroy_irq(irq);
+	return ret;
+}
+
+void arch_teardown_msi_irq(unsigned int irq)
+{
+	destroy_irq(irq);
+}
+
+#ifdef CONFIG_DMAR
+#ifdef CONFIG_SMP
+static void dmar_msi_set_affinity(unsigned int irq, cpumask_t mask)
+{
+	struct irq_cfg *cfg;
+	struct msi_msg msg;
+	unsigned int dest;
+	cpumask_t tmp;
+	struct irq_desc *desc;
+
+	cpus_and(tmp, mask, cpu_online_map);
+	if (cpus_empty(tmp))
+		return;
+
+	if (assign_irq_vector(irq, mask))
+		return;
+
+	cfg = irq_cfg(irq);
+	cpus_and(tmp, cfg->domain, mask);
+	dest = cpu_mask_to_apicid(tmp);
+
+	dmar_msi_read(irq, &msg);
+
+	msg.data &= ~MSI_DATA_VECTOR_MASK;
+	msg.data |= MSI_DATA_VECTOR(cfg->vector);
+	msg.address_lo &= ~MSI_ADDR_DEST_ID_MASK;
+	msg.address_lo |= MSI_ADDR_DEST_ID(dest);
+
+	dmar_msi_write(irq, &msg);
+	desc = irq_to_desc(irq);
+	desc->affinity = mask;
+}
+#endif /* CONFIG_SMP */
+
+struct irq_chip dmar_msi_type = {
+	.name = "DMAR_MSI",
+	.unmask = dmar_msi_unmask,
+	.mask = dmar_msi_mask,
+	.ack = ack_apic_edge,
+#ifdef CONFIG_SMP
+	.set_affinity = dmar_msi_set_affinity,
+#endif
+	.retrigger = ioapic_retrigger_irq,
+};
+
+int arch_setup_dmar_msi(unsigned int irq)
+{
+	int ret;
+	struct msi_msg msg;
+
+	ret = msi_compose_msg(NULL, irq, &msg);
+	if (ret < 0)
+		return ret;
+	dmar_msi_write(irq, &msg);
+	set_irq_chip_and_handler_name(irq, &dmar_msi_type, handle_edge_irq,
+		"edge");
+	return 0;
+}
+#endif
+
+#endif /* CONFIG_PCI_MSI */
+/*
+ * Hypertransport interrupt support
+ */
+#ifdef CONFIG_HT_IRQ
+
+#ifdef CONFIG_SMP
+
+static void target_ht_irq(unsigned int irq, unsigned int dest, u8 vector)
+{
+	struct ht_irq_msg msg;
+	fetch_ht_irq_msg(irq, &msg);
+
+	msg.address_lo &= ~(HT_IRQ_LOW_VECTOR_MASK | HT_IRQ_LOW_DEST_ID_MASK);
+	msg.address_hi &= ~(HT_IRQ_HIGH_DEST_ID_MASK);
+
+	msg.address_lo |= HT_IRQ_LOW_VECTOR(vector) | HT_IRQ_LOW_DEST_ID(dest);
+	msg.address_hi |= HT_IRQ_HIGH_DEST_ID(dest);
+
+	write_ht_irq_msg(irq, &msg);
+}
+
+static void set_ht_irq_affinity(unsigned int irq, cpumask_t mask)
+{
+	struct irq_cfg *cfg;
+	unsigned int dest;
+	cpumask_t tmp;
+	struct irq_desc *desc;
+
+	cpus_and(tmp, mask, cpu_online_map);
+	if (cpus_empty(tmp))
+		return;
+
+	if (assign_irq_vector(irq, mask))
+		return;
+
+	cfg = irq_cfg(irq);
+	cpus_and(tmp, cfg->domain, mask);
+	dest = cpu_mask_to_apicid(tmp);
+
+	target_ht_irq(irq, dest, cfg->vector);
+	desc = irq_to_desc(irq);
+	desc->affinity = mask;
+}
+#endif
+
+static struct irq_chip ht_irq_chip = {
+	.name		= "PCI-HT",
+	.mask		= mask_ht_irq,
+	.unmask		= unmask_ht_irq,
+	.ack		= ack_apic_edge,
+#ifdef CONFIG_SMP
+	.set_affinity	= set_ht_irq_affinity,
+#endif
+	.retrigger	= ioapic_retrigger_irq,
+};
+
+int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev)
+{
+	struct irq_cfg *cfg;
+	int err;
+	cpumask_t tmp;
+
+	tmp = TARGET_CPUS;
+	err = assign_irq_vector(irq, tmp);
+	if (!err) {
+		struct ht_irq_msg msg;
+		unsigned dest;
+
+		cfg = irq_cfg(irq);
+		cpus_and(tmp, cfg->domain, tmp);
+		dest = cpu_mask_to_apicid(tmp);
+
+		msg.address_hi = HT_IRQ_HIGH_DEST_ID(dest);
+
+		msg.address_lo =
+			HT_IRQ_LOW_BASE |
+			HT_IRQ_LOW_DEST_ID(dest) |
+			HT_IRQ_LOW_VECTOR(cfg->vector) |
+			((INT_DEST_MODE == 0) ?
+				HT_IRQ_LOW_DM_PHYSICAL :
+				HT_IRQ_LOW_DM_LOGICAL) |
+			HT_IRQ_LOW_RQEOI_EDGE |
+			((INT_DELIVERY_MODE != dest_LowestPrio) ?
+				HT_IRQ_LOW_MT_FIXED :
+				HT_IRQ_LOW_MT_ARBITRATED) |
+			HT_IRQ_LOW_IRQ_MASKED;
+
+		write_ht_irq_msg(irq, &msg);
+
+		set_irq_chip_and_handler_name(irq, &ht_irq_chip,
+					      handle_edge_irq, "edge");
+	}
+	return err;
+}
+#endif /* CONFIG_HT_IRQ */
+
+/* --------------------------------------------------------------------------
+                          ACPI-based IOAPIC Configuration
+   -------------------------------------------------------------------------- */
+
+#ifdef CONFIG_ACPI
+
+#ifdef CONFIG_X86_32
+int __init io_apic_get_unique_id(int ioapic, int apic_id)
+{
+	union IO_APIC_reg_00 reg_00;
+	static physid_mask_t apic_id_map = PHYSID_MASK_NONE;
+	physid_mask_t tmp;
+	unsigned long flags;
+	int i = 0;
+
+	/*
+	 * The P4 platform supports up to 256 APIC IDs on two separate APIC
+	 * buses (one for LAPICs, one for IOAPICs), where predecessors only
+	 * supports up to 16 on one shared APIC bus.
+	 *
+	 * TBD: Expand LAPIC/IOAPIC support on P4-class systems to take full
+	 *      advantage of new APIC bus architecture.
+	 */
+
+	if (physids_empty(apic_id_map))
+		apic_id_map = ioapic_phys_id_map(phys_cpu_present_map);
+
+	spin_lock_irqsave(&ioapic_lock, flags);
+	reg_00.raw = io_apic_read(ioapic, 0);
+	spin_unlock_irqrestore(&ioapic_lock, flags);
+
+	if (apic_id >= get_physical_broadcast()) {
+		printk(KERN_WARNING "IOAPIC[%d]: Invalid apic_id %d, trying "
+			"%d\n", ioapic, apic_id, reg_00.bits.ID);
+		apic_id = reg_00.bits.ID;
+	}
+
+	/*
+	 * Every APIC in a system must have a unique ID or we get lots of nice
+	 * 'stuck on smp_invalidate_needed IPI wait' messages.
+	 */
+	if (check_apicid_used(apic_id_map, apic_id)) {
+
+		for (i = 0; i < get_physical_broadcast(); i++) {
+			if (!check_apicid_used(apic_id_map, i))
+				break;
+		}
+
+		if (i == get_physical_broadcast())
+			panic("Max apic_id exceeded!\n");
+
+		printk(KERN_WARNING "IOAPIC[%d]: apic_id %d already used, "
+			"trying %d\n", ioapic, apic_id, i);
+
+		apic_id = i;
+	}
+
+	tmp = apicid_to_cpu_present(apic_id);
+	physids_or(apic_id_map, apic_id_map, tmp);
+
+	if (reg_00.bits.ID != apic_id) {
+		reg_00.bits.ID = apic_id;
+
+		spin_lock_irqsave(&ioapic_lock, flags);
+		io_apic_write(ioapic, 0, reg_00.raw);
+		reg_00.raw = io_apic_read(ioapic, 0);
+		spin_unlock_irqrestore(&ioapic_lock, flags);
+
+		/* Sanity check */
+		if (reg_00.bits.ID != apic_id) {
+			printk("IOAPIC[%d]: Unable to change apic_id!\n", ioapic);
+			return -1;
+		}
+	}
+
+	apic_printk(APIC_VERBOSE, KERN_INFO
+			"IOAPIC[%d]: Assigned apic_id %d\n", ioapic, apic_id);
+
+	return apic_id;
+}
+
+int __init io_apic_get_version(int ioapic)
+{
+	union IO_APIC_reg_01	reg_01;
+	unsigned long flags;
+
+	spin_lock_irqsave(&ioapic_lock, flags);
+	reg_01.raw = io_apic_read(ioapic, 1);
+	spin_unlock_irqrestore(&ioapic_lock, flags);
+
+	return reg_01.bits.version;
+}
+#endif
+
+int __init io_apic_get_redir_entries (int ioapic)
+{
+	union IO_APIC_reg_01	reg_01;
+	unsigned long flags;
+
+	spin_lock_irqsave(&ioapic_lock, flags);
+	reg_01.raw = io_apic_read(ioapic, 1);
+	spin_unlock_irqrestore(&ioapic_lock, flags);
+
+	return reg_01.bits.entries;
+}
+
+
+int io_apic_set_pci_routing (int ioapic, int pin, int irq, int triggering, int polarity)
+{
+	if (!IO_APIC_IRQ(irq)) {
+		apic_printk(APIC_QUIET,KERN_ERR "IOAPIC[%d]: Invalid reference to IRQ 0\n",
+			ioapic);
+		return -EINVAL;
+	}
+
+	/*
+	 * IRQs < 16 are already in the irq_2_pin[] map
+	 */
+	if (irq >= 16)
+		add_pin_to_irq(irq, ioapic, pin);
+
+	setup_IO_APIC_irq(ioapic, pin, irq, triggering, polarity);
+
+	return 0;
+}
+
+
+int acpi_get_override_irq(int bus_irq, int *trigger, int *polarity)
+{
+	int i;
+
+	if (skip_ioapic_setup)
+		return -1;
+
+	for (i = 0; i < mp_irq_entries; i++)
+		if (mp_irqs[i].mp_irqtype == mp_INT &&
+		    mp_irqs[i].mp_srcbusirq == bus_irq)
+			break;
+	if (i >= mp_irq_entries)
+		return -1;
+
+	*trigger = irq_trigger(i);
+	*polarity = irq_polarity(i);
+	return 0;
+}
+
+#endif /* CONFIG_ACPI */
+
+/*
+ * This function currently is only a helper for the i386 smp boot process where
+ * we need to reprogram the ioredtbls to cater for the cpus which have come online
+ * so mask in all cases should simply be TARGET_CPUS
+ */
+#ifdef CONFIG_SMP
+void __init setup_ioapic_dest(void)
+{
+	int pin, ioapic, irq, irq_entry;
+	struct irq_cfg *cfg;
+
+	if (skip_ioapic_setup == 1)
+		return;
+
+	for (ioapic = 0; ioapic < nr_ioapics; ioapic++) {
+		for (pin = 0; pin < nr_ioapic_registers[ioapic]; pin++) {
+			irq_entry = find_irq_entry(ioapic, pin, mp_INT);
+			if (irq_entry == -1)
+				continue;
+			irq = pin_2_irq(irq_entry, ioapic, pin);
+
+			/* setup_IO_APIC_irqs could fail to get vector for some device
+			 * when you have too many devices, because at that time only boot
+			 * cpu is online.
+			 */
+			cfg = irq_cfg(irq);
+			if (!cfg->vector)
+				setup_IO_APIC_irq(ioapic, pin, irq,
+						  irq_trigger(irq_entry),
+						  irq_polarity(irq_entry));
+#ifdef CONFIG_INTR_REMAP
+			else if (intr_remapping_enabled)
+				set_ir_ioapic_affinity_irq(irq, TARGET_CPUS);
+#endif
+			else
+				set_ioapic_affinity_irq(irq, TARGET_CPUS);
+		}
+
+	}
+}
+#endif
+
+#ifdef CONFIG_X86_64
+#define IOAPIC_RESOURCE_NAME_SIZE 11
+
+static struct resource *ioapic_resources;
+
+static struct resource * __init ioapic_setup_resources(void)
+{
+	unsigned long n;
+	struct resource *res;
+	char *mem;
+	int i;
+
+	if (nr_ioapics <= 0)
+		return NULL;
+
+	n = IOAPIC_RESOURCE_NAME_SIZE + sizeof(struct resource);
+	n *= nr_ioapics;
+
+	mem = alloc_bootmem(n);
+	res = (void *)mem;
+
+	if (mem != NULL) {
+		mem += sizeof(struct resource) * nr_ioapics;
+
+		for (i = 0; i < nr_ioapics; i++) {
+			res[i].name = mem;
+			res[i].flags = IORESOURCE_MEM | IORESOURCE_BUSY;
+			sprintf(mem,  "IOAPIC %u", i);
+			mem += IOAPIC_RESOURCE_NAME_SIZE;
+		}
+	}
+
+	ioapic_resources = res;
+
+	return res;
+}
+#endif
+
+void __init ioapic_init_mappings(void)
+{
+	unsigned long ioapic_phys, idx = FIX_IO_APIC_BASE_0;
+	int i;
+#ifdef CONFIG_X86_64
+	struct resource *ioapic_res;
+
+	ioapic_res = ioapic_setup_resources();
+#endif
+	for (i = 0; i < nr_ioapics; i++) {
+		if (smp_found_config) {
+			ioapic_phys = mp_ioapics[i].mp_apicaddr;
+#ifdef CONFIG_X86_32
+                        if (!ioapic_phys) {
+                                printk(KERN_ERR
+                                       "WARNING: bogus zero IO-APIC "
+                                       "address found in MPTABLE, "
+                                       "disabling IO/APIC support!\n");
+                                smp_found_config = 0;
+                                skip_ioapic_setup = 1;
+                                goto fake_ioapic_page;
+                        }
+#endif
+		} else {
+#ifdef CONFIG_X86_32
+fake_ioapic_page:
+#endif
+			ioapic_phys = (unsigned long)
+				alloc_bootmem_pages(PAGE_SIZE);
+			ioapic_phys = __pa(ioapic_phys);
+		}
+		set_fixmap_nocache(idx, ioapic_phys);
+		apic_printk(APIC_VERBOSE,
+			    "mapped IOAPIC to %08lx (%08lx)\n",
+			    __fix_to_virt(idx), ioapic_phys);
+		idx++;
+
+#ifdef CONFIG_X86_64
+		if (ioapic_res != NULL) {
+			ioapic_res->start = ioapic_phys;
+			ioapic_res->end = ioapic_phys + (4 * 1024) - 1;
+			ioapic_res++;
+		}
+#endif
+	}
+}
+
+#ifdef CONFIG_X86_64
+static int __init ioapic_insert_resources(void)
+{
+	int i;
+	struct resource *r = ioapic_resources;
+
+	if (!r) {
+		printk(KERN_ERR
+		       "IO APIC resources could be not be allocated.\n");
+		return -1;
+	}
+
+	for (i = 0; i < nr_ioapics; i++) {
+		insert_resource(&iomem_resource, r);
+		r++;
+	}
+
+	return 0;
+}
+
+/* Insert the IO APIC resources after PCI initialization has occured to handle
+ * IO APICS that are mapped in on a BAR in PCI space. */
+late_initcall(ioapic_insert_resources);
+#endif
diff --git a/arch/x86/kernel/io_apic_32.c b/arch/x86/kernel/io_apic_32.c
deleted file mode 100644
index fba6d6ee3480..000000000000
--- a/arch/x86/kernel/io_apic_32.c
+++ /dev/null
@@ -1,3913 +0,0 @@
-/*
- *	Intel IO-APIC support for multi-Pentium hosts.
- *
- *	Copyright (C) 1997, 1998, 1999, 2000 Ingo Molnar, Hajnalka Szabo
- *
- *	Many thanks to Stig Venaas for trying out countless experimental
- *	patches and reporting/debugging problems patiently!
- *
- *	(c) 1999, Multiple IO-APIC support, developed by
- *	Ken-ichi Yaku <yaku@css1.kbnes.nec.co.jp> and
- *      Hidemi Kishimoto <kisimoto@css1.kbnes.nec.co.jp>,
- *	further tested and cleaned up by Zach Brown <zab@redhat.com>
- *	and Ingo Molnar <mingo@redhat.com>
- *
- *	Fixes
- *	Maciej W. Rozycki	:	Bits for genuine 82489DX APICs;
- *					thanks to Eric Gilmore
- *					and Rolf G. Tews
- *					for testing these extensively
- *	Paul Diefenbaugh	:	Added full ACPI support
- */
-
-#include <linux/mm.h>
-#include <linux/interrupt.h>
-#include <linux/init.h>
-#include <linux/delay.h>
-#include <linux/sched.h>
-#include <linux/pci.h>
-#include <linux/mc146818rtc.h>
-#include <linux/compiler.h>
-#include <linux/acpi.h>
-#include <linux/module.h>
-#include <linux/sysdev.h>
-#include <linux/msi.h>
-#include <linux/htirq.h>
-#include <linux/freezer.h>
-#include <linux/kthread.h>
-#include <linux/jiffies.h>	/* time_after() */
-#ifdef CONFIG_ACPI
-#include <acpi/acpi_bus.h>
-#endif
-#include <linux/bootmem.h>
-#include <linux/dmar.h>
-
-#include <asm/idle.h>
-#include <asm/io.h>
-#include <asm/smp.h>
-#include <asm/desc.h>
-#include <asm/proto.h>
-#include <asm/acpi.h>
-#include <asm/dma.h>
-#include <asm/timer.h>
-#include <asm/i8259.h>
-#include <asm/nmi.h>
-#include <asm/msidef.h>
-#include <asm/hypertransport.h>
-#include <asm/setup.h>
-#include <asm/irq_remapping.h>
-
-#include <mach_ipi.h>
-#include <mach_apic.h>
-#include <mach_apicdef.h>
-
-#define __apicdebuginit(type) static type __init
-
-/*
- *      Is the SiS APIC rmw bug present ?
- *      -1 = don't know, 0 = no, 1 = yes
- */
-int sis_apic_bug = -1;
-
-static DEFINE_SPINLOCK(ioapic_lock);
-static DEFINE_SPINLOCK(vector_lock);
-
-int first_free_entry;
-/*
- * Rough estimation of how many shared IRQs there are, can
- * be changed anytime.
- */
-int pin_map_size;
-
-/*
- * # of IRQ routing registers
- */
-int nr_ioapic_registers[MAX_IO_APICS];
-
-/* I/O APIC entries */
-struct mp_config_ioapic mp_ioapics[MAX_IO_APICS];
-int nr_ioapics;
-
-/* MP IRQ source entries */
-struct mp_config_intsrc mp_irqs[MAX_IRQ_SOURCES];
-
-/* # of MP IRQ source entries */
-int mp_irq_entries;
-
-#if defined (CONFIG_MCA) || defined (CONFIG_EISA)
-int mp_bus_id_to_type[MAX_MP_BUSSES];
-#endif
-
-DECLARE_BITMAP(mp_bus_not_pci, MAX_MP_BUSSES);
-
-int skip_ioapic_setup;
-
-static int __init parse_noapic(char *str)
-{
-	/* disable IO-APIC */
-	disable_ioapic_setup();
-	return 0;
-}
-early_param("noapic", parse_noapic);
-
-struct irq_cfg;
-struct irq_pin_list;
-struct irq_cfg {
-	unsigned int irq;
-	struct irq_cfg *next;
-	struct irq_pin_list *irq_2_pin;
-	cpumask_t domain;
-	cpumask_t old_domain;
-	unsigned move_cleanup_count;
-	u8 vector;
-	u8 move_in_progress : 1;
-};
-
-/* irq_cfg is indexed by the sum of all RTEs in all I/O APICs. */
-static struct irq_cfg irq_cfg_legacy[] __initdata = {
-	[0]  = { .irq =  0, .domain = CPU_MASK_ALL, .vector = IRQ0_VECTOR,  },
-	[1]  = { .irq =  1, .domain = CPU_MASK_ALL, .vector = IRQ1_VECTOR,  },
-	[2]  = { .irq =  2, .domain = CPU_MASK_ALL, .vector = IRQ2_VECTOR,  },
-	[3]  = { .irq =  3, .domain = CPU_MASK_ALL, .vector = IRQ3_VECTOR,  },
-	[4]  = { .irq =  4, .domain = CPU_MASK_ALL, .vector = IRQ4_VECTOR,  },
-	[5]  = { .irq =  5, .domain = CPU_MASK_ALL, .vector = IRQ5_VECTOR,  },
-	[6]  = { .irq =  6, .domain = CPU_MASK_ALL, .vector = IRQ6_VECTOR,  },
-	[7]  = { .irq =  7, .domain = CPU_MASK_ALL, .vector = IRQ7_VECTOR,  },
-	[8]  = { .irq =  8, .domain = CPU_MASK_ALL, .vector = IRQ8_VECTOR,  },
-	[9]  = { .irq =  9, .domain = CPU_MASK_ALL, .vector = IRQ9_VECTOR,  },
-	[10] = { .irq = 10, .domain = CPU_MASK_ALL, .vector = IRQ10_VECTOR, },
-	[11] = { .irq = 11, .domain = CPU_MASK_ALL, .vector = IRQ11_VECTOR, },
-	[12] = { .irq = 12, .domain = CPU_MASK_ALL, .vector = IRQ12_VECTOR, },
-	[13] = { .irq = 13, .domain = CPU_MASK_ALL, .vector = IRQ13_VECTOR, },
-	[14] = { .irq = 14, .domain = CPU_MASK_ALL, .vector = IRQ14_VECTOR, },
-	[15] = { .irq = 15, .domain = CPU_MASK_ALL, .vector = IRQ15_VECTOR, },
-};
-
-static struct irq_cfg irq_cfg_init = { .irq =  -1U, };
-/* need to be biger than size of irq_cfg_legacy */
-static int nr_irq_cfg = 32;
-
-static int __init parse_nr_irq_cfg(char *arg)
-{
-	if (arg) {
-		nr_irq_cfg = simple_strtoul(arg, NULL, 0);
-		if (nr_irq_cfg < 32)
-			nr_irq_cfg = 32;
-	}
-	return 0;
-}
-
-early_param("nr_irq_cfg", parse_nr_irq_cfg);
-
-static void init_one_irq_cfg(struct irq_cfg *cfg)
-{
-	memcpy(cfg, &irq_cfg_init, sizeof(struct irq_cfg));
-}
-
-static struct irq_cfg *irq_cfgx;
-static struct irq_cfg *irq_cfgx_free;
-static void __init init_work(void *data)
-{
-	struct dyn_array *da = data;
-	struct irq_cfg *cfg;
-	int legacy_count;
-	int i;
-
-	cfg = *da->name;
-
-	memcpy(cfg, irq_cfg_legacy, sizeof(irq_cfg_legacy));
-
-	legacy_count = sizeof(irq_cfg_legacy)/sizeof(irq_cfg_legacy[0]);
-	for (i = legacy_count; i < *da->nr; i++)
-		init_one_irq_cfg(&cfg[i]);
-
-	for (i = 1; i < *da->nr; i++)
-		cfg[i-1].next = &cfg[i];
-
-	irq_cfgx_free = &irq_cfgx[legacy_count];
-	irq_cfgx[legacy_count - 1].next = NULL;
-}
-
-#define for_each_irq_cfg(cfg)		\
-	for (cfg = irq_cfgx; cfg; cfg = cfg->next)
-
-DEFINE_DYN_ARRAY(irq_cfgx, sizeof(struct irq_cfg), nr_irq_cfg, PAGE_SIZE, init_work);
-
-static struct irq_cfg *irq_cfg(unsigned int irq)
-{
-	struct irq_cfg *cfg;
-
-	cfg = irq_cfgx;
-	while (cfg) {
-		if (cfg->irq == irq)
-			return cfg;
-
-		cfg = cfg->next;
-	}
-
-	return NULL;
-}
-
-static struct irq_cfg *irq_cfg_alloc(unsigned int irq)
-{
-	struct irq_cfg *cfg, *cfg_pri;
-	int i;
-	int count = 0;
-
-	cfg_pri = cfg = irq_cfgx;
-	while (cfg) {
-		if (cfg->irq == irq)
-			return cfg;
-
-		cfg_pri = cfg;
-		cfg = cfg->next;
-		count++;
-	}
-
-	if (!irq_cfgx_free) {
-		unsigned long phys;
-		unsigned long total_bytes;
-		/*
-		 *  we run out of pre-allocate ones, allocate more
-		 */
-		printk(KERN_DEBUG "try to get more irq_cfg %d\n", nr_irq_cfg);
-
-		total_bytes = sizeof(struct irq_cfg) * nr_irq_cfg;
-		if (after_bootmem)
-			cfg = kzalloc(total_bytes, GFP_ATOMIC);
-		else
-			cfg = __alloc_bootmem_nopanic(total_bytes, PAGE_SIZE, 0);
-
-		if (!cfg)
-			panic("please boot with nr_irq_cfg= %d\n", count * 2);
-
-		phys = __pa(cfg);
-		printk(KERN_DEBUG "irq_irq ==> [%#lx - %#lx]\n", phys, phys + total_bytes);
-
-		for (i = 0; i < nr_irq_cfg; i++)
-			init_one_irq_cfg(&cfg[i]);
-
-		for (i = 1; i < nr_irq_cfg; i++)
-			cfg[i-1].next = &cfg[i];
-
-		irq_cfgx_free = cfg;
-	}
-
-	cfg = irq_cfgx_free;
-	irq_cfgx_free = irq_cfgx_free->next;
-	cfg->next = NULL;
-	if (cfg_pri)
-		cfg_pri->next = cfg;
-	else
-		irq_cfgx = cfg;
-	cfg->irq = irq;
-	printk(KERN_DEBUG "found new irq_cfg for irq %d\n", cfg->irq);
-#ifdef CONFIG_HAVE_SPARSE_IRQ_DEBUG
-	{
-		/* dump the results */
-		struct irq_cfg *cfg;
-		unsigned long phys;
-		unsigned long bytes = sizeof(struct irq_cfg);
-
-		printk(KERN_DEBUG "=========================== %d\n", irq);
-		printk(KERN_DEBUG "irq_cfg dump after get that for %d\n", irq);
-		for_each_irq_cfg(cfg) {
-			phys = __pa(cfg);
-			printk(KERN_DEBUG "irq_cfg %d ==> [%#lx - %#lx]\n", cfg->irq, phys, phys + bytes);
-		}
-		printk(KERN_DEBUG "===========================\n");
-	}
-#endif
-	return cfg;
-}
-
-/*
- * This is performance-critical, we want to do it O(1)
- *
- * the indexing order of this array favors 1:1 mappings
- * between pins and IRQs.
- */
-
-struct irq_pin_list {
-	int apic, pin;
-	struct irq_pin_list *next;
-};
-
-static struct irq_pin_list *irq_2_pin_head;
-/* fill one page ? */
-static int nr_irq_2_pin = 0x100;
-static struct irq_pin_list *irq_2_pin_ptr;
-static void __init irq_2_pin_init_work(void *data)
-{
-	struct dyn_array *da = data;
-	struct irq_pin_list *pin;
-	int i;
-
-	pin = *da->name;
-
-	for (i = 1; i < *da->nr; i++)
-		pin[i-1].next = &pin[i];
-
-	irq_2_pin_ptr = &pin[0];
-}
-DEFINE_DYN_ARRAY(irq_2_pin_head, sizeof(struct irq_pin_list), nr_irq_2_pin, PAGE_SIZE, irq_2_pin_init_work);
-
-static struct irq_pin_list *get_one_free_irq_2_pin(void)
-{
-	struct irq_pin_list *pin;
-	int i;
-
-	pin = irq_2_pin_ptr;
-
-	if (pin) {
-		irq_2_pin_ptr = pin->next;
-		pin->next = NULL;
-		return pin;
-	}
-
-	/*
-	 *  we run out of pre-allocate ones, allocate more
-	 */
-	printk(KERN_DEBUG "try to get more irq_2_pin %d\n", nr_irq_2_pin);
-
-	if (after_bootmem)
-		pin = kzalloc(sizeof(struct irq_pin_list)*nr_irq_2_pin,
-				 GFP_ATOMIC);
-	else
-		pin = __alloc_bootmem_nopanic(sizeof(struct irq_pin_list) *
-				nr_irq_2_pin, PAGE_SIZE, 0);
-
-	if (!pin)
-		panic("can not get more irq_2_pin\n");
-
-	for (i = 1; i < nr_irq_2_pin; i++)
-		pin[i-1].next = &pin[i];
-
-	irq_2_pin_ptr = pin->next;
-	pin->next = NULL;
-
-	return pin;
-}
-
-struct io_apic {
-	unsigned int index;
-	unsigned int unused[3];
-	unsigned int data;
-};
-
-static __attribute_const__ struct io_apic __iomem *io_apic_base(int idx)
-{
-	return (void __iomem *) __fix_to_virt(FIX_IO_APIC_BASE_0 + idx)
-		+ (mp_ioapics[idx].mp_apicaddr & ~PAGE_MASK);
-}
-
-static inline unsigned int io_apic_read(unsigned int apic, unsigned int reg)
-{
-	struct io_apic __iomem *io_apic = io_apic_base(apic);
-	writel(reg, &io_apic->index);
-	return readl(&io_apic->data);
-}
-
-static inline void io_apic_write(unsigned int apic, unsigned int reg, unsigned int value)
-{
-	struct io_apic __iomem *io_apic = io_apic_base(apic);
-	writel(reg, &io_apic->index);
-	writel(value, &io_apic->data);
-}
-
-/*
- * Re-write a value: to be used for read-modify-write
- * cycles where the read already set up the index register.
- *
- * Older SiS APIC requires we rewrite the index register
- */
-static inline void io_apic_modify(unsigned int apic, unsigned int reg, unsigned int value)
-{
-	struct io_apic __iomem *io_apic = io_apic_base(apic);
-        if (sis_apic_bug)
-                writel(reg, &io_apic->index);
-	writel(value, &io_apic->data);
-}
-
-#ifdef CONFIG_X86_64
-static bool io_apic_level_ack_pending(unsigned int irq)
-{
-	struct irq_pin_list *entry;
-	unsigned long flags;
-	struct irq_cfg *cfg = irq_cfg(irq);
-
-	spin_lock_irqsave(&ioapic_lock, flags);
-	entry = cfg->irq_2_pin;
-	for (;;) {
-		unsigned int reg;
-		int pin;
-
-		if (!entry)
-			break;
-		pin = entry->pin;
-		reg = io_apic_read(entry->apic, 0x10 + pin*2);
-		/* Is the remote IRR bit set? */
-		if (reg & IO_APIC_REDIR_REMOTE_IRR) {
-			spin_unlock_irqrestore(&ioapic_lock, flags);
-			return true;
-		}
-		if (!entry->next)
-			break;
-		entry = entry->next;
-	}
-	spin_unlock_irqrestore(&ioapic_lock, flags);
-
-	return false;
-}
-#endif
-
-union entry_union {
-	struct { u32 w1, w2; };
-	struct IO_APIC_route_entry entry;
-};
-
-static struct IO_APIC_route_entry ioapic_read_entry(int apic, int pin)
-{
-	union entry_union eu;
-	unsigned long flags;
-	spin_lock_irqsave(&ioapic_lock, flags);
-	eu.w1 = io_apic_read(apic, 0x10 + 2 * pin);
-	eu.w2 = io_apic_read(apic, 0x11 + 2 * pin);
-	spin_unlock_irqrestore(&ioapic_lock, flags);
-	return eu.entry;
-}
-
-/*
- * When we write a new IO APIC routing entry, we need to write the high
- * word first! If the mask bit in the low word is clear, we will enable
- * the interrupt, and we need to make sure the entry is fully populated
- * before that happens.
- */
-static void
-__ioapic_write_entry(int apic, int pin, struct IO_APIC_route_entry e)
-{
-	union entry_union eu;
-	eu.entry = e;
-	io_apic_write(apic, 0x11 + 2*pin, eu.w2);
-	io_apic_write(apic, 0x10 + 2*pin, eu.w1);
-}
-
-static void ioapic_write_entry(int apic, int pin, struct IO_APIC_route_entry e)
-{
-	unsigned long flags;
-	spin_lock_irqsave(&ioapic_lock, flags);
-	__ioapic_write_entry(apic, pin, e);
-	spin_unlock_irqrestore(&ioapic_lock, flags);
-}
-
-/*
- * When we mask an IO APIC routing entry, we need to write the low
- * word first, in order to set the mask bit before we change the
- * high bits!
- */
-static void ioapic_mask_entry(int apic, int pin)
-{
-	unsigned long flags;
-	union entry_union eu = { .entry.mask = 1 };
-
-	spin_lock_irqsave(&ioapic_lock, flags);
-	io_apic_write(apic, 0x10 + 2*pin, eu.w1);
-	io_apic_write(apic, 0x11 + 2*pin, eu.w2);
-	spin_unlock_irqrestore(&ioapic_lock, flags);
-}
-
-#ifdef CONFIG_SMP
-static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, u8 vector)
-{
-	int apic, pin;
-	struct irq_cfg *cfg;
-	struct irq_pin_list *entry;
-
-	cfg = irq_cfg(irq);
-	entry = cfg->irq_2_pin;
-	for (;;) {
-		unsigned int reg;
-
-		if (!entry)
-			break;
-
-		apic = entry->apic;
-		pin = entry->pin;
-#ifdef CONFIG_INTR_REMAP
-		/*
-		 * With interrupt-remapping, destination information comes
-		 * from interrupt-remapping table entry.
-		 */
-		if (!irq_remapped(irq))
-			io_apic_write(apic, 0x11 + pin*2, dest);
-#else
-		io_apic_write(apic, 0x11 + pin*2, dest);
-#endif
-		reg = io_apic_read(apic, 0x10 + pin*2);
-		reg &= ~IO_APIC_REDIR_VECTOR_MASK;
-		reg |= vector;
-		io_apic_modify(apic, 0x10 + pin*2, reg);
-		if (!entry->next)
-			break;
-		entry = entry->next;
-	}
-}
-
-static int assign_irq_vector(int irq, cpumask_t mask);
-
-static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t mask)
-{
-	struct irq_cfg *cfg;
-	unsigned long flags;
-	unsigned int dest;
-	cpumask_t tmp;
-	struct irq_desc *desc;
-
-	cpus_and(tmp, mask, cpu_online_map);
-	if (cpus_empty(tmp))
-		return;
-
-	cfg = irq_cfg(irq);
-	if (assign_irq_vector(irq, mask))
-		return;
-
-	cpus_and(tmp, cfg->domain, mask);
-	dest = cpu_mask_to_apicid(tmp);
-	/*
-	 * Only the high 8 bits are valid.
-	 */
-	dest = SET_APIC_LOGICAL_ID(dest);
-
-	desc = irq_to_desc(irq);
-	spin_lock_irqsave(&ioapic_lock, flags);
-	__target_IO_APIC_irq(irq, dest, cfg->vector);
-	desc->affinity = mask;
-	spin_unlock_irqrestore(&ioapic_lock, flags);
-}
-#endif /* CONFIG_SMP */
-
-/*
- * The common case is 1:1 IRQ<->pin mappings. Sometimes there are
- * shared ISA-space IRQs, so we have to support them. We are super
- * fast in the common case, and fast for shared ISA-space IRQs.
- */
-static void add_pin_to_irq(unsigned int irq, int apic, int pin)
-{
-	struct irq_cfg *cfg;
-	struct irq_pin_list *entry;
-
-	/* first time to refer irq_cfg, so with new */
-	cfg = irq_cfg_alloc(irq);
-	entry = cfg->irq_2_pin;
-	if (!entry) {
-		entry = get_one_free_irq_2_pin();
-		cfg->irq_2_pin = entry;
-		entry->apic = apic;
-		entry->pin = pin;
-		printk(KERN_DEBUG " 0 add_pin_to_irq: irq %d --> apic %d pin %d\n", irq, apic, pin);
-		return;
-	}
-
-	while (entry->next) {
-		/* not again, please */
-		if (entry->apic == apic && entry->pin == pin)
-			return;
-
-		entry = entry->next;
-	}
-
-	entry->next = get_one_free_irq_2_pin();
-	entry = entry->next;
-	entry->apic = apic;
-	entry->pin = pin;
-	printk(KERN_DEBUG " x add_pin_to_irq: irq %d --> apic %d pin %d\n", irq, apic, pin);
-}
-
-/*
- * Reroute an IRQ to a different pin.
- */
-static void __init replace_pin_at_irq(unsigned int irq,
-				      int oldapic, int oldpin,
-				      int newapic, int newpin)
-{
-	struct irq_cfg *cfg = irq_cfg(irq);
-	struct irq_pin_list *entry = cfg->irq_2_pin;
-	int replaced = 0;
-
-	while (entry) {
-		if (entry->apic == oldapic && entry->pin == oldpin) {
-			entry->apic = newapic;
-			entry->pin = newpin;
-			replaced = 1;
-			/* every one is different, right? */
-			break;
-		}
-		entry = entry->next;
-	}
-
-	/* why? call replace before add? */
-	if (!replaced)
-		add_pin_to_irq(irq, newapic, newpin);
-}
-
-#ifdef CONFIG_X86_64
-/*
- * Synchronize the IO-APIC and the CPU by doing
- * a dummy read from the IO-APIC
- */
-static inline void io_apic_sync(unsigned int apic)
-{
-	struct io_apic __iomem *io_apic = io_apic_base(apic);
-	readl(&io_apic->data);
-}
-
-#define __DO_ACTION(R, ACTION, FINAL)					\
-									\
-{									\
-	int pin;							\
-	struct irq_cfg *cfg;						\
-	struct irq_pin_list *entry;					\
-									\
-	cfg = irq_cfg(irq);						\
-	entry = cfg->irq_2_pin;						\
-	for (;;) {							\
-		unsigned int reg;					\
-		if (!entry)						\
-			break;						\
-		pin = entry->pin;					\
-		reg = io_apic_read(entry->apic, 0x10 + R + pin*2);	\
-		reg ACTION;						\
-		io_apic_modify(entry->apic, 0x10 + R + pin*2, reg);	\
-		FINAL;							\
-		if (!entry->next)					\
-			break;						\
-		entry = entry->next;					\
-	}								\
-}
-
-#define DO_ACTION(name,R,ACTION, FINAL)					\
-									\
-	static void name##_IO_APIC_irq (unsigned int irq)		\
-	__DO_ACTION(R, ACTION, FINAL)
-
-/* mask = 1 */
-DO_ACTION(__mask,	0, |= IO_APIC_REDIR_MASKED, io_apic_sync(entry->apic))
-
-/* mask = 0 */
-DO_ACTION(__unmask,	0, &= ~IO_APIC_REDIR_MASKED, )
-
-#else
-
-static void __modify_IO_APIC_irq(unsigned int irq, unsigned long enable, unsigned long disable)
-{
-	struct irq_cfg *cfg;
-	struct irq_pin_list *entry;
-	unsigned int pin, reg;
-
-	cfg = irq_cfg(irq);
-	entry = cfg->irq_2_pin;
-	for (;;) {
-		if (!entry)
-			break;
-		pin = entry->pin;
-		reg = io_apic_read(entry->apic, 0x10 + pin*2);
-		reg &= ~disable;
-		reg |= enable;
-		io_apic_modify(entry->apic, 0x10 + pin*2, reg);
-		if (!entry->next)
-			break;
-		entry = entry->next;
-	}
-}
-
-/* mask = 1 */
-static void __mask_IO_APIC_irq(unsigned int irq)
-{
-	__modify_IO_APIC_irq(irq, IO_APIC_REDIR_MASKED, 0);
-}
-
-/* mask = 0 */
-static void __unmask_IO_APIC_irq(unsigned int irq)
-{
-	__modify_IO_APIC_irq(irq, 0, IO_APIC_REDIR_MASKED);
-}
-
-/* mask = 1, trigger = 0 */
-static void __mask_and_edge_IO_APIC_irq(unsigned int irq)
-{
-	__modify_IO_APIC_irq(irq, IO_APIC_REDIR_MASKED,
-				IO_APIC_REDIR_LEVEL_TRIGGER);
-}
-
-/* mask = 0, trigger = 1 */
-static void __unmask_and_level_IO_APIC_irq(unsigned int irq)
-{
-	__modify_IO_APIC_irq(irq, IO_APIC_REDIR_LEVEL_TRIGGER,
-				IO_APIC_REDIR_MASKED);
-}
-
-#endif
-
-static void mask_IO_APIC_irq (unsigned int irq)
-{
-	unsigned long flags;
-
-	spin_lock_irqsave(&ioapic_lock, flags);
-	__mask_IO_APIC_irq(irq);
-	spin_unlock_irqrestore(&ioapic_lock, flags);
-}
-
-static void unmask_IO_APIC_irq (unsigned int irq)
-{
-	unsigned long flags;
-
-	spin_lock_irqsave(&ioapic_lock, flags);
-	__unmask_IO_APIC_irq(irq);
-	spin_unlock_irqrestore(&ioapic_lock, flags);
-}
-
-static void clear_IO_APIC_pin(unsigned int apic, unsigned int pin)
-{
-	struct IO_APIC_route_entry entry;
-
-	/* Check delivery_mode to be sure we're not clearing an SMI pin */
-	entry = ioapic_read_entry(apic, pin);
-	if (entry.delivery_mode == dest_SMI)
-		return;
-	/*
-	 * Disable it in the IO-APIC irq-routing table:
-	 */
-	ioapic_mask_entry(apic, pin);
-}
-
-static void clear_IO_APIC (void)
-{
-	int apic, pin;
-
-	for (apic = 0; apic < nr_ioapics; apic++)
-		for (pin = 0; pin < nr_ioapic_registers[apic]; pin++)
-			clear_IO_APIC_pin(apic, pin);
-}
-
-#if !defined(CONFIG_SMP) && defined(CONFIG_X86_32)
-void send_IPI_self(int vector)
-{
-	unsigned int cfg;
-
-	/*
-	 * Wait for idle.
-	 */
-	apic_wait_icr_idle();
-	cfg = APIC_DM_FIXED | APIC_DEST_SELF | vector | APIC_DEST_LOGICAL;
-	/*
-	 * Send the IPI. The write to APIC_ICR fires this off.
-	 */
-	apic_write(APIC_ICR, cfg);
-}
-#endif /* !CONFIG_SMP && CONFIG_X86_32*/
-
-#ifdef CONFIG_X86_32
-/*
- * support for broken MP BIOSs, enables hand-redirection of PIRQ0-7 to
- * specific CPU-side IRQs.
- */
-
-#define MAX_PIRQS 8
-static int pirq_entries [MAX_PIRQS];
-static int pirqs_enabled;
-
-static int __init ioapic_pirq_setup(char *str)
-{
-	int i, max;
-	int ints[MAX_PIRQS+1];
-
-	get_options(str, ARRAY_SIZE(ints), ints);
-
-	for (i = 0; i < MAX_PIRQS; i++)
-		pirq_entries[i] = -1;
-
-	pirqs_enabled = 1;
-	apic_printk(APIC_VERBOSE, KERN_INFO
-			"PIRQ redirection, working around broken MP-BIOS.\n");
-	max = MAX_PIRQS;
-	if (ints[0] < MAX_PIRQS)
-		max = ints[0];
-
-	for (i = 0; i < max; i++) {
-		apic_printk(APIC_VERBOSE, KERN_DEBUG
-				"... PIRQ%d -> IRQ %d\n", i, ints[i+1]);
-		/*
-		 * PIRQs are mapped upside down, usually.
-		 */
-		pirq_entries[MAX_PIRQS-i-1] = ints[i+1];
-	}
-	return 1;
-}
-
-__setup("pirq=", ioapic_pirq_setup);
-#endif /* CONFIG_X86_32 */
-
-#ifdef CONFIG_INTR_REMAP
-/* I/O APIC RTE contents at the OS boot up */
-static struct IO_APIC_route_entry *early_ioapic_entries[MAX_IO_APICS];
-
-/*
- * Saves and masks all the unmasked IO-APIC RTE's
- */
-int save_mask_IO_APIC_setup(void)
-{
-	union IO_APIC_reg_01 reg_01;
-	unsigned long flags;
-	int apic, pin;
-
-	/*
-	 * The number of IO-APIC IRQ registers (== #pins):
-	 */
-	for (apic = 0; apic < nr_ioapics; apic++) {
-		spin_lock_irqsave(&ioapic_lock, flags);
-		reg_01.raw = io_apic_read(apic, 1);
-		spin_unlock_irqrestore(&ioapic_lock, flags);
-		nr_ioapic_registers[apic] = reg_01.bits.entries+1;
-	}
-
-	for (apic = 0; apic < nr_ioapics; apic++) {
-		early_ioapic_entries[apic] =
-			kzalloc(sizeof(struct IO_APIC_route_entry) *
-				nr_ioapic_registers[apic], GFP_KERNEL);
-		if (!early_ioapic_entries[apic])
-			return -ENOMEM;
-	}
-
-	for (apic = 0; apic < nr_ioapics; apic++)
-		for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) {
-			struct IO_APIC_route_entry entry;
-
-			entry = early_ioapic_entries[apic][pin] =
-				ioapic_read_entry(apic, pin);
-			if (!entry.mask) {
-				entry.mask = 1;
-				ioapic_write_entry(apic, pin, entry);
-			}
-		}
-	return 0;
-}
-
-void restore_IO_APIC_setup(void)
-{
-	int apic, pin;
-
-	for (apic = 0; apic < nr_ioapics; apic++)
-		for (pin = 0; pin < nr_ioapic_registers[apic]; pin++)
-			ioapic_write_entry(apic, pin,
-					   early_ioapic_entries[apic][pin]);
-}
-
-void reinit_intr_remapped_IO_APIC(int intr_remapping)
-{
-	/*
-	 * for now plain restore of previous settings.
-	 * TBD: In the case of OS enabling interrupt-remapping,
-	 * IO-APIC RTE's need to be setup to point to interrupt-remapping
-	 * table entries. for now, do a plain restore, and wait for
-	 * the setup_IO_APIC_irqs() to do proper initialization.
-	 */
-	restore_IO_APIC_setup();
-}
-#endif
-
-/*
- * Find the IRQ entry number of a certain pin.
- */
-static int find_irq_entry(int apic, int pin, int type)
-{
-	int i;
-
-	for (i = 0; i < mp_irq_entries; i++)
-		if (mp_irqs[i].mp_irqtype == type &&
-		    (mp_irqs[i].mp_dstapic == mp_ioapics[apic].mp_apicid ||
-		     mp_irqs[i].mp_dstapic == MP_APIC_ALL) &&
-		    mp_irqs[i].mp_dstirq == pin)
-			return i;
-
-	return -1;
-}
-
-/*
- * Find the pin to which IRQ[irq] (ISA) is connected
- */
-static int __init find_isa_irq_pin(int irq, int type)
-{
-	int i;
-
-	for (i = 0; i < mp_irq_entries; i++) {
-		int lbus = mp_irqs[i].mp_srcbus;
-
-		if (test_bit(lbus, mp_bus_not_pci) &&
-		    (mp_irqs[i].mp_irqtype == type) &&
-		    (mp_irqs[i].mp_srcbusirq == irq))
-
-			return mp_irqs[i].mp_dstirq;
-	}
-	return -1;
-}
-
-static int __init find_isa_irq_apic(int irq, int type)
-{
-	int i;
-
-	for (i = 0; i < mp_irq_entries; i++) {
-		int lbus = mp_irqs[i].mp_srcbus;
-
-		if (test_bit(lbus, mp_bus_not_pci) &&
-		    (mp_irqs[i].mp_irqtype == type) &&
-		    (mp_irqs[i].mp_srcbusirq == irq))
-			break;
-	}
-	if (i < mp_irq_entries) {
-		int apic;
-		for(apic = 0; apic < nr_ioapics; apic++) {
-			if (mp_ioapics[apic].mp_apicid == mp_irqs[i].mp_dstapic)
-				return apic;
-		}
-	}
-
-	return -1;
-}
-
-/*
- * Find a specific PCI IRQ entry.
- * Not an __init, possibly needed by modules
- */
-static int pin_2_irq(int idx, int apic, int pin);
-
-int IO_APIC_get_PCI_irq_vector(int bus, int slot, int pin)
-{
-	int apic, i, best_guess = -1;
-
-	apic_printk(APIC_DEBUG, "querying PCI -> IRQ mapping bus:%d, slot:%d, pin:%d.\n",
-		bus, slot, pin);
-	if (test_bit(bus, mp_bus_not_pci)) {
-		apic_printk(APIC_VERBOSE, "PCI BIOS passed nonexistent PCI bus %d!\n", bus);
-		return -1;
-	}
-	for (i = 0; i < mp_irq_entries; i++) {
-		int lbus = mp_irqs[i].mp_srcbus;
-
-		for (apic = 0; apic < nr_ioapics; apic++)
-			if (mp_ioapics[apic].mp_apicid == mp_irqs[i].mp_dstapic ||
-			    mp_irqs[i].mp_dstapic == MP_APIC_ALL)
-				break;
-
-		if (!test_bit(lbus, mp_bus_not_pci) &&
-		    !mp_irqs[i].mp_irqtype &&
-		    (bus == lbus) &&
-		    (slot == ((mp_irqs[i].mp_srcbusirq >> 2) & 0x1f))) {
-			int irq = pin_2_irq(i,apic,mp_irqs[i].mp_dstirq);
-
-			if (!(apic || IO_APIC_IRQ(irq)))
-				continue;
-
-			if (pin == (mp_irqs[i].mp_srcbusirq & 3))
-				return irq;
-			/*
-			 * Use the first all-but-pin matching entry as a
-			 * best-guess fuzzy result for broken mptables.
-			 */
-			if (best_guess < 0)
-				best_guess = irq;
-		}
-	}
-	return best_guess;
-}
-
-EXPORT_SYMBOL(IO_APIC_get_PCI_irq_vector);
-
-#if defined(CONFIG_EISA) || defined(CONFIG_MCA)
-/*
- * EISA Edge/Level control register, ELCR
- */
-static int EISA_ELCR(unsigned int irq)
-{
-	if (irq < 16) {
-		unsigned int port = 0x4d0 + (irq >> 3);
-		return (inb(port) >> (irq & 7)) & 1;
-	}
-	apic_printk(APIC_VERBOSE, KERN_INFO
-			"Broken MPtable reports ISA irq %d\n", irq);
-	return 0;
-}
-
-#endif
-
-/* ISA interrupts are always polarity zero edge triggered,
- * when listed as conforming in the MP table. */
-
-#define default_ISA_trigger(idx)	(0)
-#define default_ISA_polarity(idx)	(0)
-
-/* EISA interrupts are always polarity zero and can be edge or level
- * trigger depending on the ELCR value.  If an interrupt is listed as
- * EISA conforming in the MP table, that means its trigger type must
- * be read in from the ELCR */
-
-#define default_EISA_trigger(idx)	(EISA_ELCR(mp_irqs[idx].mp_srcbusirq))
-#define default_EISA_polarity(idx)	default_ISA_polarity(idx)
-
-/* PCI interrupts are always polarity one level triggered,
- * when listed as conforming in the MP table. */
-
-#define default_PCI_trigger(idx)	(1)
-#define default_PCI_polarity(idx)	(1)
-
-/* MCA interrupts are always polarity zero level triggered,
- * when listed as conforming in the MP table. */
-
-#define default_MCA_trigger(idx)	(1)
-#define default_MCA_polarity(idx)	default_ISA_polarity(idx)
-
-static int MPBIOS_polarity(int idx)
-{
-	int bus = mp_irqs[idx].mp_srcbus;
-	int polarity;
-
-	/*
-	 * Determine IRQ line polarity (high active or low active):
-	 */
-	switch (mp_irqs[idx].mp_irqflag & 3)
-	{
-		case 0: /* conforms, ie. bus-type dependent polarity */
-			if (test_bit(bus, mp_bus_not_pci))
-				polarity = default_ISA_polarity(idx);
-			else
-				polarity = default_PCI_polarity(idx);
-			break;
-		case 1: /* high active */
-		{
-			polarity = 0;
-			break;
-		}
-		case 2: /* reserved */
-		{
-			printk(KERN_WARNING "broken BIOS!!\n");
-			polarity = 1;
-			break;
-		}
-		case 3: /* low active */
-		{
-			polarity = 1;
-			break;
-		}
-		default: /* invalid */
-		{
-			printk(KERN_WARNING "broken BIOS!!\n");
-			polarity = 1;
-			break;
-		}
-	}
-	return polarity;
-}
-
-static int MPBIOS_trigger(int idx)
-{
-	int bus = mp_irqs[idx].mp_srcbus;
-	int trigger;
-
-	/*
-	 * Determine IRQ trigger mode (edge or level sensitive):
-	 */
-	switch ((mp_irqs[idx].mp_irqflag>>2) & 3)
-	{
-		case 0: /* conforms, ie. bus-type dependent */
-			if (test_bit(bus, mp_bus_not_pci))
-				trigger = default_ISA_trigger(idx);
-			else
-				trigger = default_PCI_trigger(idx);
-#if defined(CONFIG_EISA) || defined(CONFIG_MCA)
-			switch (mp_bus_id_to_type[bus]) {
-				case MP_BUS_ISA: /* ISA pin */
-				{
-					/* set before the switch */
-					break;
-				}
-				case MP_BUS_EISA: /* EISA pin */
-				{
-					trigger = default_EISA_trigger(idx);
-					break;
-				}
-				case MP_BUS_PCI: /* PCI pin */
-				{
-					/* set before the switch */
-					break;
-				}
-				case MP_BUS_MCA: /* MCA pin */
-				{
-					trigger = default_MCA_trigger(idx);
-					break;
-				}
-				default:
-				{
-					printk(KERN_WARNING "broken BIOS!!\n");
-					trigger = 1;
-					break;
-				}
-			}
-#endif
-			break;
-		case 1: /* edge */
-		{
-			trigger = 0;
-			break;
-		}
-		case 2: /* reserved */
-		{
-			printk(KERN_WARNING "broken BIOS!!\n");
-			trigger = 1;
-			break;
-		}
-		case 3: /* level */
-		{
-			trigger = 1;
-			break;
-		}
-		default: /* invalid */
-		{
-			printk(KERN_WARNING "broken BIOS!!\n");
-			trigger = 0;
-			break;
-		}
-	}
-	return trigger;
-}
-
-static inline int irq_polarity(int idx)
-{
-	return MPBIOS_polarity(idx);
-}
-
-static inline int irq_trigger(int idx)
-{
-	return MPBIOS_trigger(idx);
-}
-
-int (*ioapic_renumber_irq)(int ioapic, int irq);
-static int pin_2_irq(int idx, int apic, int pin)
-{
-	int irq, i;
-	int bus = mp_irqs[idx].mp_srcbus;
-
-	/*
-	 * Debugging check, we are in big trouble if this message pops up!
-	 */
-	if (mp_irqs[idx].mp_dstirq != pin)
-		printk(KERN_ERR "broken BIOS or MPTABLE parser, ayiee!!\n");
-
-	if (test_bit(bus, mp_bus_not_pci)) {
-		irq = mp_irqs[idx].mp_srcbusirq;
-	} else {
-		/*
-		 * PCI IRQs are mapped in order
-		 */
-		i = irq = 0;
-		while (i < apic)
-			irq += nr_ioapic_registers[i++];
-		irq += pin;
-                /*
-                 * For MPS mode, so far only needed by ES7000 platform
-                 */
-                if (ioapic_renumber_irq)
-                        irq = ioapic_renumber_irq(apic, irq);
-	}
-
-#ifdef CONFIG_X86_32
-	/*
-	 * PCI IRQ command line redirection. Yes, limits are hardcoded.
-	 */
-	if ((pin >= 16) && (pin <= 23)) {
-		if (pirq_entries[pin-16] != -1) {
-			if (!pirq_entries[pin-16]) {
-				apic_printk(APIC_VERBOSE, KERN_DEBUG
-						"disabling PIRQ%d\n", pin-16);
-			} else {
-				irq = pirq_entries[pin-16];
-				apic_printk(APIC_VERBOSE, KERN_DEBUG
-						"using PIRQ%d -> IRQ %d\n",
-						pin-16, irq);
-			}
-		}
-	}
-#endif
-
-	return irq;
-}
-
-void lock_vector_lock(void)
-{
-	/* Used to the online set of cpus does not change
-	 * during assign_irq_vector.
-	 */
-	spin_lock(&vector_lock);
-}
-
-void unlock_vector_lock(void)
-{
-	spin_unlock(&vector_lock);
-}
-
-static int __assign_irq_vector(int irq, cpumask_t mask)
-{
-	/*
-	 * NOTE! The local APIC isn't very good at handling
-	 * multiple interrupts at the same interrupt level.
-	 * As the interrupt level is determined by taking the
-	 * vector number and shifting that right by 4, we
-	 * want to spread these out a bit so that they don't
-	 * all fall in the same interrupt level.
-	 *
-	 * Also, we've got to be careful not to trash gate
-	 * 0x80, because int 0x80 is hm, kind of importantish. ;)
-	 */
-	static int current_vector = FIRST_DEVICE_VECTOR, current_offset = 0;
-	unsigned int old_vector;
-	int cpu;
-	struct irq_cfg *cfg;
-
-	cfg = irq_cfg(irq);
-
-	/* Only try and allocate irqs on cpus that are present */
-	cpus_and(mask, mask, cpu_online_map);
-
-	if ((cfg->move_in_progress) || cfg->move_cleanup_count)
-		return -EBUSY;
-
-	old_vector = cfg->vector;
-	if (old_vector) {
-		cpumask_t tmp;
-		cpus_and(tmp, cfg->domain, mask);
-		if (!cpus_empty(tmp))
-			return 0;
-	}
-
-	for_each_cpu_mask_nr(cpu, mask) {
-		cpumask_t domain, new_mask;
-		int new_cpu;
-		int vector, offset;
-
-		domain = vector_allocation_domain(cpu);
-		cpus_and(new_mask, domain, cpu_online_map);
-
-		vector = current_vector;
-		offset = current_offset;
-next:
-		vector += 8;
-		if (vector >= first_system_vector) {
-			/* If we run out of vectors on large boxen, must share them. */
-			offset = (offset + 1) % 8;
-			vector = FIRST_DEVICE_VECTOR + offset;
-		}
-		if (unlikely(current_vector == vector))
-			continue;
-#ifdef CONFIG_X86_64
-		if (vector == IA32_SYSCALL_VECTOR)
-			goto next;
-#else
-		if (vector == SYSCALL_VECTOR)
-			goto next;
-#endif
-		for_each_cpu_mask_nr(new_cpu, new_mask)
-			if (per_cpu(vector_irq, new_cpu)[vector] != -1)
-				goto next;
-		/* Found one! */
-		current_vector = vector;
-		current_offset = offset;
-		if (old_vector) {
-			cfg->move_in_progress = 1;
-			cfg->old_domain = cfg->domain;
-		}
-		for_each_cpu_mask_nr(new_cpu, new_mask)
-			per_cpu(vector_irq, new_cpu)[vector] = irq;
-		cfg->vector = vector;
-		cfg->domain = domain;
-		return 0;
-	}
-	return -ENOSPC;
-}
-
-static int assign_irq_vector(int irq, cpumask_t mask)
-{
-	int err;
-	unsigned long flags;
-
-	spin_lock_irqsave(&vector_lock, flags);
-	err = __assign_irq_vector(irq, mask);
-	spin_unlock_irqrestore(&vector_lock, flags);
-	return err;
-}
-
-static void __clear_irq_vector(int irq)
-{
-	struct irq_cfg *cfg;
-	cpumask_t mask;
-	int cpu, vector;
-
-	cfg = irq_cfg(irq);
-	BUG_ON(!cfg->vector);
-
-	vector = cfg->vector;
-	cpus_and(mask, cfg->domain, cpu_online_map);
-	for_each_cpu_mask_nr(cpu, mask)
-		per_cpu(vector_irq, cpu)[vector] = -1;
-
-	cfg->vector = 0;
-	cpus_clear(cfg->domain);
-}
-
-void __setup_vector_irq(int cpu)
-{
-	/* Initialize vector_irq on a new cpu */
-	/* This function must be called with vector_lock held */
-	int irq, vector;
-	struct irq_cfg *cfg;
-
-	/* Mark the inuse vectors */
-	for_each_irq_cfg(cfg) {
-		if (!cpu_isset(cpu, cfg->domain))
-			continue;
-		vector = cfg->vector;
-		irq = cfg->irq;
-		per_cpu(vector_irq, cpu)[vector] = irq;
-	}
-	/* Mark the free vectors */
-	for (vector = 0; vector < NR_VECTORS; ++vector) {
-		irq = per_cpu(vector_irq, cpu)[vector];
-		if (irq < 0)
-			continue;
-
-		cfg = irq_cfg(irq);
-		if (!cpu_isset(cpu, cfg->domain))
-			per_cpu(vector_irq, cpu)[vector] = -1;
-	}
-}
-
-static struct irq_chip ioapic_chip;
-#ifdef CONFIG_INTR_REMAP
-static struct irq_chip ir_ioapic_chip;
-#endif
-
-#define IOAPIC_AUTO     -1
-#define IOAPIC_EDGE     0
-#define IOAPIC_LEVEL    1
-
-#ifdef CONFIG_X86_32
-static inline int IO_APIC_irq_trigger(int irq)
-{
-        int apic, idx, pin;
-
-        for (apic = 0; apic < nr_ioapics; apic++) {
-                for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) {
-                        idx = find_irq_entry(apic, pin, mp_INT);
-                        if ((idx != -1) && (irq == pin_2_irq(idx, apic, pin)))
-                                return irq_trigger(idx);
-                }
-        }
-        /*
-         * nonexistent IRQs are edge default
-         */
-        return 0;
-}
-#else
-static inline int IO_APIC_irq_trigger(int irq)
-{
-	return 1;
-}
-#endif
-
-static void ioapic_register_intr(int irq, unsigned long trigger)
-{
-	struct irq_desc *desc;
-
-	/* first time to use this irq_desc */
-	if (irq < 16)
-		desc = irq_to_desc(irq);
-	else
-		desc = irq_to_desc_alloc(irq);
-
-	if ((trigger == IOAPIC_AUTO && IO_APIC_irq_trigger(irq)) ||
-	    trigger == IOAPIC_LEVEL)
-		desc->status |= IRQ_LEVEL;
-	else
-		desc->status &= ~IRQ_LEVEL;
-
-#ifdef CONFIG_INTR_REMAP
-	if (irq_remapped(irq)) {
-		desc->status |= IRQ_MOVE_PCNTXT;
-		if (trigger)
-			set_irq_chip_and_handler_name(irq, &ir_ioapic_chip,
-						      handle_fasteoi_irq,
-						     "fasteoi");
-		else
-			set_irq_chip_and_handler_name(irq, &ir_ioapic_chip,
-						      handle_edge_irq, "edge");
-		return;
-	}
-#endif
-	if ((trigger == IOAPIC_AUTO && IO_APIC_irq_trigger(irq)) ||
-	    trigger == IOAPIC_LEVEL)
-		set_irq_chip_and_handler_name(irq, &ioapic_chip,
-					      handle_fasteoi_irq,
-					      "fasteoi");
-	else
-		set_irq_chip_and_handler_name(irq, &ioapic_chip,
-					      handle_edge_irq, "edge");
-}
-
-static int setup_ioapic_entry(int apic, int irq,
-			      struct IO_APIC_route_entry *entry,
-			      unsigned int destination, int trigger,
-			      int polarity, int vector)
-{
-	/*
-	 * add it to the IO-APIC irq-routing table:
-	 */
-	memset(entry,0,sizeof(*entry));
-
-#ifdef CONFIG_INTR_REMAP
-	if (intr_remapping_enabled) {
-		struct intel_iommu *iommu = map_ioapic_to_ir(apic);
-		struct irte irte;
-		struct IR_IO_APIC_route_entry *ir_entry =
-			(struct IR_IO_APIC_route_entry *) entry;
-		int index;
-
-		if (!iommu)
-			panic("No mapping iommu for ioapic %d\n", apic);
-
-		index = alloc_irte(iommu, irq, 1);
-		if (index < 0)
-			panic("Failed to allocate IRTE for ioapic %d\n", apic);
-
-		memset(&irte, 0, sizeof(irte));
-
-		irte.present = 1;
-		irte.dst_mode = INT_DEST_MODE;
-		irte.trigger_mode = trigger;
-		irte.dlvry_mode = INT_DELIVERY_MODE;
-		irte.vector = vector;
-		irte.dest_id = IRTE_DEST(destination);
-
-		modify_irte(irq, &irte);
-
-		ir_entry->index2 = (index >> 15) & 0x1;
-		ir_entry->zero = 0;
-		ir_entry->format = 1;
-		ir_entry->index = (index & 0x7fff);
-	} else
-#endif
-	{
-		entry->delivery_mode = INT_DELIVERY_MODE;
-		entry->dest_mode = INT_DEST_MODE;
-		entry->dest = destination;
-	}
-
-	entry->mask = 0;				/* enable IRQ */
-	entry->trigger = trigger;
-	entry->polarity = polarity;
-	entry->vector = vector;
-
-	/* Mask level triggered irqs.
-	 * Use IRQ_DELAYED_DISABLE for edge triggered irqs.
-	 */
-	if (trigger)
-		entry->mask = 1;
-	return 0;
-}
-
-static void setup_IO_APIC_irq(int apic, int pin, unsigned int irq,
-			      int trigger, int polarity)
-{
-	struct irq_cfg *cfg;
-	struct IO_APIC_route_entry entry;
-	cpumask_t mask;
-
-	if (!IO_APIC_IRQ(irq))
-		return;
-
-	cfg = irq_cfg(irq);
-
-	mask = TARGET_CPUS;
-	if (assign_irq_vector(irq, mask))
-		return;
-
-	cpus_and(mask, cfg->domain, mask);
-
-	apic_printk(APIC_VERBOSE,KERN_DEBUG
-		    "IOAPIC[%d]: Set routing entry (%d-%d -> 0x%x -> "
-		    "IRQ %d Mode:%i Active:%i)\n",
-		    apic, mp_ioapics[apic].mp_apicid, pin, cfg->vector,
-		    irq, trigger, polarity);
-
-
-	if (setup_ioapic_entry(mp_ioapics[apic].mp_apicid, irq, &entry,
-			       cpu_mask_to_apicid(mask), trigger, polarity,
-			       cfg->vector)) {
-		printk("Failed to setup ioapic entry for ioapic  %d, pin %d\n",
-		       mp_ioapics[apic].mp_apicid, pin);
-		__clear_irq_vector(irq);
-		return;
-	}
-
-	ioapic_register_intr(irq, trigger);
-	if (irq < 16)
-		disable_8259A_irq(irq);
-
-	ioapic_write_entry(apic, pin, entry);
-}
-
-static void __init setup_IO_APIC_irqs(void)
-{
-	int apic, pin, idx, irq, first_notcon = 1;
-
-	apic_printk(APIC_VERBOSE, KERN_DEBUG "init IO_APIC IRQs\n");
-
-	for (apic = 0; apic < nr_ioapics; apic++) {
-	for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) {
-
-		idx = find_irq_entry(apic,pin,mp_INT);
-		if (idx == -1) {
-			if (first_notcon) {
-				apic_printk(APIC_VERBOSE, KERN_DEBUG " IO-APIC (apicid-pin) %d-%d", mp_ioapics[apic].mp_apicid, pin);
-				first_notcon = 0;
-			} else
-				apic_printk(APIC_VERBOSE, ", %d-%d", mp_ioapics[apic].mp_apicid, pin);
-			continue;
-		}
-		if (!first_notcon) {
-			apic_printk(APIC_VERBOSE, " not connected.\n");
-			first_notcon = 1;
-		}
-
-		irq = pin_2_irq(idx, apic, pin);
-#ifdef CONFIG_X86_32
-                if (multi_timer_check(apic, irq))
-                        continue;
-#endif
-		add_pin_to_irq(irq, apic, pin);
-
-		setup_IO_APIC_irq(apic, pin, irq,
-				  irq_trigger(idx), irq_polarity(idx));
-	}
-	}
-
-	if (!first_notcon)
-		apic_printk(APIC_VERBOSE, " not connected.\n");
-}
-
-/*
- * Set up the timer pin, possibly with the 8259A-master behind.
- */
-static void __init setup_timer_IRQ0_pin(unsigned int apic, unsigned int pin,
-					int vector)
-{
-	struct IO_APIC_route_entry entry;
-
-#ifdef CONFIG_INTR_REMAP
-	if (intr_remapping_enabled)
-		return;
-#endif
-
-	memset(&entry, 0, sizeof(entry));
-
-	/*
-	 * We use logical delivery to get the timer IRQ
-	 * to the first CPU.
-	 */
-	entry.dest_mode = INT_DEST_MODE;
-	entry.mask = 1;					/* mask IRQ now */
-	entry.dest = cpu_mask_to_apicid(TARGET_CPUS);
-	entry.delivery_mode = INT_DELIVERY_MODE;
-	entry.polarity = 0;
-	entry.trigger = 0;
-	entry.vector = vector;
-
-	/*
-	 * The timer IRQ doesn't have to know that behind the
-	 * scene we may have a 8259A-master in AEOI mode ...
-	 */
-	set_irq_chip_and_handler_name(0, &ioapic_chip, handle_edge_irq, "edge");
-
-	/*
-	 * Add it to the IO-APIC irq-routing table:
-	 */
-	ioapic_write_entry(apic, pin, entry);
-}
-
-
-__apicdebuginit(void) print_IO_APIC(void)
-{
-	int apic, i;
-	union IO_APIC_reg_00 reg_00;
-	union IO_APIC_reg_01 reg_01;
-	union IO_APIC_reg_02 reg_02;
-	union IO_APIC_reg_03 reg_03;
-	unsigned long flags;
-	struct irq_cfg *cfg;
-
-	if (apic_verbosity == APIC_QUIET)
-		return;
-
-	printk(KERN_DEBUG "number of MP IRQ sources: %d.\n", mp_irq_entries);
-	for (i = 0; i < nr_ioapics; i++)
-		printk(KERN_DEBUG "number of IO-APIC #%d registers: %d.\n",
-		       mp_ioapics[i].mp_apicid, nr_ioapic_registers[i]);
-
-	/*
-	 * We are a bit conservative about what we expect.  We have to
-	 * know about every hardware change ASAP.
-	 */
-	printk(KERN_INFO "testing the IO APIC.......................\n");
-
-	for (apic = 0; apic < nr_ioapics; apic++) {
-
-	spin_lock_irqsave(&ioapic_lock, flags);
-	reg_00.raw = io_apic_read(apic, 0);
-	reg_01.raw = io_apic_read(apic, 1);
-	if (reg_01.bits.version >= 0x10)
-		reg_02.raw = io_apic_read(apic, 2);
-        if (reg_01.bits.version >= 0x20)
-                reg_03.raw = io_apic_read(apic, 3);
-	spin_unlock_irqrestore(&ioapic_lock, flags);
-
-	printk("\n");
-	printk(KERN_DEBUG "IO APIC #%d......\n", mp_ioapics[apic].mp_apicid);
-	printk(KERN_DEBUG ".... register #00: %08X\n", reg_00.raw);
-	printk(KERN_DEBUG ".......    : physical APIC id: %02X\n", reg_00.bits.ID);
-	printk(KERN_DEBUG ".......    : Delivery Type: %X\n", reg_00.bits.delivery_type);
-	printk(KERN_DEBUG ".......    : LTS          : %X\n", reg_00.bits.LTS);
-
-	printk(KERN_DEBUG ".... register #01: %08X\n", *(int *)&reg_01);
-	printk(KERN_DEBUG ".......     : max redirection entries: %04X\n", reg_01.bits.entries);
-
-	printk(KERN_DEBUG ".......     : PRQ implemented: %X\n", reg_01.bits.PRQ);
-	printk(KERN_DEBUG ".......     : IO APIC version: %04X\n", reg_01.bits.version);
-
-	/*
-	 * Some Intel chipsets with IO APIC VERSION of 0x1? don't have reg_02,
-	 * but the value of reg_02 is read as the previous read register
-	 * value, so ignore it if reg_02 == reg_01.
-	 */
-	if (reg_01.bits.version >= 0x10 && reg_02.raw != reg_01.raw) {
-		printk(KERN_DEBUG ".... register #02: %08X\n", reg_02.raw);
-		printk(KERN_DEBUG ".......     : arbitration: %02X\n", reg_02.bits.arbitration);
-	}
-
-	/*
-	 * Some Intel chipsets with IO APIC VERSION of 0x2? don't have reg_02
-	 * or reg_03, but the value of reg_0[23] is read as the previous read
-	 * register value, so ignore it if reg_03 == reg_0[12].
-	 */
-	if (reg_01.bits.version >= 0x20 && reg_03.raw != reg_02.raw &&
-	    reg_03.raw != reg_01.raw) {
-		printk(KERN_DEBUG ".... register #03: %08X\n", reg_03.raw);
-		printk(KERN_DEBUG ".......     : Boot DT    : %X\n", reg_03.bits.boot_DT);
-	}
-
-	printk(KERN_DEBUG ".... IRQ redirection table:\n");
-
-	printk(KERN_DEBUG " NR Dst Mask Trig IRR Pol"
-			  " Stat Dmod Deli Vect:   \n");
-
-	for (i = 0; i <= reg_01.bits.entries; i++) {
-		struct IO_APIC_route_entry entry;
-
-		entry = ioapic_read_entry(apic, i);
-
-		printk(KERN_DEBUG " %02x %03X ",
-			i,
-			entry.dest
-		);
-
-		printk("%1d    %1d    %1d   %1d   %1d    %1d    %1d    %02X\n",
-			entry.mask,
-			entry.trigger,
-			entry.irr,
-			entry.polarity,
-			entry.delivery_status,
-			entry.dest_mode,
-			entry.delivery_mode,
-			entry.vector
-		);
-	}
-	}
-	printk(KERN_DEBUG "IRQ to pin mappings:\n");
-	for_each_irq_cfg(cfg) {
-		struct irq_pin_list *entry = cfg->irq_2_pin;
-		if (!entry)
-			continue;
-		printk(KERN_DEBUG "IRQ%d ", cfg->irq);
-		for (;;) {
-			printk("-> %d:%d", entry->apic, entry->pin);
-			if (!entry->next)
-				break;
-			entry = entry->next;
-		}
-		printk("\n");
-	}
-
-	printk(KERN_INFO ".................................... done.\n");
-
-	return;
-}
-
-__apicdebuginit(void) print_APIC_bitfield(int base)
-{
-	unsigned int v;
-	int i, j;
-
-	if (apic_verbosity == APIC_QUIET)
-		return;
-
-	printk(KERN_DEBUG "0123456789abcdef0123456789abcdef\n" KERN_DEBUG);
-	for (i = 0; i < 8; i++) {
-		v = apic_read(base + i*0x10);
-		for (j = 0; j < 32; j++) {
-			if (v & (1<<j))
-				printk("1");
-			else
-				printk("0");
-		}
-		printk("\n");
-	}
-}
-
-__apicdebuginit(void) print_local_APIC(void *dummy)
-{
-	unsigned int v, ver, maxlvt;
-	u64 icr;
-
-	if (apic_verbosity == APIC_QUIET)
-		return;
-
-	printk("\n" KERN_DEBUG "printing local APIC contents on CPU#%d/%d:\n",
-		smp_processor_id(), hard_smp_processor_id());
-	v = apic_read(APIC_ID);
-	printk(KERN_INFO "... APIC ID:      %08x (%01x)\n", v, read_apic_id());
-	v = apic_read(APIC_LVR);
-	printk(KERN_INFO "... APIC VERSION: %08x\n", v);
-	ver = GET_APIC_VERSION(v);
-	maxlvt = lapic_get_maxlvt();
-
-	v = apic_read(APIC_TASKPRI);
-	printk(KERN_DEBUG "... APIC TASKPRI: %08x (%02x)\n", v, v & APIC_TPRI_MASK);
-
-	if (APIC_INTEGRATED(ver)) {                     /* !82489DX */
-		v = apic_read(APIC_ARBPRI);
-		printk(KERN_DEBUG "... APIC ARBPRI: %08x (%02x)\n", v,
-			v & APIC_ARBPRI_MASK);
-		v = apic_read(APIC_PROCPRI);
-		printk(KERN_DEBUG "... APIC PROCPRI: %08x\n", v);
-	}
-
-	v = apic_read(APIC_EOI);
-	printk(KERN_DEBUG "... APIC EOI: %08x\n", v);
-	v = apic_read(APIC_RRR);
-	printk(KERN_DEBUG "... APIC RRR: %08x\n", v);
-	v = apic_read(APIC_LDR);
-	printk(KERN_DEBUG "... APIC LDR: %08x\n", v);
-	v = apic_read(APIC_DFR);
-	printk(KERN_DEBUG "... APIC DFR: %08x\n", v);
-	v = apic_read(APIC_SPIV);
-	printk(KERN_DEBUG "... APIC SPIV: %08x\n", v);
-
-	printk(KERN_DEBUG "... APIC ISR field:\n");
-	print_APIC_bitfield(APIC_ISR);
-	printk(KERN_DEBUG "... APIC TMR field:\n");
-	print_APIC_bitfield(APIC_TMR);
-	printk(KERN_DEBUG "... APIC IRR field:\n");
-	print_APIC_bitfield(APIC_IRR);
-
-	if (APIC_INTEGRATED(ver)) {             /* !82489DX */
-		if (maxlvt > 3)         /* Due to the Pentium erratum 3AP. */
-			apic_write(APIC_ESR, 0);
-
-		v = apic_read(APIC_ESR);
-		printk(KERN_DEBUG "... APIC ESR: %08x\n", v);
-	}
-
-	icr = apic_icr_read();
-	printk(KERN_DEBUG "... APIC ICR: %08x\n", icr);
-	printk(KERN_DEBUG "... APIC ICR2: %08x\n", icr >> 32);
-
-	v = apic_read(APIC_LVTT);
-	printk(KERN_DEBUG "... APIC LVTT: %08x\n", v);
-
-	if (maxlvt > 3) {                       /* PC is LVT#4. */
-		v = apic_read(APIC_LVTPC);
-		printk(KERN_DEBUG "... APIC LVTPC: %08x\n", v);
-	}
-	v = apic_read(APIC_LVT0);
-	printk(KERN_DEBUG "... APIC LVT0: %08x\n", v);
-	v = apic_read(APIC_LVT1);
-	printk(KERN_DEBUG "... APIC LVT1: %08x\n", v);
-
-	if (maxlvt > 2) {			/* ERR is LVT#3. */
-		v = apic_read(APIC_LVTERR);
-		printk(KERN_DEBUG "... APIC LVTERR: %08x\n", v);
-	}
-
-	v = apic_read(APIC_TMICT);
-	printk(KERN_DEBUG "... APIC TMICT: %08x\n", v);
-	v = apic_read(APIC_TMCCT);
-	printk(KERN_DEBUG "... APIC TMCCT: %08x\n", v);
-	v = apic_read(APIC_TDCR);
-	printk(KERN_DEBUG "... APIC TDCR: %08x\n", v);
-	printk("\n");
-}
-
-__apicdebuginit(void) print_all_local_APICs(void)
-{
-	on_each_cpu(print_local_APIC, NULL, 1);
-}
-
-__apicdebuginit(void) print_PIC(void)
-{
-	unsigned int v;
-	unsigned long flags;
-
-	if (apic_verbosity == APIC_QUIET)
-		return;
-
-	printk(KERN_DEBUG "\nprinting PIC contents\n");
-
-	spin_lock_irqsave(&i8259A_lock, flags);
-
-	v = inb(0xa1) << 8 | inb(0x21);
-	printk(KERN_DEBUG "... PIC  IMR: %04x\n", v);
-
-	v = inb(0xa0) << 8 | inb(0x20);
-	printk(KERN_DEBUG "... PIC  IRR: %04x\n", v);
-
-	outb(0x0b,0xa0);
-	outb(0x0b,0x20);
-	v = inb(0xa0) << 8 | inb(0x20);
-	outb(0x0a,0xa0);
-	outb(0x0a,0x20);
-
-	spin_unlock_irqrestore(&i8259A_lock, flags);
-
-	printk(KERN_DEBUG "... PIC  ISR: %04x\n", v);
-
-	v = inb(0x4d1) << 8 | inb(0x4d0);
-	printk(KERN_DEBUG "... PIC ELCR: %04x\n", v);
-}
-
-__apicdebuginit(int) print_all_ICs(void)
-{
-	print_PIC();
-	print_all_local_APICs();
-	print_IO_APIC();
-
-	return 0;
-}
-
-fs_initcall(print_all_ICs);
-
-
-/* Where if anywhere is the i8259 connect in external int mode */
-static struct { int pin, apic; } ioapic_i8259 = { -1, -1 };
-
-void __init enable_IO_APIC(void)
-{
-	union IO_APIC_reg_01 reg_01;
-	int i8259_apic, i8259_pin;
-	int apic;
-	unsigned long flags;
-
-#ifdef CONFIG_X86_32
-	int i;
-	if (!pirqs_enabled)
-		for (i = 0; i < MAX_PIRQS; i++)
-			pirq_entries[i] = -1;
-#endif
-
-	/*
-	 * The number of IO-APIC IRQ registers (== #pins):
-	 */
-	for (apic = 0; apic < nr_ioapics; apic++) {
-		spin_lock_irqsave(&ioapic_lock, flags);
-		reg_01.raw = io_apic_read(apic, 1);
-		spin_unlock_irqrestore(&ioapic_lock, flags);
-		nr_ioapic_registers[apic] = reg_01.bits.entries+1;
-	}
-	for(apic = 0; apic < nr_ioapics; apic++) {
-		int pin;
-		/* See if any of the pins is in ExtINT mode */
-		for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) {
-			struct IO_APIC_route_entry entry;
-			entry = ioapic_read_entry(apic, pin);
-
-			/* If the interrupt line is enabled and in ExtInt mode
-			 * I have found the pin where the i8259 is connected.
-			 */
-			if ((entry.mask == 0) && (entry.delivery_mode == dest_ExtINT)) {
-				ioapic_i8259.apic = apic;
-				ioapic_i8259.pin  = pin;
-				goto found_i8259;
-			}
-		}
-	}
- found_i8259:
-	/* Look to see what if the MP table has reported the ExtINT */
-	/* If we could not find the appropriate pin by looking at the ioapic
-	 * the i8259 probably is not connected the ioapic but give the
-	 * mptable a chance anyway.
-	 */
-	i8259_pin  = find_isa_irq_pin(0, mp_ExtINT);
-	i8259_apic = find_isa_irq_apic(0, mp_ExtINT);
-	/* Trust the MP table if nothing is setup in the hardware */
-	if ((ioapic_i8259.pin == -1) && (i8259_pin >= 0)) {
-		printk(KERN_WARNING "ExtINT not setup in hardware but reported by MP table\n");
-		ioapic_i8259.pin  = i8259_pin;
-		ioapic_i8259.apic = i8259_apic;
-	}
-	/* Complain if the MP table and the hardware disagree */
-	if (((ioapic_i8259.apic != i8259_apic) || (ioapic_i8259.pin != i8259_pin)) &&
-		(i8259_pin >= 0) && (ioapic_i8259.pin >= 0))
-	{
-		printk(KERN_WARNING "ExtINT in hardware and MP table differ\n");
-	}
-
-	/*
-	 * Do not trust the IO-APIC being empty at bootup
-	 */
-	clear_IO_APIC();
-}
-
-/*
- * Not an __init, needed by the reboot code
- */
-void disable_IO_APIC(void)
-{
-	/*
-	 * Clear the IO-APIC before rebooting:
-	 */
-	clear_IO_APIC();
-
-	/*
-	 * If the i8259 is routed through an IOAPIC
-	 * Put that IOAPIC in virtual wire mode
-	 * so legacy interrupts can be delivered.
-	 */
-	if (ioapic_i8259.pin != -1) {
-		struct IO_APIC_route_entry entry;
-
-		memset(&entry, 0, sizeof(entry));
-		entry.mask            = 0; /* Enabled */
-		entry.trigger         = 0; /* Edge */
-		entry.irr             = 0;
-		entry.polarity        = 0; /* High */
-		entry.delivery_status = 0;
-		entry.dest_mode       = 0; /* Physical */
-		entry.delivery_mode   = dest_ExtINT; /* ExtInt */
-		entry.vector          = 0;
-		entry.dest            = read_apic_id();
-
-		/*
-		 * Add it to the IO-APIC irq-routing table:
-		 */
-		ioapic_write_entry(ioapic_i8259.apic, ioapic_i8259.pin, entry);
-	}
-
-	disconnect_bsp_APIC(ioapic_i8259.pin != -1);
-}
-
-#ifdef CONFIG_X86_32
-/*
- * function to set the IO-APIC physical IDs based on the
- * values stored in the MPC table.
- *
- * by Matt Domsch <Matt_Domsch@dell.com>  Tue Dec 21 12:25:05 CST 1999
- */
-
-static void __init setup_ioapic_ids_from_mpc(void)
-{
-	union IO_APIC_reg_00 reg_00;
-	physid_mask_t phys_id_present_map;
-	int apic;
-	int i;
-	unsigned char old_id;
-	unsigned long flags;
-
-	if (x86_quirks->setup_ioapic_ids && x86_quirks->setup_ioapic_ids())
-		return;
-
-	/*
-	 * Don't check I/O APIC IDs for xAPIC systems.  They have
-	 * no meaning without the serial APIC bus.
-	 */
-	if (!(boot_cpu_data.x86_vendor == X86_VENDOR_INTEL)
-		|| APIC_XAPIC(apic_version[boot_cpu_physical_apicid]))
-		return;
-	/*
-	 * This is broken; anything with a real cpu count has to
-	 * circumvent this idiocy regardless.
-	 */
-	phys_id_present_map = ioapic_phys_id_map(phys_cpu_present_map);
-
-	/*
-	 * Set the IOAPIC ID to the value stored in the MPC table.
-	 */
-	for (apic = 0; apic < nr_ioapics; apic++) {
-
-		/* Read the register 0 value */
-		spin_lock_irqsave(&ioapic_lock, flags);
-		reg_00.raw = io_apic_read(apic, 0);
-		spin_unlock_irqrestore(&ioapic_lock, flags);
-
-		old_id = mp_ioapics[apic].mp_apicid;
-
-		if (mp_ioapics[apic].mp_apicid >= get_physical_broadcast()) {
-			printk(KERN_ERR "BIOS bug, IO-APIC#%d ID is %d in the MPC table!...\n",
-				apic, mp_ioapics[apic].mp_apicid);
-			printk(KERN_ERR "... fixing up to %d. (tell your hw vendor)\n",
-				reg_00.bits.ID);
-			mp_ioapics[apic].mp_apicid = reg_00.bits.ID;
-		}
-
-		/*
-		 * Sanity check, is the ID really free? Every APIC in a
-		 * system must have a unique ID or we get lots of nice
-		 * 'stuck on smp_invalidate_needed IPI wait' messages.
-		 */
-		if (check_apicid_used(phys_id_present_map,
-					mp_ioapics[apic].mp_apicid)) {
-			printk(KERN_ERR "BIOS bug, IO-APIC#%d ID %d is already used!...\n",
-				apic, mp_ioapics[apic].mp_apicid);
-			for (i = 0; i < get_physical_broadcast(); i++)
-				if (!physid_isset(i, phys_id_present_map))
-					break;
-			if (i >= get_physical_broadcast())
-				panic("Max APIC ID exceeded!\n");
-			printk(KERN_ERR "... fixing up to %d. (tell your hw vendor)\n",
-				i);
-			physid_set(i, phys_id_present_map);
-			mp_ioapics[apic].mp_apicid = i;
-		} else {
-			physid_mask_t tmp;
-			tmp = apicid_to_cpu_present(mp_ioapics[apic].mp_apicid);
-			apic_printk(APIC_VERBOSE, "Setting %d in the "
-					"phys_id_present_map\n",
-					mp_ioapics[apic].mp_apicid);
-			physids_or(phys_id_present_map, phys_id_present_map, tmp);
-		}
-
-
-		/*
-		 * We need to adjust the IRQ routing table
-		 * if the ID changed.
-		 */
-		if (old_id != mp_ioapics[apic].mp_apicid)
-			for (i = 0; i < mp_irq_entries; i++)
-				if (mp_irqs[i].mp_dstapic == old_id)
-					mp_irqs[i].mp_dstapic
-						= mp_ioapics[apic].mp_apicid;
-
-		/*
-		 * Read the right value from the MPC table and
-		 * write it into the ID register.
-		 */
-		apic_printk(APIC_VERBOSE, KERN_INFO
-			"...changing IO-APIC physical APIC ID to %d ...",
-			mp_ioapics[apic].mp_apicid);
-
-		reg_00.bits.ID = mp_ioapics[apic].mp_apicid;
-		spin_lock_irqsave(&ioapic_lock, flags);
-
-		/*
-		 * Sanity check
-		 */
-		spin_lock_irqsave(&ioapic_lock, flags);
-		reg_00.raw = io_apic_read(apic, 0);
-		spin_unlock_irqrestore(&ioapic_lock, flags);
-		if (reg_00.bits.ID != mp_ioapics[apic].mp_apicid)
-			printk("could not set ID!\n");
-		else
-			apic_printk(APIC_VERBOSE, " ok.\n");
-	}
-}
-#endif
-
-int no_timer_check __initdata;
-
-static int __init notimercheck(char *s)
-{
-	no_timer_check = 1;
-	return 1;
-}
-__setup("no_timer_check", notimercheck);
-
-/*
- * There is a nasty bug in some older SMP boards, their mptable lies
- * about the timer IRQ. We do the following to work around the situation:
- *
- *	- timer IRQ defaults to IO-APIC IRQ
- *	- if this function detects that timer IRQs are defunct, then we fall
- *	  back to ISA timer IRQs
- */
-static int __init timer_irq_works(void)
-{
-	unsigned long t1 = jiffies;
-	unsigned long flags;
-
-	if (no_timer_check)
-		return 1;
-
-	local_save_flags(flags);
-	local_irq_enable();
-	/* Let ten ticks pass... */
-	mdelay((10 * 1000) / HZ);
-	local_irq_restore(flags);
-
-	/*
-	 * Expect a few ticks at least, to be sure some possible
-	 * glue logic does not lock up after one or two first
-	 * ticks in a non-ExtINT mode.  Also the local APIC
-	 * might have cached one ExtINT interrupt.  Finally, at
-	 * least one tick may be lost due to delays.
-	 */
-
-	/* jiffies wrap? */
-	if (time_after(jiffies, t1 + 4))
-		return 1;
-	return 0;
-}
-
-/*
- * In the SMP+IOAPIC case it might happen that there are an unspecified
- * number of pending IRQ events unhandled. These cases are very rare,
- * so we 'resend' these IRQs via IPIs, to the same CPU. It's much
- * better to do it this way as thus we do not have to be aware of
- * 'pending' interrupts in the IRQ path, except at this point.
- */
-/*
- * Edge triggered needs to resend any interrupt
- * that was delayed but this is now handled in the device
- * independent code.
- */
-
-/*
- * Starting up a edge-triggered IO-APIC interrupt is
- * nasty - we need to make sure that we get the edge.
- * If it is already asserted for some reason, we need
- * return 1 to indicate that is was pending.
- *
- * This is not complete - we should be able to fake
- * an edge even if it isn't on the 8259A...
- */
-
-static unsigned int startup_ioapic_irq(unsigned int irq)
-{
-	int was_pending = 0;
-	unsigned long flags;
-
-	spin_lock_irqsave(&ioapic_lock, flags);
-	if (irq < 16) {
-		disable_8259A_irq(irq);
-		if (i8259A_irq_pending(irq))
-			was_pending = 1;
-	}
-	__unmask_IO_APIC_irq(irq);
-	spin_unlock_irqrestore(&ioapic_lock, flags);
-
-	return was_pending;
-}
-
-#ifdef CONFIG_X86_64
-static int ioapic_retrigger_irq(unsigned int irq)
-{
-
-	struct irq_cfg *cfg = irq_cfg(irq);
-	unsigned long flags;
-
-	spin_lock_irqsave(&vector_lock, flags);
-	send_IPI_mask(cpumask_of_cpu(first_cpu(cfg->domain)), cfg->vector);
-	spin_unlock_irqrestore(&vector_lock, flags);
-
-	return 1;
-}
-#else
-static int ioapic_retrigger_irq(unsigned int irq)
-{
-        send_IPI_self(irq_cfg(irq)->vector);
-
-        return 1;
-}
-#endif
-
-/*
- * Level and edge triggered IO-APIC interrupts need different handling,
- * so we use two separate IRQ descriptors. Edge triggered IRQs can be
- * handled with the level-triggered descriptor, but that one has slightly
- * more overhead. Level-triggered interrupts cannot be handled with the
- * edge-triggered handler, without risking IRQ storms and other ugly
- * races.
- */
-
-#ifdef CONFIG_SMP
-
-#ifdef CONFIG_INTR_REMAP
-static void ir_irq_migration(struct work_struct *work);
-
-static DECLARE_DELAYED_WORK(ir_migration_work, ir_irq_migration);
-
-/*
- * Migrate the IO-APIC irq in the presence of intr-remapping.
- *
- * For edge triggered, irq migration is a simple atomic update(of vector
- * and cpu destination) of IRTE and flush the hardware cache.
- *
- * For level triggered, we need to modify the io-apic RTE aswell with the update
- * vector information, along with modifying IRTE with vector and destination.
- * So irq migration for level triggered is little  bit more complex compared to
- * edge triggered migration. But the good news is, we use the same algorithm
- * for level triggered migration as we have today, only difference being,
- * we now initiate the irq migration from process context instead of the
- * interrupt context.
- *
- * In future, when we do a directed EOI (combined with cpu EOI broadcast
- * suppression) to the IO-APIC, level triggered irq migration will also be
- * as simple as edge triggered migration and we can do the irq migration
- * with a simple atomic update to IO-APIC RTE.
- */
-static void migrate_ioapic_irq(int irq, cpumask_t mask)
-{
-	struct irq_cfg *cfg;
-	struct irq_desc *desc;
-	cpumask_t tmp, cleanup_mask;
-	struct irte irte;
-	int modify_ioapic_rte;
-	unsigned int dest;
-	unsigned long flags;
-
-	cpus_and(tmp, mask, cpu_online_map);
-	if (cpus_empty(tmp))
-		return;
-
-	if (get_irte(irq, &irte))
-		return;
-
-	if (assign_irq_vector(irq, mask))
-		return;
-
-	cfg = irq_cfg(irq);
-	cpus_and(tmp, cfg->domain, mask);
-	dest = cpu_mask_to_apicid(tmp);
-
-	desc = irq_to_desc(irq);
-	modify_ioapic_rte = desc->status & IRQ_LEVEL;
-	if (modify_ioapic_rte) {
-		spin_lock_irqsave(&ioapic_lock, flags);
-		__target_IO_APIC_irq(irq, dest, cfg->vector);
-		spin_unlock_irqrestore(&ioapic_lock, flags);
-	}
-
-	irte.vector = cfg->vector;
-	irte.dest_id = IRTE_DEST(dest);
-
-	/*
-	 * Modified the IRTE and flushes the Interrupt entry cache.
-	 */
-	modify_irte(irq, &irte);
-
-	if (cfg->move_in_progress) {
-		cpus_and(cleanup_mask, cfg->old_domain, cpu_online_map);
-		cfg->move_cleanup_count = cpus_weight(cleanup_mask);
-		send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR);
-		cfg->move_in_progress = 0;
-	}
-
-	desc->affinity = mask;
-}
-
-static int migrate_irq_remapped_level(int irq)
-{
-	int ret = -1;
-	struct irq_desc *desc = irq_to_desc(irq);
-
-	mask_IO_APIC_irq(irq);
-
-	if (io_apic_level_ack_pending(irq)) {
-		/*
-	 	 * Interrupt in progress. Migrating irq now will change the
-		 * vector information in the IO-APIC RTE and that will confuse
-		 * the EOI broadcast performed by cpu.
-		 * So, delay the irq migration to the next instance.
-		 */
-		schedule_delayed_work(&ir_migration_work, 1);
-		goto unmask;
-	}
-
-	/* everthing is clear. we have right of way */
-	migrate_ioapic_irq(irq, desc->pending_mask);
-
-	ret = 0;
-	desc->status &= ~IRQ_MOVE_PENDING;
-	cpus_clear(desc->pending_mask);
-
-unmask:
-	unmask_IO_APIC_irq(irq);
-	return ret;
-}
-
-static void ir_irq_migration(struct work_struct *work)
-{
-	unsigned int irq;
-	struct irq_desc *desc;
-
-	for_each_irq_desc(irq, desc) {
-		if (desc->status & IRQ_MOVE_PENDING) {
-			unsigned long flags;
-
-			spin_lock_irqsave(&desc->lock, flags);
-			if (!desc->chip->set_affinity ||
-			    !(desc->status & IRQ_MOVE_PENDING)) {
-				desc->status &= ~IRQ_MOVE_PENDING;
-				spin_unlock_irqrestore(&desc->lock, flags);
-				continue;
-			}
-
-			desc->chip->set_affinity(irq, desc->pending_mask);
-			spin_unlock_irqrestore(&desc->lock, flags);
-		}
-	}
-}
-
-/*
- * Migrates the IRQ destination in the process context.
- */
-static void set_ir_ioapic_affinity_irq(unsigned int irq, cpumask_t mask)
-{
-	struct irq_desc *desc = irq_to_desc(irq);
-
-	if (desc->status & IRQ_LEVEL) {
-		desc->status |= IRQ_MOVE_PENDING;
-		desc->pending_mask = mask;
-		migrate_irq_remapped_level(irq);
-		return;
-	}
-
-	migrate_ioapic_irq(irq, mask);
-}
-#endif
-
-asmlinkage void smp_irq_move_cleanup_interrupt(void)
-{
-	unsigned vector, me;
-	ack_APIC_irq();
-#ifdef CONFIG_X86_64
-	exit_idle();
-#endif
-	irq_enter();
-
-	me = smp_processor_id();
-	for (vector = FIRST_EXTERNAL_VECTOR; vector < NR_VECTORS; vector++) {
-		unsigned int irq;
-		struct irq_desc *desc;
-		struct irq_cfg *cfg;
-		irq = __get_cpu_var(vector_irq)[vector];
-
-		desc = irq_to_desc(irq);
-		if (!desc)
-			continue;
-
-		cfg = irq_cfg(irq);
-		spin_lock(&desc->lock);
-		if (!cfg->move_cleanup_count)
-			goto unlock;
-
-		if ((vector == cfg->vector) && cpu_isset(me, cfg->domain))
-			goto unlock;
-
-		__get_cpu_var(vector_irq)[vector] = -1;
-		cfg->move_cleanup_count--;
-unlock:
-		spin_unlock(&desc->lock);
-	}
-
-	irq_exit();
-}
-
-static void irq_complete_move(unsigned int irq)
-{
-	struct irq_cfg *cfg = irq_cfg(irq);
-	unsigned vector, me;
-
-	if (likely(!cfg->move_in_progress))
-		return;
-
-	vector = ~get_irq_regs()->orig_ax;
-	me = smp_processor_id();
-	if ((vector == cfg->vector) && cpu_isset(me, cfg->domain)) {
-		cpumask_t cleanup_mask;
-
-		cpus_and(cleanup_mask, cfg->old_domain, cpu_online_map);
-		cfg->move_cleanup_count = cpus_weight(cleanup_mask);
-		send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR);
-		cfg->move_in_progress = 0;
-	}
-}
-#else
-static inline void irq_complete_move(unsigned int irq) {}
-#endif
-#ifdef CONFIG_INTR_REMAP
-static void ack_x2apic_level(unsigned int irq)
-{
-	ack_x2APIC_irq();
-}
-
-static void ack_x2apic_edge(unsigned int irq)
-{
-	ack_x2APIC_irq();
-}
-#endif
-
-static void ack_apic_edge(unsigned int irq)
-{
-	irq_complete_move(irq);
-	move_native_irq(irq);
-	ack_APIC_irq();
-}
-
-#ifdef CONFIG_X86_64
-static void ack_apic_level(unsigned int irq)
-{
-	int do_unmask_irq = 0;
-
-	irq_complete_move(irq);
-#ifdef CONFIG_GENERIC_PENDING_IRQ
-	/* If we are moving the irq we need to mask it */
-	if (unlikely(irq_to_desc(irq)->status & IRQ_MOVE_PENDING)) {
-		do_unmask_irq = 1;
-		mask_IO_APIC_irq(irq);
-	}
-#endif
-
-	/*
-	 * We must acknowledge the irq before we move it or the acknowledge will
-	 * not propagate properly.
-	 */
-	ack_APIC_irq();
-
-	/* Now we can move and renable the irq */
-	if (unlikely(do_unmask_irq)) {
-		/* Only migrate the irq if the ack has been received.
-		 *
-		 * On rare occasions the broadcast level triggered ack gets
-		 * delayed going to ioapics, and if we reprogram the
-		 * vector while Remote IRR is still set the irq will never
-		 * fire again.
-		 *
-		 * To prevent this scenario we read the Remote IRR bit
-		 * of the ioapic.  This has two effects.
-		 * - On any sane system the read of the ioapic will
-		 *   flush writes (and acks) going to the ioapic from
-		 *   this cpu.
-		 * - We get to see if the ACK has actually been delivered.
-		 *
-		 * Based on failed experiments of reprogramming the
-		 * ioapic entry from outside of irq context starting
-		 * with masking the ioapic entry and then polling until
-		 * Remote IRR was clear before reprogramming the
-		 * ioapic I don't trust the Remote IRR bit to be
-		 * completey accurate.
-		 *
-		 * However there appears to be no other way to plug
-		 * this race, so if the Remote IRR bit is not
-		 * accurate and is causing problems then it is a hardware bug
-		 * and you can go talk to the chipset vendor about it.
-		 */
-		if (!io_apic_level_ack_pending(irq))
-			move_masked_irq(irq);
-		unmask_IO_APIC_irq(irq);
-	}
-}
-#else
-atomic_t irq_mis_count;
-static void ack_apic_level(unsigned int irq)
-{
-	unsigned long v;
-	int i;
-
-	irq_complete_move(irq);
-	move_native_irq(irq);
-	/*
-	* It appears there is an erratum which affects at least version 0x11
-	* of I/O APIC (that's the 82093AA and cores integrated into various
-	* chipsets).  Under certain conditions a level-triggered interrupt is
-	* erroneously delivered as edge-triggered one but the respective IRR
-	* bit gets set nevertheless.  As a result the I/O unit expects an EOI
-	* message but it will never arrive and further interrupts are blocked
-	* from the source.  The exact reason is so far unknown, but the
-	* phenomenon was observed when two consecutive interrupt requests
-	* from a given source get delivered to the same CPU and the source is
-	* temporarily disabled in between.
-	*
-	* A workaround is to simulate an EOI message manually.  We achieve it
-	* by setting the trigger mode to edge and then to level when the edge
-	* trigger mode gets detected in the TMR of a local APIC for a
-	* level-triggered interrupt.  We mask the source for the time of the
-	* operation to prevent an edge-triggered interrupt escaping meanwhile.
-	* The idea is from Manfred Spraul.  --macro
-	*/
-	i = irq_cfg(irq)->vector;
-
-	v = apic_read(APIC_TMR + ((i & ~0x1f) >> 1));
-
-	ack_APIC_irq();
-
-	if (!(v & (1 << (i & 0x1f)))) {
-		atomic_inc(&irq_mis_count);
-		spin_lock(&ioapic_lock);
-		__mask_and_edge_IO_APIC_irq(irq);
-		__unmask_and_level_IO_APIC_irq(irq);
-		spin_unlock(&ioapic_lock);
-	}
-}
-#endif
-
-static struct irq_chip ioapic_chip __read_mostly = {
-	.name 		= "IO-APIC",
-	.startup 	= startup_ioapic_irq,
-	.mask	 	= mask_IO_APIC_irq,
-	.unmask	 	= unmask_IO_APIC_irq,
-	.ack 		= ack_apic_edge,
-	.eoi 		= ack_apic_level,
-#ifdef CONFIG_SMP
-	.set_affinity 	= set_ioapic_affinity_irq,
-#endif
-	.retrigger	= ioapic_retrigger_irq,
-};
-
-#ifdef CONFIG_INTR_REMAP
-static struct irq_chip ir_ioapic_chip __read_mostly = {
-	.name 		= "IR-IO-APIC",
-	.startup 	= startup_ioapic_irq,
-	.mask	 	= mask_IO_APIC_irq,
-	.unmask	 	= unmask_IO_APIC_irq,
-	.ack 		= ack_x2apic_edge,
-	.eoi 		= ack_x2apic_level,
-#ifdef CONFIG_SMP
-	.set_affinity 	= set_ir_ioapic_affinity_irq,
-#endif
-	.retrigger	= ioapic_retrigger_irq,
-};
-#endif
-
-static inline void init_IO_APIC_traps(void)
-{
-	int irq;
-	struct irq_desc *desc;
-	struct irq_cfg *cfg;
-
-	/*
-	 * NOTE! The local APIC isn't very good at handling
-	 * multiple interrupts at the same interrupt level.
-	 * As the interrupt level is determined by taking the
-	 * vector number and shifting that right by 4, we
-	 * want to spread these out a bit so that they don't
-	 * all fall in the same interrupt level.
-	 *
-	 * Also, we've got to be careful not to trash gate
-	 * 0x80, because int 0x80 is hm, kind of importantish. ;)
-	 */
-	for_each_irq_cfg(cfg) {
-		irq = cfg->irq;
-		if (IO_APIC_IRQ(irq) && !cfg->vector) {
-			/*
-			 * Hmm.. We don't have an entry for this,
-			 * so default to an old-fashioned 8259
-			 * interrupt if we can..
-			 */
-			if (irq < 16)
-				make_8259A_irq(irq);
-			else {
-				desc = irq_to_desc(irq);
-				/* Strange. Oh, well.. */
-				desc->chip = &no_irq_chip;
-			}
-		}
-	}
-}
-
-/*
- * The local APIC irq-chip implementation:
- */
-
-static void mask_lapic_irq(unsigned int irq)
-{
-	unsigned long v;
-
-	v = apic_read(APIC_LVT0);
-	apic_write(APIC_LVT0, v | APIC_LVT_MASKED);
-}
-
-static void unmask_lapic_irq(unsigned int irq)
-{
-	unsigned long v;
-
-	v = apic_read(APIC_LVT0);
-	apic_write(APIC_LVT0, v & ~APIC_LVT_MASKED);
-}
-
-static void ack_lapic_irq (unsigned int irq)
-{
-	ack_APIC_irq();
-}
-
-static struct irq_chip lapic_chip __read_mostly = {
-	.name		= "local-APIC",
-	.mask		= mask_lapic_irq,
-	.unmask		= unmask_lapic_irq,
-	.ack		= ack_lapic_irq,
-};
-
-static void lapic_register_intr(int irq)
-{
-	struct irq_desc *desc;
-
-	desc = irq_to_desc(irq);
-	desc->status &= ~IRQ_LEVEL;
-	set_irq_chip_and_handler_name(irq, &lapic_chip, handle_edge_irq,
-				      "edge");
-}
-
-static void __init setup_nmi(void)
-{
-	/*
-	 * Dirty trick to enable the NMI watchdog ...
-	 * We put the 8259A master into AEOI mode and
-	 * unmask on all local APICs LVT0 as NMI.
-	 *
-	 * The idea to use the 8259A in AEOI mode ('8259A Virtual Wire')
-	 * is from Maciej W. Rozycki - so we do not have to EOI from
-	 * the NMI handler or the timer interrupt.
-	 */
-	apic_printk(APIC_VERBOSE, KERN_INFO "activating NMI Watchdog ...");
-
-	enable_NMI_through_LVT0();
-
-	apic_printk(APIC_VERBOSE, " done.\n");
-}
-
-/*
- * This looks a bit hackish but it's about the only one way of sending
- * a few INTA cycles to 8259As and any associated glue logic.  ICR does
- * not support the ExtINT mode, unfortunately.  We need to send these
- * cycles as some i82489DX-based boards have glue logic that keeps the
- * 8259A interrupt line asserted until INTA.  --macro
- */
-static inline void __init unlock_ExtINT_logic(void)
-{
-	int apic, pin, i;
-	struct IO_APIC_route_entry entry0, entry1;
-	unsigned char save_control, save_freq_select;
-
-	pin  = find_isa_irq_pin(8, mp_INT);
-	if (pin == -1) {
-		WARN_ON_ONCE(1);
-		return;
-	}
-	apic = find_isa_irq_apic(8, mp_INT);
-	if (apic == -1) {
-		WARN_ON_ONCE(1);
-		return;
-	}
-
-	entry0 = ioapic_read_entry(apic, pin);
-	clear_IO_APIC_pin(apic, pin);
-
-	memset(&entry1, 0, sizeof(entry1));
-
-	entry1.dest_mode = 0;			/* physical delivery */
-	entry1.mask = 0;			/* unmask IRQ now */
-	entry1.dest = hard_smp_processor_id();
-	entry1.delivery_mode = dest_ExtINT;
-	entry1.polarity = entry0.polarity;
-	entry1.trigger = 0;
-	entry1.vector = 0;
-
-	ioapic_write_entry(apic, pin, entry1);
-
-	save_control = CMOS_READ(RTC_CONTROL);
-	save_freq_select = CMOS_READ(RTC_FREQ_SELECT);
-	CMOS_WRITE((save_freq_select & ~RTC_RATE_SELECT) | 0x6,
-		   RTC_FREQ_SELECT);
-	CMOS_WRITE(save_control | RTC_PIE, RTC_CONTROL);
-
-	i = 100;
-	while (i-- > 0) {
-		mdelay(10);
-		if ((CMOS_READ(RTC_INTR_FLAGS) & RTC_PF) == RTC_PF)
-			i -= 10;
-	}
-
-	CMOS_WRITE(save_control, RTC_CONTROL);
-	CMOS_WRITE(save_freq_select, RTC_FREQ_SELECT);
-	clear_IO_APIC_pin(apic, pin);
-
-	ioapic_write_entry(apic, pin, entry0);
-}
-
-static int disable_timer_pin_1 __initdata;
-/* Actually the next is obsolete, but keep it for paranoid reasons -AK */
-static int __init disable_timer_pin_setup(char *arg)
-{
-	disable_timer_pin_1 = 1;
-	return 0;
-}
-early_param("disable_timer_pin_1", disable_timer_pin_setup);
-
-int timer_through_8259 __initdata;
-
-/*
- * This code may look a bit paranoid, but it's supposed to cooperate with
- * a wide range of boards and BIOS bugs.  Fortunately only the timer IRQ
- * is so screwy.  Thanks to Brian Perkins for testing/hacking this beast
- * fanatically on his truly buggy board.
- *
- * FIXME: really need to revamp this for all platforms.
- */
-static inline void __init check_timer(void)
-{
-	struct irq_cfg *cfg = irq_cfg(0);
-	int apic1, pin1, apic2, pin2;
-	unsigned long flags;
-	unsigned int ver;
-	int no_pin1 = 0;
-
-	local_irq_save(flags);
-
-        ver = apic_read(APIC_LVR);
-        ver = GET_APIC_VERSION(ver);
-
-	/*
-	 * get/set the timer IRQ vector:
-	 */
-	disable_8259A_irq(0);
-	assign_irq_vector(0, TARGET_CPUS);
-
-	/*
-	 * As IRQ0 is to be enabled in the 8259A, the virtual
-	 * wire has to be disabled in the local APIC.  Also
-	 * timer interrupts need to be acknowledged manually in
-	 * the 8259A for the i82489DX when using the NMI
-	 * watchdog as that APIC treats NMIs as level-triggered.
-	 * The AEOI mode will finish them in the 8259A
-	 * automatically.
-	 */
-	apic_write(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_EXTINT);
-	init_8259A(1);
-#ifdef CONFIG_X86_32
-	timer_ack = (nmi_watchdog == NMI_IO_APIC && !APIC_INTEGRATED(ver));
-#endif
-
-	pin1  = find_isa_irq_pin(0, mp_INT);
-	apic1 = find_isa_irq_apic(0, mp_INT);
-	pin2  = ioapic_i8259.pin;
-	apic2 = ioapic_i8259.apic;
-
-	apic_printk(APIC_QUIET, KERN_INFO "..TIMER: vector=0x%02X "
-		    "apic1=%d pin1=%d apic2=%d pin2=%d\n",
-		    cfg->vector, apic1, pin1, apic2, pin2);
-
-	/*
-	 * Some BIOS writers are clueless and report the ExtINTA
-	 * I/O APIC input from the cascaded 8259A as the timer
-	 * interrupt input.  So just in case, if only one pin
-	 * was found above, try it both directly and through the
-	 * 8259A.
-	 */
-	if (pin1 == -1) {
-#ifdef CONFIG_INTR_REMAP
-		if (intr_remapping_enabled)
-			panic("BIOS bug: timer not connected to IO-APIC");
-#endif
-		pin1 = pin2;
-		apic1 = apic2;
-		no_pin1 = 1;
-	} else if (pin2 == -1) {
-		pin2 = pin1;
-		apic2 = apic1;
-	}
-
-	if (pin1 != -1) {
-		/*
-		 * Ok, does IRQ0 through the IOAPIC work?
-		 */
-		if (no_pin1) {
-			add_pin_to_irq(0, apic1, pin1);
-			setup_timer_IRQ0_pin(apic1, pin1, cfg->vector);
-		}
-		unmask_IO_APIC_irq(0);
-		if (timer_irq_works()) {
-			if (nmi_watchdog == NMI_IO_APIC) {
-				setup_nmi();
-				enable_8259A_irq(0);
-			}
-			if (disable_timer_pin_1 > 0)
-				clear_IO_APIC_pin(0, pin1);
-			goto out;
-		}
-#ifdef CONFIG_INTR_REMAP
-		if (intr_remapping_enabled)
-			panic("timer doesn't work through Interrupt-remapped IO-APIC");
-#endif
-		clear_IO_APIC_pin(apic1, pin1);
-		if (!no_pin1)
-			apic_printk(APIC_QUIET, KERN_ERR "..MP-BIOS bug: "
-				    "8254 timer not connected to IO-APIC\n");
-
-		apic_printk(APIC_QUIET, KERN_INFO "...trying to set up timer "
-			    "(IRQ0) through the 8259A ...\n");
-		apic_printk(APIC_QUIET, KERN_INFO
-			    "..... (found apic %d pin %d) ...\n", apic2, pin2);
-		/*
-		 * legacy devices should be connected to IO APIC #0
-		 */
-		replace_pin_at_irq(0, apic1, pin1, apic2, pin2);
-		setup_timer_IRQ0_pin(apic2, pin2, cfg->vector);
-		unmask_IO_APIC_irq(0);
-		enable_8259A_irq(0);
-		if (timer_irq_works()) {
-			apic_printk(APIC_QUIET, KERN_INFO "....... works.\n");
-			timer_through_8259 = 1;
-			if (nmi_watchdog == NMI_IO_APIC) {
-				disable_8259A_irq(0);
-				setup_nmi();
-				enable_8259A_irq(0);
-			}
-			goto out;
-		}
-		/*
-		 * Cleanup, just in case ...
-		 */
-		disable_8259A_irq(0);
-		clear_IO_APIC_pin(apic2, pin2);
-		apic_printk(APIC_QUIET, KERN_INFO "....... failed.\n");
-	}
-
-	if (nmi_watchdog == NMI_IO_APIC) {
-		apic_printk(APIC_QUIET, KERN_WARNING "timer doesn't work "
-			    "through the IO-APIC - disabling NMI Watchdog!\n");
-		nmi_watchdog = NMI_NONE;
-	}
-#ifdef CONFIG_X86_32
-	timer_ack = 0;
-#endif
-
-	apic_printk(APIC_QUIET, KERN_INFO
-		    "...trying to set up timer as Virtual Wire IRQ...\n");
-
-	lapic_register_intr(0);
-	apic_write(APIC_LVT0, APIC_DM_FIXED | cfg->vector);	/* Fixed mode */
-	enable_8259A_irq(0);
-
-	if (timer_irq_works()) {
-		apic_printk(APIC_QUIET, KERN_INFO "..... works.\n");
-		goto out;
-	}
-	disable_8259A_irq(0);
-	apic_write(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_FIXED | cfg->vector);
-	apic_printk(APIC_QUIET, KERN_INFO "..... failed.\n");
-
-	apic_printk(APIC_QUIET, KERN_INFO
-		    "...trying to set up timer as ExtINT IRQ...\n");
-
-	init_8259A(0);
-	make_8259A_irq(0);
-	apic_write(APIC_LVT0, APIC_DM_EXTINT);
-
-	unlock_ExtINT_logic();
-
-	if (timer_irq_works()) {
-		apic_printk(APIC_QUIET, KERN_INFO "..... works.\n");
-		goto out;
-	}
-	apic_printk(APIC_QUIET, KERN_INFO "..... failed :(.\n");
-	panic("IO-APIC + timer doesn't work!  Boot with apic=debug and send a "
-		"report.  Then try booting with the 'noapic' option.\n");
-out:
-	local_irq_restore(flags);
-}
-
-/*
- * Traditionally ISA IRQ2 is the cascade IRQ, and is not available
- * to devices.  However there may be an I/O APIC pin available for
- * this interrupt regardless.  The pin may be left unconnected, but
- * typically it will be reused as an ExtINT cascade interrupt for
- * the master 8259A.  In the MPS case such a pin will normally be
- * reported as an ExtINT interrupt in the MP table.  With ACPI
- * there is no provision for ExtINT interrupts, and in the absence
- * of an override it would be treated as an ordinary ISA I/O APIC
- * interrupt, that is edge-triggered and unmasked by default.  We
- * used to do this, but it caused problems on some systems because
- * of the NMI watchdog and sometimes IRQ0 of the 8254 timer using
- * the same ExtINT cascade interrupt to drive the local APIC of the
- * bootstrap processor.  Therefore we refrain from routing IRQ2 to
- * the I/O APIC in all cases now.  No actual device should request
- * it anyway.  --macro
- */
-#define PIC_IRQS	(1 << PIC_CASCADE_IR)
-
-void __init setup_IO_APIC(void)
-{
-
-#ifdef CONFIG_X86_32
-	enable_IO_APIC();
-#else
-	/*
-	 * calling enable_IO_APIC() is moved to setup_local_APIC for BP
-	 */
-#endif
-
-	io_apic_irqs = ~PIC_IRQS;
-
-	apic_printk(APIC_VERBOSE, "ENABLING IO-APIC IRQs\n");
-        /*
-         * Set up IO-APIC IRQ routing.
-         */
-#ifdef CONFIG_X86_32
-        if (!acpi_ioapic)
-                setup_ioapic_ids_from_mpc();
-#endif
-	sync_Arb_IDs();
-	setup_IO_APIC_irqs();
-	init_IO_APIC_traps();
-	check_timer();
-}
-
-/*
- *      Called after all the initialization is done. If we didnt find any
- *      APIC bugs then we can allow the modify fast path
- */
-
-static int __init io_apic_bug_finalize(void)
-{
-        if (sis_apic_bug == -1)
-                sis_apic_bug = 0;
-        return 0;
-}
-
-late_initcall(io_apic_bug_finalize);
-
-struct sysfs_ioapic_data {
-	struct sys_device dev;
-	struct IO_APIC_route_entry entry[0];
-};
-static struct sysfs_ioapic_data * mp_ioapic_data[MAX_IO_APICS];
-
-static int ioapic_suspend(struct sys_device *dev, pm_message_t state)
-{
-	struct IO_APIC_route_entry *entry;
-	struct sysfs_ioapic_data *data;
-	int i;
-
-	data = container_of(dev, struct sysfs_ioapic_data, dev);
-	entry = data->entry;
-	for (i = 0; i < nr_ioapic_registers[dev->id]; i ++, entry ++ )
-		*entry = ioapic_read_entry(dev->id, i);
-
-	return 0;
-}
-
-static int ioapic_resume(struct sys_device *dev)
-{
-	struct IO_APIC_route_entry *entry;
-	struct sysfs_ioapic_data *data;
-	unsigned long flags;
-	union IO_APIC_reg_00 reg_00;
-	int i;
-
-	data = container_of(dev, struct sysfs_ioapic_data, dev);
-	entry = data->entry;
-
-	spin_lock_irqsave(&ioapic_lock, flags);
-	reg_00.raw = io_apic_read(dev->id, 0);
-	if (reg_00.bits.ID != mp_ioapics[dev->id].mp_apicid) {
-		reg_00.bits.ID = mp_ioapics[dev->id].mp_apicid;
-		io_apic_write(dev->id, 0, reg_00.raw);
-	}
-	spin_unlock_irqrestore(&ioapic_lock, flags);
-	for (i = 0; i < nr_ioapic_registers[dev->id]; i++)
-		ioapic_write_entry(dev->id, i, entry[i]);
-
-	return 0;
-}
-
-static struct sysdev_class ioapic_sysdev_class = {
-	.name = "ioapic",
-	.suspend = ioapic_suspend,
-	.resume = ioapic_resume,
-};
-
-static int __init ioapic_init_sysfs(void)
-{
-	struct sys_device * dev;
-	int i, size, error;
-
-	error = sysdev_class_register(&ioapic_sysdev_class);
-	if (error)
-		return error;
-
-	for (i = 0; i < nr_ioapics; i++ ) {
-		size = sizeof(struct sys_device) + nr_ioapic_registers[i]
-			* sizeof(struct IO_APIC_route_entry);
-		mp_ioapic_data[i] = kzalloc(size, GFP_KERNEL);
-		if (!mp_ioapic_data[i]) {
-			printk(KERN_ERR "Can't suspend/resume IOAPIC %d\n", i);
-			continue;
-		}
-		dev = &mp_ioapic_data[i]->dev;
-		dev->id = i;
-		dev->cls = &ioapic_sysdev_class;
-		error = sysdev_register(dev);
-		if (error) {
-			kfree(mp_ioapic_data[i]);
-			mp_ioapic_data[i] = NULL;
-			printk(KERN_ERR "Can't suspend/resume IOAPIC %d\n", i);
-			continue;
-		}
-	}
-
-	return 0;
-}
-
-device_initcall(ioapic_init_sysfs);
-
-/*
- * Dynamic irq allocate and deallocation
- */
-unsigned int create_irq_nr(unsigned int irq_want)
-{
-	/* Allocate an unused irq */
-	unsigned int irq;
-	unsigned int new;
-	unsigned long flags;
-	struct irq_cfg *cfg_new;
-
-#ifndef CONFIG_HAVE_SPARSE_IRQ
-	irq_want = nr_irqs - 1;
-#endif
-
-	irq = 0;
-	spin_lock_irqsave(&vector_lock, flags);
-	for (new = irq_want; new > 0; new--) {
-		if (platform_legacy_irq(new))
-			continue;
-		cfg_new = irq_cfg(new);
-		if (cfg_new && cfg_new->vector != 0)
-			continue;
-		/* check if need to create one */
-		if (!cfg_new)
-			cfg_new = irq_cfg_alloc(new);
-		if (__assign_irq_vector(new, TARGET_CPUS) == 0)
-			irq = new;
-		break;
-	}
-	spin_unlock_irqrestore(&vector_lock, flags);
-
-	if (irq > 0) {
-		dynamic_irq_init(irq);
-	}
-	return irq;
-}
-
-int create_irq(void)
-{
-	int irq;
-
-	irq = create_irq_nr(nr_irqs - 1);
-
-	if (irq == 0)
-		irq = -1;
-
-	return irq;
-}
-
-void destroy_irq(unsigned int irq)
-{
-	unsigned long flags;
-
-	dynamic_irq_cleanup(irq);
-
-#ifdef CONFIG_INTR_REMAP
-	free_irte(irq);
-#endif
-	spin_lock_irqsave(&vector_lock, flags);
-	__clear_irq_vector(irq);
-	spin_unlock_irqrestore(&vector_lock, flags);
-}
-
-/*
- * MSI message composition
- */
-#ifdef CONFIG_PCI_MSI
-static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_msg *msg)
-{
-	struct irq_cfg *cfg;
-	int err;
-	unsigned dest;
-	cpumask_t tmp;
-
-	tmp = TARGET_CPUS;
-	err = assign_irq_vector(irq, tmp);
-	if (err)
-		return err;
-
-	cfg = irq_cfg(irq);
-	cpus_and(tmp, cfg->domain, tmp);
-	dest = cpu_mask_to_apicid(tmp);
-
-#ifdef CONFIG_INTR_REMAP
-	if (irq_remapped(irq)) {
-		struct irte irte;
-		int ir_index;
-		u16 sub_handle;
-
-		ir_index = map_irq_to_irte_handle(irq, &sub_handle);
-		BUG_ON(ir_index == -1);
-
-		memset (&irte, 0, sizeof(irte));
-
-		irte.present = 1;
-		irte.dst_mode = INT_DEST_MODE;
-		irte.trigger_mode = 0; /* edge */
-		irte.dlvry_mode = INT_DELIVERY_MODE;
-		irte.vector = cfg->vector;
-		irte.dest_id = IRTE_DEST(dest);
-
-		modify_irte(irq, &irte);
-
-		msg->address_hi = MSI_ADDR_BASE_HI;
-		msg->data = sub_handle;
-		msg->address_lo = MSI_ADDR_BASE_LO | MSI_ADDR_IR_EXT_INT |
-				  MSI_ADDR_IR_SHV |
-				  MSI_ADDR_IR_INDEX1(ir_index) |
-				  MSI_ADDR_IR_INDEX2(ir_index);
-	} else
-#endif
-	{
-		msg->address_hi = MSI_ADDR_BASE_HI;
-		msg->address_lo =
-			MSI_ADDR_BASE_LO |
-			((INT_DEST_MODE == 0) ?
-				MSI_ADDR_DEST_MODE_PHYSICAL:
-				MSI_ADDR_DEST_MODE_LOGICAL) |
-			((INT_DELIVERY_MODE != dest_LowestPrio) ?
-				MSI_ADDR_REDIRECTION_CPU:
-				MSI_ADDR_REDIRECTION_LOWPRI) |
-			MSI_ADDR_DEST_ID(dest);
-
-		msg->data =
-			MSI_DATA_TRIGGER_EDGE |
-			MSI_DATA_LEVEL_ASSERT |
-			((INT_DELIVERY_MODE != dest_LowestPrio) ?
-				MSI_DATA_DELIVERY_FIXED:
-				MSI_DATA_DELIVERY_LOWPRI) |
-			MSI_DATA_VECTOR(cfg->vector);
-	}
-	return err;
-}
-
-#ifdef CONFIG_SMP
-static void set_msi_irq_affinity(unsigned int irq, cpumask_t mask)
-{
-	struct irq_cfg *cfg;
-	struct msi_msg msg;
-	unsigned int dest;
-	cpumask_t tmp;
-	struct irq_desc *desc;
-
-	cpus_and(tmp, mask, cpu_online_map);
-	if (cpus_empty(tmp))
-		return;
-
-	if (assign_irq_vector(irq, mask))
-		return;
-
-	cfg = irq_cfg(irq);
-	cpus_and(tmp, cfg->domain, mask);
-	dest = cpu_mask_to_apicid(tmp);
-
-	read_msi_msg(irq, &msg);
-
-	msg.data &= ~MSI_DATA_VECTOR_MASK;
-	msg.data |= MSI_DATA_VECTOR(cfg->vector);
-	msg.address_lo &= ~MSI_ADDR_DEST_ID_MASK;
-	msg.address_lo |= MSI_ADDR_DEST_ID(dest);
-
-	write_msi_msg(irq, &msg);
-	desc = irq_to_desc(irq);
-	desc->affinity = mask;
-}
-
-#ifdef CONFIG_INTR_REMAP
-/*
- * Migrate the MSI irq to another cpumask. This migration is
- * done in the process context using interrupt-remapping hardware.
- */
-static void ir_set_msi_irq_affinity(unsigned int irq, cpumask_t mask)
-{
-	struct irq_cfg *cfg;
-	unsigned int dest;
-	cpumask_t tmp, cleanup_mask;
-	struct irte irte;
-	struct irq_desc *desc;
-
-	cpus_and(tmp, mask, cpu_online_map);
-	if (cpus_empty(tmp))
-		return;
-
-	if (get_irte(irq, &irte))
-		return;
-
-	if (assign_irq_vector(irq, mask))
-		return;
-
-	cfg = irq_cfg(irq);
-	cpus_and(tmp, cfg->domain, mask);
-	dest = cpu_mask_to_apicid(tmp);
-
-	irte.vector = cfg->vector;
-	irte.dest_id = IRTE_DEST(dest);
-
-	/*
-	 * atomically update the IRTE with the new destination and vector.
-	 */
-	modify_irte(irq, &irte);
-
-	/*
-	 * After this point, all the interrupts will start arriving
-	 * at the new destination. So, time to cleanup the previous
-	 * vector allocation.
-	 */
-	if (cfg->move_in_progress) {
-		cpus_and(cleanup_mask, cfg->old_domain, cpu_online_map);
-		cfg->move_cleanup_count = cpus_weight(cleanup_mask);
-		send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR);
-		cfg->move_in_progress = 0;
-	}
-
-	desc = irq_to_desc(irq);
-	desc->affinity = mask;
-}
-#endif
-#endif /* CONFIG_SMP */
-
-/*
- * IRQ Chip for MSI PCI/PCI-X/PCI-Express Devices,
- * which implement the MSI or MSI-X Capability Structure.
- */
-static struct irq_chip msi_chip = {
-	.name		= "PCI-MSI",
-	.unmask		= unmask_msi_irq,
-	.mask		= mask_msi_irq,
-	.ack		= ack_apic_edge,
-#ifdef CONFIG_SMP
-	.set_affinity	= set_msi_irq_affinity,
-#endif
-	.retrigger	= ioapic_retrigger_irq,
-};
-
-#ifdef CONFIG_INTR_REMAP
-static struct irq_chip msi_ir_chip = {
-	.name		= "IR-PCI-MSI",
-	.unmask		= unmask_msi_irq,
-	.mask		= mask_msi_irq,
-	.ack		= ack_x2apic_edge,
-#ifdef CONFIG_SMP
-	.set_affinity	= ir_set_msi_irq_affinity,
-#endif
-	.retrigger	= ioapic_retrigger_irq,
-};
-
-/*
- * Map the PCI dev to the corresponding remapping hardware unit
- * and allocate 'nvec' consecutive interrupt-remapping table entries
- * in it.
- */
-static int msi_alloc_irte(struct pci_dev *dev, int irq, int nvec)
-{
-	struct intel_iommu *iommu;
-	int index;
-
-	iommu = map_dev_to_ir(dev);
-	if (!iommu) {
-		printk(KERN_ERR
-		       "Unable to map PCI %s to iommu\n", pci_name(dev));
-		return -ENOENT;
-	}
-
-	index = alloc_irte(iommu, irq, nvec);
-	if (index < 0) {
-		printk(KERN_ERR
-		       "Unable to allocate %d IRTE for PCI %s\n", nvec,
-		        pci_name(dev));
-		return -ENOSPC;
-	}
-	return index;
-}
-#endif
-
-static int setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc, int irq)
-{
-	int ret;
-	struct msi_msg msg;
-
-	ret = msi_compose_msg(dev, irq, &msg);
-	if (ret < 0)
-		return ret;
-
-	set_irq_msi(irq, desc);
-	write_msi_msg(irq, &msg);
-
-#ifdef CONFIG_INTR_REMAP
-	if (irq_remapped(irq)) {
-		struct irq_desc *desc = irq_to_desc(irq);
-		/*
-		 * irq migration in process context
-		 */
-		desc->status |= IRQ_MOVE_PCNTXT;
-		set_irq_chip_and_handler_name(irq, &msi_ir_chip, handle_edge_irq, "edge");
-	} else
-#endif
-		set_irq_chip_and_handler_name(irq, &msi_chip, handle_edge_irq, "edge");
-
-	return 0;
-}
-
-static unsigned int build_irq_for_pci_dev(struct pci_dev *dev)
-{
-	unsigned int irq;
-
-	irq = dev->bus->number;
-	irq <<= 8;
-	irq |= dev->devfn;
-	irq <<= 12;
-
-	return irq;
-}
-
-int arch_setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc)
-{
-	unsigned int irq;
-	int ret;
-	unsigned int irq_want;
-
-	irq_want = build_irq_for_pci_dev(dev) + 0x100;
-
-	irq = create_irq_nr(irq_want);
-	if (irq == 0)
-		return -1;
-
-#ifdef CONFIG_INTR_REMAP
-	if (!intr_remapping_enabled)
-		goto no_ir;
-
-	ret = msi_alloc_irte(dev, irq, 1);
-	if (ret < 0)
-		goto error;
-no_ir:
-#endif
-	ret = setup_msi_irq(dev, desc, irq);
-	if (ret < 0) {
-		destroy_irq(irq);
-		return ret;
-	}
-	return 0;
-
-#ifdef CONFIG_INTR_REMAP
-error:
-	destroy_irq(irq);
-	return ret;
-#endif
-}
-
-int arch_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
-{
-	unsigned int irq;
-	int ret, sub_handle;
-	struct msi_desc *desc;
-	unsigned int irq_want;
-
-#ifdef CONFIG_INTR_REMAP
-	struct intel_iommu *iommu = 0;
-	int index = 0;
-#endif
-
-	irq_want = build_irq_for_pci_dev(dev) + 0x100;
-	sub_handle = 0;
-	list_for_each_entry(desc, &dev->msi_list, list) {
-		irq = create_irq_nr(irq_want--);
-		if (irq == 0)
-			return -1;
-#ifdef CONFIG_INTR_REMAP
-		if (!intr_remapping_enabled)
-			goto no_ir;
-
-		if (!sub_handle) {
-			/*
-			 * allocate the consecutive block of IRTE's
-			 * for 'nvec'
-			 */
-			index = msi_alloc_irte(dev, irq, nvec);
-			if (index < 0) {
-				ret = index;
-				goto error;
-			}
-		} else {
-			iommu = map_dev_to_ir(dev);
-			if (!iommu) {
-				ret = -ENOENT;
-				goto error;
-			}
-			/*
-			 * setup the mapping between the irq and the IRTE
-			 * base index, the sub_handle pointing to the
-			 * appropriate interrupt remap table entry.
-			 */
-			set_irte_irq(irq, iommu, index, sub_handle);
-		}
-no_ir:
-#endif
-		ret = setup_msi_irq(dev, desc, irq);
-		if (ret < 0)
-			goto error;
-		sub_handle++;
-	}
-	return 0;
-
-error:
-	destroy_irq(irq);
-	return ret;
-}
-
-void arch_teardown_msi_irq(unsigned int irq)
-{
-	destroy_irq(irq);
-}
-
-#ifdef CONFIG_DMAR
-#ifdef CONFIG_SMP
-static void dmar_msi_set_affinity(unsigned int irq, cpumask_t mask)
-{
-	struct irq_cfg *cfg;
-	struct msi_msg msg;
-	unsigned int dest;
-	cpumask_t tmp;
-	struct irq_desc *desc;
-
-	cpus_and(tmp, mask, cpu_online_map);
-	if (cpus_empty(tmp))
-		return;
-
-	if (assign_irq_vector(irq, mask))
-		return;
-
-	cfg = irq_cfg(irq);
-	cpus_and(tmp, cfg->domain, mask);
-	dest = cpu_mask_to_apicid(tmp);
-
-	dmar_msi_read(irq, &msg);
-
-	msg.data &= ~MSI_DATA_VECTOR_MASK;
-	msg.data |= MSI_DATA_VECTOR(cfg->vector);
-	msg.address_lo &= ~MSI_ADDR_DEST_ID_MASK;
-	msg.address_lo |= MSI_ADDR_DEST_ID(dest);
-
-	dmar_msi_write(irq, &msg);
-	desc = irq_to_desc(irq);
-	desc->affinity = mask;
-}
-#endif /* CONFIG_SMP */
-
-struct irq_chip dmar_msi_type = {
-	.name = "DMAR_MSI",
-	.unmask = dmar_msi_unmask,
-	.mask = dmar_msi_mask,
-	.ack = ack_apic_edge,
-#ifdef CONFIG_SMP
-	.set_affinity = dmar_msi_set_affinity,
-#endif
-	.retrigger = ioapic_retrigger_irq,
-};
-
-int arch_setup_dmar_msi(unsigned int irq)
-{
-	int ret;
-	struct msi_msg msg;
-
-	ret = msi_compose_msg(NULL, irq, &msg);
-	if (ret < 0)
-		return ret;
-	dmar_msi_write(irq, &msg);
-	set_irq_chip_and_handler_name(irq, &dmar_msi_type, handle_edge_irq,
-		"edge");
-	return 0;
-}
-#endif
-
-#endif /* CONFIG_PCI_MSI */
-/*
- * Hypertransport interrupt support
- */
-#ifdef CONFIG_HT_IRQ
-
-#ifdef CONFIG_SMP
-
-static void target_ht_irq(unsigned int irq, unsigned int dest, u8 vector)
-{
-	struct ht_irq_msg msg;
-	fetch_ht_irq_msg(irq, &msg);
-
-	msg.address_lo &= ~(HT_IRQ_LOW_VECTOR_MASK | HT_IRQ_LOW_DEST_ID_MASK);
-	msg.address_hi &= ~(HT_IRQ_HIGH_DEST_ID_MASK);
-
-	msg.address_lo |= HT_IRQ_LOW_VECTOR(vector) | HT_IRQ_LOW_DEST_ID(dest);
-	msg.address_hi |= HT_IRQ_HIGH_DEST_ID(dest);
-
-	write_ht_irq_msg(irq, &msg);
-}
-
-static void set_ht_irq_affinity(unsigned int irq, cpumask_t mask)
-{
-	struct irq_cfg *cfg;
-	unsigned int dest;
-	cpumask_t tmp;
-	struct irq_desc *desc;
-
-	cpus_and(tmp, mask, cpu_online_map);
-	if (cpus_empty(tmp))
-		return;
-
-	if (assign_irq_vector(irq, mask))
-		return;
-
-	cfg = irq_cfg(irq);
-	cpus_and(tmp, cfg->domain, mask);
-	dest = cpu_mask_to_apicid(tmp);
-
-	target_ht_irq(irq, dest, cfg->vector);
-	desc = irq_to_desc(irq);
-	desc->affinity = mask;
-}
-#endif
-
-static struct irq_chip ht_irq_chip = {
-	.name		= "PCI-HT",
-	.mask		= mask_ht_irq,
-	.unmask		= unmask_ht_irq,
-	.ack		= ack_apic_edge,
-#ifdef CONFIG_SMP
-	.set_affinity	= set_ht_irq_affinity,
-#endif
-	.retrigger	= ioapic_retrigger_irq,
-};
-
-int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev)
-{
-	struct irq_cfg *cfg;
-	int err;
-	cpumask_t tmp;
-
-	tmp = TARGET_CPUS;
-	err = assign_irq_vector(irq, tmp);
-	if (!err) {
-		struct ht_irq_msg msg;
-		unsigned dest;
-
-		cfg = irq_cfg(irq);
-		cpus_and(tmp, cfg->domain, tmp);
-		dest = cpu_mask_to_apicid(tmp);
-
-		msg.address_hi = HT_IRQ_HIGH_DEST_ID(dest);
-
-		msg.address_lo =
-			HT_IRQ_LOW_BASE |
-			HT_IRQ_LOW_DEST_ID(dest) |
-			HT_IRQ_LOW_VECTOR(cfg->vector) |
-			((INT_DEST_MODE == 0) ?
-				HT_IRQ_LOW_DM_PHYSICAL :
-				HT_IRQ_LOW_DM_LOGICAL) |
-			HT_IRQ_LOW_RQEOI_EDGE |
-			((INT_DELIVERY_MODE != dest_LowestPrio) ?
-				HT_IRQ_LOW_MT_FIXED :
-				HT_IRQ_LOW_MT_ARBITRATED) |
-			HT_IRQ_LOW_IRQ_MASKED;
-
-		write_ht_irq_msg(irq, &msg);
-
-		set_irq_chip_and_handler_name(irq, &ht_irq_chip,
-					      handle_edge_irq, "edge");
-	}
-	return err;
-}
-#endif /* CONFIG_HT_IRQ */
-
-/* --------------------------------------------------------------------------
-                          ACPI-based IOAPIC Configuration
-   -------------------------------------------------------------------------- */
-
-#ifdef CONFIG_ACPI
-
-#ifdef CONFIG_X86_32
-int __init io_apic_get_unique_id(int ioapic, int apic_id)
-{
-	union IO_APIC_reg_00 reg_00;
-	static physid_mask_t apic_id_map = PHYSID_MASK_NONE;
-	physid_mask_t tmp;
-	unsigned long flags;
-	int i = 0;
-
-	/*
-	 * The P4 platform supports up to 256 APIC IDs on two separate APIC
-	 * buses (one for LAPICs, one for IOAPICs), where predecessors only
-	 * supports up to 16 on one shared APIC bus.
-	 *
-	 * TBD: Expand LAPIC/IOAPIC support on P4-class systems to take full
-	 *      advantage of new APIC bus architecture.
-	 */
-
-	if (physids_empty(apic_id_map))
-		apic_id_map = ioapic_phys_id_map(phys_cpu_present_map);
-
-	spin_lock_irqsave(&ioapic_lock, flags);
-	reg_00.raw = io_apic_read(ioapic, 0);
-	spin_unlock_irqrestore(&ioapic_lock, flags);
-
-	if (apic_id >= get_physical_broadcast()) {
-		printk(KERN_WARNING "IOAPIC[%d]: Invalid apic_id %d, trying "
-			"%d\n", ioapic, apic_id, reg_00.bits.ID);
-		apic_id = reg_00.bits.ID;
-	}
-
-	/*
-	 * Every APIC in a system must have a unique ID or we get lots of nice
-	 * 'stuck on smp_invalidate_needed IPI wait' messages.
-	 */
-	if (check_apicid_used(apic_id_map, apic_id)) {
-
-		for (i = 0; i < get_physical_broadcast(); i++) {
-			if (!check_apicid_used(apic_id_map, i))
-				break;
-		}
-
-		if (i == get_physical_broadcast())
-			panic("Max apic_id exceeded!\n");
-
-		printk(KERN_WARNING "IOAPIC[%d]: apic_id %d already used, "
-			"trying %d\n", ioapic, apic_id, i);
-
-		apic_id = i;
-	}
-
-	tmp = apicid_to_cpu_present(apic_id);
-	physids_or(apic_id_map, apic_id_map, tmp);
-
-	if (reg_00.bits.ID != apic_id) {
-		reg_00.bits.ID = apic_id;
-
-		spin_lock_irqsave(&ioapic_lock, flags);
-		io_apic_write(ioapic, 0, reg_00.raw);
-		reg_00.raw = io_apic_read(ioapic, 0);
-		spin_unlock_irqrestore(&ioapic_lock, flags);
-
-		/* Sanity check */
-		if (reg_00.bits.ID != apic_id) {
-			printk("IOAPIC[%d]: Unable to change apic_id!\n", ioapic);
-			return -1;
-		}
-	}
-
-	apic_printk(APIC_VERBOSE, KERN_INFO
-			"IOAPIC[%d]: Assigned apic_id %d\n", ioapic, apic_id);
-
-	return apic_id;
-}
-
-int __init io_apic_get_version(int ioapic)
-{
-	union IO_APIC_reg_01	reg_01;
-	unsigned long flags;
-
-	spin_lock_irqsave(&ioapic_lock, flags);
-	reg_01.raw = io_apic_read(ioapic, 1);
-	spin_unlock_irqrestore(&ioapic_lock, flags);
-
-	return reg_01.bits.version;
-}
-#endif
-
-int __init io_apic_get_redir_entries (int ioapic)
-{
-	union IO_APIC_reg_01	reg_01;
-	unsigned long flags;
-
-	spin_lock_irqsave(&ioapic_lock, flags);
-	reg_01.raw = io_apic_read(ioapic, 1);
-	spin_unlock_irqrestore(&ioapic_lock, flags);
-
-	return reg_01.bits.entries;
-}
-
-
-int io_apic_set_pci_routing (int ioapic, int pin, int irq, int triggering, int polarity)
-{
-	if (!IO_APIC_IRQ(irq)) {
-		apic_printk(APIC_QUIET,KERN_ERR "IOAPIC[%d]: Invalid reference to IRQ 0\n",
-			ioapic);
-		return -EINVAL;
-	}
-
-	/*
-	 * IRQs < 16 are already in the irq_2_pin[] map
-	 */
-	if (irq >= 16)
-		add_pin_to_irq(irq, ioapic, pin);
-
-	setup_IO_APIC_irq(ioapic, pin, irq, triggering, polarity);
-
-	return 0;
-}
-
-
-int acpi_get_override_irq(int bus_irq, int *trigger, int *polarity)
-{
-	int i;
-
-	if (skip_ioapic_setup)
-		return -1;
-
-	for (i = 0; i < mp_irq_entries; i++)
-		if (mp_irqs[i].mp_irqtype == mp_INT &&
-		    mp_irqs[i].mp_srcbusirq == bus_irq)
-			break;
-	if (i >= mp_irq_entries)
-		return -1;
-
-	*trigger = irq_trigger(i);
-	*polarity = irq_polarity(i);
-	return 0;
-}
-
-#endif /* CONFIG_ACPI */
-
-/*
- * This function currently is only a helper for the i386 smp boot process where
- * we need to reprogram the ioredtbls to cater for the cpus which have come online
- * so mask in all cases should simply be TARGET_CPUS
- */
-#ifdef CONFIG_SMP
-void __init setup_ioapic_dest(void)
-{
-	int pin, ioapic, irq, irq_entry;
-	struct irq_cfg *cfg;
-
-	if (skip_ioapic_setup == 1)
-		return;
-
-	for (ioapic = 0; ioapic < nr_ioapics; ioapic++) {
-		for (pin = 0; pin < nr_ioapic_registers[ioapic]; pin++) {
-			irq_entry = find_irq_entry(ioapic, pin, mp_INT);
-			if (irq_entry == -1)
-				continue;
-			irq = pin_2_irq(irq_entry, ioapic, pin);
-
-			/* setup_IO_APIC_irqs could fail to get vector for some device
-			 * when you have too many devices, because at that time only boot
-			 * cpu is online.
-			 */
-			cfg = irq_cfg(irq);
-			if (!cfg->vector)
-				setup_IO_APIC_irq(ioapic, pin, irq,
-						  irq_trigger(irq_entry),
-						  irq_polarity(irq_entry));
-#ifdef CONFIG_INTR_REMAP
-			else if (intr_remapping_enabled)
-				set_ir_ioapic_affinity_irq(irq, TARGET_CPUS);
-#endif
-			else
-				set_ioapic_affinity_irq(irq, TARGET_CPUS);
-		}
-
-	}
-}
-#endif
-
-#ifdef CONFIG_X86_64
-#define IOAPIC_RESOURCE_NAME_SIZE 11
-
-static struct resource *ioapic_resources;
-
-static struct resource * __init ioapic_setup_resources(void)
-{
-	unsigned long n;
-	struct resource *res;
-	char *mem;
-	int i;
-
-	if (nr_ioapics <= 0)
-		return NULL;
-
-	n = IOAPIC_RESOURCE_NAME_SIZE + sizeof(struct resource);
-	n *= nr_ioapics;
-
-	mem = alloc_bootmem(n);
-	res = (void *)mem;
-
-	if (mem != NULL) {
-		mem += sizeof(struct resource) * nr_ioapics;
-
-		for (i = 0; i < nr_ioapics; i++) {
-			res[i].name = mem;
-			res[i].flags = IORESOURCE_MEM | IORESOURCE_BUSY;
-			sprintf(mem,  "IOAPIC %u", i);
-			mem += IOAPIC_RESOURCE_NAME_SIZE;
-		}
-	}
-
-	ioapic_resources = res;
-
-	return res;
-}
-#endif
-
-void __init ioapic_init_mappings(void)
-{
-	unsigned long ioapic_phys, idx = FIX_IO_APIC_BASE_0;
-	int i;
-#ifdef CONFIG_X86_64
-	struct resource *ioapic_res;
-
-	ioapic_res = ioapic_setup_resources();
-#endif
-	for (i = 0; i < nr_ioapics; i++) {
-		if (smp_found_config) {
-			ioapic_phys = mp_ioapics[i].mp_apicaddr;
-#ifdef CONFIG_X86_32
-                        if (!ioapic_phys) {
-                                printk(KERN_ERR
-                                       "WARNING: bogus zero IO-APIC "
-                                       "address found in MPTABLE, "
-                                       "disabling IO/APIC support!\n");
-                                smp_found_config = 0;
-                                skip_ioapic_setup = 1;
-                                goto fake_ioapic_page;
-                        }
-#endif
-		} else {
-#ifdef CONFIG_X86_32
-fake_ioapic_page:
-#endif
-			ioapic_phys = (unsigned long)
-				alloc_bootmem_pages(PAGE_SIZE);
-			ioapic_phys = __pa(ioapic_phys);
-		}
-		set_fixmap_nocache(idx, ioapic_phys);
-		apic_printk(APIC_VERBOSE,
-			    "mapped IOAPIC to %08lx (%08lx)\n",
-			    __fix_to_virt(idx), ioapic_phys);
-		idx++;
-
-#ifdef CONFIG_X86_64
-		if (ioapic_res != NULL) {
-			ioapic_res->start = ioapic_phys;
-			ioapic_res->end = ioapic_phys + (4 * 1024) - 1;
-			ioapic_res++;
-		}
-#endif
-	}
-}
-
-#ifdef CONFIG_X86_64
-static int __init ioapic_insert_resources(void)
-{
-	int i;
-	struct resource *r = ioapic_resources;
-
-	if (!r) {
-		printk(KERN_ERR
-		       "IO APIC resources could be not be allocated.\n");
-		return -1;
-	}
-
-	for (i = 0; i < nr_ioapics; i++) {
-		insert_resource(&iomem_resource, r);
-		r++;
-	}
-
-	return 0;
-}
-
-/* Insert the IO APIC resources after PCI initialization has occured to handle
- * IO APICS that are mapped in on a BAR in PCI space. */
-late_initcall(ioapic_insert_resources);
-#endif
diff --git a/arch/x86/kernel/io_apic_64.c b/arch/x86/kernel/io_apic_64.c
deleted file mode 100644
index 940c4167b325..000000000000
--- a/arch/x86/kernel/io_apic_64.c
+++ /dev/null
@@ -1,3913 +0,0 @@
-/*
- *	Intel IO-APIC support for multi-Pentium hosts.
- *
- *	Copyright (C) 1997, 1998, 1999, 2000 Ingo Molnar, Hajnalka Szabo
- *
- *	Many thanks to Stig Venaas for trying out countless experimental
- *	patches and reporting/debugging problems patiently!
- *
- *	(c) 1999, Multiple IO-APIC support, developed by
- *	Ken-ichi Yaku <yaku@css1.kbnes.nec.co.jp> and
- *      Hidemi Kishimoto <kisimoto@css1.kbnes.nec.co.jp>,
- *	further tested and cleaned up by Zach Brown <zab@redhat.com>
- *	and Ingo Molnar <mingo@redhat.com>
- *
- *	Fixes
- *	Maciej W. Rozycki	:	Bits for genuine 82489DX APICs;
- *					thanks to Eric Gilmore
- *					and Rolf G. Tews
- *					for testing these extensively
- *	Paul Diefenbaugh	:	Added full ACPI support
- */
-
-#include <linux/mm.h>
-#include <linux/interrupt.h>
-#include <linux/init.h>
-#include <linux/delay.h>
-#include <linux/sched.h>
-#include <linux/pci.h>
-#include <linux/mc146818rtc.h>
-#include <linux/compiler.h>
-#include <linux/acpi.h>
-#include <linux/module.h>
-#include <linux/sysdev.h>
-#include <linux/msi.h>
-#include <linux/htirq.h>
-#include <linux/freezer.h>
-#include <linux/kthread.h>
-#include <linux/jiffies.h>	/* time_after() */
-#ifdef CONFIG_ACPI
-#include <acpi/acpi_bus.h>
-#endif
-#include <linux/bootmem.h>
-#include <linux/dmar.h>
-
-#include <asm/idle.h>
-#include <asm/io.h>
-#include <asm/smp.h>
-#include <asm/desc.h>
-#include <asm/proto.h>
-#include <asm/acpi.h>
-#include <asm/dma.h>
-#include <asm/timer.h>
-#include <asm/i8259.h>
-#include <asm/nmi.h>
-#include <asm/msidef.h>
-#include <asm/hypertransport.h>
-#include <asm/setup.h>
-#include <asm/irq_remapping.h>
-
-#include <mach_ipi.h>
-#include <mach_apic.h>
-#include <mach_apicdef.h>
-
-#define __apicdebuginit(type) static type __init
-
-/*
- *      Is the SiS APIC rmw bug present ?
- *      -1 = don't know, 0 = no, 1 = yes
- */
-int sis_apic_bug = -1;
-
-static DEFINE_SPINLOCK(ioapic_lock);
-static DEFINE_SPINLOCK(vector_lock);
-
-int first_free_entry;
-/*
- * Rough estimation of how many shared IRQs there are, can
- * be changed anytime.
- */
-int pin_map_size;
-
-/*
- * # of IRQ routing registers
- */
-int nr_ioapic_registers[MAX_IO_APICS];
-
-/* I/O APIC entries */
-struct mp_config_ioapic mp_ioapics[MAX_IO_APICS];
-int nr_ioapics;
-
-/* MP IRQ source entries */
-struct mp_config_intsrc mp_irqs[MAX_IRQ_SOURCES];
-
-/* # of MP IRQ source entries */
-int mp_irq_entries;
-
-#if defined (CONFIG_MCA) || defined (CONFIG_EISA)
-int mp_bus_id_to_type[MAX_MP_BUSSES];
-#endif
-
-DECLARE_BITMAP(mp_bus_not_pci, MAX_MP_BUSSES);
-
-int skip_ioapic_setup;
-
-static int __init parse_noapic(char *str)
-{
-	/* disable IO-APIC */
-	disable_ioapic_setup();
-	return 0;
-}
-early_param("noapic", parse_noapic);
-
-struct irq_cfg;
-struct irq_pin_list;
-struct irq_cfg {
-	unsigned int irq;
-	struct irq_cfg *next;
-	struct irq_pin_list *irq_2_pin;
-	cpumask_t domain;
-	cpumask_t old_domain;
-	unsigned move_cleanup_count;
-	u8 vector;
-	u8 move_in_progress : 1;
-};
-
-/* irq_cfg is indexed by the sum of all RTEs in all I/O APICs. */
-static struct irq_cfg irq_cfg_legacy[] __initdata = {
-	[0]  = { .irq =  0, .domain = CPU_MASK_ALL, .vector = IRQ0_VECTOR,  },
-	[1]  = { .irq =  1, .domain = CPU_MASK_ALL, .vector = IRQ1_VECTOR,  },
-	[2]  = { .irq =  2, .domain = CPU_MASK_ALL, .vector = IRQ2_VECTOR,  },
-	[3]  = { .irq =  3, .domain = CPU_MASK_ALL, .vector = IRQ3_VECTOR,  },
-	[4]  = { .irq =  4, .domain = CPU_MASK_ALL, .vector = IRQ4_VECTOR,  },
-	[5]  = { .irq =  5, .domain = CPU_MASK_ALL, .vector = IRQ5_VECTOR,  },
-	[6]  = { .irq =  6, .domain = CPU_MASK_ALL, .vector = IRQ6_VECTOR,  },
-	[7]  = { .irq =  7, .domain = CPU_MASK_ALL, .vector = IRQ7_VECTOR,  },
-	[8]  = { .irq =  8, .domain = CPU_MASK_ALL, .vector = IRQ8_VECTOR,  },
-	[9]  = { .irq =  9, .domain = CPU_MASK_ALL, .vector = IRQ9_VECTOR,  },
-	[10] = { .irq = 10, .domain = CPU_MASK_ALL, .vector = IRQ10_VECTOR, },
-	[11] = { .irq = 11, .domain = CPU_MASK_ALL, .vector = IRQ11_VECTOR, },
-	[12] = { .irq = 12, .domain = CPU_MASK_ALL, .vector = IRQ12_VECTOR, },
-	[13] = { .irq = 13, .domain = CPU_MASK_ALL, .vector = IRQ13_VECTOR, },
-	[14] = { .irq = 14, .domain = CPU_MASK_ALL, .vector = IRQ14_VECTOR, },
-	[15] = { .irq = 15, .domain = CPU_MASK_ALL, .vector = IRQ15_VECTOR, },
-};
-
-static struct irq_cfg irq_cfg_init = { .irq =  -1U, };
-/* need to be biger than size of irq_cfg_legacy */
-static int nr_irq_cfg = 32;
-
-static int __init parse_nr_irq_cfg(char *arg)
-{
-	if (arg) {
-		nr_irq_cfg = simple_strtoul(arg, NULL, 0);
-		if (nr_irq_cfg < 32)
-			nr_irq_cfg = 32;
-	}
-	return 0;
-}
-
-early_param("nr_irq_cfg", parse_nr_irq_cfg);
-
-static void init_one_irq_cfg(struct irq_cfg *cfg)
-{
-	memcpy(cfg, &irq_cfg_init, sizeof(struct irq_cfg));
-}
-
-static struct irq_cfg *irq_cfgx;
-static struct irq_cfg *irq_cfgx_free;
-static void __init init_work(void *data)
-{
-	struct dyn_array *da = data;
-	struct irq_cfg *cfg;
-	int legacy_count;
-	int i;
-
-	cfg = *da->name;
-
-	memcpy(cfg, irq_cfg_legacy, sizeof(irq_cfg_legacy));
-
-	legacy_count = sizeof(irq_cfg_legacy)/sizeof(irq_cfg_legacy[0]);
-	for (i = legacy_count; i < *da->nr; i++)
-		init_one_irq_cfg(&cfg[i]);
-
-	for (i = 1; i < *da->nr; i++)
-		cfg[i-1].next = &cfg[i];
-
-	irq_cfgx_free = &irq_cfgx[legacy_count];
-	irq_cfgx[legacy_count - 1].next = NULL;
-}
-
-#define for_each_irq_cfg(cfg)		\
-	for (cfg = irq_cfgx; cfg; cfg = cfg->next)
-
-DEFINE_DYN_ARRAY(irq_cfgx, sizeof(struct irq_cfg), nr_irq_cfg, PAGE_SIZE, init_work);
-
-static struct irq_cfg *irq_cfg(unsigned int irq)
-{
-	struct irq_cfg *cfg;
-
-	cfg = irq_cfgx;
-	while (cfg) {
-		if (cfg->irq == irq)
-			return cfg;
-
-		cfg = cfg->next;
-	}
-
-	return NULL;
-}
-
-static struct irq_cfg *irq_cfg_alloc(unsigned int irq)
-{
-	struct irq_cfg *cfg, *cfg_pri;
-	int i;
-	int count = 0;
-
-	cfg_pri = cfg = irq_cfgx;
-	while (cfg) {
-		if (cfg->irq == irq)
-			return cfg;
-
-		cfg_pri = cfg;
-		cfg = cfg->next;
-		count++;
-	}
-
-	if (!irq_cfgx_free) {
-		unsigned long phys;
-		unsigned long total_bytes;
-		/*
-		 *  we run out of pre-allocate ones, allocate more
-		 */
-		printk(KERN_DEBUG "try to get more irq_cfg %d\n", nr_irq_cfg);
-
-		total_bytes = sizeof(struct irq_cfg) * nr_irq_cfg;
-		if (after_bootmem)
-			cfg = kzalloc(total_bytes, GFP_ATOMIC);
-		else
-			cfg = __alloc_bootmem_nopanic(total_bytes, PAGE_SIZE, 0);
-
-		if (!cfg)
-			panic("please boot with nr_irq_cfg= %d\n", count * 2);
-
-		phys = __pa(cfg);
-		printk(KERN_DEBUG "irq_irq ==> [%#lx - %#lx]\n", phys, phys + total_bytes);
-
-		for (i = 0; i < nr_irq_cfg; i++)
-			init_one_irq_cfg(&cfg[i]);
-
-		for (i = 1; i < nr_irq_cfg; i++)
-			cfg[i-1].next = &cfg[i];
-
-		irq_cfgx_free = cfg;
-	}
-
-	cfg = irq_cfgx_free;
-	irq_cfgx_free = irq_cfgx_free->next;
-	cfg->next = NULL;
-	if (cfg_pri)
-		cfg_pri->next = cfg;
-	else
-		irq_cfgx = cfg;
-	cfg->irq = irq;
-	printk(KERN_DEBUG "found new irq_cfg for irq %d\n", cfg->irq);
-#ifdef CONFIG_HAVE_SPARSE_IRQ_DEBUG
-	{
-		/* dump the results */
-		struct irq_cfg *cfg;
-		unsigned long phys;
-		unsigned long bytes = sizeof(struct irq_cfg);
-
-		printk(KERN_DEBUG "=========================== %d\n", irq);
-		printk(KERN_DEBUG "irq_cfg dump after get that for %d\n", irq);
-		for_each_irq_cfg(cfg) {
-			phys = __pa(cfg);
-			printk(KERN_DEBUG "irq_cfg %d ==> [%#lx - %#lx]\n", cfg->irq, phys, phys + bytes);
-		}
-		printk(KERN_DEBUG "===========================\n");
-	}
-#endif
-	return cfg;
-}
-
-/*
- * This is performance-critical, we want to do it O(1)
- *
- * the indexing order of this array favors 1:1 mappings
- * between pins and IRQs.
- */
-
-struct irq_pin_list {
-	int apic, pin;
-	struct irq_pin_list *next;
-};
-
-static struct irq_pin_list *irq_2_pin_head;
-/* fill one page ? */
-static int nr_irq_2_pin = 0x100;
-static struct irq_pin_list *irq_2_pin_ptr;
-static void __init irq_2_pin_init_work(void *data)
-{
-	struct dyn_array *da = data;
-	struct irq_pin_list *pin;
-	int i;
-
-	pin = *da->name;
-
-	for (i = 1; i < *da->nr; i++)
-		pin[i-1].next = &pin[i];
-
-	irq_2_pin_ptr = &pin[0];
-}
-DEFINE_DYN_ARRAY(irq_2_pin_head, sizeof(struct irq_pin_list), nr_irq_2_pin, PAGE_SIZE, irq_2_pin_init_work);
-
-static struct irq_pin_list *get_one_free_irq_2_pin(void)
-{
-	struct irq_pin_list *pin;
-	int i;
-
-	pin = irq_2_pin_ptr;
-
-	if (pin) {
-		irq_2_pin_ptr = pin->next;
-		pin->next = NULL;
-		return pin;
-	}
-
-	/*
-	 *  we run out of pre-allocate ones, allocate more
-	 */
-	printk(KERN_DEBUG "try to get more irq_2_pin %d\n", nr_irq_2_pin);
-
-	if (after_bootmem)
-		pin = kzalloc(sizeof(struct irq_pin_list)*nr_irq_2_pin,
-				 GFP_ATOMIC);
-	else
-		pin = __alloc_bootmem_nopanic(sizeof(struct irq_pin_list) *
-				nr_irq_2_pin, PAGE_SIZE, 0);
-
-	if (!pin)
-		panic("can not get more irq_2_pin\n");
-
-	for (i = 1; i < nr_irq_2_pin; i++)
-		pin[i-1].next = &pin[i];
-
-	irq_2_pin_ptr = pin->next;
-	pin->next = NULL;
-
-	return pin;
-}
-
-struct io_apic {
-	unsigned int index;
-	unsigned int unused[3];
-	unsigned int data;
-};
-
-static __attribute_const__ struct io_apic __iomem *io_apic_base(int idx)
-{
-	return (void __iomem *) __fix_to_virt(FIX_IO_APIC_BASE_0 + idx)
-		+ (mp_ioapics[idx].mp_apicaddr & ~PAGE_MASK);
-}
-
-static inline unsigned int io_apic_read(unsigned int apic, unsigned int reg)
-{
-	struct io_apic __iomem *io_apic = io_apic_base(apic);
-	writel(reg, &io_apic->index);
-	return readl(&io_apic->data);
-}
-
-static inline void io_apic_write(unsigned int apic, unsigned int reg, unsigned int value)
-{
-	struct io_apic __iomem *io_apic = io_apic_base(apic);
-	writel(reg, &io_apic->index);
-	writel(value, &io_apic->data);
-}
-
-/*
- * Re-write a value: to be used for read-modify-write
- * cycles where the read already set up the index register.
- *
- * Older SiS APIC requires we rewrite the index register
- */
-static inline void io_apic_modify(unsigned int apic, unsigned int reg, unsigned int value)
-{
-	struct io_apic __iomem *io_apic = io_apic_base(apic);
-        if (sis_apic_bug)
-                writel(reg, &io_apic->index);
-	writel(value, &io_apic->data);
-}
-
-#ifdef CONFIG_X86_64
-static bool io_apic_level_ack_pending(unsigned int irq)
-{
-	struct irq_pin_list *entry;
-	unsigned long flags;
-	struct irq_cfg *cfg = irq_cfg(irq);
-
-	spin_lock_irqsave(&ioapic_lock, flags);
-	entry = cfg->irq_2_pin;
-	for (;;) {
-		unsigned int reg;
-		int pin;
-
-		if (!entry)
-			break;
-		pin = entry->pin;
-		reg = io_apic_read(entry->apic, 0x10 + pin*2);
-		/* Is the remote IRR bit set? */
-		if (reg & IO_APIC_REDIR_REMOTE_IRR) {
-			spin_unlock_irqrestore(&ioapic_lock, flags);
-			return true;
-		}
-		if (!entry->next)
-			break;
-		entry = entry->next;
-	}
-	spin_unlock_irqrestore(&ioapic_lock, flags);
-
-	return false;
-}
-#endif
-
-union entry_union {
-	struct { u32 w1, w2; };
-	struct IO_APIC_route_entry entry;
-};
-
-static struct IO_APIC_route_entry ioapic_read_entry(int apic, int pin)
-{
-	union entry_union eu;
-	unsigned long flags;
-	spin_lock_irqsave(&ioapic_lock, flags);
-	eu.w1 = io_apic_read(apic, 0x10 + 2 * pin);
-	eu.w2 = io_apic_read(apic, 0x11 + 2 * pin);
-	spin_unlock_irqrestore(&ioapic_lock, flags);
-	return eu.entry;
-}
-
-/*
- * When we write a new IO APIC routing entry, we need to write the high
- * word first! If the mask bit in the low word is clear, we will enable
- * the interrupt, and we need to make sure the entry is fully populated
- * before that happens.
- */
-static void
-__ioapic_write_entry(int apic, int pin, struct IO_APIC_route_entry e)
-{
-	union entry_union eu;
-	eu.entry = e;
-	io_apic_write(apic, 0x11 + 2*pin, eu.w2);
-	io_apic_write(apic, 0x10 + 2*pin, eu.w1);
-}
-
-static void ioapic_write_entry(int apic, int pin, struct IO_APIC_route_entry e)
-{
-	unsigned long flags;
-	spin_lock_irqsave(&ioapic_lock, flags);
-	__ioapic_write_entry(apic, pin, e);
-	spin_unlock_irqrestore(&ioapic_lock, flags);
-}
-
-/*
- * When we mask an IO APIC routing entry, we need to write the low
- * word first, in order to set the mask bit before we change the
- * high bits!
- */
-static void ioapic_mask_entry(int apic, int pin)
-{
-	unsigned long flags;
-	union entry_union eu = { .entry.mask = 1 };
-
-	spin_lock_irqsave(&ioapic_lock, flags);
-	io_apic_write(apic, 0x10 + 2*pin, eu.w1);
-	io_apic_write(apic, 0x11 + 2*pin, eu.w2);
-	spin_unlock_irqrestore(&ioapic_lock, flags);
-}
-
-#ifdef CONFIG_SMP
-static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, u8 vector)
-{
-	int apic, pin;
-	struct irq_cfg *cfg;
-	struct irq_pin_list *entry;
-
-	cfg = irq_cfg(irq);
-	entry = cfg->irq_2_pin;
-	for (;;) {
-		unsigned int reg;
-
-		if (!entry)
-			break;
-
-		apic = entry->apic;
-		pin = entry->pin;
-#ifdef CONFIG_INTR_REMAP
-		/*
-		 * With interrupt-remapping, destination information comes
-		 * from interrupt-remapping table entry.
-		 */
-		if (!irq_remapped(irq))
-			io_apic_write(apic, 0x11 + pin*2, dest);
-#else
-		io_apic_write(apic, 0x11 + pin*2, dest);
-#endif
-		reg = io_apic_read(apic, 0x10 + pin*2);
-		reg &= ~IO_APIC_REDIR_VECTOR_MASK;
-		reg |= vector;
-		io_apic_modify(apic, 0x10 + pin*2, reg);
-		if (!entry->next)
-			break;
-		entry = entry->next;
-	}
-}
-
-static int assign_irq_vector(int irq, cpumask_t mask);
-
-static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t mask)
-{
-	struct irq_cfg *cfg;
-	unsigned long flags;
-	unsigned int dest;
-	cpumask_t tmp;
-	struct irq_desc *desc;
-
-	cpus_and(tmp, mask, cpu_online_map);
-	if (cpus_empty(tmp))
-		return;
-
-	cfg = irq_cfg(irq);
-	if (assign_irq_vector(irq, mask))
-		return;
-
-	cpus_and(tmp, cfg->domain, mask);
-	dest = cpu_mask_to_apicid(tmp);
-	/*
-	 * Only the high 8 bits are valid.
-	 */
-	dest = SET_APIC_LOGICAL_ID(dest);
-
-	desc = irq_to_desc(irq);
-	spin_lock_irqsave(&ioapic_lock, flags);
-	__target_IO_APIC_irq(irq, dest, cfg->vector);
-	desc->affinity = mask;
-	spin_unlock_irqrestore(&ioapic_lock, flags);
-}
-#endif /* CONFIG_SMP */
-
-/*
- * The common case is 1:1 IRQ<->pin mappings. Sometimes there are
- * shared ISA-space IRQs, so we have to support them. We are super
- * fast in the common case, and fast for shared ISA-space IRQs.
- */
-static void add_pin_to_irq(unsigned int irq, int apic, int pin)
-{
-	struct irq_cfg *cfg;
-	struct irq_pin_list *entry;
-
-	/* first time to refer irq_cfg, so with new */
-	cfg = irq_cfg_alloc(irq);
-	entry = cfg->irq_2_pin;
-	if (!entry) {
-		entry = get_one_free_irq_2_pin();
-		cfg->irq_2_pin = entry;
-		entry->apic = apic;
-		entry->pin = pin;
-		printk(KERN_DEBUG " 0 add_pin_to_irq: irq %d --> apic %d pin %d\n", irq, apic, pin);
-		return;
-	}
-
-	while (entry->next) {
-		/* not again, please */
-		if (entry->apic == apic && entry->pin == pin)
-			return;
-
-		entry = entry->next;
-	}
-
-	entry->next = get_one_free_irq_2_pin();
-	entry = entry->next;
-	entry->apic = apic;
-	entry->pin = pin;
-	printk(KERN_DEBUG " x add_pin_to_irq: irq %d --> apic %d pin %d\n", irq, apic, pin);
-}
-
-/*
- * Reroute an IRQ to a different pin.
- */
-static void __init replace_pin_at_irq(unsigned int irq,
-				      int oldapic, int oldpin,
-				      int newapic, int newpin)
-{
-	struct irq_cfg *cfg = irq_cfg(irq);
-	struct irq_pin_list *entry = cfg->irq_2_pin;
-	int replaced = 0;
-
-	while (entry) {
-		if (entry->apic == oldapic && entry->pin == oldpin) {
-			entry->apic = newapic;
-			entry->pin = newpin;
-			replaced = 1;
-			/* every one is different, right? */
-			break;
-		}
-		entry = entry->next;
-	}
-
-	/* why? call replace before add? */
-	if (!replaced)
-		add_pin_to_irq(irq, newapic, newpin);
-}
-
-#ifdef CONFIG_X86_64
-/*
- * Synchronize the IO-APIC and the CPU by doing
- * a dummy read from the IO-APIC
- */
-static inline void io_apic_sync(unsigned int apic)
-{
-	struct io_apic __iomem *io_apic = io_apic_base(apic);
-	readl(&io_apic->data);
-}
-
-#define __DO_ACTION(R, ACTION, FINAL)					\
-									\
-{									\
-	int pin;							\
-	struct irq_cfg *cfg;						\
-	struct irq_pin_list *entry;					\
-									\
-	cfg = irq_cfg(irq);						\
-	entry = cfg->irq_2_pin;						\
-	for (;;) {							\
-		unsigned int reg;					\
-		if (!entry)						\
-			break;						\
-		pin = entry->pin;					\
-		reg = io_apic_read(entry->apic, 0x10 + R + pin*2);	\
-		reg ACTION;						\
-		io_apic_modify(entry->apic, 0x10 + R + pin*2, reg);	\
-		FINAL;							\
-		if (!entry->next)					\
-			break;						\
-		entry = entry->next;					\
-	}								\
-}
-
-#define DO_ACTION(name,R,ACTION, FINAL)					\
-									\
-	static void name##_IO_APIC_irq (unsigned int irq)		\
-	__DO_ACTION(R, ACTION, FINAL)
-
-/* mask = 1 */
-DO_ACTION(__mask,	0, |= IO_APIC_REDIR_MASKED, io_apic_sync(entry->apic))
-
-/* mask = 0 */
-DO_ACTION(__unmask,	0, &= ~IO_APIC_REDIR_MASKED, )
-
-#else
-
-static void __modify_IO_APIC_irq(unsigned int irq, unsigned long enable, unsigned long disable)
-{
-	struct irq_cfg *cfg;
-	struct irq_pin_list *entry;
-	unsigned int pin, reg;
-
-	cfg = irq_cfg(irq);
-	entry = cfg->irq_2_pin;
-	for (;;) {
-		if (!entry)
-			break;
-		pin = entry->pin;
-		reg = io_apic_read(entry->apic, 0x10 + pin*2);
-		reg &= ~disable;
-		reg |= enable;
-		io_apic_modify(entry->apic, 0x10 + pin*2, reg);
-		if (!entry->next)
-			break;
-		entry = entry->next;
-	}
-}
-
-/* mask = 1 */
-static void __mask_IO_APIC_irq(unsigned int irq)
-{
-	__modify_IO_APIC_irq(irq, IO_APIC_REDIR_MASKED, 0);
-}
-
-/* mask = 0 */
-static void __unmask_IO_APIC_irq(unsigned int irq)
-{
-	__modify_IO_APIC_irq(irq, 0, IO_APIC_REDIR_MASKED);
-}
-
-/* mask = 1, trigger = 0 */
-static void __mask_and_edge_IO_APIC_irq(unsigned int irq)
-{
-	__modify_IO_APIC_irq(irq, IO_APIC_REDIR_MASKED,
-				IO_APIC_REDIR_LEVEL_TRIGGER);
-}
-
-/* mask = 0, trigger = 1 */
-static void __unmask_and_level_IO_APIC_irq(unsigned int irq)
-{
-	__modify_IO_APIC_irq(irq, IO_APIC_REDIR_LEVEL_TRIGGER,
-				IO_APIC_REDIR_MASKED);
-}
-
-#endif
-
-static void mask_IO_APIC_irq (unsigned int irq)
-{
-	unsigned long flags;
-
-	spin_lock_irqsave(&ioapic_lock, flags);
-	__mask_IO_APIC_irq(irq);
-	spin_unlock_irqrestore(&ioapic_lock, flags);
-}
-
-static void unmask_IO_APIC_irq (unsigned int irq)
-{
-	unsigned long flags;
-
-	spin_lock_irqsave(&ioapic_lock, flags);
-	__unmask_IO_APIC_irq(irq);
-	spin_unlock_irqrestore(&ioapic_lock, flags);
-}
-
-static void clear_IO_APIC_pin(unsigned int apic, unsigned int pin)
-{
-	struct IO_APIC_route_entry entry;
-
-	/* Check delivery_mode to be sure we're not clearing an SMI pin */
-	entry = ioapic_read_entry(apic, pin);
-	if (entry.delivery_mode == dest_SMI)
-		return;
-	/*
-	 * Disable it in the IO-APIC irq-routing table:
-	 */
-	ioapic_mask_entry(apic, pin);
-}
-
-static void clear_IO_APIC (void)
-{
-	int apic, pin;
-
-	for (apic = 0; apic < nr_ioapics; apic++)
-		for (pin = 0; pin < nr_ioapic_registers[apic]; pin++)
-			clear_IO_APIC_pin(apic, pin);
-}
-
-#if !defined(CONFIG_SMP) && defined(CONFIG_X86_32)
-void send_IPI_self(int vector)
-{
-	unsigned int cfg;
-
-	/*
-	 * Wait for idle.
-	 */
-	apic_wait_icr_idle();
-	cfg = APIC_DM_FIXED | APIC_DEST_SELF | vector | APIC_DEST_LOGICAL;
-	/*
-	 * Send the IPI. The write to APIC_ICR fires this off.
-	 */
-	apic_write(APIC_ICR, cfg);
-}
-#endif /* !CONFIG_SMP && CONFIG_X86_32*/
-
-#ifdef CONFIG_X86_32
-/*
- * support for broken MP BIOSs, enables hand-redirection of PIRQ0-7 to
- * specific CPU-side IRQs.
- */
-
-#define MAX_PIRQS 8
-static int pirq_entries [MAX_PIRQS];
-static int pirqs_enabled;
-
-static int __init ioapic_pirq_setup(char *str)
-{
-	int i, max;
-	int ints[MAX_PIRQS+1];
-
-	get_options(str, ARRAY_SIZE(ints), ints);
-
-	for (i = 0; i < MAX_PIRQS; i++)
-		pirq_entries[i] = -1;
-
-	pirqs_enabled = 1;
-	apic_printk(APIC_VERBOSE, KERN_INFO
-			"PIRQ redirection, working around broken MP-BIOS.\n");
-	max = MAX_PIRQS;
-	if (ints[0] < MAX_PIRQS)
-		max = ints[0];
-
-	for (i = 0; i < max; i++) {
-		apic_printk(APIC_VERBOSE, KERN_DEBUG
-				"... PIRQ%d -> IRQ %d\n", i, ints[i+1]);
-		/*
-		 * PIRQs are mapped upside down, usually.
-		 */
-		pirq_entries[MAX_PIRQS-i-1] = ints[i+1];
-	}
-	return 1;
-}
-
-__setup("pirq=", ioapic_pirq_setup);
-#endif /* CONFIG_X86_32 */
-
-#ifdef CONFIG_INTR_REMAP
-/* I/O APIC RTE contents at the OS boot up */
-static struct IO_APIC_route_entry *early_ioapic_entries[MAX_IO_APICS];
-
-/*
- * Saves and masks all the unmasked IO-APIC RTE's
- */
-int save_mask_IO_APIC_setup(void)
-{
-	union IO_APIC_reg_01 reg_01;
-	unsigned long flags;
-	int apic, pin;
-
-	/*
-	 * The number of IO-APIC IRQ registers (== #pins):
-	 */
-	for (apic = 0; apic < nr_ioapics; apic++) {
-		spin_lock_irqsave(&ioapic_lock, flags);
-		reg_01.raw = io_apic_read(apic, 1);
-		spin_unlock_irqrestore(&ioapic_lock, flags);
-		nr_ioapic_registers[apic] = reg_01.bits.entries+1;
-	}
-
-	for (apic = 0; apic < nr_ioapics; apic++) {
-		early_ioapic_entries[apic] =
-			kzalloc(sizeof(struct IO_APIC_route_entry) *
-				nr_ioapic_registers[apic], GFP_KERNEL);
-		if (!early_ioapic_entries[apic])
-			return -ENOMEM;
-	}
-
-	for (apic = 0; apic < nr_ioapics; apic++)
-		for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) {
-			struct IO_APIC_route_entry entry;
-
-			entry = early_ioapic_entries[apic][pin] =
-				ioapic_read_entry(apic, pin);
-			if (!entry.mask) {
-				entry.mask = 1;
-				ioapic_write_entry(apic, pin, entry);
-			}
-		}
-	return 0;
-}
-
-void restore_IO_APIC_setup(void)
-{
-	int apic, pin;
-
-	for (apic = 0; apic < nr_ioapics; apic++)
-		for (pin = 0; pin < nr_ioapic_registers[apic]; pin++)
-			ioapic_write_entry(apic, pin,
-					   early_ioapic_entries[apic][pin]);
-}
-
-void reinit_intr_remapped_IO_APIC(int intr_remapping)
-{
-	/*
-	 * for now plain restore of previous settings.
-	 * TBD: In the case of OS enabling interrupt-remapping,
-	 * IO-APIC RTE's need to be setup to point to interrupt-remapping
-	 * table entries. for now, do a plain restore, and wait for
-	 * the setup_IO_APIC_irqs() to do proper initialization.
-	 */
-	restore_IO_APIC_setup();
-}
-#endif
-
-/*
- * Find the IRQ entry number of a certain pin.
- */
-static int find_irq_entry(int apic, int pin, int type)
-{
-	int i;
-
-	for (i = 0; i < mp_irq_entries; i++)
-		if (mp_irqs[i].mp_irqtype == type &&
-		    (mp_irqs[i].mp_dstapic == mp_ioapics[apic].mp_apicid ||
-		     mp_irqs[i].mp_dstapic == MP_APIC_ALL) &&
-		    mp_irqs[i].mp_dstirq == pin)
-			return i;
-
-	return -1;
-}
-
-/*
- * Find the pin to which IRQ[irq] (ISA) is connected
- */
-static int __init find_isa_irq_pin(int irq, int type)
-{
-	int i;
-
-	for (i = 0; i < mp_irq_entries; i++) {
-		int lbus = mp_irqs[i].mp_srcbus;
-
-		if (test_bit(lbus, mp_bus_not_pci) &&
-		    (mp_irqs[i].mp_irqtype == type) &&
-		    (mp_irqs[i].mp_srcbusirq == irq))
-
-			return mp_irqs[i].mp_dstirq;
-	}
-	return -1;
-}
-
-static int __init find_isa_irq_apic(int irq, int type)
-{
-	int i;
-
-	for (i = 0; i < mp_irq_entries; i++) {
-		int lbus = mp_irqs[i].mp_srcbus;
-
-		if (test_bit(lbus, mp_bus_not_pci) &&
-		    (mp_irqs[i].mp_irqtype == type) &&
-		    (mp_irqs[i].mp_srcbusirq == irq))
-			break;
-	}
-	if (i < mp_irq_entries) {
-		int apic;
-		for(apic = 0; apic < nr_ioapics; apic++) {
-			if (mp_ioapics[apic].mp_apicid == mp_irqs[i].mp_dstapic)
-				return apic;
-		}
-	}
-
-	return -1;
-}
-
-/*
- * Find a specific PCI IRQ entry.
- * Not an __init, possibly needed by modules
- */
-static int pin_2_irq(int idx, int apic, int pin);
-
-int IO_APIC_get_PCI_irq_vector(int bus, int slot, int pin)
-{
-	int apic, i, best_guess = -1;
-
-	apic_printk(APIC_DEBUG, "querying PCI -> IRQ mapping bus:%d, slot:%d, pin:%d.\n",
-		bus, slot, pin);
-	if (test_bit(bus, mp_bus_not_pci)) {
-		apic_printk(APIC_VERBOSE, "PCI BIOS passed nonexistent PCI bus %d!\n", bus);
-		return -1;
-	}
-	for (i = 0; i < mp_irq_entries; i++) {
-		int lbus = mp_irqs[i].mp_srcbus;
-
-		for (apic = 0; apic < nr_ioapics; apic++)
-			if (mp_ioapics[apic].mp_apicid == mp_irqs[i].mp_dstapic ||
-			    mp_irqs[i].mp_dstapic == MP_APIC_ALL)
-				break;
-
-		if (!test_bit(lbus, mp_bus_not_pci) &&
-		    !mp_irqs[i].mp_irqtype &&
-		    (bus == lbus) &&
-		    (slot == ((mp_irqs[i].mp_srcbusirq >> 2) & 0x1f))) {
-			int irq = pin_2_irq(i,apic,mp_irqs[i].mp_dstirq);
-
-			if (!(apic || IO_APIC_IRQ(irq)))
-				continue;
-
-			if (pin == (mp_irqs[i].mp_srcbusirq & 3))
-				return irq;
-			/*
-			 * Use the first all-but-pin matching entry as a
-			 * best-guess fuzzy result for broken mptables.
-			 */
-			if (best_guess < 0)
-				best_guess = irq;
-		}
-	}
-	return best_guess;
-}
-
-EXPORT_SYMBOL(IO_APIC_get_PCI_irq_vector);
-
-#if defined(CONFIG_EISA) || defined(CONFIG_MCA)
-/*
- * EISA Edge/Level control register, ELCR
- */
-static int EISA_ELCR(unsigned int irq)
-{
-	if (irq < 16) {
-		unsigned int port = 0x4d0 + (irq >> 3);
-		return (inb(port) >> (irq & 7)) & 1;
-	}
-	apic_printk(APIC_VERBOSE, KERN_INFO
-			"Broken MPtable reports ISA irq %d\n", irq);
-	return 0;
-}
-
-#endif
-
-/* ISA interrupts are always polarity zero edge triggered,
- * when listed as conforming in the MP table. */
-
-#define default_ISA_trigger(idx)	(0)
-#define default_ISA_polarity(idx)	(0)
-
-/* EISA interrupts are always polarity zero and can be edge or level
- * trigger depending on the ELCR value.  If an interrupt is listed as
- * EISA conforming in the MP table, that means its trigger type must
- * be read in from the ELCR */
-
-#define default_EISA_trigger(idx)	(EISA_ELCR(mp_irqs[idx].mp_srcbusirq))
-#define default_EISA_polarity(idx)	default_ISA_polarity(idx)
-
-/* PCI interrupts are always polarity one level triggered,
- * when listed as conforming in the MP table. */
-
-#define default_PCI_trigger(idx)	(1)
-#define default_PCI_polarity(idx)	(1)
-
-/* MCA interrupts are always polarity zero level triggered,
- * when listed as conforming in the MP table. */
-
-#define default_MCA_trigger(idx)	(1)
-#define default_MCA_polarity(idx)	default_ISA_polarity(idx)
-
-static int MPBIOS_polarity(int idx)
-{
-	int bus = mp_irqs[idx].mp_srcbus;
-	int polarity;
-
-	/*
-	 * Determine IRQ line polarity (high active or low active):
-	 */
-	switch (mp_irqs[idx].mp_irqflag & 3)
-	{
-		case 0: /* conforms, ie. bus-type dependent polarity */
-			if (test_bit(bus, mp_bus_not_pci))
-				polarity = default_ISA_polarity(idx);
-			else
-				polarity = default_PCI_polarity(idx);
-			break;
-		case 1: /* high active */
-		{
-			polarity = 0;
-			break;
-		}
-		case 2: /* reserved */
-		{
-			printk(KERN_WARNING "broken BIOS!!\n");
-			polarity = 1;
-			break;
-		}
-		case 3: /* low active */
-		{
-			polarity = 1;
-			break;
-		}
-		default: /* invalid */
-		{
-			printk(KERN_WARNING "broken BIOS!!\n");
-			polarity = 1;
-			break;
-		}
-	}
-	return polarity;
-}
-
-static int MPBIOS_trigger(int idx)
-{
-	int bus = mp_irqs[idx].mp_srcbus;
-	int trigger;
-
-	/*
-	 * Determine IRQ trigger mode (edge or level sensitive):
-	 */
-	switch ((mp_irqs[idx].mp_irqflag>>2) & 3)
-	{
-		case 0: /* conforms, ie. bus-type dependent */
-			if (test_bit(bus, mp_bus_not_pci))
-				trigger = default_ISA_trigger(idx);
-			else
-				trigger = default_PCI_trigger(idx);
-#if defined(CONFIG_EISA) || defined(CONFIG_MCA)
-			switch (mp_bus_id_to_type[bus]) {
-				case MP_BUS_ISA: /* ISA pin */
-				{
-					/* set before the switch */
-					break;
-				}
-				case MP_BUS_EISA: /* EISA pin */
-				{
-					trigger = default_EISA_trigger(idx);
-					break;
-				}
-				case MP_BUS_PCI: /* PCI pin */
-				{
-					/* set before the switch */
-					break;
-				}
-				case MP_BUS_MCA: /* MCA pin */
-				{
-					trigger = default_MCA_trigger(idx);
-					break;
-				}
-				default:
-				{
-					printk(KERN_WARNING "broken BIOS!!\n");
-					trigger = 1;
-					break;
-				}
-			}
-#endif
-			break;
-		case 1: /* edge */
-		{
-			trigger = 0;
-			break;
-		}
-		case 2: /* reserved */
-		{
-			printk(KERN_WARNING "broken BIOS!!\n");
-			trigger = 1;
-			break;
-		}
-		case 3: /* level */
-		{
-			trigger = 1;
-			break;
-		}
-		default: /* invalid */
-		{
-			printk(KERN_WARNING "broken BIOS!!\n");
-			trigger = 0;
-			break;
-		}
-	}
-	return trigger;
-}
-
-static inline int irq_polarity(int idx)
-{
-	return MPBIOS_polarity(idx);
-}
-
-static inline int irq_trigger(int idx)
-{
-	return MPBIOS_trigger(idx);
-}
-
-int (*ioapic_renumber_irq)(int ioapic, int irq);
-static int pin_2_irq(int idx, int apic, int pin)
-{
-	int irq, i;
-	int bus = mp_irqs[idx].mp_srcbus;
-
-	/*
-	 * Debugging check, we are in big trouble if this message pops up!
-	 */
-	if (mp_irqs[idx].mp_dstirq != pin)
-		printk(KERN_ERR "broken BIOS or MPTABLE parser, ayiee!!\n");
-
-	if (test_bit(bus, mp_bus_not_pci)) {
-		irq = mp_irqs[idx].mp_srcbusirq;
-	} else {
-		/*
-		 * PCI IRQs are mapped in order
-		 */
-		i = irq = 0;
-		while (i < apic)
-			irq += nr_ioapic_registers[i++];
-		irq += pin;
-                /*
-                 * For MPS mode, so far only needed by ES7000 platform
-                 */
-                if (ioapic_renumber_irq)
-                        irq = ioapic_renumber_irq(apic, irq);
-	}
-
-#ifdef CONFIG_X86_32
-	/*
-	 * PCI IRQ command line redirection. Yes, limits are hardcoded.
-	 */
-	if ((pin >= 16) && (pin <= 23)) {
-		if (pirq_entries[pin-16] != -1) {
-			if (!pirq_entries[pin-16]) {
-				apic_printk(APIC_VERBOSE, KERN_DEBUG
-						"disabling PIRQ%d\n", pin-16);
-			} else {
-				irq = pirq_entries[pin-16];
-				apic_printk(APIC_VERBOSE, KERN_DEBUG
-						"using PIRQ%d -> IRQ %d\n",
-						pin-16, irq);
-			}
-		}
-	}
-#endif
-
-	return irq;
-}
-
-void lock_vector_lock(void)
-{
-	/* Used to the online set of cpus does not change
-	 * during assign_irq_vector.
-	 */
-	spin_lock(&vector_lock);
-}
-
-void unlock_vector_lock(void)
-{
-	spin_unlock(&vector_lock);
-}
-
-static int __assign_irq_vector(int irq, cpumask_t mask)
-{
-	/*
-	 * NOTE! The local APIC isn't very good at handling
-	 * multiple interrupts at the same interrupt level.
-	 * As the interrupt level is determined by taking the
-	 * vector number and shifting that right by 4, we
-	 * want to spread these out a bit so that they don't
-	 * all fall in the same interrupt level.
-	 *
-	 * Also, we've got to be careful not to trash gate
-	 * 0x80, because int 0x80 is hm, kind of importantish. ;)
-	 */
-	static int current_vector = FIRST_DEVICE_VECTOR, current_offset = 0;
-	unsigned int old_vector;
-	int cpu;
-	struct irq_cfg *cfg;
-
-	cfg = irq_cfg(irq);
-
-	/* Only try and allocate irqs on cpus that are present */
-	cpus_and(mask, mask, cpu_online_map);
-
-	if ((cfg->move_in_progress) || cfg->move_cleanup_count)
-		return -EBUSY;
-
-	old_vector = cfg->vector;
-	if (old_vector) {
-		cpumask_t tmp;
-		cpus_and(tmp, cfg->domain, mask);
-		if (!cpus_empty(tmp))
-			return 0;
-	}
-
-	for_each_cpu_mask_nr(cpu, mask) {
-		cpumask_t domain, new_mask;
-		int new_cpu;
-		int vector, offset;
-
-		domain = vector_allocation_domain(cpu);
-		cpus_and(new_mask, domain, cpu_online_map);
-
-		vector = current_vector;
-		offset = current_offset;
-next:
-		vector += 8;
-		if (vector >= first_system_vector) {
-			/* If we run out of vectors on large boxen, must share them. */
-			offset = (offset + 1) % 8;
-			vector = FIRST_DEVICE_VECTOR + offset;
-		}
-		if (unlikely(current_vector == vector))
-			continue;
-#ifdef CONFIG_X86_64
-		if (vector == IA32_SYSCALL_VECTOR)
-			goto next;
-#else
-		if (vector == SYSCALL_VECTOR)
-			goto next;
-#endif
-		for_each_cpu_mask_nr(new_cpu, new_mask)
-			if (per_cpu(vector_irq, new_cpu)[vector] != -1)
-				goto next;
-		/* Found one! */
-		current_vector = vector;
-		current_offset = offset;
-		if (old_vector) {
-			cfg->move_in_progress = 1;
-			cfg->old_domain = cfg->domain;
-		}
-		for_each_cpu_mask_nr(new_cpu, new_mask)
-			per_cpu(vector_irq, new_cpu)[vector] = irq;
-		cfg->vector = vector;
-		cfg->domain = domain;
-		return 0;
-	}
-	return -ENOSPC;
-}
-
-static int assign_irq_vector(int irq, cpumask_t mask)
-{
-	int err;
-	unsigned long flags;
-
-	spin_lock_irqsave(&vector_lock, flags);
-	err = __assign_irq_vector(irq, mask);
-	spin_unlock_irqrestore(&vector_lock, flags);
-	return err;
-}
-
-static void __clear_irq_vector(int irq)
-{
-	struct irq_cfg *cfg;
-	cpumask_t mask;
-	int cpu, vector;
-
-	cfg = irq_cfg(irq);
-	BUG_ON(!cfg->vector);
-
-	vector = cfg->vector;
-	cpus_and(mask, cfg->domain, cpu_online_map);
-	for_each_cpu_mask_nr(cpu, mask)
-		per_cpu(vector_irq, cpu)[vector] = -1;
-
-	cfg->vector = 0;
-	cpus_clear(cfg->domain);
-}
-
-void __setup_vector_irq(int cpu)
-{
-	/* Initialize vector_irq on a new cpu */
-	/* This function must be called with vector_lock held */
-	int irq, vector;
-	struct irq_cfg *cfg;
-
-	/* Mark the inuse vectors */
-	for_each_irq_cfg(cfg) {
-		if (!cpu_isset(cpu, cfg->domain))
-			continue;
-		vector = cfg->vector;
-		irq = cfg->irq;
-		per_cpu(vector_irq, cpu)[vector] = irq;
-	}
-	/* Mark the free vectors */
-	for (vector = 0; vector < NR_VECTORS; ++vector) {
-		irq = per_cpu(vector_irq, cpu)[vector];
-		if (irq < 0)
-			continue;
-
-		cfg = irq_cfg(irq);
-		if (!cpu_isset(cpu, cfg->domain))
-			per_cpu(vector_irq, cpu)[vector] = -1;
-	}
-}
-
-static struct irq_chip ioapic_chip;
-#ifdef CONFIG_INTR_REMAP
-static struct irq_chip ir_ioapic_chip;
-#endif
-
-#define IOAPIC_AUTO     -1
-#define IOAPIC_EDGE     0
-#define IOAPIC_LEVEL    1
-
-#ifdef CONFIG_X86_32
-static inline int IO_APIC_irq_trigger(int irq)
-{
-        int apic, idx, pin;
-
-        for (apic = 0; apic < nr_ioapics; apic++) {
-                for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) {
-                        idx = find_irq_entry(apic, pin, mp_INT);
-                        if ((idx != -1) && (irq == pin_2_irq(idx, apic, pin)))
-                                return irq_trigger(idx);
-                }
-        }
-        /*
-         * nonexistent IRQs are edge default
-         */
-        return 0;
-}
-#else
-static inline int IO_APIC_irq_trigger(int irq)
-{
-	return 1;
-}
-#endif
-
-static void ioapic_register_intr(int irq, unsigned long trigger)
-{
-	struct irq_desc *desc;
-
-	/* first time to use this irq_desc */
-	if (irq < 16)
-		desc = irq_to_desc(irq);
-	else
-		desc = irq_to_desc_alloc(irq);
-
-	if ((trigger == IOAPIC_AUTO && IO_APIC_irq_trigger(irq)) ||
-	    trigger == IOAPIC_LEVEL)
-		desc->status |= IRQ_LEVEL;
-	else
-		desc->status &= ~IRQ_LEVEL;
-
-#ifdef CONFIG_INTR_REMAP
-	if (irq_remapped(irq)) {
-		desc->status |= IRQ_MOVE_PCNTXT;
-		if (trigger)
-			set_irq_chip_and_handler_name(irq, &ir_ioapic_chip,
-						      handle_fasteoi_irq,
-						     "fasteoi");
-		else
-			set_irq_chip_and_handler_name(irq, &ir_ioapic_chip,
-						      handle_edge_irq, "edge");
-		return;
-	}
-#endif
-	if ((trigger == IOAPIC_AUTO && IO_APIC_irq_trigger(irq)) ||
-	    trigger == IOAPIC_LEVEL)
-		set_irq_chip_and_handler_name(irq, &ioapic_chip,
-					      handle_fasteoi_irq,
-					      "fasteoi");
-	else
-		set_irq_chip_and_handler_name(irq, &ioapic_chip,
-					      handle_edge_irq, "edge");
-}
-
-static int setup_ioapic_entry(int apic, int irq,
-			      struct IO_APIC_route_entry *entry,
-			      unsigned int destination, int trigger,
-			      int polarity, int vector)
-{
-	/*
-	 * add it to the IO-APIC irq-routing table:
-	 */
-	memset(entry,0,sizeof(*entry));
-
-#ifdef CONFIG_INTR_REMAP
-	if (intr_remapping_enabled) {
-		struct intel_iommu *iommu = map_ioapic_to_ir(apic);
-		struct irte irte;
-		struct IR_IO_APIC_route_entry *ir_entry =
-			(struct IR_IO_APIC_route_entry *) entry;
-		int index;
-
-		if (!iommu)
-			panic("No mapping iommu for ioapic %d\n", apic);
-
-		index = alloc_irte(iommu, irq, 1);
-		if (index < 0)
-			panic("Failed to allocate IRTE for ioapic %d\n", apic);
-
-		memset(&irte, 0, sizeof(irte));
-
-		irte.present = 1;
-		irte.dst_mode = INT_DEST_MODE;
-		irte.trigger_mode = trigger;
-		irte.dlvry_mode = INT_DELIVERY_MODE;
-		irte.vector = vector;
-		irte.dest_id = IRTE_DEST(destination);
-
-		modify_irte(irq, &irte);
-
-		ir_entry->index2 = (index >> 15) & 0x1;
-		ir_entry->zero = 0;
-		ir_entry->format = 1;
-		ir_entry->index = (index & 0x7fff);
-	} else
-#endif
-	{
-		entry->delivery_mode = INT_DELIVERY_MODE;
-		entry->dest_mode = INT_DEST_MODE;
-		entry->dest = destination;
-	}
-
-	entry->mask = 0;				/* enable IRQ */
-	entry->trigger = trigger;
-	entry->polarity = polarity;
-	entry->vector = vector;
-
-	/* Mask level triggered irqs.
-	 * Use IRQ_DELAYED_DISABLE for edge triggered irqs.
-	 */
-	if (trigger)
-		entry->mask = 1;
-	return 0;
-}
-
-static void setup_IO_APIC_irq(int apic, int pin, unsigned int irq,
-			      int trigger, int polarity)
-{
-	struct irq_cfg *cfg;
-	struct IO_APIC_route_entry entry;
-	cpumask_t mask;
-
-	if (!IO_APIC_IRQ(irq))
-		return;
-
-	cfg = irq_cfg(irq);
-
-	mask = TARGET_CPUS;
-	if (assign_irq_vector(irq, mask))
-		return;
-
-	cpus_and(mask, cfg->domain, mask);
-
-	apic_printk(APIC_VERBOSE,KERN_DEBUG
-		    "IOAPIC[%d]: Set routing entry (%d-%d -> 0x%x -> "
-		    "IRQ %d Mode:%i Active:%i)\n",
-		    apic, mp_ioapics[apic].mp_apicid, pin, cfg->vector,
-		    irq, trigger, polarity);
-
-
-	if (setup_ioapic_entry(mp_ioapics[apic].mp_apicid, irq, &entry,
-			       cpu_mask_to_apicid(mask), trigger, polarity,
-			       cfg->vector)) {
-		printk("Failed to setup ioapic entry for ioapic  %d, pin %d\n",
-		       mp_ioapics[apic].mp_apicid, pin);
-		__clear_irq_vector(irq);
-		return;
-	}
-
-	ioapic_register_intr(irq, trigger);
-	if (irq < 16)
-		disable_8259A_irq(irq);
-
-	ioapic_write_entry(apic, pin, entry);
-}
-
-static void __init setup_IO_APIC_irqs(void)
-{
-	int apic, pin, idx, irq, first_notcon = 1;
-
-	apic_printk(APIC_VERBOSE, KERN_DEBUG "init IO_APIC IRQs\n");
-
-	for (apic = 0; apic < nr_ioapics; apic++) {
-	for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) {
-
-		idx = find_irq_entry(apic,pin,mp_INT);
-		if (idx == -1) {
-			if (first_notcon) {
-				apic_printk(APIC_VERBOSE, KERN_DEBUG " IO-APIC (apicid-pin) %d-%d", mp_ioapics[apic].mp_apicid, pin);
-				first_notcon = 0;
-			} else
-				apic_printk(APIC_VERBOSE, ", %d-%d", mp_ioapics[apic].mp_apicid, pin);
-			continue;
-		}
-		if (!first_notcon) {
-			apic_printk(APIC_VERBOSE, " not connected.\n");
-			first_notcon = 1;
-		}
-
-		irq = pin_2_irq(idx, apic, pin);
-#ifdef CONFIG_X86_32
-                if (multi_timer_check(apic, irq))
-                        continue;
-#endif
-		add_pin_to_irq(irq, apic, pin);
-
-		setup_IO_APIC_irq(apic, pin, irq,
-				  irq_trigger(idx), irq_polarity(idx));
-	}
-	}
-
-	if (!first_notcon)
-		apic_printk(APIC_VERBOSE, " not connected.\n");
-}
-
-/*
- * Set up the timer pin, possibly with the 8259A-master behind.
- */
-static void __init setup_timer_IRQ0_pin(unsigned int apic, unsigned int pin,
-					int vector)
-{
-	struct IO_APIC_route_entry entry;
-
-#ifdef CONFIG_INTR_REMAP
-	if (intr_remapping_enabled)
-		return;
-#endif
-
-	memset(&entry, 0, sizeof(entry));
-
-	/*
-	 * We use logical delivery to get the timer IRQ
-	 * to the first CPU.
-	 */
-	entry.dest_mode = INT_DEST_MODE;
-	entry.mask = 1;					/* mask IRQ now */
-	entry.dest = cpu_mask_to_apicid(TARGET_CPUS);
-	entry.delivery_mode = INT_DELIVERY_MODE;
-	entry.polarity = 0;
-	entry.trigger = 0;
-	entry.vector = vector;
-
-	/*
-	 * The timer IRQ doesn't have to know that behind the
-	 * scene we may have a 8259A-master in AEOI mode ...
-	 */
-	set_irq_chip_and_handler_name(0, &ioapic_chip, handle_edge_irq, "edge");
-
-	/*
-	 * Add it to the IO-APIC irq-routing table:
-	 */
-	ioapic_write_entry(apic, pin, entry);
-}
-
-
-__apicdebuginit(void) print_IO_APIC(void)
-{
-	int apic, i;
-	union IO_APIC_reg_00 reg_00;
-	union IO_APIC_reg_01 reg_01;
-	union IO_APIC_reg_02 reg_02;
-	union IO_APIC_reg_03 reg_03;
-	unsigned long flags;
-	struct irq_cfg *cfg;
-
-	if (apic_verbosity == APIC_QUIET)
-		return;
-
-	printk(KERN_DEBUG "number of MP IRQ sources: %d.\n", mp_irq_entries);
-	for (i = 0; i < nr_ioapics; i++)
-		printk(KERN_DEBUG "number of IO-APIC #%d registers: %d.\n",
-		       mp_ioapics[i].mp_apicid, nr_ioapic_registers[i]);
-
-	/*
-	 * We are a bit conservative about what we expect.  We have to
-	 * know about every hardware change ASAP.
-	 */
-	printk(KERN_INFO "testing the IO APIC.......................\n");
-
-	for (apic = 0; apic < nr_ioapics; apic++) {
-
-	spin_lock_irqsave(&ioapic_lock, flags);
-	reg_00.raw = io_apic_read(apic, 0);
-	reg_01.raw = io_apic_read(apic, 1);
-	if (reg_01.bits.version >= 0x10)
-		reg_02.raw = io_apic_read(apic, 2);
-        if (reg_01.bits.version >= 0x20)
-                reg_03.raw = io_apic_read(apic, 3);
-	spin_unlock_irqrestore(&ioapic_lock, flags);
-
-	printk("\n");
-	printk(KERN_DEBUG "IO APIC #%d......\n", mp_ioapics[apic].mp_apicid);
-	printk(KERN_DEBUG ".... register #00: %08X\n", reg_00.raw);
-	printk(KERN_DEBUG ".......    : physical APIC id: %02X\n", reg_00.bits.ID);
-	printk(KERN_DEBUG ".......    : Delivery Type: %X\n", reg_00.bits.delivery_type);
-	printk(KERN_DEBUG ".......    : LTS          : %X\n", reg_00.bits.LTS);
-
-	printk(KERN_DEBUG ".... register #01: %08X\n", *(int *)&reg_01);
-	printk(KERN_DEBUG ".......     : max redirection entries: %04X\n", reg_01.bits.entries);
-
-	printk(KERN_DEBUG ".......     : PRQ implemented: %X\n", reg_01.bits.PRQ);
-	printk(KERN_DEBUG ".......     : IO APIC version: %04X\n", reg_01.bits.version);
-
-	/*
-	 * Some Intel chipsets with IO APIC VERSION of 0x1? don't have reg_02,
-	 * but the value of reg_02 is read as the previous read register
-	 * value, so ignore it if reg_02 == reg_01.
-	 */
-	if (reg_01.bits.version >= 0x10 && reg_02.raw != reg_01.raw) {
-		printk(KERN_DEBUG ".... register #02: %08X\n", reg_02.raw);
-		printk(KERN_DEBUG ".......     : arbitration: %02X\n", reg_02.bits.arbitration);
-	}
-
-	/*
-	 * Some Intel chipsets with IO APIC VERSION of 0x2? don't have reg_02
-	 * or reg_03, but the value of reg_0[23] is read as the previous read
-	 * register value, so ignore it if reg_03 == reg_0[12].
-	 */
-	if (reg_01.bits.version >= 0x20 && reg_03.raw != reg_02.raw &&
-	    reg_03.raw != reg_01.raw) {
-		printk(KERN_DEBUG ".... register #03: %08X\n", reg_03.raw);
-		printk(KERN_DEBUG ".......     : Boot DT    : %X\n", reg_03.bits.boot_DT);
-	}
-
-	printk(KERN_DEBUG ".... IRQ redirection table:\n");
-
-	printk(KERN_DEBUG " NR Dst Mask Trig IRR Pol"
-			  " Stat Dmod Deli Vect:   \n");
-
-	for (i = 0; i <= reg_01.bits.entries; i++) {
-		struct IO_APIC_route_entry entry;
-
-		entry = ioapic_read_entry(apic, i);
-
-		printk(KERN_DEBUG " %02x %03X ",
-			i,
-			entry.dest
-		);
-
-		printk("%1d    %1d    %1d   %1d   %1d    %1d    %1d    %02X\n",
-			entry.mask,
-			entry.trigger,
-			entry.irr,
-			entry.polarity,
-			entry.delivery_status,
-			entry.dest_mode,
-			entry.delivery_mode,
-			entry.vector
-		);
-	}
-	}
-	printk(KERN_DEBUG "IRQ to pin mappings:\n");
-	for_each_irq_cfg(cfg) {
-		struct irq_pin_list *entry = cfg->irq_2_pin;
-		if (!entry)
-			continue;
-		printk(KERN_DEBUG "IRQ%d ", cfg->irq);
-		for (;;) {
-			printk("-> %d:%d", entry->apic, entry->pin);
-			if (!entry->next)
-				break;
-			entry = entry->next;
-		}
-		printk("\n");
-	}
-
-	printk(KERN_INFO ".................................... done.\n");
-
-	return;
-}
-
-__apicdebuginit(void) print_APIC_bitfield(int base)
-{
-	unsigned int v;
-	int i, j;
-
-	if (apic_verbosity == APIC_QUIET)
-		return;
-
-	printk(KERN_DEBUG "0123456789abcdef0123456789abcdef\n" KERN_DEBUG);
-	for (i = 0; i < 8; i++) {
-		v = apic_read(base + i*0x10);
-		for (j = 0; j < 32; j++) {
-			if (v & (1<<j))
-				printk("1");
-			else
-				printk("0");
-		}
-		printk("\n");
-	}
-}
-
-__apicdebuginit(void) print_local_APIC(void *dummy)
-{
-	unsigned int v, ver, maxlvt;
-	u64 icr;
-
-	if (apic_verbosity == APIC_QUIET)
-		return;
-
-	printk("\n" KERN_DEBUG "printing local APIC contents on CPU#%d/%d:\n",
-		smp_processor_id(), hard_smp_processor_id());
-	v = apic_read(APIC_ID);
-	printk(KERN_INFO "... APIC ID:      %08x (%01x)\n", v, read_apic_id());
-	v = apic_read(APIC_LVR);
-	printk(KERN_INFO "... APIC VERSION: %08x\n", v);
-	ver = GET_APIC_VERSION(v);
-	maxlvt = lapic_get_maxlvt();
-
-	v = apic_read(APIC_TASKPRI);
-	printk(KERN_DEBUG "... APIC TASKPRI: %08x (%02x)\n", v, v & APIC_TPRI_MASK);
-
-	if (APIC_INTEGRATED(ver)) {                     /* !82489DX */
-		v = apic_read(APIC_ARBPRI);
-		printk(KERN_DEBUG "... APIC ARBPRI: %08x (%02x)\n", v,
-			v & APIC_ARBPRI_MASK);
-		v = apic_read(APIC_PROCPRI);
-		printk(KERN_DEBUG "... APIC PROCPRI: %08x\n", v);
-	}
-
-	v = apic_read(APIC_EOI);
-	printk(KERN_DEBUG "... APIC EOI: %08x\n", v);
-	v = apic_read(APIC_RRR);
-	printk(KERN_DEBUG "... APIC RRR: %08x\n", v);
-	v = apic_read(APIC_LDR);
-	printk(KERN_DEBUG "... APIC LDR: %08x\n", v);
-	v = apic_read(APIC_DFR);
-	printk(KERN_DEBUG "... APIC DFR: %08x\n", v);
-	v = apic_read(APIC_SPIV);
-	printk(KERN_DEBUG "... APIC SPIV: %08x\n", v);
-
-	printk(KERN_DEBUG "... APIC ISR field:\n");
-	print_APIC_bitfield(APIC_ISR);
-	printk(KERN_DEBUG "... APIC TMR field:\n");
-	print_APIC_bitfield(APIC_TMR);
-	printk(KERN_DEBUG "... APIC IRR field:\n");
-	print_APIC_bitfield(APIC_IRR);
-
-	if (APIC_INTEGRATED(ver)) {             /* !82489DX */
-		if (maxlvt > 3)         /* Due to the Pentium erratum 3AP. */
-			apic_write(APIC_ESR, 0);
-
-		v = apic_read(APIC_ESR);
-		printk(KERN_DEBUG "... APIC ESR: %08x\n", v);
-	}
-
-	icr = apic_icr_read();
-	printk(KERN_DEBUG "... APIC ICR: %08x\n", (u32)icr);
-	printk(KERN_DEBUG "... APIC ICR2: %08x\n", (u32)(icr >> 32));
-
-	v = apic_read(APIC_LVTT);
-	printk(KERN_DEBUG "... APIC LVTT: %08x\n", v);
-
-	if (maxlvt > 3) {                       /* PC is LVT#4. */
-		v = apic_read(APIC_LVTPC);
-		printk(KERN_DEBUG "... APIC LVTPC: %08x\n", v);
-	}
-	v = apic_read(APIC_LVT0);
-	printk(KERN_DEBUG "... APIC LVT0: %08x\n", v);
-	v = apic_read(APIC_LVT1);
-	printk(KERN_DEBUG "... APIC LVT1: %08x\n", v);
-
-	if (maxlvt > 2) {			/* ERR is LVT#3. */
-		v = apic_read(APIC_LVTERR);
-		printk(KERN_DEBUG "... APIC LVTERR: %08x\n", v);
-	}
-
-	v = apic_read(APIC_TMICT);
-	printk(KERN_DEBUG "... APIC TMICT: %08x\n", v);
-	v = apic_read(APIC_TMCCT);
-	printk(KERN_DEBUG "... APIC TMCCT: %08x\n", v);
-	v = apic_read(APIC_TDCR);
-	printk(KERN_DEBUG "... APIC TDCR: %08x\n", v);
-	printk("\n");
-}
-
-__apicdebuginit(void) print_all_local_APICs(void)
-{
-	on_each_cpu(print_local_APIC, NULL, 1);
-}
-
-__apicdebuginit(void) print_PIC(void)
-{
-	unsigned int v;
-	unsigned long flags;
-
-	if (apic_verbosity == APIC_QUIET)
-		return;
-
-	printk(KERN_DEBUG "\nprinting PIC contents\n");
-
-	spin_lock_irqsave(&i8259A_lock, flags);
-
-	v = inb(0xa1) << 8 | inb(0x21);
-	printk(KERN_DEBUG "... PIC  IMR: %04x\n", v);
-
-	v = inb(0xa0) << 8 | inb(0x20);
-	printk(KERN_DEBUG "... PIC  IRR: %04x\n", v);
-
-	outb(0x0b,0xa0);
-	outb(0x0b,0x20);
-	v = inb(0xa0) << 8 | inb(0x20);
-	outb(0x0a,0xa0);
-	outb(0x0a,0x20);
-
-	spin_unlock_irqrestore(&i8259A_lock, flags);
-
-	printk(KERN_DEBUG "... PIC  ISR: %04x\n", v);
-
-	v = inb(0x4d1) << 8 | inb(0x4d0);
-	printk(KERN_DEBUG "... PIC ELCR: %04x\n", v);
-}
-
-__apicdebuginit(int) print_all_ICs(void)
-{
-	print_PIC();
-	print_all_local_APICs();
-	print_IO_APIC();
-
-	return 0;
-}
-
-fs_initcall(print_all_ICs);
-
-
-/* Where if anywhere is the i8259 connect in external int mode */
-static struct { int pin, apic; } ioapic_i8259 = { -1, -1 };
-
-void __init enable_IO_APIC(void)
-{
-	union IO_APIC_reg_01 reg_01;
-	int i8259_apic, i8259_pin;
-	int apic;
-	unsigned long flags;
-
-#ifdef CONFIG_X86_32
-	int i;
-	if (!pirqs_enabled)
-		for (i = 0; i < MAX_PIRQS; i++)
-			pirq_entries[i] = -1;
-#endif
-
-	/*
-	 * The number of IO-APIC IRQ registers (== #pins):
-	 */
-	for (apic = 0; apic < nr_ioapics; apic++) {
-		spin_lock_irqsave(&ioapic_lock, flags);
-		reg_01.raw = io_apic_read(apic, 1);
-		spin_unlock_irqrestore(&ioapic_lock, flags);
-		nr_ioapic_registers[apic] = reg_01.bits.entries+1;
-	}
-	for(apic = 0; apic < nr_ioapics; apic++) {
-		int pin;
-		/* See if any of the pins is in ExtINT mode */
-		for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) {
-			struct IO_APIC_route_entry entry;
-			entry = ioapic_read_entry(apic, pin);
-
-			/* If the interrupt line is enabled and in ExtInt mode
-			 * I have found the pin where the i8259 is connected.
-			 */
-			if ((entry.mask == 0) && (entry.delivery_mode == dest_ExtINT)) {
-				ioapic_i8259.apic = apic;
-				ioapic_i8259.pin  = pin;
-				goto found_i8259;
-			}
-		}
-	}
- found_i8259:
-	/* Look to see what if the MP table has reported the ExtINT */
-	/* If we could not find the appropriate pin by looking at the ioapic
-	 * the i8259 probably is not connected the ioapic but give the
-	 * mptable a chance anyway.
-	 */
-	i8259_pin  = find_isa_irq_pin(0, mp_ExtINT);
-	i8259_apic = find_isa_irq_apic(0, mp_ExtINT);
-	/* Trust the MP table if nothing is setup in the hardware */
-	if ((ioapic_i8259.pin == -1) && (i8259_pin >= 0)) {
-		printk(KERN_WARNING "ExtINT not setup in hardware but reported by MP table\n");
-		ioapic_i8259.pin  = i8259_pin;
-		ioapic_i8259.apic = i8259_apic;
-	}
-	/* Complain if the MP table and the hardware disagree */
-	if (((ioapic_i8259.apic != i8259_apic) || (ioapic_i8259.pin != i8259_pin)) &&
-		(i8259_pin >= 0) && (ioapic_i8259.pin >= 0))
-	{
-		printk(KERN_WARNING "ExtINT in hardware and MP table differ\n");
-	}
-
-	/*
-	 * Do not trust the IO-APIC being empty at bootup
-	 */
-	clear_IO_APIC();
-}
-
-/*
- * Not an __init, needed by the reboot code
- */
-void disable_IO_APIC(void)
-{
-	/*
-	 * Clear the IO-APIC before rebooting:
-	 */
-	clear_IO_APIC();
-
-	/*
-	 * If the i8259 is routed through an IOAPIC
-	 * Put that IOAPIC in virtual wire mode
-	 * so legacy interrupts can be delivered.
-	 */
-	if (ioapic_i8259.pin != -1) {
-		struct IO_APIC_route_entry entry;
-
-		memset(&entry, 0, sizeof(entry));
-		entry.mask            = 0; /* Enabled */
-		entry.trigger         = 0; /* Edge */
-		entry.irr             = 0;
-		entry.polarity        = 0; /* High */
-		entry.delivery_status = 0;
-		entry.dest_mode       = 0; /* Physical */
-		entry.delivery_mode   = dest_ExtINT; /* ExtInt */
-		entry.vector          = 0;
-		entry.dest            = read_apic_id();
-
-		/*
-		 * Add it to the IO-APIC irq-routing table:
-		 */
-		ioapic_write_entry(ioapic_i8259.apic, ioapic_i8259.pin, entry);
-	}
-
-	disconnect_bsp_APIC(ioapic_i8259.pin != -1);
-}
-
-#ifdef CONFIG_X86_32
-/*
- * function to set the IO-APIC physical IDs based on the
- * values stored in the MPC table.
- *
- * by Matt Domsch <Matt_Domsch@dell.com>  Tue Dec 21 12:25:05 CST 1999
- */
-
-static void __init setup_ioapic_ids_from_mpc(void)
-{
-	union IO_APIC_reg_00 reg_00;
-	physid_mask_t phys_id_present_map;
-	int apic;
-	int i;
-	unsigned char old_id;
-	unsigned long flags;
-
-	if (x86_quirks->setup_ioapic_ids && x86_quirks->setup_ioapic_ids())
-		return;
-
-	/*
-	 * Don't check I/O APIC IDs for xAPIC systems.  They have
-	 * no meaning without the serial APIC bus.
-	 */
-	if (!(boot_cpu_data.x86_vendor == X86_VENDOR_INTEL)
-		|| APIC_XAPIC(apic_version[boot_cpu_physical_apicid]))
-		return;
-	/*
-	 * This is broken; anything with a real cpu count has to
-	 * circumvent this idiocy regardless.
-	 */
-	phys_id_present_map = ioapic_phys_id_map(phys_cpu_present_map);
-
-	/*
-	 * Set the IOAPIC ID to the value stored in the MPC table.
-	 */
-	for (apic = 0; apic < nr_ioapics; apic++) {
-
-		/* Read the register 0 value */
-		spin_lock_irqsave(&ioapic_lock, flags);
-		reg_00.raw = io_apic_read(apic, 0);
-		spin_unlock_irqrestore(&ioapic_lock, flags);
-
-		old_id = mp_ioapics[apic].mp_apicid;
-
-		if (mp_ioapics[apic].mp_apicid >= get_physical_broadcast()) {
-			printk(KERN_ERR "BIOS bug, IO-APIC#%d ID is %d in the MPC table!...\n",
-				apic, mp_ioapics[apic].mp_apicid);
-			printk(KERN_ERR "... fixing up to %d. (tell your hw vendor)\n",
-				reg_00.bits.ID);
-			mp_ioapics[apic].mp_apicid = reg_00.bits.ID;
-		}
-
-		/*
-		 * Sanity check, is the ID really free? Every APIC in a
-		 * system must have a unique ID or we get lots of nice
-		 * 'stuck on smp_invalidate_needed IPI wait' messages.
-		 */
-		if (check_apicid_used(phys_id_present_map,
-					mp_ioapics[apic].mp_apicid)) {
-			printk(KERN_ERR "BIOS bug, IO-APIC#%d ID %d is already used!...\n",
-				apic, mp_ioapics[apic].mp_apicid);
-			for (i = 0; i < get_physical_broadcast(); i++)
-				if (!physid_isset(i, phys_id_present_map))
-					break;
-			if (i >= get_physical_broadcast())
-				panic("Max APIC ID exceeded!\n");
-			printk(KERN_ERR "... fixing up to %d. (tell your hw vendor)\n",
-				i);
-			physid_set(i, phys_id_present_map);
-			mp_ioapics[apic].mp_apicid = i;
-		} else {
-			physid_mask_t tmp;
-			tmp = apicid_to_cpu_present(mp_ioapics[apic].mp_apicid);
-			apic_printk(APIC_VERBOSE, "Setting %d in the "
-					"phys_id_present_map\n",
-					mp_ioapics[apic].mp_apicid);
-			physids_or(phys_id_present_map, phys_id_present_map, tmp);
-		}
-
-
-		/*
-		 * We need to adjust the IRQ routing table
-		 * if the ID changed.
-		 */
-		if (old_id != mp_ioapics[apic].mp_apicid)
-			for (i = 0; i < mp_irq_entries; i++)
-				if (mp_irqs[i].mp_dstapic == old_id)
-					mp_irqs[i].mp_dstapic
-						= mp_ioapics[apic].mp_apicid;
-
-		/*
-		 * Read the right value from the MPC table and
-		 * write it into the ID register.
-		 */
-		apic_printk(APIC_VERBOSE, KERN_INFO
-			"...changing IO-APIC physical APIC ID to %d ...",
-			mp_ioapics[apic].mp_apicid);
-
-		reg_00.bits.ID = mp_ioapics[apic].mp_apicid;
-		spin_lock_irqsave(&ioapic_lock, flags);
-
-		/*
-		 * Sanity check
-		 */
-		spin_lock_irqsave(&ioapic_lock, flags);
-		reg_00.raw = io_apic_read(apic, 0);
-		spin_unlock_irqrestore(&ioapic_lock, flags);
-		if (reg_00.bits.ID != mp_ioapics[apic].mp_apicid)
-			printk("could not set ID!\n");
-		else
-			apic_printk(APIC_VERBOSE, " ok.\n");
-	}
-}
-#endif
-
-int no_timer_check __initdata;
-
-static int __init notimercheck(char *s)
-{
-	no_timer_check = 1;
-	return 1;
-}
-__setup("no_timer_check", notimercheck);
-
-/*
- * There is a nasty bug in some older SMP boards, their mptable lies
- * about the timer IRQ. We do the following to work around the situation:
- *
- *	- timer IRQ defaults to IO-APIC IRQ
- *	- if this function detects that timer IRQs are defunct, then we fall
- *	  back to ISA timer IRQs
- */
-static int __init timer_irq_works(void)
-{
-	unsigned long t1 = jiffies;
-	unsigned long flags;
-
-	if (no_timer_check)
-		return 1;
-
-	local_save_flags(flags);
-	local_irq_enable();
-	/* Let ten ticks pass... */
-	mdelay((10 * 1000) / HZ);
-	local_irq_restore(flags);
-
-	/*
-	 * Expect a few ticks at least, to be sure some possible
-	 * glue logic does not lock up after one or two first
-	 * ticks in a non-ExtINT mode.  Also the local APIC
-	 * might have cached one ExtINT interrupt.  Finally, at
-	 * least one tick may be lost due to delays.
-	 */
-
-	/* jiffies wrap? */
-	if (time_after(jiffies, t1 + 4))
-		return 1;
-	return 0;
-}
-
-/*
- * In the SMP+IOAPIC case it might happen that there are an unspecified
- * number of pending IRQ events unhandled. These cases are very rare,
- * so we 'resend' these IRQs via IPIs, to the same CPU. It's much
- * better to do it this way as thus we do not have to be aware of
- * 'pending' interrupts in the IRQ path, except at this point.
- */
-/*
- * Edge triggered needs to resend any interrupt
- * that was delayed but this is now handled in the device
- * independent code.
- */
-
-/*
- * Starting up a edge-triggered IO-APIC interrupt is
- * nasty - we need to make sure that we get the edge.
- * If it is already asserted for some reason, we need
- * return 1 to indicate that is was pending.
- *
- * This is not complete - we should be able to fake
- * an edge even if it isn't on the 8259A...
- */
-
-static unsigned int startup_ioapic_irq(unsigned int irq)
-{
-	int was_pending = 0;
-	unsigned long flags;
-
-	spin_lock_irqsave(&ioapic_lock, flags);
-	if (irq < 16) {
-		disable_8259A_irq(irq);
-		if (i8259A_irq_pending(irq))
-			was_pending = 1;
-	}
-	__unmask_IO_APIC_irq(irq);
-	spin_unlock_irqrestore(&ioapic_lock, flags);
-
-	return was_pending;
-}
-
-#ifdef CONFIG_X86_64
-static int ioapic_retrigger_irq(unsigned int irq)
-{
-
-	struct irq_cfg *cfg = irq_cfg(irq);
-	unsigned long flags;
-
-	spin_lock_irqsave(&vector_lock, flags);
-	send_IPI_mask(cpumask_of_cpu(first_cpu(cfg->domain)), cfg->vector);
-	spin_unlock_irqrestore(&vector_lock, flags);
-
-	return 1;
-}
-#else
-static int ioapic_retrigger_irq(unsigned int irq)
-{
-        send_IPI_self(irq_cfg(irq)->vector);
-
-        return 1;
-}
-#endif
-
-/*
- * Level and edge triggered IO-APIC interrupts need different handling,
- * so we use two separate IRQ descriptors. Edge triggered IRQs can be
- * handled with the level-triggered descriptor, but that one has slightly
- * more overhead. Level-triggered interrupts cannot be handled with the
- * edge-triggered handler, without risking IRQ storms and other ugly
- * races.
- */
-
-#ifdef CONFIG_SMP
-
-#ifdef CONFIG_INTR_REMAP
-static void ir_irq_migration(struct work_struct *work);
-
-static DECLARE_DELAYED_WORK(ir_migration_work, ir_irq_migration);
-
-/*
- * Migrate the IO-APIC irq in the presence of intr-remapping.
- *
- * For edge triggered, irq migration is a simple atomic update(of vector
- * and cpu destination) of IRTE and flush the hardware cache.
- *
- * For level triggered, we need to modify the io-apic RTE aswell with the update
- * vector information, along with modifying IRTE with vector and destination.
- * So irq migration for level triggered is little  bit more complex compared to
- * edge triggered migration. But the good news is, we use the same algorithm
- * for level triggered migration as we have today, only difference being,
- * we now initiate the irq migration from process context instead of the
- * interrupt context.
- *
- * In future, when we do a directed EOI (combined with cpu EOI broadcast
- * suppression) to the IO-APIC, level triggered irq migration will also be
- * as simple as edge triggered migration and we can do the irq migration
- * with a simple atomic update to IO-APIC RTE.
- */
-static void migrate_ioapic_irq(int irq, cpumask_t mask)
-{
-	struct irq_cfg *cfg;
-	struct irq_desc *desc;
-	cpumask_t tmp, cleanup_mask;
-	struct irte irte;
-	int modify_ioapic_rte;
-	unsigned int dest;
-	unsigned long flags;
-
-	cpus_and(tmp, mask, cpu_online_map);
-	if (cpus_empty(tmp))
-		return;
-
-	if (get_irte(irq, &irte))
-		return;
-
-	if (assign_irq_vector(irq, mask))
-		return;
-
-	cfg = irq_cfg(irq);
-	cpus_and(tmp, cfg->domain, mask);
-	dest = cpu_mask_to_apicid(tmp);
-
-	desc = irq_to_desc(irq);
-	modify_ioapic_rte = desc->status & IRQ_LEVEL;
-	if (modify_ioapic_rte) {
-		spin_lock_irqsave(&ioapic_lock, flags);
-		__target_IO_APIC_irq(irq, dest, cfg->vector);
-		spin_unlock_irqrestore(&ioapic_lock, flags);
-	}
-
-	irte.vector = cfg->vector;
-	irte.dest_id = IRTE_DEST(dest);
-
-	/*
-	 * Modified the IRTE and flushes the Interrupt entry cache.
-	 */
-	modify_irte(irq, &irte);
-
-	if (cfg->move_in_progress) {
-		cpus_and(cleanup_mask, cfg->old_domain, cpu_online_map);
-		cfg->move_cleanup_count = cpus_weight(cleanup_mask);
-		send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR);
-		cfg->move_in_progress = 0;
-	}
-
-	desc->affinity = mask;
-}
-
-static int migrate_irq_remapped_level(int irq)
-{
-	int ret = -1;
-	struct irq_desc *desc = irq_to_desc(irq);
-
-	mask_IO_APIC_irq(irq);
-
-	if (io_apic_level_ack_pending(irq)) {
-		/*
-	 	 * Interrupt in progress. Migrating irq now will change the
-		 * vector information in the IO-APIC RTE and that will confuse
-		 * the EOI broadcast performed by cpu.
-		 * So, delay the irq migration to the next instance.
-		 */
-		schedule_delayed_work(&ir_migration_work, 1);
-		goto unmask;
-	}
-
-	/* everthing is clear. we have right of way */
-	migrate_ioapic_irq(irq, desc->pending_mask);
-
-	ret = 0;
-	desc->status &= ~IRQ_MOVE_PENDING;
-	cpus_clear(desc->pending_mask);
-
-unmask:
-	unmask_IO_APIC_irq(irq);
-	return ret;
-}
-
-static void ir_irq_migration(struct work_struct *work)
-{
-	unsigned int irq;
-	struct irq_desc *desc;
-
-	for_each_irq_desc(irq, desc) {
-		if (desc->status & IRQ_MOVE_PENDING) {
-			unsigned long flags;
-
-			spin_lock_irqsave(&desc->lock, flags);
-			if (!desc->chip->set_affinity ||
-			    !(desc->status & IRQ_MOVE_PENDING)) {
-				desc->status &= ~IRQ_MOVE_PENDING;
-				spin_unlock_irqrestore(&desc->lock, flags);
-				continue;
-			}
-
-			desc->chip->set_affinity(irq, desc->pending_mask);
-			spin_unlock_irqrestore(&desc->lock, flags);
-		}
-	}
-}
-
-/*
- * Migrates the IRQ destination in the process context.
- */
-static void set_ir_ioapic_affinity_irq(unsigned int irq, cpumask_t mask)
-{
-	struct irq_desc *desc = irq_to_desc(irq);
-
-	if (desc->status & IRQ_LEVEL) {
-		desc->status |= IRQ_MOVE_PENDING;
-		desc->pending_mask = mask;
-		migrate_irq_remapped_level(irq);
-		return;
-	}
-
-	migrate_ioapic_irq(irq, mask);
-}
-#endif
-
-asmlinkage void smp_irq_move_cleanup_interrupt(void)
-{
-	unsigned vector, me;
-	ack_APIC_irq();
-#ifdef CONFIG_X86_64
-	exit_idle();
-#endif
-	irq_enter();
-
-	me = smp_processor_id();
-	for (vector = FIRST_EXTERNAL_VECTOR; vector < NR_VECTORS; vector++) {
-		unsigned int irq;
-		struct irq_desc *desc;
-		struct irq_cfg *cfg;
-		irq = __get_cpu_var(vector_irq)[vector];
-
-		desc = irq_to_desc(irq);
-		if (!desc)
-			continue;
-
-		cfg = irq_cfg(irq);
-		spin_lock(&desc->lock);
-		if (!cfg->move_cleanup_count)
-			goto unlock;
-
-		if ((vector == cfg->vector) && cpu_isset(me, cfg->domain))
-			goto unlock;
-
-		__get_cpu_var(vector_irq)[vector] = -1;
-		cfg->move_cleanup_count--;
-unlock:
-		spin_unlock(&desc->lock);
-	}
-
-	irq_exit();
-}
-
-static void irq_complete_move(unsigned int irq)
-{
-	struct irq_cfg *cfg = irq_cfg(irq);
-	unsigned vector, me;
-
-	if (likely(!cfg->move_in_progress))
-		return;
-
-	vector = ~get_irq_regs()->orig_ax;
-	me = smp_processor_id();
-	if ((vector == cfg->vector) && cpu_isset(me, cfg->domain)) {
-		cpumask_t cleanup_mask;
-
-		cpus_and(cleanup_mask, cfg->old_domain, cpu_online_map);
-		cfg->move_cleanup_count = cpus_weight(cleanup_mask);
-		send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR);
-		cfg->move_in_progress = 0;
-	}
-}
-#else
-static inline void irq_complete_move(unsigned int irq) {}
-#endif
-#ifdef CONFIG_INTR_REMAP
-static void ack_x2apic_level(unsigned int irq)
-{
-	ack_x2APIC_irq();
-}
-
-static void ack_x2apic_edge(unsigned int irq)
-{
-	ack_x2APIC_irq();
-}
-#endif
-
-static void ack_apic_edge(unsigned int irq)
-{
-	irq_complete_move(irq);
-	move_native_irq(irq);
-	ack_APIC_irq();
-}
-
-#ifdef CONFIG_X86_64
-static void ack_apic_level(unsigned int irq)
-{
-	int do_unmask_irq = 0;
-
-	irq_complete_move(irq);
-#ifdef CONFIG_GENERIC_PENDING_IRQ
-	/* If we are moving the irq we need to mask it */
-	if (unlikely(irq_to_desc(irq)->status & IRQ_MOVE_PENDING)) {
-		do_unmask_irq = 1;
-		mask_IO_APIC_irq(irq);
-	}
-#endif
-
-	/*
-	 * We must acknowledge the irq before we move it or the acknowledge will
-	 * not propagate properly.
-	 */
-	ack_APIC_irq();
-
-	/* Now we can move and renable the irq */
-	if (unlikely(do_unmask_irq)) {
-		/* Only migrate the irq if the ack has been received.
-		 *
-		 * On rare occasions the broadcast level triggered ack gets
-		 * delayed going to ioapics, and if we reprogram the
-		 * vector while Remote IRR is still set the irq will never
-		 * fire again.
-		 *
-		 * To prevent this scenario we read the Remote IRR bit
-		 * of the ioapic.  This has two effects.
-		 * - On any sane system the read of the ioapic will
-		 *   flush writes (and acks) going to the ioapic from
-		 *   this cpu.
-		 * - We get to see if the ACK has actually been delivered.
-		 *
-		 * Based on failed experiments of reprogramming the
-		 * ioapic entry from outside of irq context starting
-		 * with masking the ioapic entry and then polling until
-		 * Remote IRR was clear before reprogramming the
-		 * ioapic I don't trust the Remote IRR bit to be
-		 * completey accurate.
-		 *
-		 * However there appears to be no other way to plug
-		 * this race, so if the Remote IRR bit is not
-		 * accurate and is causing problems then it is a hardware bug
-		 * and you can go talk to the chipset vendor about it.
-		 */
-		if (!io_apic_level_ack_pending(irq))
-			move_masked_irq(irq);
-		unmask_IO_APIC_irq(irq);
-	}
-}
-#else
-atomic_t irq_mis_count;
-static void ack_apic_level(unsigned int irq)
-{
-	unsigned long v;
-	int i;
-
-	irq_complete_move(irq);
-	move_native_irq(irq);
-	/*
-	* It appears there is an erratum which affects at least version 0x11
-	* of I/O APIC (that's the 82093AA and cores integrated into various
-	* chipsets).  Under certain conditions a level-triggered interrupt is
-	* erroneously delivered as edge-triggered one but the respective IRR
-	* bit gets set nevertheless.  As a result the I/O unit expects an EOI
-	* message but it will never arrive and further interrupts are blocked
-	* from the source.  The exact reason is so far unknown, but the
-	* phenomenon was observed when two consecutive interrupt requests
-	* from a given source get delivered to the same CPU and the source is
-	* temporarily disabled in between.
-	*
-	* A workaround is to simulate an EOI message manually.  We achieve it
-	* by setting the trigger mode to edge and then to level when the edge
-	* trigger mode gets detected in the TMR of a local APIC for a
-	* level-triggered interrupt.  We mask the source for the time of the
-	* operation to prevent an edge-triggered interrupt escaping meanwhile.
-	* The idea is from Manfred Spraul.  --macro
-	*/
-	i = irq_cfg(irq)->vector;
-
-	v = apic_read(APIC_TMR + ((i & ~0x1f) >> 1));
-
-	ack_APIC_irq();
-
-	if (!(v & (1 << (i & 0x1f)))) {
-		atomic_inc(&irq_mis_count);
-		spin_lock(&ioapic_lock);
-		__mask_and_edge_IO_APIC_irq(irq);
-		__unmask_and_level_IO_APIC_irq(irq);
-		spin_unlock(&ioapic_lock);
-	}
-}
-#endif
-
-static struct irq_chip ioapic_chip __read_mostly = {
-	.name 		= "IO-APIC",
-	.startup 	= startup_ioapic_irq,
-	.mask	 	= mask_IO_APIC_irq,
-	.unmask	 	= unmask_IO_APIC_irq,
-	.ack 		= ack_apic_edge,
-	.eoi 		= ack_apic_level,
-#ifdef CONFIG_SMP
-	.set_affinity 	= set_ioapic_affinity_irq,
-#endif
-	.retrigger	= ioapic_retrigger_irq,
-};
-
-#ifdef CONFIG_INTR_REMAP
-static struct irq_chip ir_ioapic_chip __read_mostly = {
-	.name 		= "IR-IO-APIC",
-	.startup 	= startup_ioapic_irq,
-	.mask	 	= mask_IO_APIC_irq,
-	.unmask	 	= unmask_IO_APIC_irq,
-	.ack 		= ack_x2apic_edge,
-	.eoi 		= ack_x2apic_level,
-#ifdef CONFIG_SMP
-	.set_affinity 	= set_ir_ioapic_affinity_irq,
-#endif
-	.retrigger	= ioapic_retrigger_irq,
-};
-#endif
-
-static inline void init_IO_APIC_traps(void)
-{
-	int irq;
-	struct irq_desc *desc;
-	struct irq_cfg *cfg;
-
-	/*
-	 * NOTE! The local APIC isn't very good at handling
-	 * multiple interrupts at the same interrupt level.
-	 * As the interrupt level is determined by taking the
-	 * vector number and shifting that right by 4, we
-	 * want to spread these out a bit so that they don't
-	 * all fall in the same interrupt level.
-	 *
-	 * Also, we've got to be careful not to trash gate
-	 * 0x80, because int 0x80 is hm, kind of importantish. ;)
-	 */
-	for_each_irq_cfg(cfg) {
-		irq = cfg->irq;
-		if (IO_APIC_IRQ(irq) && !cfg->vector) {
-			/*
-			 * Hmm.. We don't have an entry for this,
-			 * so default to an old-fashioned 8259
-			 * interrupt if we can..
-			 */
-			if (irq < 16)
-				make_8259A_irq(irq);
-			else {
-				desc = irq_to_desc(irq);
-				/* Strange. Oh, well.. */
-				desc->chip = &no_irq_chip;
-			}
-		}
-	}
-}
-
-/*
- * The local APIC irq-chip implementation:
- */
-
-static void mask_lapic_irq(unsigned int irq)
-{
-	unsigned long v;
-
-	v = apic_read(APIC_LVT0);
-	apic_write(APIC_LVT0, v | APIC_LVT_MASKED);
-}
-
-static void unmask_lapic_irq(unsigned int irq)
-{
-	unsigned long v;
-
-	v = apic_read(APIC_LVT0);
-	apic_write(APIC_LVT0, v & ~APIC_LVT_MASKED);
-}
-
-static void ack_lapic_irq (unsigned int irq)
-{
-	ack_APIC_irq();
-}
-
-static struct irq_chip lapic_chip __read_mostly = {
-	.name		= "local-APIC",
-	.mask		= mask_lapic_irq,
-	.unmask		= unmask_lapic_irq,
-	.ack		= ack_lapic_irq,
-};
-
-static void lapic_register_intr(int irq)
-{
-	struct irq_desc *desc;
-
-	desc = irq_to_desc(irq);
-	desc->status &= ~IRQ_LEVEL;
-	set_irq_chip_and_handler_name(irq, &lapic_chip, handle_edge_irq,
-				      "edge");
-}
-
-static void __init setup_nmi(void)
-{
-	/*
-	 * Dirty trick to enable the NMI watchdog ...
-	 * We put the 8259A master into AEOI mode and
-	 * unmask on all local APICs LVT0 as NMI.
-	 *
-	 * The idea to use the 8259A in AEOI mode ('8259A Virtual Wire')
-	 * is from Maciej W. Rozycki - so we do not have to EOI from
-	 * the NMI handler or the timer interrupt.
-	 */
-	apic_printk(APIC_VERBOSE, KERN_INFO "activating NMI Watchdog ...");
-
-	enable_NMI_through_LVT0();
-
-	apic_printk(APIC_VERBOSE, " done.\n");
-}
-
-/*
- * This looks a bit hackish but it's about the only one way of sending
- * a few INTA cycles to 8259As and any associated glue logic.  ICR does
- * not support the ExtINT mode, unfortunately.  We need to send these
- * cycles as some i82489DX-based boards have glue logic that keeps the
- * 8259A interrupt line asserted until INTA.  --macro
- */
-static inline void __init unlock_ExtINT_logic(void)
-{
-	int apic, pin, i;
-	struct IO_APIC_route_entry entry0, entry1;
-	unsigned char save_control, save_freq_select;
-
-	pin  = find_isa_irq_pin(8, mp_INT);
-	if (pin == -1) {
-		WARN_ON_ONCE(1);
-		return;
-	}
-	apic = find_isa_irq_apic(8, mp_INT);
-	if (apic == -1) {
-		WARN_ON_ONCE(1);
-		return;
-	}
-
-	entry0 = ioapic_read_entry(apic, pin);
-	clear_IO_APIC_pin(apic, pin);
-
-	memset(&entry1, 0, sizeof(entry1));
-
-	entry1.dest_mode = 0;			/* physical delivery */
-	entry1.mask = 0;			/* unmask IRQ now */
-	entry1.dest = hard_smp_processor_id();
-	entry1.delivery_mode = dest_ExtINT;
-	entry1.polarity = entry0.polarity;
-	entry1.trigger = 0;
-	entry1.vector = 0;
-
-	ioapic_write_entry(apic, pin, entry1);
-
-	save_control = CMOS_READ(RTC_CONTROL);
-	save_freq_select = CMOS_READ(RTC_FREQ_SELECT);
-	CMOS_WRITE((save_freq_select & ~RTC_RATE_SELECT) | 0x6,
-		   RTC_FREQ_SELECT);
-	CMOS_WRITE(save_control | RTC_PIE, RTC_CONTROL);
-
-	i = 100;
-	while (i-- > 0) {
-		mdelay(10);
-		if ((CMOS_READ(RTC_INTR_FLAGS) & RTC_PF) == RTC_PF)
-			i -= 10;
-	}
-
-	CMOS_WRITE(save_control, RTC_CONTROL);
-	CMOS_WRITE(save_freq_select, RTC_FREQ_SELECT);
-	clear_IO_APIC_pin(apic, pin);
-
-	ioapic_write_entry(apic, pin, entry0);
-}
-
-static int disable_timer_pin_1 __initdata;
-/* Actually the next is obsolete, but keep it for paranoid reasons -AK */
-static int __init disable_timer_pin_setup(char *arg)
-{
-	disable_timer_pin_1 = 1;
-	return 0;
-}
-early_param("disable_timer_pin_1", disable_timer_pin_setup);
-
-int timer_through_8259 __initdata;
-
-/*
- * This code may look a bit paranoid, but it's supposed to cooperate with
- * a wide range of boards and BIOS bugs.  Fortunately only the timer IRQ
- * is so screwy.  Thanks to Brian Perkins for testing/hacking this beast
- * fanatically on his truly buggy board.
- *
- * FIXME: really need to revamp this for all platforms.
- */
-static inline void __init check_timer(void)
-{
-	struct irq_cfg *cfg = irq_cfg(0);
-	int apic1, pin1, apic2, pin2;
-	unsigned long flags;
-	unsigned int ver;
-	int no_pin1 = 0;
-
-	local_irq_save(flags);
-
-        ver = apic_read(APIC_LVR);
-        ver = GET_APIC_VERSION(ver);
-
-	/*
-	 * get/set the timer IRQ vector:
-	 */
-	disable_8259A_irq(0);
-	assign_irq_vector(0, TARGET_CPUS);
-
-	/*
-	 * As IRQ0 is to be enabled in the 8259A, the virtual
-	 * wire has to be disabled in the local APIC.  Also
-	 * timer interrupts need to be acknowledged manually in
-	 * the 8259A for the i82489DX when using the NMI
-	 * watchdog as that APIC treats NMIs as level-triggered.
-	 * The AEOI mode will finish them in the 8259A
-	 * automatically.
-	 */
-	apic_write(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_EXTINT);
-	init_8259A(1);
-#ifdef CONFIG_X86_32
-	timer_ack = (nmi_watchdog == NMI_IO_APIC && !APIC_INTEGRATED(ver));
-#endif
-
-	pin1  = find_isa_irq_pin(0, mp_INT);
-	apic1 = find_isa_irq_apic(0, mp_INT);
-	pin2  = ioapic_i8259.pin;
-	apic2 = ioapic_i8259.apic;
-
-	apic_printk(APIC_QUIET, KERN_INFO "..TIMER: vector=0x%02X "
-		    "apic1=%d pin1=%d apic2=%d pin2=%d\n",
-		    cfg->vector, apic1, pin1, apic2, pin2);
-
-	/*
-	 * Some BIOS writers are clueless and report the ExtINTA
-	 * I/O APIC input from the cascaded 8259A as the timer
-	 * interrupt input.  So just in case, if only one pin
-	 * was found above, try it both directly and through the
-	 * 8259A.
-	 */
-	if (pin1 == -1) {
-#ifdef CONFIG_INTR_REMAP
-		if (intr_remapping_enabled)
-			panic("BIOS bug: timer not connected to IO-APIC");
-#endif
-		pin1 = pin2;
-		apic1 = apic2;
-		no_pin1 = 1;
-	} else if (pin2 == -1) {
-		pin2 = pin1;
-		apic2 = apic1;
-	}
-
-	if (pin1 != -1) {
-		/*
-		 * Ok, does IRQ0 through the IOAPIC work?
-		 */
-		if (no_pin1) {
-			add_pin_to_irq(0, apic1, pin1);
-			setup_timer_IRQ0_pin(apic1, pin1, cfg->vector);
-		}
-		unmask_IO_APIC_irq(0);
-		if (timer_irq_works()) {
-			if (nmi_watchdog == NMI_IO_APIC) {
-				setup_nmi();
-				enable_8259A_irq(0);
-			}
-			if (disable_timer_pin_1 > 0)
-				clear_IO_APIC_pin(0, pin1);
-			goto out;
-		}
-#ifdef CONFIG_INTR_REMAP
-		if (intr_remapping_enabled)
-			panic("timer doesn't work through Interrupt-remapped IO-APIC");
-#endif
-		clear_IO_APIC_pin(apic1, pin1);
-		if (!no_pin1)
-			apic_printk(APIC_QUIET, KERN_ERR "..MP-BIOS bug: "
-				    "8254 timer not connected to IO-APIC\n");
-
-		apic_printk(APIC_QUIET, KERN_INFO "...trying to set up timer "
-			    "(IRQ0) through the 8259A ...\n");
-		apic_printk(APIC_QUIET, KERN_INFO
-			    "..... (found apic %d pin %d) ...\n", apic2, pin2);
-		/*
-		 * legacy devices should be connected to IO APIC #0
-		 */
-		replace_pin_at_irq(0, apic1, pin1, apic2, pin2);
-		setup_timer_IRQ0_pin(apic2, pin2, cfg->vector);
-		unmask_IO_APIC_irq(0);
-		enable_8259A_irq(0);
-		if (timer_irq_works()) {
-			apic_printk(APIC_QUIET, KERN_INFO "....... works.\n");
-			timer_through_8259 = 1;
-			if (nmi_watchdog == NMI_IO_APIC) {
-				disable_8259A_irq(0);
-				setup_nmi();
-				enable_8259A_irq(0);
-			}
-			goto out;
-		}
-		/*
-		 * Cleanup, just in case ...
-		 */
-		disable_8259A_irq(0);
-		clear_IO_APIC_pin(apic2, pin2);
-		apic_printk(APIC_QUIET, KERN_INFO "....... failed.\n");
-	}
-
-	if (nmi_watchdog == NMI_IO_APIC) {
-		apic_printk(APIC_QUIET, KERN_WARNING "timer doesn't work "
-			    "through the IO-APIC - disabling NMI Watchdog!\n");
-		nmi_watchdog = NMI_NONE;
-	}
-#ifdef CONFIG_X86_32
-	timer_ack = 0;
-#endif
-
-	apic_printk(APIC_QUIET, KERN_INFO
-		    "...trying to set up timer as Virtual Wire IRQ...\n");
-
-	lapic_register_intr(0);
-	apic_write(APIC_LVT0, APIC_DM_FIXED | cfg->vector);	/* Fixed mode */
-	enable_8259A_irq(0);
-
-	if (timer_irq_works()) {
-		apic_printk(APIC_QUIET, KERN_INFO "..... works.\n");
-		goto out;
-	}
-	disable_8259A_irq(0);
-	apic_write(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_FIXED | cfg->vector);
-	apic_printk(APIC_QUIET, KERN_INFO "..... failed.\n");
-
-	apic_printk(APIC_QUIET, KERN_INFO
-		    "...trying to set up timer as ExtINT IRQ...\n");
-
-	init_8259A(0);
-	make_8259A_irq(0);
-	apic_write(APIC_LVT0, APIC_DM_EXTINT);
-
-	unlock_ExtINT_logic();
-
-	if (timer_irq_works()) {
-		apic_printk(APIC_QUIET, KERN_INFO "..... works.\n");
-		goto out;
-	}
-	apic_printk(APIC_QUIET, KERN_INFO "..... failed :(.\n");
-	panic("IO-APIC + timer doesn't work!  Boot with apic=debug and send a "
-		"report.  Then try booting with the 'noapic' option.\n");
-out:
-	local_irq_restore(flags);
-}
-
-/*
- * Traditionally ISA IRQ2 is the cascade IRQ, and is not available
- * to devices.  However there may be an I/O APIC pin available for
- * this interrupt regardless.  The pin may be left unconnected, but
- * typically it will be reused as an ExtINT cascade interrupt for
- * the master 8259A.  In the MPS case such a pin will normally be
- * reported as an ExtINT interrupt in the MP table.  With ACPI
- * there is no provision for ExtINT interrupts, and in the absence
- * of an override it would be treated as an ordinary ISA I/O APIC
- * interrupt, that is edge-triggered and unmasked by default.  We
- * used to do this, but it caused problems on some systems because
- * of the NMI watchdog and sometimes IRQ0 of the 8254 timer using
- * the same ExtINT cascade interrupt to drive the local APIC of the
- * bootstrap processor.  Therefore we refrain from routing IRQ2 to
- * the I/O APIC in all cases now.  No actual device should request
- * it anyway.  --macro
- */
-#define PIC_IRQS	(1 << PIC_CASCADE_IR)
-
-void __init setup_IO_APIC(void)
-{
-
-#ifdef CONFIG_X86_32
-	enable_IO_APIC();
-#else
-	/*
-	 * calling enable_IO_APIC() is moved to setup_local_APIC for BP
-	 */
-#endif
-
-	io_apic_irqs = ~PIC_IRQS;
-
-	apic_printk(APIC_VERBOSE, "ENABLING IO-APIC IRQs\n");
-        /*
-         * Set up IO-APIC IRQ routing.
-         */
-#ifdef CONFIG_X86_32
-        if (!acpi_ioapic)
-                setup_ioapic_ids_from_mpc();
-#endif
-	sync_Arb_IDs();
-	setup_IO_APIC_irqs();
-	init_IO_APIC_traps();
-	check_timer();
-}
-
-/*
- *      Called after all the initialization is done. If we didnt find any
- *      APIC bugs then we can allow the modify fast path
- */
-
-static int __init io_apic_bug_finalize(void)
-{
-        if (sis_apic_bug == -1)
-                sis_apic_bug = 0;
-        return 0;
-}
-
-late_initcall(io_apic_bug_finalize);
-
-struct sysfs_ioapic_data {
-	struct sys_device dev;
-	struct IO_APIC_route_entry entry[0];
-};
-static struct sysfs_ioapic_data * mp_ioapic_data[MAX_IO_APICS];
-
-static int ioapic_suspend(struct sys_device *dev, pm_message_t state)
-{
-	struct IO_APIC_route_entry *entry;
-	struct sysfs_ioapic_data *data;
-	int i;
-
-	data = container_of(dev, struct sysfs_ioapic_data, dev);
-	entry = data->entry;
-	for (i = 0; i < nr_ioapic_registers[dev->id]; i ++, entry ++ )
-		*entry = ioapic_read_entry(dev->id, i);
-
-	return 0;
-}
-
-static int ioapic_resume(struct sys_device *dev)
-{
-	struct IO_APIC_route_entry *entry;
-	struct sysfs_ioapic_data *data;
-	unsigned long flags;
-	union IO_APIC_reg_00 reg_00;
-	int i;
-
-	data = container_of(dev, struct sysfs_ioapic_data, dev);
-	entry = data->entry;
-
-	spin_lock_irqsave(&ioapic_lock, flags);
-	reg_00.raw = io_apic_read(dev->id, 0);
-	if (reg_00.bits.ID != mp_ioapics[dev->id].mp_apicid) {
-		reg_00.bits.ID = mp_ioapics[dev->id].mp_apicid;
-		io_apic_write(dev->id, 0, reg_00.raw);
-	}
-	spin_unlock_irqrestore(&ioapic_lock, flags);
-	for (i = 0; i < nr_ioapic_registers[dev->id]; i++)
-		ioapic_write_entry(dev->id, i, entry[i]);
-
-	return 0;
-}
-
-static struct sysdev_class ioapic_sysdev_class = {
-	.name = "ioapic",
-	.suspend = ioapic_suspend,
-	.resume = ioapic_resume,
-};
-
-static int __init ioapic_init_sysfs(void)
-{
-	struct sys_device * dev;
-	int i, size, error;
-
-	error = sysdev_class_register(&ioapic_sysdev_class);
-	if (error)
-		return error;
-
-	for (i = 0; i < nr_ioapics; i++ ) {
-		size = sizeof(struct sys_device) + nr_ioapic_registers[i]
-			* sizeof(struct IO_APIC_route_entry);
-		mp_ioapic_data[i] = kzalloc(size, GFP_KERNEL);
-		if (!mp_ioapic_data[i]) {
-			printk(KERN_ERR "Can't suspend/resume IOAPIC %d\n", i);
-			continue;
-		}
-		dev = &mp_ioapic_data[i]->dev;
-		dev->id = i;
-		dev->cls = &ioapic_sysdev_class;
-		error = sysdev_register(dev);
-		if (error) {
-			kfree(mp_ioapic_data[i]);
-			mp_ioapic_data[i] = NULL;
-			printk(KERN_ERR "Can't suspend/resume IOAPIC %d\n", i);
-			continue;
-		}
-	}
-
-	return 0;
-}
-
-device_initcall(ioapic_init_sysfs);
-
-/*
- * Dynamic irq allocate and deallocation
- */
-unsigned int create_irq_nr(unsigned int irq_want)
-{
-	/* Allocate an unused irq */
-	unsigned int irq;
-	unsigned int new;
-	unsigned long flags;
-	struct irq_cfg *cfg_new;
-
-#ifndef CONFIG_HAVE_SPARSE_IRQ
-	irq_want = nr_irqs - 1;
-#endif
-
-	irq = 0;
-	spin_lock_irqsave(&vector_lock, flags);
-	for (new = irq_want; new > 0; new--) {
-		if (platform_legacy_irq(new))
-			continue;
-		cfg_new = irq_cfg(new);
-		if (cfg_new && cfg_new->vector != 0)
-			continue;
-		/* check if need to create one */
-		if (!cfg_new)
-			cfg_new = irq_cfg_alloc(new);
-		if (__assign_irq_vector(new, TARGET_CPUS) == 0)
-			irq = new;
-		break;
-	}
-	spin_unlock_irqrestore(&vector_lock, flags);
-
-	if (irq > 0) {
-		dynamic_irq_init(irq);
-	}
-	return irq;
-}
-
-int create_irq(void)
-{
-	int irq;
-
-	irq = create_irq_nr(nr_irqs - 1);
-
-	if (irq == 0)
-		irq = -1;
-
-	return irq;
-}
-
-void destroy_irq(unsigned int irq)
-{
-	unsigned long flags;
-
-	dynamic_irq_cleanup(irq);
-
-#ifdef CONFIG_INTR_REMAP
-	free_irte(irq);
-#endif
-	spin_lock_irqsave(&vector_lock, flags);
-	__clear_irq_vector(irq);
-	spin_unlock_irqrestore(&vector_lock, flags);
-}
-
-/*
- * MSI message composition
- */
-#ifdef CONFIG_PCI_MSI
-static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_msg *msg)
-{
-	struct irq_cfg *cfg;
-	int err;
-	unsigned dest;
-	cpumask_t tmp;
-
-	tmp = TARGET_CPUS;
-	err = assign_irq_vector(irq, tmp);
-	if (err)
-		return err;
-
-	cfg = irq_cfg(irq);
-	cpus_and(tmp, cfg->domain, tmp);
-	dest = cpu_mask_to_apicid(tmp);
-
-#ifdef CONFIG_INTR_REMAP
-	if (irq_remapped(irq)) {
-		struct irte irte;
-		int ir_index;
-		u16 sub_handle;
-
-		ir_index = map_irq_to_irte_handle(irq, &sub_handle);
-		BUG_ON(ir_index == -1);
-
-		memset (&irte, 0, sizeof(irte));
-
-		irte.present = 1;
-		irte.dst_mode = INT_DEST_MODE;
-		irte.trigger_mode = 0; /* edge */
-		irte.dlvry_mode = INT_DELIVERY_MODE;
-		irte.vector = cfg->vector;
-		irte.dest_id = IRTE_DEST(dest);
-
-		modify_irte(irq, &irte);
-
-		msg->address_hi = MSI_ADDR_BASE_HI;
-		msg->data = sub_handle;
-		msg->address_lo = MSI_ADDR_BASE_LO | MSI_ADDR_IR_EXT_INT |
-				  MSI_ADDR_IR_SHV |
-				  MSI_ADDR_IR_INDEX1(ir_index) |
-				  MSI_ADDR_IR_INDEX2(ir_index);
-	} else
-#endif
-	{
-		msg->address_hi = MSI_ADDR_BASE_HI;
-		msg->address_lo =
-			MSI_ADDR_BASE_LO |
-			((INT_DEST_MODE == 0) ?
-				MSI_ADDR_DEST_MODE_PHYSICAL:
-				MSI_ADDR_DEST_MODE_LOGICAL) |
-			((INT_DELIVERY_MODE != dest_LowestPrio) ?
-				MSI_ADDR_REDIRECTION_CPU:
-				MSI_ADDR_REDIRECTION_LOWPRI) |
-			MSI_ADDR_DEST_ID(dest);
-
-		msg->data =
-			MSI_DATA_TRIGGER_EDGE |
-			MSI_DATA_LEVEL_ASSERT |
-			((INT_DELIVERY_MODE != dest_LowestPrio) ?
-				MSI_DATA_DELIVERY_FIXED:
-				MSI_DATA_DELIVERY_LOWPRI) |
-			MSI_DATA_VECTOR(cfg->vector);
-	}
-	return err;
-}
-
-#ifdef CONFIG_SMP
-static void set_msi_irq_affinity(unsigned int irq, cpumask_t mask)
-{
-	struct irq_cfg *cfg;
-	struct msi_msg msg;
-	unsigned int dest;
-	cpumask_t tmp;
-	struct irq_desc *desc;
-
-	cpus_and(tmp, mask, cpu_online_map);
-	if (cpus_empty(tmp))
-		return;
-
-	if (assign_irq_vector(irq, mask))
-		return;
-
-	cfg = irq_cfg(irq);
-	cpus_and(tmp, cfg->domain, mask);
-	dest = cpu_mask_to_apicid(tmp);
-
-	read_msi_msg(irq, &msg);
-
-	msg.data &= ~MSI_DATA_VECTOR_MASK;
-	msg.data |= MSI_DATA_VECTOR(cfg->vector);
-	msg.address_lo &= ~MSI_ADDR_DEST_ID_MASK;
-	msg.address_lo |= MSI_ADDR_DEST_ID(dest);
-
-	write_msi_msg(irq, &msg);
-	desc = irq_to_desc(irq);
-	desc->affinity = mask;
-}
-
-#ifdef CONFIG_INTR_REMAP
-/*
- * Migrate the MSI irq to another cpumask. This migration is
- * done in the process context using interrupt-remapping hardware.
- */
-static void ir_set_msi_irq_affinity(unsigned int irq, cpumask_t mask)
-{
-	struct irq_cfg *cfg;
-	unsigned int dest;
-	cpumask_t tmp, cleanup_mask;
-	struct irte irte;
-	struct irq_desc *desc;
-
-	cpus_and(tmp, mask, cpu_online_map);
-	if (cpus_empty(tmp))
-		return;
-
-	if (get_irte(irq, &irte))
-		return;
-
-	if (assign_irq_vector(irq, mask))
-		return;
-
-	cfg = irq_cfg(irq);
-	cpus_and(tmp, cfg->domain, mask);
-	dest = cpu_mask_to_apicid(tmp);
-
-	irte.vector = cfg->vector;
-	irte.dest_id = IRTE_DEST(dest);
-
-	/*
-	 * atomically update the IRTE with the new destination and vector.
-	 */
-	modify_irte(irq, &irte);
-
-	/*
-	 * After this point, all the interrupts will start arriving
-	 * at the new destination. So, time to cleanup the previous
-	 * vector allocation.
-	 */
-	if (cfg->move_in_progress) {
-		cpus_and(cleanup_mask, cfg->old_domain, cpu_online_map);
-		cfg->move_cleanup_count = cpus_weight(cleanup_mask);
-		send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR);
-		cfg->move_in_progress = 0;
-	}
-
-	desc = irq_to_desc(irq);
-	desc->affinity = mask;
-}
-#endif
-#endif /* CONFIG_SMP */
-
-/*
- * IRQ Chip for MSI PCI/PCI-X/PCI-Express Devices,
- * which implement the MSI or MSI-X Capability Structure.
- */
-static struct irq_chip msi_chip = {
-	.name		= "PCI-MSI",
-	.unmask		= unmask_msi_irq,
-	.mask		= mask_msi_irq,
-	.ack		= ack_apic_edge,
-#ifdef CONFIG_SMP
-	.set_affinity	= set_msi_irq_affinity,
-#endif
-	.retrigger	= ioapic_retrigger_irq,
-};
-
-#ifdef CONFIG_INTR_REMAP
-static struct irq_chip msi_ir_chip = {
-	.name		= "IR-PCI-MSI",
-	.unmask		= unmask_msi_irq,
-	.mask		= mask_msi_irq,
-	.ack		= ack_x2apic_edge,
-#ifdef CONFIG_SMP
-	.set_affinity	= ir_set_msi_irq_affinity,
-#endif
-	.retrigger	= ioapic_retrigger_irq,
-};
-
-/*
- * Map the PCI dev to the corresponding remapping hardware unit
- * and allocate 'nvec' consecutive interrupt-remapping table entries
- * in it.
- */
-static int msi_alloc_irte(struct pci_dev *dev, int irq, int nvec)
-{
-	struct intel_iommu *iommu;
-	int index;
-
-	iommu = map_dev_to_ir(dev);
-	if (!iommu) {
-		printk(KERN_ERR
-		       "Unable to map PCI %s to iommu\n", pci_name(dev));
-		return -ENOENT;
-	}
-
-	index = alloc_irte(iommu, irq, nvec);
-	if (index < 0) {
-		printk(KERN_ERR
-		       "Unable to allocate %d IRTE for PCI %s\n", nvec,
-		        pci_name(dev));
-		return -ENOSPC;
-	}
-	return index;
-}
-#endif
-
-static int setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc, int irq)
-{
-	int ret;
-	struct msi_msg msg;
-
-	ret = msi_compose_msg(dev, irq, &msg);
-	if (ret < 0)
-		return ret;
-
-	set_irq_msi(irq, desc);
-	write_msi_msg(irq, &msg);
-
-#ifdef CONFIG_INTR_REMAP
-	if (irq_remapped(irq)) {
-		struct irq_desc *desc = irq_to_desc(irq);
-		/*
-		 * irq migration in process context
-		 */
-		desc->status |= IRQ_MOVE_PCNTXT;
-		set_irq_chip_and_handler_name(irq, &msi_ir_chip, handle_edge_irq, "edge");
-	} else
-#endif
-		set_irq_chip_and_handler_name(irq, &msi_chip, handle_edge_irq, "edge");
-
-	return 0;
-}
-
-static unsigned int build_irq_for_pci_dev(struct pci_dev *dev)
-{
-	unsigned int irq;
-
-	irq = dev->bus->number;
-	irq <<= 8;
-	irq |= dev->devfn;
-	irq <<= 12;
-
-	return irq;
-}
-
-int arch_setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc)
-{
-	unsigned int irq;
-	int ret;
-	unsigned int irq_want;
-
-	irq_want = build_irq_for_pci_dev(dev) + 0x100;
-
-	irq = create_irq_nr(irq_want);
-	if (irq == 0)
-		return -1;
-
-#ifdef CONFIG_INTR_REMAP
-	if (!intr_remapping_enabled)
-		goto no_ir;
-
-	ret = msi_alloc_irte(dev, irq, 1);
-	if (ret < 0)
-		goto error;
-no_ir:
-#endif
-	ret = setup_msi_irq(dev, desc, irq);
-	if (ret < 0) {
-		destroy_irq(irq);
-		return ret;
-	}
-	return 0;
-
-#ifdef CONFIG_INTR_REMAP
-error:
-	destroy_irq(irq);
-	return ret;
-#endif
-}
-
-int arch_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
-{
-	unsigned int irq;
-	int ret, sub_handle;
-	struct msi_desc *desc;
-	unsigned int irq_want;
-
-#ifdef CONFIG_INTR_REMAP
-	struct intel_iommu *iommu = 0;
-	int index = 0;
-#endif
-
-	irq_want = build_irq_for_pci_dev(dev) + 0x100;
-	sub_handle = 0;
-	list_for_each_entry(desc, &dev->msi_list, list) {
-		irq = create_irq_nr(irq_want--);
-		if (irq == 0)
-			return -1;
-#ifdef CONFIG_INTR_REMAP
-		if (!intr_remapping_enabled)
-			goto no_ir;
-
-		if (!sub_handle) {
-			/*
-			 * allocate the consecutive block of IRTE's
-			 * for 'nvec'
-			 */
-			index = msi_alloc_irte(dev, irq, nvec);
-			if (index < 0) {
-				ret = index;
-				goto error;
-			}
-		} else {
-			iommu = map_dev_to_ir(dev);
-			if (!iommu) {
-				ret = -ENOENT;
-				goto error;
-			}
-			/*
-			 * setup the mapping between the irq and the IRTE
-			 * base index, the sub_handle pointing to the
-			 * appropriate interrupt remap table entry.
-			 */
-			set_irte_irq(irq, iommu, index, sub_handle);
-		}
-no_ir:
-#endif
-		ret = setup_msi_irq(dev, desc, irq);
-		if (ret < 0)
-			goto error;
-		sub_handle++;
-	}
-	return 0;
-
-error:
-	destroy_irq(irq);
-	return ret;
-}
-
-void arch_teardown_msi_irq(unsigned int irq)
-{
-	destroy_irq(irq);
-}
-
-#ifdef CONFIG_DMAR
-#ifdef CONFIG_SMP
-static void dmar_msi_set_affinity(unsigned int irq, cpumask_t mask)
-{
-	struct irq_cfg *cfg;
-	struct msi_msg msg;
-	unsigned int dest;
-	cpumask_t tmp;
-	struct irq_desc *desc;
-
-	cpus_and(tmp, mask, cpu_online_map);
-	if (cpus_empty(tmp))
-		return;
-
-	if (assign_irq_vector(irq, mask))
-		return;
-
-	cfg = irq_cfg(irq);
-	cpus_and(tmp, cfg->domain, mask);
-	dest = cpu_mask_to_apicid(tmp);
-
-	dmar_msi_read(irq, &msg);
-
-	msg.data &= ~MSI_DATA_VECTOR_MASK;
-	msg.data |= MSI_DATA_VECTOR(cfg->vector);
-	msg.address_lo &= ~MSI_ADDR_DEST_ID_MASK;
-	msg.address_lo |= MSI_ADDR_DEST_ID(dest);
-
-	dmar_msi_write(irq, &msg);
-	desc = irq_to_desc(irq);
-	desc->affinity = mask;
-}
-#endif /* CONFIG_SMP */
-
-struct irq_chip dmar_msi_type = {
-	.name = "DMAR_MSI",
-	.unmask = dmar_msi_unmask,
-	.mask = dmar_msi_mask,
-	.ack = ack_apic_edge,
-#ifdef CONFIG_SMP
-	.set_affinity = dmar_msi_set_affinity,
-#endif
-	.retrigger = ioapic_retrigger_irq,
-};
-
-int arch_setup_dmar_msi(unsigned int irq)
-{
-	int ret;
-	struct msi_msg msg;
-
-	ret = msi_compose_msg(NULL, irq, &msg);
-	if (ret < 0)
-		return ret;
-	dmar_msi_write(irq, &msg);
-	set_irq_chip_and_handler_name(irq, &dmar_msi_type, handle_edge_irq,
-		"edge");
-	return 0;
-}
-#endif
-
-#endif /* CONFIG_PCI_MSI */
-/*
- * Hypertransport interrupt support
- */
-#ifdef CONFIG_HT_IRQ
-
-#ifdef CONFIG_SMP
-
-static void target_ht_irq(unsigned int irq, unsigned int dest, u8 vector)
-{
-	struct ht_irq_msg msg;
-	fetch_ht_irq_msg(irq, &msg);
-
-	msg.address_lo &= ~(HT_IRQ_LOW_VECTOR_MASK | HT_IRQ_LOW_DEST_ID_MASK);
-	msg.address_hi &= ~(HT_IRQ_HIGH_DEST_ID_MASK);
-
-	msg.address_lo |= HT_IRQ_LOW_VECTOR(vector) | HT_IRQ_LOW_DEST_ID(dest);
-	msg.address_hi |= HT_IRQ_HIGH_DEST_ID(dest);
-
-	write_ht_irq_msg(irq, &msg);
-}
-
-static void set_ht_irq_affinity(unsigned int irq, cpumask_t mask)
-{
-	struct irq_cfg *cfg;
-	unsigned int dest;
-	cpumask_t tmp;
-	struct irq_desc *desc;
-
-	cpus_and(tmp, mask, cpu_online_map);
-	if (cpus_empty(tmp))
-		return;
-
-	if (assign_irq_vector(irq, mask))
-		return;
-
-	cfg = irq_cfg(irq);
-	cpus_and(tmp, cfg->domain, mask);
-	dest = cpu_mask_to_apicid(tmp);
-
-	target_ht_irq(irq, dest, cfg->vector);
-	desc = irq_to_desc(irq);
-	desc->affinity = mask;
-}
-#endif
-
-static struct irq_chip ht_irq_chip = {
-	.name		= "PCI-HT",
-	.mask		= mask_ht_irq,
-	.unmask		= unmask_ht_irq,
-	.ack		= ack_apic_edge,
-#ifdef CONFIG_SMP
-	.set_affinity	= set_ht_irq_affinity,
-#endif
-	.retrigger	= ioapic_retrigger_irq,
-};
-
-int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev)
-{
-	struct irq_cfg *cfg;
-	int err;
-	cpumask_t tmp;
-
-	tmp = TARGET_CPUS;
-	err = assign_irq_vector(irq, tmp);
-	if (!err) {
-		struct ht_irq_msg msg;
-		unsigned dest;
-
-		cfg = irq_cfg(irq);
-		cpus_and(tmp, cfg->domain, tmp);
-		dest = cpu_mask_to_apicid(tmp);
-
-		msg.address_hi = HT_IRQ_HIGH_DEST_ID(dest);
-
-		msg.address_lo =
-			HT_IRQ_LOW_BASE |
-			HT_IRQ_LOW_DEST_ID(dest) |
-			HT_IRQ_LOW_VECTOR(cfg->vector) |
-			((INT_DEST_MODE == 0) ?
-				HT_IRQ_LOW_DM_PHYSICAL :
-				HT_IRQ_LOW_DM_LOGICAL) |
-			HT_IRQ_LOW_RQEOI_EDGE |
-			((INT_DELIVERY_MODE != dest_LowestPrio) ?
-				HT_IRQ_LOW_MT_FIXED :
-				HT_IRQ_LOW_MT_ARBITRATED) |
-			HT_IRQ_LOW_IRQ_MASKED;
-
-		write_ht_irq_msg(irq, &msg);
-
-		set_irq_chip_and_handler_name(irq, &ht_irq_chip,
-					      handle_edge_irq, "edge");
-	}
-	return err;
-}
-#endif /* CONFIG_HT_IRQ */
-
-/* --------------------------------------------------------------------------
-                          ACPI-based IOAPIC Configuration
-   -------------------------------------------------------------------------- */
-
-#ifdef CONFIG_ACPI
-
-#ifdef CONFIG_X86_32
-int __init io_apic_get_unique_id(int ioapic, int apic_id)
-{
-	union IO_APIC_reg_00 reg_00;
-	static physid_mask_t apic_id_map = PHYSID_MASK_NONE;
-	physid_mask_t tmp;
-	unsigned long flags;
-	int i = 0;
-
-	/*
-	 * The P4 platform supports up to 256 APIC IDs on two separate APIC
-	 * buses (one for LAPICs, one for IOAPICs), where predecessors only
-	 * supports up to 16 on one shared APIC bus.
-	 *
-	 * TBD: Expand LAPIC/IOAPIC support on P4-class systems to take full
-	 *      advantage of new APIC bus architecture.
-	 */
-
-	if (physids_empty(apic_id_map))
-		apic_id_map = ioapic_phys_id_map(phys_cpu_present_map);
-
-	spin_lock_irqsave(&ioapic_lock, flags);
-	reg_00.raw = io_apic_read(ioapic, 0);
-	spin_unlock_irqrestore(&ioapic_lock, flags);
-
-	if (apic_id >= get_physical_broadcast()) {
-		printk(KERN_WARNING "IOAPIC[%d]: Invalid apic_id %d, trying "
-			"%d\n", ioapic, apic_id, reg_00.bits.ID);
-		apic_id = reg_00.bits.ID;
-	}
-
-	/*
-	 * Every APIC in a system must have a unique ID or we get lots of nice
-	 * 'stuck on smp_invalidate_needed IPI wait' messages.
-	 */
-	if (check_apicid_used(apic_id_map, apic_id)) {
-
-		for (i = 0; i < get_physical_broadcast(); i++) {
-			if (!check_apicid_used(apic_id_map, i))
-				break;
-		}
-
-		if (i == get_physical_broadcast())
-			panic("Max apic_id exceeded!\n");
-
-		printk(KERN_WARNING "IOAPIC[%d]: apic_id %d already used, "
-			"trying %d\n", ioapic, apic_id, i);
-
-		apic_id = i;
-	}
-
-	tmp = apicid_to_cpu_present(apic_id);
-	physids_or(apic_id_map, apic_id_map, tmp);
-
-	if (reg_00.bits.ID != apic_id) {
-		reg_00.bits.ID = apic_id;
-
-		spin_lock_irqsave(&ioapic_lock, flags);
-		io_apic_write(ioapic, 0, reg_00.raw);
-		reg_00.raw = io_apic_read(ioapic, 0);
-		spin_unlock_irqrestore(&ioapic_lock, flags);
-
-		/* Sanity check */
-		if (reg_00.bits.ID != apic_id) {
-			printk("IOAPIC[%d]: Unable to change apic_id!\n", ioapic);
-			return -1;
-		}
-	}
-
-	apic_printk(APIC_VERBOSE, KERN_INFO
-			"IOAPIC[%d]: Assigned apic_id %d\n", ioapic, apic_id);
-
-	return apic_id;
-}
-
-int __init io_apic_get_version(int ioapic)
-{
-	union IO_APIC_reg_01	reg_01;
-	unsigned long flags;
-
-	spin_lock_irqsave(&ioapic_lock, flags);
-	reg_01.raw = io_apic_read(ioapic, 1);
-	spin_unlock_irqrestore(&ioapic_lock, flags);
-
-	return reg_01.bits.version;
-}
-#endif
-
-int __init io_apic_get_redir_entries (int ioapic)
-{
-	union IO_APIC_reg_01	reg_01;
-	unsigned long flags;
-
-	spin_lock_irqsave(&ioapic_lock, flags);
-	reg_01.raw = io_apic_read(ioapic, 1);
-	spin_unlock_irqrestore(&ioapic_lock, flags);
-
-	return reg_01.bits.entries;
-}
-
-
-int io_apic_set_pci_routing (int ioapic, int pin, int irq, int triggering, int polarity)
-{
-	if (!IO_APIC_IRQ(irq)) {
-		apic_printk(APIC_QUIET,KERN_ERR "IOAPIC[%d]: Invalid reference to IRQ 0\n",
-			ioapic);
-		return -EINVAL;
-	}
-
-	/*
-	 * IRQs < 16 are already in the irq_2_pin[] map
-	 */
-	if (irq >= 16)
-		add_pin_to_irq(irq, ioapic, pin);
-
-	setup_IO_APIC_irq(ioapic, pin, irq, triggering, polarity);
-
-	return 0;
-}
-
-
-int acpi_get_override_irq(int bus_irq, int *trigger, int *polarity)
-{
-	int i;
-
-	if (skip_ioapic_setup)
-		return -1;
-
-	for (i = 0; i < mp_irq_entries; i++)
-		if (mp_irqs[i].mp_irqtype == mp_INT &&
-		    mp_irqs[i].mp_srcbusirq == bus_irq)
-			break;
-	if (i >= mp_irq_entries)
-		return -1;
-
-	*trigger = irq_trigger(i);
-	*polarity = irq_polarity(i);
-	return 0;
-}
-
-#endif /* CONFIG_ACPI */
-
-/*
- * This function currently is only a helper for the i386 smp boot process where
- * we need to reprogram the ioredtbls to cater for the cpus which have come online
- * so mask in all cases should simply be TARGET_CPUS
- */
-#ifdef CONFIG_SMP
-void __init setup_ioapic_dest(void)
-{
-	int pin, ioapic, irq, irq_entry;
-	struct irq_cfg *cfg;
-
-	if (skip_ioapic_setup == 1)
-		return;
-
-	for (ioapic = 0; ioapic < nr_ioapics; ioapic++) {
-		for (pin = 0; pin < nr_ioapic_registers[ioapic]; pin++) {
-			irq_entry = find_irq_entry(ioapic, pin, mp_INT);
-			if (irq_entry == -1)
-				continue;
-			irq = pin_2_irq(irq_entry, ioapic, pin);
-
-			/* setup_IO_APIC_irqs could fail to get vector for some device
-			 * when you have too many devices, because at that time only boot
-			 * cpu is online.
-			 */
-			cfg = irq_cfg(irq);
-			if (!cfg->vector)
-				setup_IO_APIC_irq(ioapic, pin, irq,
-						  irq_trigger(irq_entry),
-						  irq_polarity(irq_entry));
-#ifdef CONFIG_INTR_REMAP
-			else if (intr_remapping_enabled)
-				set_ir_ioapic_affinity_irq(irq, TARGET_CPUS);
-#endif
-			else
-				set_ioapic_affinity_irq(irq, TARGET_CPUS);
-		}
-
-	}
-}
-#endif
-
-#ifdef CONFIG_X86_64
-#define IOAPIC_RESOURCE_NAME_SIZE 11
-
-static struct resource *ioapic_resources;
-
-static struct resource * __init ioapic_setup_resources(void)
-{
-	unsigned long n;
-	struct resource *res;
-	char *mem;
-	int i;
-
-	if (nr_ioapics <= 0)
-		return NULL;
-
-	n = IOAPIC_RESOURCE_NAME_SIZE + sizeof(struct resource);
-	n *= nr_ioapics;
-
-	mem = alloc_bootmem(n);
-	res = (void *)mem;
-
-	if (mem != NULL) {
-		mem += sizeof(struct resource) * nr_ioapics;
-
-		for (i = 0; i < nr_ioapics; i++) {
-			res[i].name = mem;
-			res[i].flags = IORESOURCE_MEM | IORESOURCE_BUSY;
-			sprintf(mem,  "IOAPIC %u", i);
-			mem += IOAPIC_RESOURCE_NAME_SIZE;
-		}
-	}
-
-	ioapic_resources = res;
-
-	return res;
-}
-#endif
-
-void __init ioapic_init_mappings(void)
-{
-	unsigned long ioapic_phys, idx = FIX_IO_APIC_BASE_0;
-	int i;
-#ifdef CONFIG_X86_64
-	struct resource *ioapic_res;
-
-	ioapic_res = ioapic_setup_resources();
-#endif
-	for (i = 0; i < nr_ioapics; i++) {
-		if (smp_found_config) {
-			ioapic_phys = mp_ioapics[i].mp_apicaddr;
-#ifdef CONFIG_X86_32
-                        if (!ioapic_phys) {
-                                printk(KERN_ERR
-                                       "WARNING: bogus zero IO-APIC "
-                                       "address found in MPTABLE, "
-                                       "disabling IO/APIC support!\n");
-                                smp_found_config = 0;
-                                skip_ioapic_setup = 1;
-                                goto fake_ioapic_page;
-                        }
-#endif
-		} else {
-#ifdef CONFIG_X86_32
-fake_ioapic_page:
-#endif
-			ioapic_phys = (unsigned long)
-				alloc_bootmem_pages(PAGE_SIZE);
-			ioapic_phys = __pa(ioapic_phys);
-		}
-		set_fixmap_nocache(idx, ioapic_phys);
-		apic_printk(APIC_VERBOSE,
-			    "mapped IOAPIC to %08lx (%08lx)\n",
-			    __fix_to_virt(idx), ioapic_phys);
-		idx++;
-
-#ifdef CONFIG_X86_64
-		if (ioapic_res != NULL) {
-			ioapic_res->start = ioapic_phys;
-			ioapic_res->end = ioapic_phys + (4 * 1024) - 1;
-			ioapic_res++;
-		}
-#endif
-	}
-}
-
-#ifdef CONFIG_X86_64
-static int __init ioapic_insert_resources(void)
-{
-	int i;
-	struct resource *r = ioapic_resources;
-
-	if (!r) {
-		printk(KERN_ERR
-		       "IO APIC resources could be not be allocated.\n");
-		return -1;
-	}
-
-	for (i = 0; i < nr_ioapics; i++) {
-		insert_resource(&iomem_resource, r);
-		r++;
-	}
-
-	return 0;
-}
-
-/* Insert the IO APIC resources after PCI initialization has occured to handle
- * IO APICS that are mapped in on a BAR in PCI space. */
-late_initcall(ioapic_insert_resources);
-#endif
-- 
cgit v1.2.3-55-g7522


From c691cc84529ec88ccb32b174535bb61875888c90 Mon Sep 17 00:00:00 2001
From: Yinghai Lu
Date: Tue, 19 Aug 2008 20:50:43 -0700
Subject: io_apic: make 32 bit have io_apic resource in /proc/iomem

Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/io_apic.c | 8 --------
 1 file changed, 8 deletions(-)

diff --git a/arch/x86/kernel/io_apic.c b/arch/x86/kernel/io_apic.c
index fba6d6ee3480..7e303e0967a4 100644
--- a/arch/x86/kernel/io_apic.c
+++ b/arch/x86/kernel/io_apic.c
@@ -3802,7 +3802,6 @@ void __init setup_ioapic_dest(void)
 }
 #endif
 
-#ifdef CONFIG_X86_64
 #define IOAPIC_RESOURCE_NAME_SIZE 11
 
 static struct resource *ioapic_resources;
@@ -3838,17 +3837,14 @@ static struct resource * __init ioapic_setup_resources(void)
 
 	return res;
 }
-#endif
 
 void __init ioapic_init_mappings(void)
 {
 	unsigned long ioapic_phys, idx = FIX_IO_APIC_BASE_0;
 	int i;
-#ifdef CONFIG_X86_64
 	struct resource *ioapic_res;
 
 	ioapic_res = ioapic_setup_resources();
-#endif
 	for (i = 0; i < nr_ioapics; i++) {
 		if (smp_found_config) {
 			ioapic_phys = mp_ioapics[i].mp_apicaddr;
@@ -3877,17 +3873,14 @@ fake_ioapic_page:
 			    __fix_to_virt(idx), ioapic_phys);
 		idx++;
 
-#ifdef CONFIG_X86_64
 		if (ioapic_res != NULL) {
 			ioapic_res->start = ioapic_phys;
 			ioapic_res->end = ioapic_phys + (4 * 1024) - 1;
 			ioapic_res++;
 		}
-#endif
 	}
 }
 
-#ifdef CONFIG_X86_64
 static int __init ioapic_insert_resources(void)
 {
 	int i;
@@ -3910,4 +3903,3 @@ static int __init ioapic_insert_resources(void)
 /* Insert the IO APIC resources after PCI initialization has occured to handle
  * IO APICS that are mapped in on a BAR in PCI space. */
 late_initcall(ioapic_insert_resources);
-#endif
-- 
cgit v1.2.3-55-g7522


From 42379b1122bab7f9aefdbd4b7004a6fa89dfbae5 Mon Sep 17 00:00:00 2001
From: Yinghai Lu
Date: Tue, 19 Aug 2008 20:50:45 -0700
Subject: pci: change msi-x vector to 32bit

we are using 28bit pci (bus/dev/fn + 12 bits) as irq number, so the
cache for irq number should be 32 bit too.

Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 drivers/scsi/qla2xxx/qla_def.h | 2 +-
 include/linux/pci.h            | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/scsi/qla2xxx/qla_def.h b/drivers/scsi/qla2xxx/qla_def.h
index 83c819216771..f25f41a499e5 100644
--- a/drivers/scsi/qla2xxx/qla_def.h
+++ b/drivers/scsi/qla2xxx/qla_def.h
@@ -2108,7 +2108,7 @@ struct scsi_qla_host;
 
 struct qla_msix_entry {
 	int have_irq;
-	uint16_t msix_vector;
+	uint32_t msix_vector;
 	uint16_t msix_entry;
 };
 
diff --git a/include/linux/pci.h b/include/linux/pci.h
index 98dc6243a706..1f8db240ca48 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -723,7 +723,7 @@ enum pci_dma_burst_strategy {
 };
 
 struct msix_entry {
-	u16 	vector;	/* kernel uses to write allocated vector */
+	u32	vector;	/* kernel uses to write allocated vector */
 	u16	entry;	/* driver uses to specify entry, OS writes */
 };
 
-- 
cgit v1.2.3-55-g7522


From bcd562607f17b0c9f9ae96af849894dd06645f63 Mon Sep 17 00:00:00 2001
From: Yinghai Lu
Date: Tue, 19 Aug 2008 20:50:46 -0700
Subject: x86: irq: interrupt array size should be NR_VECTORS

Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/asm-x86/hw_irq.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/asm-x86/hw_irq.h b/include/asm-x86/hw_irq.h
index 51c787d17cbc..39c7a4745d25 100644
--- a/include/asm-x86/hw_irq.h
+++ b/include/asm-x86/hw_irq.h
@@ -115,7 +115,7 @@ extern asmlinkage void smp_invalidate_interrupt(struct pt_regs *);
 #endif
 
 #ifdef CONFIG_X86_32
-extern void (*const interrupt[NR_IRQS])(void);
+extern void (*const interrupt[NR_VECTORS])(void);
 #endif
 
 typedef int vector_irq_t[NR_VECTORS];
-- 
cgit v1.2.3-55-g7522


From 4e738e2f307113feaedebae147c3e0d072e39648 Mon Sep 17 00:00:00 2001
From: Yinghai Lu
Date: Tue, 19 Aug 2008 20:50:47 -0700
Subject: x86: unify mask_IO_APIC_irq

use MACRO for 32 bit too

Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/io_apic.c | 80 +++++++++++++----------------------------------
 1 file changed, 21 insertions(+), 59 deletions(-)

diff --git a/arch/x86/kernel/io_apic.c b/arch/x86/kernel/io_apic.c
index 7e303e0967a4..099696109ca8 100644
--- a/arch/x86/kernel/io_apic.c
+++ b/arch/x86/kernel/io_apic.c
@@ -610,18 +610,7 @@ static void __init replace_pin_at_irq(unsigned int irq,
 		add_pin_to_irq(irq, newapic, newpin);
 }
 
-#ifdef CONFIG_X86_64
-/*
- * Synchronize the IO-APIC and the CPU by doing
- * a dummy read from the IO-APIC
- */
-static inline void io_apic_sync(unsigned int apic)
-{
-	struct io_apic __iomem *io_apic = io_apic_base(apic);
-	readl(&io_apic->data);
-}
-
-#define __DO_ACTION(R, ACTION, FINAL)					\
+#define __DO_ACTION(R, ACTION_ENABLE, ACTION_DISABLE, FINAL)		\
 									\
 {									\
 	int pin;							\
@@ -636,7 +625,8 @@ static inline void io_apic_sync(unsigned int apic)
 			break;						\
 		pin = entry->pin;					\
 		reg = io_apic_read(entry->apic, 0x10 + R + pin*2);	\
-		reg ACTION;						\
+		reg ACTION_DISABLE;					\
+		reg ACTION_ENABLE;					\
 		io_apic_modify(entry->apic, 0x10 + R + pin*2, reg);	\
 		FINAL;							\
 		if (!entry->next)					\
@@ -645,66 +635,38 @@ static inline void io_apic_sync(unsigned int apic)
 	}								\
 }
 
-#define DO_ACTION(name,R,ACTION, FINAL)					\
+#define DO_ACTION(name,R, ACTION_ENABLE, ACTION_DISABLE, FINAL)		\
 									\
 	static void name##_IO_APIC_irq (unsigned int irq)		\
-	__DO_ACTION(R, ACTION, FINAL)
-
-/* mask = 1 */
-DO_ACTION(__mask,	0, |= IO_APIC_REDIR_MASKED, io_apic_sync(entry->apic))
+	__DO_ACTION(R, ACTION_ENABLE, ACTION_DISABLE, FINAL)
 
 /* mask = 0 */
-DO_ACTION(__unmask,	0, &= ~IO_APIC_REDIR_MASKED, )
+DO_ACTION(__unmask,	0, |= 0, &= ~IO_APIC_REDIR_MASKED, )
 
-#else
-
-static void __modify_IO_APIC_irq(unsigned int irq, unsigned long enable, unsigned long disable)
+#ifdef CONFIG_X86_64
+/*
+ * Synchronize the IO-APIC and the CPU by doing
+ * a dummy read from the IO-APIC
+ */
+static inline void io_apic_sync(unsigned int apic)
 {
-	struct irq_cfg *cfg;
-	struct irq_pin_list *entry;
-	unsigned int pin, reg;
-
-	cfg = irq_cfg(irq);
-	entry = cfg->irq_2_pin;
-	for (;;) {
-		if (!entry)
-			break;
-		pin = entry->pin;
-		reg = io_apic_read(entry->apic, 0x10 + pin*2);
-		reg &= ~disable;
-		reg |= enable;
-		io_apic_modify(entry->apic, 0x10 + pin*2, reg);
-		if (!entry->next)
-			break;
-		entry = entry->next;
-	}
+	struct io_apic __iomem *io_apic = io_apic_base(apic);
+	readl(&io_apic->data);
 }
 
 /* mask = 1 */
-static void __mask_IO_APIC_irq(unsigned int irq)
-{
-	__modify_IO_APIC_irq(irq, IO_APIC_REDIR_MASKED, 0);
-}
+DO_ACTION(__mask,	0, |= IO_APIC_REDIR_MASKED, &= ~0, io_apic_sync(entry->apic))
 
-/* mask = 0 */
-static void __unmask_IO_APIC_irq(unsigned int irq)
-{
-	__modify_IO_APIC_irq(irq, 0, IO_APIC_REDIR_MASKED);
-}
+#else
+
+/* mask = 1 */
+DO_ACTION(__mask,	0, |= IO_APIC_REDIR_MASKED, &= ~0, )
 
 /* mask = 1, trigger = 0 */
-static void __mask_and_edge_IO_APIC_irq(unsigned int irq)
-{
-	__modify_IO_APIC_irq(irq, IO_APIC_REDIR_MASKED,
-				IO_APIC_REDIR_LEVEL_TRIGGER);
-}
+DO_ACTION(__mask_and_edge, 0, |= IO_APIC_REDIR_MASKED, &= ~IO_APIC_REDIR_LEVEL_TRIGGER, )
 
 /* mask = 0, trigger = 1 */
-static void __unmask_and_level_IO_APIC_irq(unsigned int irq)
-{
-	__modify_IO_APIC_irq(irq, IO_APIC_REDIR_LEVEL_TRIGGER,
-				IO_APIC_REDIR_MASKED);
-}
+DO_ACTION(__unmask_and_level, 0, |= IO_APIC_REDIR_LEVEL_TRIGGER, &= ~IO_APIC_REDIR_MASKED, )
 
 #endif
 
-- 
cgit v1.2.3-55-g7522


From 3eb2cce84beae8fd41de950569cafd5bca7edd5d Mon Sep 17 00:00:00 2001
From: Yinghai Lu
Date: Tue, 19 Aug 2008 20:50:48 -0700
Subject: x86: unify ack_apic_edge

use code in 64 to replace
	move_native_irq(irq, desc);
in 32 bit

Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/io_apic.c | 73 +++++++++++++++++++++++------------------------
 1 file changed, 35 insertions(+), 38 deletions(-)

diff --git a/arch/x86/kernel/io_apic.c b/arch/x86/kernel/io_apic.c
index 099696109ca8..a466b04ad5a4 100644
--- a/arch/x86/kernel/io_apic.c
+++ b/arch/x86/kernel/io_apic.c
@@ -389,7 +389,6 @@ static inline void io_apic_modify(unsigned int apic, unsigned int reg, unsigned
 	writel(value, &io_apic->data);
 }
 
-#ifdef CONFIG_X86_64
 static bool io_apic_level_ack_pending(unsigned int irq)
 {
 	struct irq_pin_list *entry;
@@ -419,7 +418,6 @@ static bool io_apic_level_ack_pending(unsigned int irq)
 
 	return false;
 }
-#endif
 
 union entry_union {
 	struct { u32 w1, w2; };
@@ -2398,9 +2396,16 @@ static void ack_apic_edge(unsigned int irq)
 	ack_APIC_irq();
 }
 
-#ifdef CONFIG_X86_64
+#ifdef CONFIG_X86_32
+atomic_t irq_mis_count;
+#endif
+
 static void ack_apic_level(unsigned int irq)
 {
+#ifdef CONFIG_X86_32
+	unsigned long v;
+	int i;
+#endif
 	int do_unmask_irq = 0;
 
 	irq_complete_move(irq);
@@ -2412,6 +2417,31 @@ static void ack_apic_level(unsigned int irq)
 	}
 #endif
 
+#ifdef CONFIG_X86_32
+	/*
+	* It appears there is an erratum which affects at least version 0x11
+	* of I/O APIC (that's the 82093AA and cores integrated into various
+	* chipsets).  Under certain conditions a level-triggered interrupt is
+	* erroneously delivered as edge-triggered one but the respective IRR
+	* bit gets set nevertheless.  As a result the I/O unit expects an EOI
+	* message but it will never arrive and further interrupts are blocked
+	* from the source.  The exact reason is so far unknown, but the
+	* phenomenon was observed when two consecutive interrupt requests
+	* from a given source get delivered to the same CPU and the source is
+	* temporarily disabled in between.
+	*
+	* A workaround is to simulate an EOI message manually.  We achieve it
+	* by setting the trigger mode to edge and then to level when the edge
+	* trigger mode gets detected in the TMR of a local APIC for a
+	* level-triggered interrupt.  We mask the source for the time of the
+	* operation to prevent an edge-triggered interrupt escaping meanwhile.
+	* The idea is from Manfred Spraul.  --macro
+	*/
+	i = irq_cfg(irq)->vector;
+
+	v = apic_read(APIC_TMR + ((i & ~0x1f) >> 1));
+#endif
+
 	/*
 	 * We must acknowledge the irq before we move it or the acknowledge will
 	 * not propagate properly.
@@ -2450,41 +2480,8 @@ static void ack_apic_level(unsigned int irq)
 			move_masked_irq(irq);
 		unmask_IO_APIC_irq(irq);
 	}
-}
-#else
-atomic_t irq_mis_count;
-static void ack_apic_level(unsigned int irq)
-{
-	unsigned long v;
-	int i;
-
-	irq_complete_move(irq);
-	move_native_irq(irq);
-	/*
-	* It appears there is an erratum which affects at least version 0x11
-	* of I/O APIC (that's the 82093AA and cores integrated into various
-	* chipsets).  Under certain conditions a level-triggered interrupt is
-	* erroneously delivered as edge-triggered one but the respective IRR
-	* bit gets set nevertheless.  As a result the I/O unit expects an EOI
-	* message but it will never arrive and further interrupts are blocked
-	* from the source.  The exact reason is so far unknown, but the
-	* phenomenon was observed when two consecutive interrupt requests
-	* from a given source get delivered to the same CPU and the source is
-	* temporarily disabled in between.
-	*
-	* A workaround is to simulate an EOI message manually.  We achieve it
-	* by setting the trigger mode to edge and then to level when the edge
-	* trigger mode gets detected in the TMR of a local APIC for a
-	* level-triggered interrupt.  We mask the source for the time of the
-	* operation to prevent an edge-triggered interrupt escaping meanwhile.
-	* The idea is from Manfred Spraul.  --macro
-	*/
-	i = irq_cfg(irq)->vector;
-
-	v = apic_read(APIC_TMR + ((i & ~0x1f) >> 1));
-
-	ack_APIC_irq();
 
+#ifdef CONFIG_X86_32
 	if (!(v & (1 << (i & 0x1f)))) {
 		atomic_inc(&irq_mis_count);
 		spin_lock(&ioapic_lock);
@@ -2492,8 +2489,8 @@ static void ack_apic_level(unsigned int irq)
 		__unmask_and_level_IO_APIC_irq(irq);
 		spin_unlock(&ioapic_lock);
 	}
-}
 #endif
+}
 
 static struct irq_chip ioapic_chip __read_mostly = {
 	.name 		= "IO-APIC",
-- 
cgit v1.2.3-55-g7522


From 29ccbbf232c035b8c7ff0c5060fbe30a66ed9b99 Mon Sep 17 00:00:00 2001
From: Yinghai Lu
Date: Tue, 19 Aug 2008 20:50:49 -0700
Subject: x86: remove first_free_entry/pin_map_size

no user now

Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/io_apic.c | 7 -------
 arch/x86/kernel/setup.c   | 4 ----
 include/asm-x86/irq.h     | 3 ---
 3 files changed, 14 deletions(-)

diff --git a/arch/x86/kernel/io_apic.c b/arch/x86/kernel/io_apic.c
index a466b04ad5a4..5de2d38812aa 100644
--- a/arch/x86/kernel/io_apic.c
+++ b/arch/x86/kernel/io_apic.c
@@ -72,13 +72,6 @@ int sis_apic_bug = -1;
 static DEFINE_SPINLOCK(ioapic_lock);
 static DEFINE_SPINLOCK(vector_lock);
 
-int first_free_entry;
-/*
- * Rough estimation of how many shared IRQs there are, can
- * be changed anytime.
- */
-int pin_map_size;
-
 /*
  * # of IRQ routing registers
  */
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index 02e3a6697977..d90c659b70e9 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -1074,10 +1074,6 @@ void __init setup_arch(char **cmdline_p)
 		nr_irqs = 32 * nr_cpu_ids + 224;
 	init_cpu_to_node();
 #endif
-#ifdef CONFIG_X86_IO_APIC
-	pin_map_size = nr_irqs * 2;
-	first_free_entry = nr_irqs;
-#endif
 
 	init_apic_mappings();
 	ioapic_init_mappings();
diff --git a/include/asm-x86/irq.h b/include/asm-x86/irq.h
index 2a130c44f5de..1e5f2909c1db 100644
--- a/include/asm-x86/irq.h
+++ b/include/asm-x86/irq.h
@@ -10,9 +10,6 @@
 #include <asm/apicdef.h>
 #include <asm/irq_vectors.h>
 
-extern int pin_map_size;
-extern int first_free_entry;
-
 static inline int irq_canonicalize(int irq)
 {
 	return ((irq == 2) ? 9 : irq);
-- 
cgit v1.2.3-55-g7522


From ffd5aae7817fba22c5c3e304a31c44fa0a4e9a97 Mon Sep 17 00:00:00 2001
From: Yinghai Lu
Date: Tue, 19 Aug 2008 20:50:50 -0700
Subject: x86: print local APIC of APs one by one

instead of print that of all APs at the time

Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/io_apic.c | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/arch/x86/kernel/io_apic.c b/arch/x86/kernel/io_apic.c
index 5de2d38812aa..bf0e66d73030 100644
--- a/arch/x86/kernel/io_apic.c
+++ b/arch/x86/kernel/io_apic.c
@@ -1777,7 +1777,12 @@ __apicdebuginit(void) print_local_APIC(void *dummy)
 
 __apicdebuginit(void) print_all_local_APICs(void)
 {
-	on_each_cpu(print_local_APIC, NULL, 1);
+	int cpu;
+
+	preempt_disable();
+	for_each_online_cpu(cpu)
+		smp_call_function_single(cpu, print_local_APIC, NULL, 1);
+	preempt_enable();
 }
 
 __apicdebuginit(void) print_PIC(void)
-- 
cgit v1.2.3-55-g7522


From 8f09cd20a24c5f13c571bc73ddcd47be0af3b70f Mon Sep 17 00:00:00 2001
From: Yinghai Lu
Date: Tue, 19 Aug 2008 20:50:51 -0700
Subject: x86: make HAVE_SPARSE_IRQ support selectable

Ingo said sparse_irq is some intrusive. need to make it selectable

to make it simple, remove irq_desc as parameter in some functions.
(ack, eoi, set_affinity).
may need to make member if irq_chip to take irq_desc, or struct irq later.

Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/Kconfig              |  3 --
 arch/x86/Kconfig          | 12 +++++++-
 arch/x86/kernel/io_apic.c | 71 ++++++++++++++++++++++++++++++++---------------
 arch/x86/kernel/irq_32.c  |  2 +-
 arch/x86/kernel/irq_64.c  |  2 +-
 5 files changed, 62 insertions(+), 28 deletions(-)

diff --git a/arch/Kconfig b/arch/Kconfig
index b36762246265..c8a7c2eb6490 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -106,6 +106,3 @@ config HAVE_CLK
 config HAVE_DYN_ARRAY
 	def_bool n
 
-config HAVE_SPARSE_IRQ
-	def_bool n
-
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index b157e94637bf..600584b7a497 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -34,7 +34,6 @@ config X86
 	select HAVE_GENERIC_DMA_COHERENT if X86_32
 	select HAVE_EFFICIENT_UNALIGNED_ACCESS
 	select HAVE_DYN_ARRAY
-	select HAVE_SPARSE_IRQ
 
 config ARCH_DEFCONFIG
 	string
@@ -238,6 +237,17 @@ config SMP
 
 	  If you don't know what to do here, say N.
 
+config HAVE_SPARSE_IRQ
+	bool "Support sparse irq numbering"
+	depends on PCI_MSI || HT_IRQ
+	default y
+	help
+	  This enables support for sparse irq, esp for msi/msi-x. the irq
+	  number will be bus/dev/fn + 12bit. You may need if you have lots of
+	  cards supports msi-x installed.
+
+	  If you don't know what to do here, say Y.
+
 config X86_FIND_SMP_CONFIG
 	def_bool y
 	depends on X86_MPPARSE || X86_VOYAGER
diff --git a/arch/x86/kernel/io_apic.c b/arch/x86/kernel/io_apic.c
index bf0e66d73030..f853b667fa5c 100644
--- a/arch/x86/kernel/io_apic.c
+++ b/arch/x86/kernel/io_apic.c
@@ -107,7 +107,9 @@ struct irq_cfg;
 struct irq_pin_list;
 struct irq_cfg {
 	unsigned int irq;
+#ifdef CONFIG_HAVE_SPARSE_IRQ
 	struct irq_cfg *next;
+#endif
 	struct irq_pin_list *irq_2_pin;
 	cpumask_t domain;
 	cpumask_t old_domain;
@@ -137,20 +139,6 @@ static struct irq_cfg irq_cfg_legacy[] __initdata = {
 };
 
 static struct irq_cfg irq_cfg_init = { .irq =  -1U, };
-/* need to be biger than size of irq_cfg_legacy */
-static int nr_irq_cfg = 32;
-
-static int __init parse_nr_irq_cfg(char *arg)
-{
-	if (arg) {
-		nr_irq_cfg = simple_strtoul(arg, NULL, 0);
-		if (nr_irq_cfg < 32)
-			nr_irq_cfg = 32;
-	}
-	return 0;
-}
-
-early_param("nr_irq_cfg", parse_nr_irq_cfg);
 
 static void init_one_irq_cfg(struct irq_cfg *cfg)
 {
@@ -158,7 +146,9 @@ static void init_one_irq_cfg(struct irq_cfg *cfg)
 }
 
 static struct irq_cfg *irq_cfgx;
+#ifdef CONFIG_HAVE_SPARSE_IRQ
 static struct irq_cfg *irq_cfgx_free;
+#endif
 static void __init init_work(void *data)
 {
 	struct dyn_array *da = data;
@@ -174,15 +164,34 @@ static void __init init_work(void *data)
 	for (i = legacy_count; i < *da->nr; i++)
 		init_one_irq_cfg(&cfg[i]);
 
+#ifdef CONFIG_HAVE_SPARSE_IRQ
 	for (i = 1; i < *da->nr; i++)
 		cfg[i-1].next = &cfg[i];
 
 	irq_cfgx_free = &irq_cfgx[legacy_count];
 	irq_cfgx[legacy_count - 1].next = NULL;
+#endif
+}
+
+#ifdef CONFIG_HAVE_SPARSE_IRQ
+/* need to be biger than size of irq_cfg_legacy */
+static int nr_irq_cfg = 32;
+
+static int __init parse_nr_irq_cfg(char *arg)
+{
+	if (arg) {
+		nr_irq_cfg = simple_strtoul(arg, NULL, 0);
+		if (nr_irq_cfg < 32)
+			nr_irq_cfg = 32;
+	}
+	return 0;
 }
 
-#define for_each_irq_cfg(cfg)		\
-	for (cfg = irq_cfgx; cfg; cfg = cfg->next)
+early_param("nr_irq_cfg", parse_nr_irq_cfg);
+
+#define for_each_irq_cfg(irqX, cfg)           \
+        for (cfg = irq_cfgx, irqX = cfg->irq; cfg; cfg = cfg->next, irqX = cfg ? cfg->irq : -1U)
+
 
 DEFINE_DYN_ARRAY(irq_cfgx, sizeof(struct irq_cfg), nr_irq_cfg, PAGE_SIZE, init_work);
 
@@ -273,7 +282,26 @@ static struct irq_cfg *irq_cfg_alloc(unsigned int irq)
 #endif
 	return cfg;
 }
+#else
+
+#define for_each_irq_cfg(irq, cfg)		\
+	for (irq = 0, cfg = &irq_cfgx[irq]; irq < nr_irqs; irq++, cfg = &irq_cfgx[irq])
+
+DEFINE_DYN_ARRAY(irq_cfgx, sizeof(struct irq_cfg), nr_irqs, PAGE_SIZE, init_work);
 
+struct irq_cfg *irq_cfg(unsigned int irq)
+{
+        if (irq < nr_irqs)
+                return &irq_cfgx[irq];
+
+        return NULL;
+}
+struct irq_cfg *irq_cfg_alloc(unsigned int irq)
+{
+        return irq_cfg(irq);
+}
+
+#endif
 /*
  * This is performance-critical, we want to do it O(1)
  *
@@ -1282,11 +1310,10 @@ void __setup_vector_irq(int cpu)
 	struct irq_cfg *cfg;
 
 	/* Mark the inuse vectors */
-	for_each_irq_cfg(cfg) {
+	for_each_irq_cfg(irq, cfg) {
 		if (!cpu_isset(cpu, cfg->domain))
 			continue;
 		vector = cfg->vector;
-		irq = cfg->irq;
 		per_cpu(vector_irq, cpu)[vector] = irq;
 	}
 	/* Mark the free vectors */
@@ -1563,6 +1590,7 @@ __apicdebuginit(void) print_IO_APIC(void)
 	union IO_APIC_reg_03 reg_03;
 	unsigned long flags;
 	struct irq_cfg *cfg;
+	unsigned int irq;
 
 	if (apic_verbosity == APIC_QUIET)
 		return;
@@ -1651,11 +1679,11 @@ __apicdebuginit(void) print_IO_APIC(void)
 	}
 	}
 	printk(KERN_DEBUG "IRQ to pin mappings:\n");
-	for_each_irq_cfg(cfg) {
+	for_each_irq_cfg(irq, cfg) {
 		struct irq_pin_list *entry = cfg->irq_2_pin;
 		if (!entry)
 			continue;
-		printk(KERN_DEBUG "IRQ%d ", cfg->irq);
+		printk(KERN_DEBUG "IRQ%d ", irq);
 		for (;;) {
 			printk("-> %d:%d", entry->apic, entry->pin);
 			if (!entry->next)
@@ -2535,8 +2563,7 @@ static inline void init_IO_APIC_traps(void)
 	 * Also, we've got to be careful not to trash gate
 	 * 0x80, because int 0x80 is hm, kind of importantish. ;)
 	 */
-	for_each_irq_cfg(cfg) {
-		irq = cfg->irq;
+	for_each_irq_cfg(irq, cfg) {
 		if (IO_APIC_IRQ(irq) && !cfg->vector) {
 			/*
 			 * Hmm.. We don't have an entry for this,
diff --git a/arch/x86/kernel/irq_32.c b/arch/x86/kernel/irq_32.c
index cc929f2f84f1..b2e1082cf5ad 100644
--- a/arch/x86/kernel/irq_32.c
+++ b/arch/x86/kernel/irq_32.c
@@ -269,7 +269,7 @@ int show_interrupts(struct seq_file *p, void *v)
 	struct irqaction * action;
 	unsigned long flags;
 	unsigned int entries;
-	struct irq_desc *desc;
+	struct irq_desc *desc = NULL;
 	int tail = 0;
 
 #ifdef CONFIG_HAVE_SPARSE_IRQ
diff --git a/arch/x86/kernel/irq_64.c b/arch/x86/kernel/irq_64.c
index 348a11168c2b..c2fca89a3f7e 100644
--- a/arch/x86/kernel/irq_64.c
+++ b/arch/x86/kernel/irq_64.c
@@ -74,7 +74,7 @@ int show_interrupts(struct seq_file *p, void *v)
 	struct irqaction * action;
 	unsigned long flags;
 	unsigned int entries;
-	struct irq_desc *desc;
+	struct irq_desc *desc = NULL;
 	int tail = 0;
 
 #ifdef CONFIG_HAVE_SPARSE_IRQ
-- 
cgit v1.2.3-55-g7522


From 9d6a4d0823b3b8e29156f5e698b5a68687afad32 Mon Sep 17 00:00:00 2001
From: Yinghai Lu
Date: Tue, 19 Aug 2008 20:50:52 -0700
Subject: x86: probe nr_irqs even only mptable is used

for !CONFIG_HAVE_SPARSE_IRQ

fix:

 In file included from arch/x86/kernel/early-quirks.c:18:
 include/asm/io_apic.h: In function 'probe_nr_irqs':
 include/asm/io_apic.h:209: error: 'NR_IRQS' undeclared (first use in this function)
 include/asm/io_apic.h:209: error: (Each undeclared identifier is reported only once
 include/asm/io_apic.h:209: error: for each function it appears in.)

v2: fix by Ingo

Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/acpi/boot.c | 25 -------------------------
 arch/x86/kernel/io_apic.c   | 43 ++++++++++++++++++++++++++++++-------------
 arch/x86/kernel/setup.c     |  6 +++---
 include/asm-x86/io_apic.h   |  8 ++++++++
 include/asm-x86/mpspec.h    |  1 -
 5 files changed, 41 insertions(+), 42 deletions(-)

diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
index 3e9d163fd92f..5fef4fece4a5 100644
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@ -957,29 +957,6 @@ void __init mp_register_ioapic(int id, u32 address, u32 gsi_base)
 	nr_ioapics++;
 }
 
-int get_nr_irqs_via_madt(void)
-{
-	int idx;
-	int nr = 0;
-
-	for (idx = 0; idx < nr_ioapics; idx++) {
-		if (mp_ioapic_routing[idx].gsi_end > nr)
-			nr = mp_ioapic_routing[idx].gsi_end;
-	}
-
-	nr++;
-
-	/* double it for hotplug and msi and nmi */
-	nr <<= 1;
-
-	/* something wrong ? */
-	if (nr < 32)
-		nr = 32;
-
-	return nr;
-
-}
-
 static void assign_to_mp_irq(struct mp_config_intsrc *m,
 				    struct mp_config_intsrc *mp_irq)
 {
@@ -1278,8 +1255,6 @@ static int __init acpi_parse_madt_ioapic_entries(void)
 	}
 
 
-	nr_irqs = get_nr_irqs_via_madt();
-
 	count =
 	    acpi_table_parse_madt(ACPI_MADT_TYPE_INTERRUPT_OVERRIDE, acpi_parse_int_src_ovr,
 				  nr_irqs);
diff --git a/arch/x86/kernel/io_apic.c b/arch/x86/kernel/io_apic.c
index f853b667fa5c..f7e80262cbbb 100644
--- a/arch/x86/kernel/io_apic.c
+++ b/arch/x86/kernel/io_apic.c
@@ -3596,6 +3596,36 @@ int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev)
 }
 #endif /* CONFIG_HT_IRQ */
 
+int __init io_apic_get_redir_entries (int ioapic)
+{
+	union IO_APIC_reg_01	reg_01;
+	unsigned long flags;
+
+	spin_lock_irqsave(&ioapic_lock, flags);
+	reg_01.raw = io_apic_read(ioapic, 1);
+	spin_unlock_irqrestore(&ioapic_lock, flags);
+
+	return reg_01.bits.entries;
+}
+
+int __init probe_nr_irqs(void)
+{
+	int idx;
+	int nr = 0;
+
+	for (idx = 0; idx < nr_ioapics; idx++)
+		nr += io_apic_get_redir_entries(idx);
+
+	/* double it for hotplug and msi and nmi */
+	nr <<= 1;
+
+	/* something wrong ? */
+	if (nr < 32)
+		nr = 32;
+
+	return nr;
+}
+
 /* --------------------------------------------------------------------------
                           ACPI-based IOAPIC Configuration
    -------------------------------------------------------------------------- */
@@ -3690,19 +3720,6 @@ int __init io_apic_get_version(int ioapic)
 }
 #endif
 
-int __init io_apic_get_redir_entries (int ioapic)
-{
-	union IO_APIC_reg_01	reg_01;
-	unsigned long flags;
-
-	spin_lock_irqsave(&ioapic_lock, flags);
-	reg_01.raw = io_apic_read(ioapic, 1);
-	spin_unlock_irqrestore(&ioapic_lock, flags);
-
-	return reg_01.bits.entries;
-}
-
-
 int io_apic_set_pci_routing (int ioapic, int pin, int irq, int triggering, int polarity)
 {
 	if (!IO_APIC_IRQ(irq)) {
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index d90c659b70e9..61335306696a 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -1069,15 +1069,15 @@ void __init setup_arch(char **cmdline_p)
 	prefill_possible_map();
 
 #ifdef CONFIG_X86_64
-	/* need to wait for nr_cpu_ids settle down */
-	if (nr_irqs == NR_IRQS)
-		nr_irqs = 32 * nr_cpu_ids + 224;
 	init_cpu_to_node();
 #endif
 
 	init_apic_mappings();
 	ioapic_init_mappings();
 
+	/* need to wait for io_apic is mapped */
+	nr_irqs = probe_nr_irqs();
+
 	kvm_guest_init();
 
 	e820_reserve_resources();
diff --git a/include/asm-x86/io_apic.h b/include/asm-x86/io_apic.h
index ce818292d2c7..d35cbd7aa587 100644
--- a/include/asm-x86/io_apic.h
+++ b/include/asm-x86/io_apic.h
@@ -4,6 +4,7 @@
 #include <linux/types.h>
 #include <asm/mpspec.h>
 #include <asm/apicdef.h>
+#include <asm/irq_vectors.h>
 
 /*
  * Intel IO-APIC support for SMP and UP systems.
@@ -187,10 +188,17 @@ extern void restore_IO_APIC_setup(void);
 extern void reinit_intr_remapped_IO_APIC(int);
 #endif
 
+extern int probe_nr_irqs(void);
+
 #else  /* !CONFIG_X86_IO_APIC */
 #define io_apic_assign_pci_irqs 0
 static const int timer_through_8259 = 0;
 static inline void ioapic_init_mappings(void) { }
+
+static inline int probe_nr_irqs(void)
+{
+	return NR_IRQS;
+}
 #endif
 
 #endif /* ASM_X86__IO_APIC_H */
diff --git a/include/asm-x86/mpspec.h b/include/asm-x86/mpspec.h
index a0748021250b..be2241a818f1 100644
--- a/include/asm-x86/mpspec.h
+++ b/include/asm-x86/mpspec.h
@@ -60,7 +60,6 @@ extern void mp_override_legacy_irq(u8 bus_irq, u8 polarity, u8 trigger,
 				   u32 gsi);
 extern void mp_config_acpi_legacy_irqs(void);
 extern int mp_register_gsi(u32 gsi, int edge_level, int active_high_low);
-extern int get_nr_irqs_via_madt(void);
 #ifdef CONFIG_X86_IO_APIC
 extern int mp_config_acpi_gsi(unsigned char number, unsigned int devfn, u8 pin,
 				u32 gsi, int triggering, int polarity);
-- 
cgit v1.2.3-55-g7522


From 2b46b37de73296018da02c2a421ac2a9cbccfa9f Mon Sep 17 00:00:00 2001
From: Alex Nixon
Date: Tue, 19 Aug 2008 20:50:53 -0700
Subject: xen: fix memory access violation bug when CONFIG_HAVE_SPARSE_IRQ is
 enabled

When sparse IRQs are enabled, it is not safe to assume an IRQ descriptor
exists for every possible IRQ.  This patch causes init_evtchn_cpu_bindings
to skip initialisation of IRQ descriptors which don't exist.

Signed-off-by: Alex Nixon <alex.nixon@citrix.com>
Acked-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 drivers/xen/events.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/xen/events.c b/drivers/xen/events.c
index 56ace47f24d6..e6d47e8ca1ac 100644
--- a/drivers/xen/events.c
+++ b/drivers/xen/events.c
@@ -141,6 +141,8 @@ static void init_evtchn_cpu_bindings(void)
 	/* By default all event channels notify CPU#0. */
 	for (i = 0; i < nr_irqs; i++) {
 		struct irq_desc *desc = irq_to_desc(i);
+		if (!desc)
+			continue;
 		desc->affinity = cpumask_of_cpu(0);
 	}
 #endif
-- 
cgit v1.2.3-55-g7522


From e7f5ed8d6e921c6200ce5e2549a86db629f2dd11 Mon Sep 17 00:00:00 2001
From: Yinghai Lu
Date: Tue, 19 Aug 2008 20:50:54 -0700
Subject: dyn_array: split dyn_array functions from init/main.c

Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 init/Makefile    |   2 +-
 init/dyn_array.c | 124 +++++++++++++++++++++++++++++++++++++++++++++++++++++++
 init/main.c      | 118 ----------------------------------------------------
 3 files changed, 125 insertions(+), 119 deletions(-)
 create mode 100644 init/dyn_array.c

diff --git a/init/Makefile b/init/Makefile
index 4a243df426f7..dc5eeca6eb6d 100644
--- a/init/Makefile
+++ b/init/Makefile
@@ -2,7 +2,7 @@
 # Makefile for the linux kernel.
 #
 
-obj-y                          := main.o version.o mounts.o
+obj-y                          := main.o dyn_array.o version.o mounts.o
 ifneq ($(CONFIG_BLK_DEV_INITRD),y)
 obj-y                          += noinitramfs.o
 else
diff --git a/init/dyn_array.c b/init/dyn_array.c
new file mode 100644
index 000000000000..c4cd902a1180
--- /dev/null
+++ b/init/dyn_array.c
@@ -0,0 +1,124 @@
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/kallsyms.h>
+#include <linux/init.h>
+#include <linux/bootmem.h>
+#include <linux/irq.h>
+
+void __init pre_alloc_dyn_array(void)
+{
+#ifdef CONFIG_HAVE_DYN_ARRAY
+	unsigned long total_size = 0, size, phys;
+	unsigned long max_align = 1;
+	struct dyn_array **daa;
+	char *ptr;
+
+	/* get the total size at first */
+	for (daa = __dyn_array_start ; daa < __dyn_array_end; daa++) {
+		struct dyn_array *da = *daa;
+
+		size = da->size * (*da->nr);
+		print_fn_descriptor_symbol("dyn_array %s ", da->name);
+		printk(KERN_CONT "size:%#lx nr:%d align:%#lx\n",
+			da->size, *da->nr, da->align);
+		total_size += roundup(size, da->align);
+		if (da->align > max_align)
+			max_align = da->align;
+	}
+	if (total_size)
+		printk(KERN_DEBUG "dyn_array total_size: %#lx\n",
+			 total_size);
+	else
+		return;
+
+	/* allocate them all together */
+	max_align = max_t(unsigned long, max_align, PAGE_SIZE);
+	ptr = __alloc_bootmem_nopanic(total_size, max_align, 0);
+	if (!ptr)
+		panic("Can not alloc dyn_alloc\n");
+
+	phys = virt_to_phys(ptr);
+	for (daa = __dyn_array_start ; daa < __dyn_array_end; daa++) {
+		struct dyn_array *da = *daa;
+
+		size = da->size * (*da->nr);
+		print_fn_descriptor_symbol("dyn_array %s ", da->name);
+
+		phys = roundup(phys, da->align);
+		*da->name = phys_to_virt(phys);
+		printk(KERN_CONT " ==> [%#lx - %#lx]\n", phys, phys + size);
+
+		phys += size;
+
+		if (da->init_work)
+			da->init_work(da);
+	}
+#else
+#ifdef CONFIF_GENERIC_HARDIRQS
+	unsigned int i;
+
+	for (i = 0; i < NR_IRQS; i++)
+		irq_desc[i].irq = i;
+#endif
+#endif
+}
+
+unsigned long __init per_cpu_dyn_array_size(unsigned long *align)
+{
+	unsigned long total_size = 0;
+#ifdef CONFIG_HAVE_DYN_ARRAY
+	unsigned long size;
+	struct dyn_array **daa;
+	unsigned max_align = 1;
+
+	for (daa = __per_cpu_dyn_array_start ; daa < __per_cpu_dyn_array_end; daa++) {
+		struct dyn_array *da = *daa;
+
+		size = da->size * (*da->nr);
+		print_fn_descriptor_symbol("per_cpu_dyn_array %s ", da->name);
+		printk(KERN_CONT "size:%#lx nr:%d align:%#lx\n",
+			da->size, *da->nr, da->align);
+		total_size += roundup(size, da->align);
+		if (da->align > max_align)
+			max_align = da->align;
+	}
+	if (total_size) {
+		printk(KERN_DEBUG "per_cpu_dyn_array total_size: %#lx\n",
+			 total_size);
+		*align = max_align;
+	}
+#endif
+	return total_size;
+}
+
+void __init per_cpu_alloc_dyn_array(int cpu, char *ptr)
+{
+#ifdef CONFIG_HAVE_DYN_ARRAY
+	unsigned long size, phys;
+	struct dyn_array **daa;
+	unsigned long addr;
+	void **array;
+
+	phys = virt_to_phys(ptr);
+	for (daa = __per_cpu_dyn_array_start ; daa < __per_cpu_dyn_array_end; daa++) {
+		struct dyn_array *da = *daa;
+
+		size = da->size * (*da->nr);
+		print_fn_descriptor_symbol("per_cpu_dyn_array %s ", da->name);
+
+		phys = roundup(phys, da->align);
+		addr = (unsigned long)da->name;
+		addr += per_cpu_offset(cpu);
+		array = (void **)addr;
+		*array = phys_to_virt(phys);
+		*da->name = *array; /* so init_work could use it directly */
+		printk(KERN_CONT " ==> [%#lx - %#lx]\n", phys, phys + size);
+
+		phys += size;
+
+		if (da->init_work) {
+			da->init_work(da);
+		}
+	}
+#endif
+}
diff --git a/init/main.c b/init/main.c
index 0d2e60144f83..e81cf427d9c7 100644
--- a/init/main.c
+++ b/init/main.c
@@ -542,124 +542,6 @@ void __init __weak thread_info_cache_init(void)
 {
 }
 
-void pre_alloc_dyn_array(void)
-{
-#ifdef CONFIG_HAVE_DYN_ARRAY
-	unsigned long total_size = 0, size, phys;
-	unsigned long max_align = 1;
-	struct dyn_array **daa;
-	char *ptr;
-
-	/* get the total size at first */
-	for (daa = __dyn_array_start ; daa < __dyn_array_end; daa++) {
-		struct dyn_array *da = *daa;
-
-		size = da->size * (*da->nr);
-		print_fn_descriptor_symbol("dyn_array %s ", da->name);
-		printk(KERN_CONT "size:%#lx nr:%d align:%#lx\n",
-			da->size, *da->nr, da->align);
-		total_size += roundup(size, da->align);
-		if (da->align > max_align)
-			max_align = da->align;
-	}
-	if (total_size)
-		printk(KERN_DEBUG "dyn_array total_size: %#lx\n",
-			 total_size);
-	else
-		return;
-
-	/* allocate them all together */
-	max_align = max_t(unsigned long, max_align, PAGE_SIZE);
-	ptr = __alloc_bootmem_nopanic(total_size, max_align, 0);
-	if (!ptr)
-		panic("Can not alloc dyn_alloc\n");
-
-	phys = virt_to_phys(ptr);
-	for (daa = __dyn_array_start ; daa < __dyn_array_end; daa++) {
-		struct dyn_array *da = *daa;
-
-		size = da->size * (*da->nr);
-		print_fn_descriptor_symbol("dyn_array %s ", da->name);
-
-		phys = roundup(phys, da->align);
-		*da->name = phys_to_virt(phys);
-		printk(KERN_CONT " ==> [%#lx - %#lx]\n", phys, phys + size);
-
-		phys += size;
-
-		if (da->init_work)
-			da->init_work(da);
-	}
-#else
-#ifdef CONFIF_GENERIC_HARDIRQS
-	unsigned int i;
-
-	for (i = 0; i < NR_IRQS; i++)
-		irq_desc[i].irq = i;
-#endif
-#endif
-}
-
-unsigned long per_cpu_dyn_array_size(unsigned long *align)
-{
-	unsigned long total_size = 0;
-#ifdef CONFIG_HAVE_DYN_ARRAY
-	unsigned long size;
-	struct dyn_array **daa;
-	unsigned max_align = 1;
-
-	for (daa = __per_cpu_dyn_array_start ; daa < __per_cpu_dyn_array_end; daa++) {
-		struct dyn_array *da = *daa;
-
-		size = da->size * (*da->nr);
-		print_fn_descriptor_symbol("per_cpu_dyn_array %s ", da->name);
-		printk(KERN_CONT "size:%#lx nr:%d align:%#lx\n",
-			da->size, *da->nr, da->align);
-		total_size += roundup(size, da->align);
-		if (da->align > max_align)
-			max_align = da->align;
-	}
-	if (total_size) {
-		printk(KERN_DEBUG "per_cpu_dyn_array total_size: %#lx\n",
-			 total_size);
-		*align = max_align;
-	}
-#endif
-	return total_size;
-}
-
-void per_cpu_alloc_dyn_array(int cpu, char *ptr)
-{
-#ifdef CONFIG_HAVE_DYN_ARRAY
-	unsigned long size, phys;
-	struct dyn_array **daa;
-	unsigned long addr;
-	void **array;
-
-	phys = virt_to_phys(ptr);
-	for (daa = __per_cpu_dyn_array_start ; daa < __per_cpu_dyn_array_end; daa++) {
-		struct dyn_array *da = *daa;
-
-		size = da->size * (*da->nr);
-		print_fn_descriptor_symbol("per_cpu_dyn_array %s ", da->name);
-
-		phys = roundup(phys, da->align);
-		addr = (unsigned long)da->name;
-		addr += per_cpu_offset(cpu);
-		array = (void **)addr;
-		*array = phys_to_virt(phys);
-		*da->name = *array; /* so init_work could use it directly */
-		printk(KERN_CONT " ==> [%#lx - %#lx]\n", phys, phys + size);
-
-		phys += size;
-
-		if (da->init_work) {
-			da->init_work(da);
-		}
-	}
-#endif
-}
-
 asmlinkage void __init start_kernel(void)
 {
 	char * command_line;
-- 
cgit v1.2.3-55-g7522


From bf9d3cf73e8caf0b3ae0b7f508a9f251536f5ff4 Mon Sep 17 00:00:00 2001
From: Alex Nixon
Date: Mon, 18 Aug 2008 22:17:08 -0700
Subject: xen: Fix bug `do_IRQ: cannot handle IRQ -1 vector 0x6 cpu 1'

Following commit 9c3f2468d8339866d9ef6a25aae31a8909c6be0d, do_IRQ()
looks up the IRQ number in the per-cpu variable vector_irq.

This commit makes Xen initialise an identity vector_irq map for both X86_32 and X86_64.

Signed-off-by: Alex Nixon <alex.nixon@citrix.com>
Acked-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/xen/irq.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/arch/x86/xen/irq.c b/arch/x86/xen/irq.c
index 28b85ab8422e..bb042608c602 100644
--- a/arch/x86/xen/irq.c
+++ b/arch/x86/xen/irq.c
@@ -21,7 +21,6 @@ void xen_force_evtchn_callback(void)
 
 static void __init __xen_init_IRQ(void)
 {
-#ifdef CONFIG_X86_64
 	int i;
 
 	/* Create identity vector->irq map */
@@ -31,7 +30,6 @@ static void __init __xen_init_IRQ(void)
 		for_each_possible_cpu(cpu)
 			per_cpu(vector_irq, cpu)[i] = i;
 	}
-#endif	/* CONFIG_X86_64 */
 
 	xen_init_IRQ();
 }
-- 
cgit v1.2.3-55-g7522


From 0c425cec64eb0c0d0dd7037c21a25585cbe3636c Mon Sep 17 00:00:00 2001
From: Ingo Molnar
Date: Mon, 18 Aug 2008 13:04:26 +0200
Subject: warning: fix arch x86 kernel io_apic c

fix warning:

  arch/x86/kernel/io_apic.c: In function ‘print_local_APIC’:
  arch/x86/kernel/io_apic.c:1786: warning: format ‘%08x’ expects type ‘unsigned int’, but argument 2 has type ‘u64’
  arch/x86/kernel/io_apic.c:1787: warning: format ‘%08x’ expects type ‘unsigned int’, but argument 2 has type ‘u64’

By creating uniform behavior on 32-bit and 64-bit and printing out the ICR
value in two 32-bit words.

Code has changed:

   text	   data	    bss	    dec	    hex	filename
  22901	  19650	  17040	  59591	   e8c7	io_apic.o.before
  22899	  19650	  17040	  59589	   e8c5	io_apic.o.after

Due to the 32-bit cast narrowing the printed out value on 64-bit.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/io_apic.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/x86/kernel/io_apic.c b/arch/x86/kernel/io_apic.c
index f7e80262cbbb..34c74cf5c244 100644
--- a/arch/x86/kernel/io_apic.c
+++ b/arch/x86/kernel/io_apic.c
@@ -1774,8 +1774,8 @@ __apicdebuginit(void) print_local_APIC(void *dummy)
 	}
 
 	icr = apic_icr_read();
-	printk(KERN_DEBUG "... APIC ICR: %08x\n", icr);
-	printk(KERN_DEBUG "... APIC ICR2: %08x\n", icr >> 32);
+	printk(KERN_DEBUG "... APIC ICR: %08x\n", (u32)icr);
+	printk(KERN_DEBUG "... APIC ICR2: %08x\n", (u32)(icr >> 32));
 
 	v = apic_read(APIC_LVTT);
 	printk(KERN_DEBUG "... APIC LVTT: %08x\n", v);
-- 
cgit v1.2.3-55-g7522


From 7ddfb650c7ef7a33a5ef11c0fdf5b3d837a47dba Mon Sep 17 00:00:00 2001
From: Suresh Siddha
Date: Wed, 20 Aug 2008 17:22:51 -0700
Subject: sparseirq: fix intr-remap with dyn_array/nr_irqs changes]

In irq_2_iommu_alloc() and set_irte_irq(), irq_to_desc or
irq_2_iommu pointers may not be allocated. So use the routines
which will allocate them if they are not already allocated.

Signed-off-by: Suresh Siddha <suresh.b.siddha@intel.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 drivers/pci/intr_remapping.c | 14 ++++++--------
 1 file changed, 6 insertions(+), 8 deletions(-)

diff --git a/drivers/pci/intr_remapping.c b/drivers/pci/intr_remapping.c
index 23372c811159..2dcf973890c4 100644
--- a/drivers/pci/intr_remapping.c
+++ b/drivers/pci/intr_remapping.c
@@ -76,9 +76,10 @@ static struct irq_2_iommu *irq_2_iommu_alloc(unsigned int irq)
 	struct irq_desc *desc;
 	struct irq_2_iommu *irq_iommu;
 
-	desc = irq_to_desc(irq);
-
-	BUG_ON(!desc);
+	/*
+	 * alloc irq desc if not allocated already.
+	 */
+	desc = irq_to_desc_alloc(irq);
 
 	irq_iommu = desc->irq_2_iommu;
 
@@ -255,11 +256,8 @@ int set_irte_irq(int irq, struct intel_iommu *iommu, u16 index, u16 subhandle)
 	struct irq_2_iommu *irq_iommu;
 
 	spin_lock(&irq_2_ir_lock);
-	irq_iommu = valid_irq_2_iommu(irq);
-	if (!irq_iommu) {
-		spin_unlock(&irq_2_ir_lock);
-		return -1;
-	}
+
+	irq_iommu = irq_2_iommu_alloc(irq);
 
 	irq_iommu->iommu = iommu;
 	irq_iommu->irte_index = index;
-- 
cgit v1.2.3-55-g7522


From e89eb43863c2d9f11a3bbe766766fe646e6c50d9 Mon Sep 17 00:00:00 2001
From: Yinghai Lu
Date: Wed, 20 Aug 2008 20:46:25 -0700
Subject: x86: sparse_irq needs spin_lock in allocations

Suresh Siddha noticed that we should have a spinlock around it.

Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/io_apic.c | 13 ++++++++++++-
 kernel/irq/handle.c       | 13 ++++++++++++-
 2 files changed, 24 insertions(+), 2 deletions(-)

diff --git a/arch/x86/kernel/io_apic.c b/arch/x86/kernel/io_apic.c
index 34c74cf5c244..7ca556690474 100644
--- a/arch/x86/kernel/io_apic.c
+++ b/arch/x86/kernel/io_apic.c
@@ -146,6 +146,12 @@ static void init_one_irq_cfg(struct irq_cfg *cfg)
 }
 
 static struct irq_cfg *irq_cfgx;
+
+/*
+ * Protect the irq_cfgx_free freelist:
+ */
+static DEFINE_SPINLOCK(irq_cfg_lock);
+
 #ifdef CONFIG_HAVE_SPARSE_IRQ
 static struct irq_cfg *irq_cfgx_free;
 #endif
@@ -213,8 +219,9 @@ static struct irq_cfg *irq_cfg(unsigned int irq)
 static struct irq_cfg *irq_cfg_alloc(unsigned int irq)
 {
 	struct irq_cfg *cfg, *cfg_pri;
-	int i;
+	unsigned long flags;
 	int count = 0;
+	int i;
 
 	cfg_pri = cfg = irq_cfgx;
 	while (cfg) {
@@ -226,6 +233,7 @@ static struct irq_cfg *irq_cfg_alloc(unsigned int irq)
 		count++;
 	}
 
+	spin_lock_irqsave(&irq_cfg_lock, flags);
 	if (!irq_cfgx_free) {
 		unsigned long phys;
 		unsigned long total_bytes;
@@ -263,6 +271,9 @@ static struct irq_cfg *irq_cfg_alloc(unsigned int irq)
 	else
 		irq_cfgx = cfg;
 	cfg->irq = irq;
+
+	spin_unlock_irqrestore(&irq_cfg_lock, flags);
+
 	printk(KERN_DEBUG "found new irq_cfg for irq %d\n", cfg->irq);
 #ifdef CONFIG_HAVE_SPARSE_IRQ_DEBUG
 	{
diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c
index 24c83a3cee4d..d638a911cbc1 100644
--- a/kernel/irq/handle.c
+++ b/kernel/irq/handle.c
@@ -107,6 +107,11 @@ static void init_kstat_irqs(struct irq_desc *desc, int nr_desc, int nr)
 	}
 }
 
+/*
+ * Protect the sparse_irqs_free freelist:
+ */
+static DEFINE_SPINLOCK(sparse_irq_lock);
+
 #ifdef CONFIG_HAVE_SPARSE_IRQ
 static struct irq_desc *sparse_irqs_free;
 struct irq_desc *sparse_irqs;
@@ -166,11 +171,13 @@ struct irq_desc *irq_to_desc(unsigned int irq)
 	}
 	return NULL;
 }
+
 struct irq_desc *irq_to_desc_alloc(unsigned int irq)
 {
 	struct irq_desc *desc, *desc_pri;
-	int i;
+	unsigned long flags;
 	int count = 0;
+	int i;
 
 	desc_pri = desc = sparse_irqs;
 	while (desc) {
@@ -182,6 +189,7 @@ struct irq_desc *irq_to_desc_alloc(unsigned int irq)
 		count++;
 	}
 
+	spin_lock_irqsave(&sparse_irq_lock, flags);
 	/*
 	 *  we run out of pre-allocate ones, allocate more
 	 */
@@ -223,6 +231,9 @@ struct irq_desc *irq_to_desc_alloc(unsigned int irq)
 	else
 		sparse_irqs = desc;
 	desc->irq = irq;
+
+	spin_unlock_irqrestore(&sparse_irq_lock, flags);
+
 	printk(KERN_DEBUG "found new irq_desc for irq %d\n", desc->irq);
 #ifdef CONFIG_HAVE_SPARSE_IRQ_DEBUG
 	{
-- 
cgit v1.2.3-55-g7522


From a2d332fa3445160519de03c350a59602ac1c3df9 Mon Sep 17 00:00:00 2001
From: Yinghai Lu
Date: Thu, 21 Aug 2008 12:56:32 -0700
Subject: x86: fix 32-bit ioapic lockup with sparseirqs

Missed two lines when copying.

Fix panic on one of Ingo's machines that need to adjust ioapic id when
acpi off/ 32bit.

Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/io_apic.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/arch/x86/kernel/io_apic.c b/arch/x86/kernel/io_apic.c
index 7ca556690474..4e44fd1f466e 100644
--- a/arch/x86/kernel/io_apic.c
+++ b/arch/x86/kernel/io_apic.c
@@ -2077,6 +2077,8 @@ static void __init setup_ioapic_ids_from_mpc(void)
 
 		reg_00.bits.ID = mp_ioapics[apic].mp_apicid;
 		spin_lock_irqsave(&ioapic_lock, flags);
+		io_apic_write(apic, 0, reg_00.raw);
+		spin_unlock_irqrestore(&ioapic_lock, flags);
 
 		/*
 		 * Sanity check
-- 
cgit v1.2.3-55-g7522


From 052c0bff9b83a578654dfa513d6e3d0b3795f1e8 Mon Sep 17 00:00:00 2001
From: Yinghai Lu
Date: Thu, 21 Aug 2008 13:10:09 -0700
Subject: x86: fix probe_nr_irqs for xen

otherwise Xen is _completely_ unusable with 5 or more VCPUs.
(when !CONFIG_HAVE_SPARSE_IRQ).

based on Alex Nixon's patch.

also add +1 offset after redir_entries

Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>
Acked-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Acked-by: Alex Nixon <alex.nixon@citrix.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/io_apic.c | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/arch/x86/kernel/io_apic.c b/arch/x86/kernel/io_apic.c
index 4e44fd1f466e..d28128e0392c 100644
--- a/arch/x86/kernel/io_apic.c
+++ b/arch/x86/kernel/io_apic.c
@@ -3625,16 +3625,21 @@ int __init probe_nr_irqs(void)
 {
 	int idx;
 	int nr = 0;
+#ifndef CONFIG_XEN
+	int nr_min = 32;
+#else
+	int nr_min = NR_IRQS;
+#endif
 
 	for (idx = 0; idx < nr_ioapics; idx++)
-		nr += io_apic_get_redir_entries(idx);
+		nr += io_apic_get_redir_entries(idx) + 1;
 
 	/* double it for hotplug and msi and nmi */
 	nr <<= 1;
 
 	/* something wrong ? */
-	if (nr < 32)
-		nr = 32;
+	if (nr < nr_min)
+		nr = nr_min;
 
 	return nr;
 }
-- 
cgit v1.2.3-55-g7522


From 2699574b3c04351f5a23c8a842e17c303d7ebb6f Mon Sep 17 00:00:00 2001
From: Alok Kataria
Date: Thu, 21 Aug 2008 11:26:43 -0700
Subject: x86: VMI, initialize IRQ vector

Initialize vector_irq for the vmi used vector, to point to correct irq.

Signed-off-by: Alok N Kataria <akataria@vmware.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/vmiclock_32.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/arch/x86/kernel/vmiclock_32.c b/arch/x86/kernel/vmiclock_32.c
index 6953859fe289..254ee07f8635 100644
--- a/arch/x86/kernel/vmiclock_32.c
+++ b/arch/x86/kernel/vmiclock_32.c
@@ -235,11 +235,14 @@ static void __devinit vmi_time_init_clockevent(void)
 
 void __init vmi_time_init(void)
 {
+	unsigned int cpu;
 	/* Disable PIT: BIOSes start PIT CH0 with 18.2hz peridic. */
 	outb_pit(0x3a, PIT_MODE); /* binary, mode 5, LSB/MSB, ch 0 */
 
 	vmi_time_init_clockevent();
 	setup_irq(0, &vmi_clock_action);
+	for_each_possible_cpu(cpu)
+		per_cpu(vector_irq, cpu)[vmi_get_timer_vector()] = 0;
 }
 
 #ifdef CONFIG_X86_LOCAL_APIC
-- 
cgit v1.2.3-55-g7522


From db4b5525caafd846ec20f95afbc6403c792e22cf Mon Sep 17 00:00:00 2001
From: Cyrill Gorcunov
Date: Sun, 24 Aug 2008 02:01:39 -0700
Subject: x86: apic_64.c - setup_APIC_timer has to be __cpuinit function

There is no need to hold this code if CPU_HOTPLUG is not
defined.

Signed-off-by: Cyrill Gorcunov <gorcunov@gmail.com>
Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/apic_64.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/kernel/apic_64.c b/arch/x86/kernel/apic_64.c
index cd860856a0b7..8f551276681e 100644
--- a/arch/x86/kernel/apic_64.c
+++ b/arch/x86/kernel/apic_64.c
@@ -400,7 +400,7 @@ static void lapic_timer_broadcast(cpumask_t mask)
  * Setup the local APIC timer for this CPU. Copy the initilized values
  * of the boot CPU and register the clock event in the framework.
  */
-static void setup_APIC_timer(void)
+static void __cpuinit setup_APIC_timer(void)
 {
 	struct clock_event_device *levt = &__get_cpu_var(lapic_events);
 
-- 
cgit v1.2.3-55-g7522


From 7c37e48b5125fdb0acac846a0a3af42806175d44 Mon Sep 17 00:00:00 2001
From: Cyrill Gorcunov
Date: Sun, 24 Aug 2008 02:01:40 -0700
Subject: x86: apic - introduce get_physical_broadcast for 64bit

We don't really use it now on 64bit mode but
could reserve it for future.

Signed-off-by: Cyrill Gorcunov <gorcunov@gmail.com>
Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/apic_64.c | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/arch/x86/kernel/apic_64.c b/arch/x86/kernel/apic_64.c
index 8f551276681e..bb46711a0b1e 100644
--- a/arch/x86/kernel/apic_64.c
+++ b/arch/x86/kernel/apic_64.c
@@ -244,6 +244,16 @@ void __cpuinit enable_NMI_through_LVT0(void)
 	apic_write(APIC_LVT0, v);
 }
 
+#ifdef CONFIG_X86_32
+/**
+ * get_physical_broadcast - Get number of physical broadcast IDs
+ */
+int get_physical_broadcast(void)
+{
+	return modern_apic() ? 0xff : 0xf;
+}
+#endif
+
 /**
  * lapic_get_maxlvt - get the maximum number of local vector table entries
  */
-- 
cgit v1.2.3-55-g7522


From 920fa7a507c3b1004a9ebe07a2c9d38605b3406a Mon Sep 17 00:00:00 2001
From: Cyrill Gorcunov
Date: Sun, 24 Aug 2008 02:01:41 -0700
Subject: x86: apic - unify setup_apicpmtimer

Signed-off-by: Cyrill Gorcunov <gorcunov@gmail.com>
Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/apic_32.c | 10 ++++++++++
 arch/x86/kernel/apic_64.c |  2 ++
 2 files changed, 12 insertions(+)

diff --git a/arch/x86/kernel/apic_32.c b/arch/x86/kernel/apic_32.c
index 21c831d96af3..df6ed603e547 100644
--- a/arch/x86/kernel/apic_32.c
+++ b/arch/x86/kernel/apic_32.c
@@ -1774,6 +1774,16 @@ static int __init parse_nolapic_timer(char *arg)
 }
 early_param("nolapic_timer", parse_nolapic_timer);
 
+#ifdef CONFIG_X86_64
+static __init int setup_apicpmtimer(char *s)
+{
+	apic_calibrate_pmtmr = 1;
+	notsc_setup(NULL);
+	return 0;
+}
+__setup("apicpmtimer", setup_apicpmtimer);
+#endif
+
 static int __init apic_set_verbosity(char *arg)
 {
 	if (!arg)  {
diff --git a/arch/x86/kernel/apic_64.c b/arch/x86/kernel/apic_64.c
index bb46711a0b1e..ddc5b245faf2 100644
--- a/arch/x86/kernel/apic_64.c
+++ b/arch/x86/kernel/apic_64.c
@@ -1810,6 +1810,7 @@ static int __init parse_nolapic_timer(char *arg)
 }
 early_param("nolapic_timer", parse_nolapic_timer);
 
+#ifdef CONFIG_X86_64
 static __init int setup_apicpmtimer(char *s)
 {
 	apic_calibrate_pmtmr = 1;
@@ -1817,6 +1818,7 @@ static __init int setup_apicpmtimer(char *s)
 	return 0;
 }
 __setup("apicpmtimer", setup_apicpmtimer);
+#endif
 
 static int __init apic_set_verbosity(char *arg)
 {
-- 
cgit v1.2.3-55-g7522


From 80e5609cabd7e4321769701a70297f819a15b08d Mon Sep 17 00:00:00 2001
From: Cyrill Gorcunov
Date: Sun, 24 Aug 2008 02:01:42 -0700
Subject: x86: apic_64.c - add sanity check for spurious vector definition

Do not check for SPUTIOUS_APIC_VECTOR definition twice.
Check it once - is what we need.

Signed-off-by: Cyrill Gorcunov <gorcunov@gmail.com>
Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/apic_64.c | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/arch/x86/kernel/apic_64.c b/arch/x86/kernel/apic_64.c
index ddc5b245faf2..4587e16f73ec 100644
--- a/arch/x86/kernel/apic_64.c
+++ b/arch/x86/kernel/apic_64.c
@@ -46,6 +46,13 @@
 #include <mach_ipi.h>
 #include <mach_apic.h>
 
+/*
+ * Sanity check
+ */
+#if ((SPURIOUS_APIC_VECTOR & 0x0F) != 0x0F)
+# error SPURIOUS_APIC_VECTOR definition error
+#endif
+
 /* Disable local APIC timer from the kernel commandline or via dmi quirk */
 static int disable_apic_timer __cpuinitdata;
 static int apic_calibrate_pmtmr __initdata;
@@ -939,8 +946,6 @@ void __cpuinit setup_local_APIC(void)
 	preempt_disable();
 	value = apic_read(APIC_LVR);
 
-	BUILD_BUG_ON((SPURIOUS_APIC_VECTOR & 0x0f) != 0x0f);
-
 	/*
 	 * Double-check whether this APIC is really registered.
 	 * This is meaningless in clustered apic mode, so we skip it.
-- 
cgit v1.2.3-55-g7522


From 89c38c2867ebe37c4c5aee23e7fa1bffb025b171 Mon Sep 17 00:00:00 2001
From: Cyrill Gorcunov
Date: Sun, 24 Aug 2008 02:01:43 -0700
Subject: x86: apic - unify setup_local_APIC

- remove useless read of APIC_LVR
- wrap with preempt_disable/enable
- check for integrated APIC just in place

v2: fix by Yinghai Lu.
	fix lapic_is_integrated using
	let 64-bit too have pic_mode

Signed-off-by: Cyrill Gorcunov <gorcunov@gmail.com>
Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/apic_32.c | 21 ++++++++++++++-----
 arch/x86/kernel/apic_64.c | 51 ++++++++++++++++++++++++++++++++++++++++++-----
 2 files changed, 62 insertions(+), 10 deletions(-)

diff --git a/arch/x86/kernel/apic_32.c b/arch/x86/kernel/apic_32.c
index df6ed603e547..f8c1251984e2 100644
--- a/arch/x86/kernel/apic_32.c
+++ b/arch/x86/kernel/apic_32.c
@@ -1033,9 +1033,10 @@ static void __cpuinit lapic_setup_esr(void)
  */
 void __cpuinit setup_local_APIC(void)
 {
-	unsigned long value, integrated;
+	unsigned int value;
 	int i, j;
 
+#ifdef CONFIG_X86_32
 	/* Pound the ESR really hard over the head with a big hammer - mbligh */
 	if (esr_disable) {
 		apic_write(APIC_ESR, 0);
@@ -1043,14 +1044,16 @@ void __cpuinit setup_local_APIC(void)
 		apic_write(APIC_ESR, 0);
 		apic_write(APIC_ESR, 0);
 	}
+#endif
 
-	integrated = lapic_is_integrated();
+	preempt_disable();
 
 	/*
 	 * Double-check whether this APIC is really registered.
+	 * This is meaningless in clustered apic mode, so we skip it.
 	 */
 	if (!apic_id_registered())
-		WARN_ON_ONCE(1);
+		BUG();
 
 	/*
 	 * Intel recommends to set DFR, LDR and TPR before enabling
@@ -1096,6 +1099,7 @@ void __cpuinit setup_local_APIC(void)
 	 */
 	value |= APIC_SPIV_APIC_ENABLED;
 
+#ifdef CONFIG_X86_32
 	/*
 	 * Some unknown Intel IO/APIC (or APIC) errata is biting us with
 	 * certain networking cards. If high frequency interrupts are
@@ -1116,8 +1120,13 @@ void __cpuinit setup_local_APIC(void)
 	 * See also the comment in end_level_ioapic_irq().  --macro
 	 */
 
-	/* Enable focus processor (bit==0) */
+	/*
+	 * - enable focus processor (bit==0)
+	 * - 64bit mode always use processor focus
+	 *   so no need to set it
+	 */
 	value &= ~APIC_SPIV_FOCUS_DISABLED;
+#endif
 
 	/*
 	 * Set spurious IRQ vector
@@ -1154,9 +1163,11 @@ void __cpuinit setup_local_APIC(void)
 		value = APIC_DM_NMI;
 	else
 		value = APIC_DM_NMI | APIC_LVT_MASKED;
-	if (!integrated)		/* 82489DX */
+	if (!lapic_is_integrated())		/* 82489DX */
 		value |= APIC_LVT_LEVEL_TRIGGER;
 	apic_write(APIC_LVT1, value);
+
+	preempt_enable();
 }
 
 void __cpuinit end_local_APIC_setup(void)
diff --git a/arch/x86/kernel/apic_64.c b/arch/x86/kernel/apic_64.c
index 4587e16f73ec..283968d4e024 100644
--- a/arch/x86/kernel/apic_64.c
+++ b/arch/x86/kernel/apic_64.c
@@ -76,6 +76,8 @@ char system_vectors[NR_VECTORS] = { [0 ... NR_VECTORS-1] = SYS_VECTOR_FREE};
  */
 unsigned int apic_verbosity;
 
+int pic_mode;
+
 /* Have we found an MP table */
 int smp_found_config;
 
@@ -943,8 +945,17 @@ void __cpuinit setup_local_APIC(void)
 	unsigned int value;
 	int i, j;
 
+#ifdef CONFIG_X86_32
+	/* Pound the ESR really hard over the head with a big hammer - mbligh */
+	if (esr_disable) {
+		apic_write(APIC_ESR, 0);
+		apic_write(APIC_ESR, 0);
+		apic_write(APIC_ESR, 0);
+		apic_write(APIC_ESR, 0);
+	}
+#endif
+
 	preempt_disable();
-	value = apic_read(APIC_LVR);
 
 	/*
 	 * Double-check whether this APIC is really registered.
@@ -997,7 +1008,34 @@ void __cpuinit setup_local_APIC(void)
 	 */
 	value |= APIC_SPIV_APIC_ENABLED;
 
-	/* We always use processor focus */
+#ifdef CONFIG_X86_32
+	/*
+	 * Some unknown Intel IO/APIC (or APIC) errata is biting us with
+	 * certain networking cards. If high frequency interrupts are
+	 * happening on a particular IOAPIC pin, plus the IOAPIC routing
+	 * entry is masked/unmasked at a high rate as well then sooner or
+	 * later IOAPIC line gets 'stuck', no more interrupts are received
+	 * from the device. If focus CPU is disabled then the hang goes
+	 * away, oh well :-(
+	 *
+	 * [ This bug can be reproduced easily with a level-triggered
+	 *   PCI Ne2000 networking cards and PII/PIII processors, dual
+	 *   BX chipset. ]
+	 */
+	/*
+	 * Actually disabling the focus CPU check just makes the hang less
+	 * frequent as it makes the interrupt distributon model be more
+	 * like LRU than MRU (the short-term load is more even across CPUs).
+	 * See also the comment in end_level_ioapic_irq().  --macro
+	 */
+
+	/*
+	 * - enable focus processor (bit==0)
+	 * - 64bit mode always use processor focus
+	 *   so no need to set it
+	 */
+	value &= ~APIC_SPIV_FOCUS_DISABLED;
+#endif
 
 	/*
 	 * Set spurious IRQ vector
@@ -1016,14 +1054,14 @@ void __cpuinit setup_local_APIC(void)
 	 * TODO: set up through-local-APIC from through-I/O-APIC? --macro
 	 */
 	value = apic_read(APIC_LVT0) & APIC_LVT_MASKED;
-	if (!smp_processor_id() && !value) {
+	if (!smp_processor_id() && (pic_mode || !value)) {
 		value = APIC_DM_EXTINT;
 		apic_printk(APIC_VERBOSE, "enabled ExtINT on CPU#%d\n",
-			    smp_processor_id());
+				smp_processor_id());
 	} else {
 		value = APIC_DM_EXTINT | APIC_LVT_MASKED;
 		apic_printk(APIC_VERBOSE, "masked ExtINT on CPU#%d\n",
-			    smp_processor_id());
+				smp_processor_id());
 	}
 	apic_write(APIC_LVT0, value);
 
@@ -1034,7 +1072,10 @@ void __cpuinit setup_local_APIC(void)
 		value = APIC_DM_NMI;
 	else
 		value = APIC_DM_NMI | APIC_LVT_MASKED;
+	if (!lapic_is_integrated())		/* 82489DX */
+		value |= APIC_LVT_LEVEL_TRIGGER;
 	apic_write(APIC_LVT1, value);
+
 	preempt_enable();
 }
 
-- 
cgit v1.2.3-55-g7522


From 457cc52d4670bcf1470606a108bbf35aac28eb7f Mon Sep 17 00:00:00 2001
From: Cyrill Gorcunov
Date: Sun, 24 Aug 2008 02:01:44 -0700
Subject: x86: apic_32.c should use __cpuinit section

All callers are __init or __cpuinit so there is no need
to hold this code without CPU_HOTPLUG being set.

Signed-off-by: Cyrill Gorcunov <gorcunov@gmail.com>
Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/apic_32.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/arch/x86/kernel/apic_32.c b/arch/x86/kernel/apic_32.c
index f8c1251984e2..4ca3717b3026 100644
--- a/arch/x86/kernel/apic_32.c
+++ b/arch/x86/kernel/apic_32.c
@@ -383,7 +383,7 @@ static void lapic_timer_broadcast(cpumask_t mask)
  * Setup the local APIC timer for this CPU. Copy the initilized values
  * of the boot CPU and register the clock event in the framework.
  */
-static void __devinit setup_APIC_timer(void)
+static void __cpuinit setup_APIC_timer(void)
 {
 	struct clock_event_device *levt = &__get_cpu_var(lapic_events);
 
@@ -652,7 +652,7 @@ void __init setup_boot_APIC_clock(void)
 	setup_APIC_timer();
 }
 
-void __devinit setup_secondary_APIC_clock(void)
+void __cpuinit setup_secondary_APIC_clock(void)
 {
 	setup_APIC_timer();
 }
@@ -1713,7 +1713,7 @@ static struct sys_device device_lapic = {
 	.cls	= &lapic_sysclass,
 };
 
-static void __devinit apic_pm_activate(void)
+static void __cpuinit apic_pm_activate(void)
 {
 	apic_pm_state.active = 1;
 }
-- 
cgit v1.2.3-55-g7522


From 6460bc73aac970135104a0bc407c2c8b85394d59 Mon Sep 17 00:00:00 2001
From: Cyrill Gorcunov
Date: Sun, 24 Aug 2008 02:01:45 -0700
Subject: x86: apic - unify smp_apic_timer_interrupt

Signed-off-by: Cyrill Gorcunov <gorcunov@gmail.com>
Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/apic_32.c | 3 +++
 arch/x86/kernel/apic_64.c | 2 ++
 2 files changed, 5 insertions(+)

diff --git a/arch/x86/kernel/apic_32.c b/arch/x86/kernel/apic_32.c
index 4ca3717b3026..913d9924b101 100644
--- a/arch/x86/kernel/apic_32.c
+++ b/arch/x86/kernel/apic_32.c
@@ -718,6 +718,9 @@ void smp_apic_timer_interrupt(struct pt_regs *regs)
 	 * Besides, if we don't timer interrupts ignore the global
 	 * interrupt lock, which is the WrongThing (tm) to do.
 	 */
+#ifdef CONFIG_X86_64
+	exit_idle();
+#endif
 	irq_enter();
 	local_apic_timer_interrupt();
 	irq_exit();
diff --git a/arch/x86/kernel/apic_64.c b/arch/x86/kernel/apic_64.c
index 283968d4e024..2e109119f647 100644
--- a/arch/x86/kernel/apic_64.c
+++ b/arch/x86/kernel/apic_64.c
@@ -625,7 +625,9 @@ void smp_apic_timer_interrupt(struct pt_regs *regs)
 	 * Besides, if we don't timer interrupts ignore the global
 	 * interrupt lock, which is the WrongThing (tm) to do.
 	 */
+#ifdef CONFIG_X86_64
 	exit_idle();
+#endif
 	irq_enter();
 	local_apic_timer_interrupt();
 	irq_exit();
-- 
cgit v1.2.3-55-g7522


From b3c5117050e8028d48b2fa0ea09c7a50dd7f3414 Mon Sep 17 00:00:00 2001
From: Yinghai Lu
Date: Sun, 24 Aug 2008 02:01:46 -0700
Subject: x86: apic_xx.c order variables

Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/apic_32.c | 46 +++++++++++++++++++++++----------------------
 arch/x86/kernel/apic_64.c | 48 +++++++++++++++++++++++++++++++----------------
 2 files changed, 56 insertions(+), 38 deletions(-)

diff --git a/arch/x86/kernel/apic_32.c b/arch/x86/kernel/apic_32.c
index 913d9924b101..a54512bea629 100644
--- a/arch/x86/kernel/apic_32.c
+++ b/arch/x86/kernel/apic_32.c
@@ -50,16 +50,37 @@
 # error SPURIOUS_APIC_VECTOR definition error
 #endif
 
-unsigned long mp_lapic_addr;
-
+#ifdef CONFIG_X86_32
 /*
  * Knob to control our willingness to enable the local APIC.
  *
  * +1=force-enable
  */
 static int force_enable_local_apic;
-int disable_apic;
+/*
+ * APIC command line parameters
+ */
+static int __init parse_lapic(char *arg)
+{
+	force_enable_local_apic = 1;
+	return 0;
+}
+early_param("lapic", parse_lapic);
+#endif
 
+#ifdef CONFIG_X86_64
+static int apic_calibrate_pmtmr __initdata;
+static __init int setup_apicpmtimer(char *s)
+{
+	apic_calibrate_pmtmr = 1;
+	notsc_setup(NULL);
+	return 0;
+}
+__setup("apicpmtimer", setup_apicpmtimer);
+#endif
+
+unsigned long mp_lapic_addr;
+int disable_apic;
 /* Disable local APIC timer from the kernel commandline or via dmi quirk */
 static int disable_apic_timer __cpuinitdata;
 /* Local APIC timer works in C2 */
@@ -1742,15 +1763,6 @@ static void apic_pm_activate(void) { }
 
 #endif	/* CONFIG_PM */
 
-/*
- * APIC command line parameters
- */
-static int __init parse_lapic(char *arg)
-{
-	force_enable_local_apic = 1;
-	return 0;
-}
-early_param("lapic", parse_lapic);
 
 static int __init setup_disableapic(char *arg)
 {
@@ -1788,16 +1800,6 @@ static int __init parse_nolapic_timer(char *arg)
 }
 early_param("nolapic_timer", parse_nolapic_timer);
 
-#ifdef CONFIG_X86_64
-static __init int setup_apicpmtimer(char *s)
-{
-	apic_calibrate_pmtmr = 1;
-	notsc_setup(NULL);
-	return 0;
-}
-__setup("apicpmtimer", setup_apicpmtimer);
-#endif
-
 static int __init apic_set_verbosity(char *arg)
 {
 	if (!arg)  {
diff --git a/arch/x86/kernel/apic_64.c b/arch/x86/kernel/apic_64.c
index 2e109119f647..c40a900e155b 100644
--- a/arch/x86/kernel/apic_64.c
+++ b/arch/x86/kernel/apic_64.c
@@ -53,16 +53,44 @@
 # error SPURIOUS_APIC_VECTOR definition error
 #endif
 
-/* Disable local APIC timer from the kernel commandline or via dmi quirk */
-static int disable_apic_timer __cpuinitdata;
+#ifdef CONFIG_X86_32
+/*
+ * Knob to control our willingness to enable the local APIC.
+ *
+ * +1=force-enable
+ */
+static int force_enable_local_apic;
+/*
+ * APIC command line parameters
+ */
+static int __init parse_lapic(char *arg)
+{
+	force_enable_local_apic = 1;
+	return 0;
+}
+early_param("lapic", parse_lapic);
+#endif
+
+#ifdef CONFIG_X86_64
 static int apic_calibrate_pmtmr __initdata;
-int disable_apic;
+static __init int setup_apicpmtimer(char *s)
+{
+	apic_calibrate_pmtmr = 1;
+	notsc_setup(NULL);
+	return 0;
+}
+__setup("apicpmtimer", setup_apicpmtimer);
+#endif
+
 int disable_x2apic;
 int x2apic;
-
 /* x2apic enabled before OS handover */
 int x2apic_preenabled;
 
+unsigned long mp_lapic_addr;
+int disable_apic;
+/* Disable local APIC timer from the kernel commandline or via dmi quirk */
+static int disable_apic_timer __cpuinitdata;
 /* Local APIC timer works in C2 */
 int local_apic_timer_c2_ok;
 EXPORT_SYMBOL_GPL(local_apic_timer_c2_ok);
@@ -113,8 +141,6 @@ static DEFINE_PER_CPU(struct clock_event_device, lapic_events);
 
 static unsigned long apic_phys;
 
-unsigned long mp_lapic_addr;
-
 /*
  * Get the LAPIC version
  */
@@ -1858,16 +1884,6 @@ static int __init parse_nolapic_timer(char *arg)
 }
 early_param("nolapic_timer", parse_nolapic_timer);
 
-#ifdef CONFIG_X86_64
-static __init int setup_apicpmtimer(char *s)
-{
-	apic_calibrate_pmtmr = 1;
-	notsc_setup(NULL);
-	return 0;
-}
-__setup("apicpmtimer", setup_apicpmtimer);
-#endif
-
 static int __init apic_set_verbosity(char *arg)
 {
 	if (!arg)  {
-- 
cgit v1.2.3-55-g7522


From 49899eacce79ce39faf531dad3e00f771eba2eb1 Mon Sep 17 00:00:00 2001
From: Yinghai Lu
Date: Sun, 24 Aug 2008 02:01:47 -0700
Subject: x86: use HAVE_X2APIC in apic_64.c

Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/apic_64.c | 35 +++++++++++++++++++++++------------
 1 file changed, 23 insertions(+), 12 deletions(-)

diff --git a/arch/x86/kernel/apic_64.c b/arch/x86/kernel/apic_64.c
index c40a900e155b..d3ec746aede4 100644
--- a/arch/x86/kernel/apic_64.c
+++ b/arch/x86/kernel/apic_64.c
@@ -82,10 +82,23 @@ static __init int setup_apicpmtimer(char *s)
 __setup("apicpmtimer", setup_apicpmtimer);
 #endif
 
-int disable_x2apic;
+#ifdef CONFIG_X86_64
+#define HAVE_X2APIC
+#endif
+
+#ifdef HAVE_X2APIC
 int x2apic;
 /* x2apic enabled before OS handover */
 int x2apic_preenabled;
+int disable_x2apic;
+static __init int setup_nox2apic(char *str)
+{
+	disable_x2apic = 1;
+	setup_clear_cpu_cap(X86_FEATURE_X2APIC);
+	return 0;
+}
+early_param("nox2apic", setup_nox2apic);
+#endif
 
 unsigned long mp_lapic_addr;
 int disable_apic;
@@ -228,6 +241,7 @@ static struct apic_ops xapic_ops = {
 struct apic_ops __read_mostly *apic_ops = &xapic_ops;
 EXPORT_SYMBOL_GPL(apic_ops);
 
+#ifdef HAVE_X2APIC
 static void x2apic_wait_icr_idle(void)
 {
 	/* no need to wait for icr idle in x2apic */
@@ -261,6 +275,7 @@ static struct apic_ops x2apic_ops = {
 	.wait_icr_idle = x2apic_wait_icr_idle,
 	.safe_wait_icr_idle = safe_x2apic_wait_icr_idle,
 };
+#endif
 
 /**
  * enable_NMI_through_LVT0 - enable NMI through local vector table 0
@@ -1125,6 +1140,7 @@ void __cpuinit end_local_APIC_setup(void)
 	apic_pm_activate();
 }
 
+#ifdef HAVE_X2APIC
 void check_x2apic(void)
 {
 	int msr, msr2;
@@ -1243,6 +1259,7 @@ end:
 
 	return;
 }
+#endif /* HAVE_X2APIC */
 
 /*
  * Detect and enable local APICs on non-SMP boards.
@@ -1291,10 +1308,12 @@ void __init early_init_lapic_mapping(void)
  */
 void __init init_apic_mappings(void)
 {
+#ifdef HAVE_X2APIC
 	if (x2apic) {
 		boot_cpu_physical_apicid = read_apic_id();
 		return;
 	}
+#endif
 
 	/*
 	 * If no local APIC can be found then set up a fake all
@@ -1335,8 +1354,9 @@ int __init APIC_init_uniprocessor(void)
 		printk(KERN_INFO "Apic disabled by BIOS\n");
 		return -1;
 	}
-
+#ifdef HAVE_X2APIC
 	enable_IR_x2apic();
+#endif
 	setup_apic_routing();
 
 	verify_local_APIC();
@@ -1672,7 +1692,7 @@ static int lapic_resume(struct sys_device *dev)
 
 	local_irq_save(flags);
 
-#ifdef CONFIG_X86_64
+#ifdef HAVE_X2APIC
 	if (x2apic)
 		enable_x2apic();
 	else
@@ -1836,15 +1856,6 @@ __cpuinit int apic_is_clustered_box(void)
 	return (clusters > 2);
 }
 
-static __init int setup_nox2apic(char *str)
-{
-	disable_x2apic = 1;
-	clear_cpu_cap(&boot_cpu_data, X86_FEATURE_X2APIC);
-	return 0;
-}
-early_param("nox2apic", setup_nox2apic);
-
-
 /*
  * APIC command line parameters
  */
-- 
cgit v1.2.3-55-g7522


From 3491998dd54f6d4ef7344518fe5463b299fdf537 Mon Sep 17 00:00:00 2001
From: Yinghai Lu
Date: Sun, 24 Aug 2008 02:01:48 -0700
Subject: x86: add hard_smp_prossor_id with MACRO in io_apic_xx.c

Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/apic_32.c | 7 +++++++
 arch/x86/kernel/apic_64.c | 2 ++
 2 files changed, 9 insertions(+)

diff --git a/arch/x86/kernel/apic_32.c b/arch/x86/kernel/apic_32.c
index a54512bea629..93718ffb9b9c 100644
--- a/arch/x86/kernel/apic_32.c
+++ b/arch/x86/kernel/apic_32.c
@@ -1600,6 +1600,13 @@ void __cpuinit generic_processor_info(int apicid, int version)
 	cpu_set(cpu, cpu_present_map);
 }
 
+#ifdef CONFIG_X86_64
+int hard_smp_processor_id(void)
+{
+	return read_apic_id();
+}
+#endif
+
 /*
  * Power management
  */
diff --git a/arch/x86/kernel/apic_64.c b/arch/x86/kernel/apic_64.c
index d3ec746aede4..eeb69838c2f8 100644
--- a/arch/x86/kernel/apic_64.c
+++ b/arch/x86/kernel/apic_64.c
@@ -1612,10 +1612,12 @@ void __cpuinit generic_processor_info(int apicid, int version)
 	cpu_set(cpu, cpu_present_map);
 }
 
+#ifdef CONFIG_X86_64
 int hard_smp_processor_id(void)
 {
 	return read_apic_id();
 }
+#endif
 
 /*
  * Power management
-- 
cgit v1.2.3-55-g7522


From f28c0ae21d80ffd6eb0987901c5273843387e341 Mon Sep 17 00:00:00 2001
From: Yinghai Lu
Date: Sun, 24 Aug 2008 02:01:49 -0700
Subject: x86: make apic_32/64.c more like

except x2apic, detec_init_APIC, and calibrating_APIC_clock

Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/apic_32.c | 125 +++++++++++++++++++++++++++++++++++++++++-----
 arch/x86/kernel/apic_64.c |  10 +++-
 2 files changed, 122 insertions(+), 13 deletions(-)

diff --git a/arch/x86/kernel/apic_32.c b/arch/x86/kernel/apic_32.c
index 93718ffb9b9c..bfac26a16099 100644
--- a/arch/x86/kernel/apic_32.c
+++ b/arch/x86/kernel/apic_32.c
@@ -66,6 +66,9 @@ static int __init parse_lapic(char *arg)
 	return 0;
 }
 early_param("lapic", parse_lapic);
+/* Local APIC was disabled by the BIOS and enabled by the kernel */
+static int enabled_via_apicbase;
+
 #endif
 
 #ifdef CONFIG_X86_64
@@ -131,9 +134,6 @@ static struct clock_event_device lapic_clockevent = {
 };
 static DEFINE_PER_CPU(struct clock_event_device, lapic_events);
 
-/* Local APIC was disabled by the BIOS and enabled by the kernel */
-static int enabled_via_apicbase;
-
 static unsigned long apic_phys;
 
 /*
@@ -240,6 +240,7 @@ void __cpuinit enable_NMI_through_LVT0(void)
 	apic_write(APIC_LVT0, v);
 }
 
+#ifdef CONFIG_X86_32
 /**
  * get_physical_broadcast - Get number of physical broadcast IDs
  */
@@ -247,6 +248,7 @@ int get_physical_broadcast(void)
 {
 	return modern_apic() ? 0xff : 0xf;
 }
+#endif
 
 /**
  * lapic_get_maxlvt - get the maximum number of local vector table entries
@@ -1291,6 +1293,32 @@ no_apic:
 	return -1;
 }
 
+#ifdef CONFIG_X86_64
+void __init early_init_lapic_mapping(void)
+{
+	unsigned long phys_addr;
+
+	/*
+	 * If no local APIC can be found then go out
+	 * : it means there is no mpatable and MADT
+	 */
+	if (!smp_found_config)
+		return;
+
+	phys_addr = mp_lapic_addr;
+
+	set_fixmap_nocache(FIX_APIC_BASE, phys_addr);
+	apic_printk(APIC_VERBOSE, "mapped APIC to %16lx (%16lx)\n",
+		    APIC_BASE, phys_addr);
+
+	/*
+	 * Fetch the APIC ID of the BSP in case we have a
+	 * default configuration (or the MP table is broken).
+	 */
+	boot_cpu_physical_apicid = read_apic_id();
+}
+#endif
+
 /**
  * init_apic_mappings - initialize APIC mappings
  */
@@ -1308,8 +1336,8 @@ void __init init_apic_mappings(void)
 		apic_phys = mp_lapic_addr;
 
 	set_fixmap_nocache(FIX_APIC_BASE, apic_phys);
-	printk(KERN_DEBUG "mapped APIC to %08lx (%08lx)\n", APIC_BASE,
-	       apic_phys);
+	apic_printk(APIC_VERBOSE, "mapped APIC to %16lx (%16lx)\n",
+				APIC_BASE, apic_phys);
 
 	/*
 	 * Fetch the APIC ID of the BSP in case we have a
@@ -1317,14 +1345,12 @@ void __init init_apic_mappings(void)
 	 */
 	if (boot_cpu_physical_apicid == -1U)
 		boot_cpu_physical_apicid = read_apic_id();
-
 }
 
 /*
  * This initializes the IO-APIC and APIC hardware if this is
  * a UP kernel.
  */
-
 int apic_version[MAX_APICS];
 
 int __init APIC_init_uniprocessor(void)
@@ -1682,11 +1708,6 @@ static int lapic_resume(struct sys_device *dev)
 
 	local_irq_save(flags);
 
-#ifdef CONFIG_X86_64
-	if (x2apic)
-		enable_x2apic();
-	else
-#endif
 	{
 		/*
 		 * Make sure the APICBASE points to the right address
@@ -1770,7 +1791,87 @@ static void apic_pm_activate(void) { }
 
 #endif	/* CONFIG_PM */
 
+#ifdef CONFIG_X86_64
+/*
+ * apic_is_clustered_box() -- Check if we can expect good TSC
+ *
+ * Thus far, the major user of this is IBM's Summit2 series:
+ *
+ * Clustered boxes may have unsynced TSC problems if they are
+ * multi-chassis. Use available data to take a good guess.
+ * If in doubt, go HPET.
+ */
+__cpuinit int apic_is_clustered_box(void)
+{
+	int i, clusters, zeros;
+	unsigned id;
+	u16 *bios_cpu_apicid;
+	DECLARE_BITMAP(clustermap, NUM_APIC_CLUSTERS);
+
+	/*
+	 * there is not this kind of box with AMD CPU yet.
+	 * Some AMD box with quadcore cpu and 8 sockets apicid
+	 * will be [4, 0x23] or [8, 0x27] could be thought to
+	 * vsmp box still need checking...
+	 */
+	if ((boot_cpu_data.x86_vendor == X86_VENDOR_AMD) && !is_vsmp_box())
+		return 0;
+
+	bios_cpu_apicid = early_per_cpu_ptr(x86_bios_cpu_apicid);
+	bitmap_zero(clustermap, NUM_APIC_CLUSTERS);
+
+	for (i = 0; i < NR_CPUS; i++) {
+		/* are we being called early in kernel startup? */
+		if (bios_cpu_apicid) {
+			id = bios_cpu_apicid[i];
+		}
+		else if (i < nr_cpu_ids) {
+			if (cpu_present(i))
+				id = per_cpu(x86_bios_cpu_apicid, i);
+			else
+				continue;
+		}
+		else
+			break;
 
+		if (id != BAD_APICID)
+			__set_bit(APIC_CLUSTERID(id), clustermap);
+	}
+
+	/* Problem:  Partially populated chassis may not have CPUs in some of
+	 * the APIC clusters they have been allocated.  Only present CPUs have
+	 * x86_bios_cpu_apicid entries, thus causing zeroes in the bitmap.
+	 * Since clusters are allocated sequentially, count zeros only if
+	 * they are bounded by ones.
+	 */
+	clusters = 0;
+	zeros = 0;
+	for (i = 0; i < NUM_APIC_CLUSTERS; i++) {
+		if (test_bit(i, clustermap)) {
+			clusters += 1 + zeros;
+			zeros = 0;
+		} else
+			++zeros;
+	}
+
+	/* ScaleMP vSMPowered boxes have one cluster per board and TSCs are
+	 * not guaranteed to be synced between boards
+	 */
+	if (is_vsmp_box() && clusters > 1)
+		return 1;
+
+	/*
+	 * If clusters > 2, then should be multi-chassis.
+	 * May have to revisit this when multi-core + hyperthreaded CPUs come
+	 * out, but AFAIK this will work even for them.
+	 */
+	return (clusters > 2);
+}
+#endif
+
+/*
+ * APIC command line parameters
+ */
 static int __init setup_disableapic(char *arg)
 {
 	disable_apic = 1;
diff --git a/arch/x86/kernel/apic_64.c b/arch/x86/kernel/apic_64.c
index eeb69838c2f8..dca6c246e731 100644
--- a/arch/x86/kernel/apic_64.c
+++ b/arch/x86/kernel/apic_64.c
@@ -69,6 +69,9 @@ static int __init parse_lapic(char *arg)
 	return 0;
 }
 early_param("lapic", parse_lapic);
+/* Local APIC was disabled by the BIOS and enabled by the kernel */
+static int enabled_via_apicbase;
+
 #endif
 
 #ifdef CONFIG_X86_64
@@ -1279,6 +1282,7 @@ static int __init detect_init_APIC(void)
 	return 0;
 }
 
+#ifdef CONFIG_X86_64
 void __init early_init_lapic_mapping(void)
 {
 	unsigned long phys_addr;
@@ -1302,6 +1306,7 @@ void __init early_init_lapic_mapping(void)
 	 */
 	boot_cpu_physical_apicid = read_apic_id();
 }
+#endif
 
 /**
  * init_apic_mappings - initialize APIC mappings
@@ -1334,7 +1339,8 @@ void __init init_apic_mappings(void)
 	 * Fetch the APIC ID of the BSP in case we have a
 	 * default configuration (or the MP table is broken).
 	 */
-	boot_cpu_physical_apicid = read_apic_id();
+	if (boot_cpu_physical_apicid == -1U)
+		boot_cpu_physical_apicid = read_apic_id();
 }
 
 /*
@@ -1782,6 +1788,7 @@ static void apic_pm_activate(void) { }
 
 #endif	/* CONFIG_PM */
 
+#ifdef CONFIG_X86_64
 /*
  * apic_is_clustered_box() -- Check if we can expect good TSC
  *
@@ -1857,6 +1864,7 @@ __cpuinit int apic_is_clustered_box(void)
 	 */
 	return (clusters > 2);
 }
+#endif
 
 /*
  * APIC command line parameters
-- 
cgit v1.2.3-55-g7522


From fa2bd35a8d5c88c03b638c72daf7f38a132d0e8c Mon Sep 17 00:00:00 2001
From: Yinghai Lu
Date: Sun, 24 Aug 2008 02:01:50 -0700
Subject: x86: merge APIC_init_uniprocessor

Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/apic_32.c | 51 +++++++++++++++++++++++++++++++++++++++++------
 arch/x86/kernel/apic_64.c | 48 +++++++++++++++++++++++++++++++++++++++++---
 2 files changed, 90 insertions(+), 9 deletions(-)

diff --git a/arch/x86/kernel/apic_32.c b/arch/x86/kernel/apic_32.c
index bfac26a16099..8f8b0e1f3eb3 100644
--- a/arch/x86/kernel/apic_32.c
+++ b/arch/x86/kernel/apic_32.c
@@ -1355,6 +1355,17 @@ int apic_version[MAX_APICS];
 
 int __init APIC_init_uniprocessor(void)
 {
+#ifdef CONFIG_X86_64
+	if (disable_apic) {
+		printk(KERN_INFO "Apic disabled\n");
+		return -1;
+	}
+	if (!cpu_has_apic) {
+		disable_apic = 1;
+		printk(KERN_INFO "Apic disabled by BIOS\n");
+		return -1;
+	}
+#else
 	if (!smp_found_config && !cpu_has_apic)
 		return -1;
 
@@ -1368,34 +1379,62 @@ int __init APIC_init_uniprocessor(void)
 		clear_cpu_cap(&boot_cpu_data, X86_FEATURE_APIC);
 		return -1;
 	}
+#endif
 
+#ifdef HAVE_X2APIC
+	enable_IR_x2apic();
+#endif
+#ifdef CONFIG_X86_64
+	setup_apic_routing();
+#endif
 	verify_local_APIC();
-
 	connect_bsp_APIC();
 
+#ifdef CONFIG_X86_64
+	apic_write(APIC_ID, SET_APIC_ID(boot_cpu_physical_apicid));
+#else
 	/*
 	 * Hack: In case of kdump, after a crash, kernel might be booting
 	 * on a cpu with non-zero lapic id. But boot_cpu_physical_apicid
 	 * might be zero if read from MP tables. Get it from LAPIC.
 	 */
-#ifdef CONFIG_CRASH_DUMP
+# ifdef CONFIG_CRASH_DUMP
 	boot_cpu_physical_apicid = read_apic_id();
+# endif
 #endif
 	physid_set_mask_of_physid(boot_cpu_physical_apicid, &phys_cpu_present_map);
-
 	setup_local_APIC();
 
+#ifdef CONFIG_X86_64
+	/*
+	 * Now enable IO-APICs, actually call clear_IO_APIC
+	 * We need clear_IO_APIC before enabling vector on BP
+	 */
+	if (!skip_ioapic_setup && nr_ioapics)
+		enable_IO_APIC();
+#endif
+
 #ifdef CONFIG_X86_IO_APIC
 	if (!smp_found_config || skip_ioapic_setup || !nr_ioapics)
 #endif
 		localise_nmi_watchdog();
 	end_local_APIC_setup();
+
 #ifdef CONFIG_X86_IO_APIC
-	if (smp_found_config)
-		if (!skip_ioapic_setup && nr_ioapics)
-			setup_IO_APIC();
+	if (smp_found_config && !skip_ioapic_setup && nr_ioapics)
+		setup_IO_APIC();
+# ifdef CONFIG_X86_64
+	else
+		nr_ioapics = 0;
+# endif
 #endif
+
+#ifdef CONFIG_X86_64
+	setup_boot_APIC_clock();
+	check_nmi_watchdog();
+#else
 	setup_boot_clock();
+#endif
 
 	return 0;
 }
diff --git a/arch/x86/kernel/apic_64.c b/arch/x86/kernel/apic_64.c
index dca6c246e731..16a30c22b07c 100644
--- a/arch/x86/kernel/apic_64.c
+++ b/arch/x86/kernel/apic_64.c
@@ -1351,6 +1351,7 @@ int apic_version[MAX_APICS];
 
 int __init APIC_init_uniprocessor(void)
 {
+#ifdef CONFIG_X86_64
 	if (disable_apic) {
 		printk(KERN_INFO "Apic disabled\n");
 		return -1;
@@ -1360,37 +1361,78 @@ int __init APIC_init_uniprocessor(void)
 		printk(KERN_INFO "Apic disabled by BIOS\n");
 		return -1;
 	}
+#else
+	if (!smp_found_config && !cpu_has_apic)
+		return -1;
+
+	/*
+	 * Complain if the BIOS pretends there is one.
+	 */
+	if (!cpu_has_apic &&
+	    APIC_INTEGRATED(apic_version[boot_cpu_physical_apicid])) {
+		printk(KERN_ERR "BIOS bug, local APIC #%d not detected!...\n",
+		       boot_cpu_physical_apicid);
+		clear_cpu_cap(&boot_cpu_data, X86_FEATURE_APIC);
+		return -1;
+	}
+#endif
+
 #ifdef HAVE_X2APIC
 	enable_IR_x2apic();
 #endif
+#ifdef CONFIG_X86_64
 	setup_apic_routing();
+#endif
 
 	verify_local_APIC();
-
 	connect_bsp_APIC();
 
-	physid_set_mask_of_physid(boot_cpu_physical_apicid, &phys_cpu_present_map);
+#ifdef CONFIG_X86_64
 	apic_write(APIC_ID, SET_APIC_ID(boot_cpu_physical_apicid));
-
+#else
+	/*
+	 * Hack: In case of kdump, after a crash, kernel might be booting
+	 * on a cpu with non-zero lapic id. But boot_cpu_physical_apicid
+	 * might be zero if read from MP tables. Get it from LAPIC.
+	 */
+# ifdef CONFIG_CRASH_DUMP
+	boot_cpu_physical_apicid = read_apic_id();
+# endif
+#endif
+	physid_set_mask_of_physid(boot_cpu_physical_apicid, &phys_cpu_present_map);
 	setup_local_APIC();
 
+#ifdef CONFIG_X86_64
 	/*
 	 * Now enable IO-APICs, actually call clear_IO_APIC
 	 * We need clear_IO_APIC before enabling vector on BP
 	 */
 	if (!skip_ioapic_setup && nr_ioapics)
 		enable_IO_APIC();
+#endif
 
+#ifdef CONFIG_X86_IO_APIC
 	if (!smp_found_config || skip_ioapic_setup || !nr_ioapics)
+#endif
 		localise_nmi_watchdog();
 	end_local_APIC_setup();
 
+#ifdef CONFIG_X86_IO_APIC
 	if (smp_found_config && !skip_ioapic_setup && nr_ioapics)
 		setup_IO_APIC();
+# ifdef CONFIG_X86_64
 	else
 		nr_ioapics = 0;
+# endif
+#endif
+
+#ifdef CONFIG_X86_64
 	setup_boot_APIC_clock();
 	check_nmi_watchdog();
+#else
+	setup_boot_clock();
+#endif
+
 	return 0;
 }
 
-- 
cgit v1.2.3-55-g7522


From be7a656fe131cba088912bcafb079b029320504d Mon Sep 17 00:00:00 2001
From: Yinghai Lu
Date: Sun, 24 Aug 2008 02:01:51 -0700
Subject: x86: copy detect_init_APIC to the other

Signed-off-by: Yinghai Lu <yhlu.kernel@mgail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/apic_32.c | 20 ++++++++++++
 arch/x86/kernel/apic_64.c | 82 ++++++++++++++++++++++++++++++++++++++++++++++-
 2 files changed, 101 insertions(+), 1 deletion(-)

diff --git a/arch/x86/kernel/apic_32.c b/arch/x86/kernel/apic_32.c
index 8f8b0e1f3eb3..1103e05aa1ba 100644
--- a/arch/x86/kernel/apic_32.c
+++ b/arch/x86/kernel/apic_32.c
@@ -1214,6 +1214,25 @@ void __cpuinit end_local_APIC_setup(void)
 	apic_pm_activate();
 }
 
+#ifdef CONFIG_X86_64
+/*
+ * Detect and enable local APICs on non-SMP boards.
+ * Original code written by Keir Fraser.
+ * On AMD64 we trust the BIOS - if it says no APIC it is likely
+ * not correctly set up (usually the APIC timer won't work etc.)
+ */
+static int __init detect_init_APIC(void)
+{
+	if (!cpu_has_apic) {
+		printk(KERN_INFO "No local APIC present\n");
+		return -1;
+	}
+
+	mp_lapic_addr = APIC_DEFAULT_PHYS_BASE;
+	boot_cpu_physical_apicid = 0;
+	return 0;
+}
+#else
 /*
  * Detect and initialize APIC
  */
@@ -1292,6 +1311,7 @@ no_apic:
 	printk(KERN_INFO "No local APIC present or hardware disabled\n");
 	return -1;
 }
+#endif
 
 #ifdef CONFIG_X86_64
 void __init early_init_lapic_mapping(void)
diff --git a/arch/x86/kernel/apic_64.c b/arch/x86/kernel/apic_64.c
index 16a30c22b07c..b7268f5c8b18 100644
--- a/arch/x86/kernel/apic_64.c
+++ b/arch/x86/kernel/apic_64.c
@@ -684,7 +684,6 @@ int setup_profiling_timer(unsigned int multiplier)
 	return -EINVAL;
 }
 
-
 /*
  * Local APIC start and shutdown
  */
@@ -1264,6 +1263,7 @@ end:
 }
 #endif /* HAVE_X2APIC */
 
+#ifdef CONFIG_X86_64
 /*
  * Detect and enable local APICs on non-SMP boards.
  * Original code written by Keir Fraser.
@@ -1281,6 +1281,86 @@ static int __init detect_init_APIC(void)
 	boot_cpu_physical_apicid = 0;
 	return 0;
 }
+#else
+/*
+ * Detect and initialize APIC
+ */
+static int __init detect_init_APIC(void)
+{
+	u32 h, l, features;
+
+	/* Disabled by kernel option? */
+	if (disable_apic)
+		return -1;
+
+	switch (boot_cpu_data.x86_vendor) {
+	case X86_VENDOR_AMD:
+		if ((boot_cpu_data.x86 == 6 && boot_cpu_data.x86_model > 1) ||
+		    (boot_cpu_data.x86 == 15))
+			break;
+		goto no_apic;
+	case X86_VENDOR_INTEL:
+		if (boot_cpu_data.x86 == 6 || boot_cpu_data.x86 == 15 ||
+		    (boot_cpu_data.x86 == 5 && cpu_has_apic))
+			break;
+		goto no_apic;
+	default:
+		goto no_apic;
+	}
+
+	if (!cpu_has_apic) {
+		/*
+		 * Over-ride BIOS and try to enable the local APIC only if
+		 * "lapic" specified.
+		 */
+		if (!force_enable_local_apic) {
+			printk(KERN_INFO "Local APIC disabled by BIOS -- "
+			       "you can enable it with \"lapic\"\n");
+			return -1;
+		}
+		/*
+		 * Some BIOSes disable the local APIC in the APIC_BASE
+		 * MSR. This can only be done in software for Intel P6 or later
+		 * and AMD K7 (Model > 1) or later.
+		 */
+		rdmsr(MSR_IA32_APICBASE, l, h);
+		if (!(l & MSR_IA32_APICBASE_ENABLE)) {
+			printk(KERN_INFO
+			       "Local APIC disabled by BIOS -- reenabling.\n");
+			l &= ~MSR_IA32_APICBASE_BASE;
+			l |= MSR_IA32_APICBASE_ENABLE | APIC_DEFAULT_PHYS_BASE;
+			wrmsr(MSR_IA32_APICBASE, l, h);
+			enabled_via_apicbase = 1;
+		}
+	}
+	/*
+	 * The APIC feature bit should now be enabled
+	 * in `cpuid'
+	 */
+	features = cpuid_edx(1);
+	if (!(features & (1 << X86_FEATURE_APIC))) {
+		printk(KERN_WARNING "Could not enable APIC!\n");
+		return -1;
+	}
+	set_cpu_cap(&boot_cpu_data, X86_FEATURE_APIC);
+	mp_lapic_addr = APIC_DEFAULT_PHYS_BASE;
+
+	/* The BIOS may have set up the APIC at some other address */
+	rdmsr(MSR_IA32_APICBASE, l, h);
+	if (l & MSR_IA32_APICBASE_ENABLE)
+		mp_lapic_addr = l & MSR_IA32_APICBASE_BASE;
+
+	printk(KERN_INFO "Found and enabled local APIC!\n");
+
+	apic_pm_activate();
+
+	return 0;
+
+no_apic:
+	printk(KERN_INFO "No local APIC present or hardware disabled\n");
+	return -1;
+}
+#endif
 
 #ifdef CONFIG_X86_64
 void __init early_init_lapic_mapping(void)
-- 
cgit v1.2.3-55-g7522


From 773763df7de881e65ff2600c024c9ce2dde64750 Mon Sep 17 00:00:00 2001
From: Yinghai Lu
Date: Sun, 24 Aug 2008 02:01:52 -0700
Subject: x86: merge header files in apic_xx.c

Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/apic_32.c | 8 ++++++++
 arch/x86/kernel/apic_64.c | 7 ++++++-
 2 files changed, 14 insertions(+), 1 deletion(-)

diff --git a/arch/x86/kernel/apic_32.c b/arch/x86/kernel/apic_32.c
index 1103e05aa1ba..0ec8321e687c 100644
--- a/arch/x86/kernel/apic_32.c
+++ b/arch/x86/kernel/apic_32.c
@@ -23,11 +23,13 @@
 #include <linux/mc146818rtc.h>
 #include <linux/kernel_stat.h>
 #include <linux/sysdev.h>
+#include <linux/ioport.h>
 #include <linux/cpu.h>
 #include <linux/clockchips.h>
 #include <linux/acpi_pmtmr.h>
 #include <linux/module.h>
 #include <linux/dmi.h>
+#include <linux/dmar.h>
 
 #include <asm/atomic.h>
 #include <asm/smp.h>
@@ -36,8 +38,14 @@
 #include <asm/desc.h>
 #include <asm/arch_hooks.h>
 #include <asm/hpet.h>
+#include <asm/pgalloc.h>
 #include <asm/i8253.h>
 #include <asm/nmi.h>
+#include <asm/idle.h>
+#include <asm/proto.h>
+#include <asm/timex.h>
+#include <asm/apic.h>
+#include <asm/i8259.h>
 
 #include <mach_apic.h>
 #include <mach_apicdef.h>
diff --git a/arch/x86/kernel/apic_64.c b/arch/x86/kernel/apic_64.c
index b7268f5c8b18..ebe417b4d7fc 100644
--- a/arch/x86/kernel/apic_64.c
+++ b/arch/x86/kernel/apic_64.c
@@ -24,9 +24,11 @@
 #include <linux/kernel_stat.h>
 #include <linux/sysdev.h>
 #include <linux/ioport.h>
+#include <linux/cpu.h>
 #include <linux/clockchips.h>
 #include <linux/acpi_pmtmr.h>
 #include <linux/module.h>
+#include <linux/dmi.h>
 #include <linux/dmar.h>
 
 #include <asm/atomic.h>
@@ -34,8 +36,10 @@
 #include <asm/mtrr.h>
 #include <asm/mpspec.h>
 #include <asm/desc.h>
+#include <asm/arch_hooks.h>
 #include <asm/hpet.h>
 #include <asm/pgalloc.h>
+#include <asm/i8253.h>
 #include <asm/nmi.h>
 #include <asm/idle.h>
 #include <asm/proto.h>
@@ -43,8 +47,9 @@
 #include <asm/apic.h>
 #include <asm/i8259.h>
 
-#include <mach_ipi.h>
 #include <mach_apic.h>
+#include <mach_apicdef.h>
+#include <mach_ipi.h>
 
 /*
  * Sanity check
-- 
cgit v1.2.3-55-g7522


From dc1528dd864a0b79fa67b60b3ca5674fe94fdce5 Mon Sep 17 00:00:00 2001
From: Yinghai Lu
Date: Sun, 24 Aug 2008 02:01:53 -0700
Subject: x86: apic unify smp_spurious/error_interrupt

Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/apic_32.c | 24 +++++++++++++++++++++---
 arch/x86/kernel/apic_64.c | 24 ++++++++++++++++++++++--
 2 files changed, 43 insertions(+), 5 deletions(-)

diff --git a/arch/x86/kernel/apic_32.c b/arch/x86/kernel/apic_32.c
index 0ec8321e687c..60a901b2d4fe 100644
--- a/arch/x86/kernel/apic_32.c
+++ b/arch/x86/kernel/apic_32.c
@@ -1474,10 +1474,17 @@ int __init APIC_init_uniprocessor(void)
 /*
  * This interrupt should _never_ happen with our APIC/SMP architecture
  */
+#ifdef CONFIG_X86_64
+asmlinkage void smp_spurious_interrupt(void)
+#else
 void smp_spurious_interrupt(struct pt_regs *regs)
+#endif
 {
-	unsigned long v;
+	u32 v;
 
+#ifdef CONFIG_X86_64
+	exit_idle();
+#endif
 	irq_enter();
 	/*
 	 * Check if this really is a spurious interrupt and ACK it
@@ -1488,20 +1495,31 @@ void smp_spurious_interrupt(struct pt_regs *regs)
 	if (v & (1 << (SPURIOUS_APIC_VECTOR & 0x1f)))
 		ack_APIC_irq();
 
+#ifdef CONFIG_X86_64
+	add_pda(irq_spurious_count, 1);
+#else
 	/* see sw-dev-man vol 3, chapter 7.4.13.5 */
 	printk(KERN_INFO "spurious APIC interrupt on CPU#%d, "
 	       "should never happen.\n", smp_processor_id());
 	__get_cpu_var(irq_stat).irq_spurious_count++;
+#endif
 	irq_exit();
 }
 
 /*
  * This interrupt should never happen with our APIC/SMP architecture
  */
+#ifdef CONFIG_X86_64
+asmlinkage void smp_error_interrupt(void)
+#else
 void smp_error_interrupt(struct pt_regs *regs)
+#endif
 {
-	unsigned long v, v1;
+	u32 v, v1;
 
+#ifdef CONFIG_X86_64
+	exit_idle();
+#endif
 	irq_enter();
 	/* First tickle the hardware, only then report what went on. -- REW */
 	v = apic_read(APIC_ESR);
@@ -1520,7 +1538,7 @@ void smp_error_interrupt(struct pt_regs *regs)
 	   6: Received illegal vector
 	   7: Illegal register address
 	*/
-	printk(KERN_DEBUG "APIC error on CPU%d: %02lx(%02lx)\n",
+	printk(KERN_DEBUG "APIC error on CPU%d: %02x(%02x)\n",
 		smp_processor_id(), v , v1);
 	irq_exit();
 }
diff --git a/arch/x86/kernel/apic_64.c b/arch/x86/kernel/apic_64.c
index ebe417b4d7fc..c728885e4f4a 100644
--- a/arch/x86/kernel/apic_64.c
+++ b/arch/x86/kernel/apic_64.c
@@ -1528,10 +1528,17 @@ int __init APIC_init_uniprocessor(void)
 /*
  * This interrupt should _never_ happen with our APIC/SMP architecture
  */
+#ifdef CONFIG_X86_64
 asmlinkage void smp_spurious_interrupt(void)
+#else
+void smp_spurious_interrupt(struct pt_regs *regs)
+#endif
 {
-	unsigned int v;
+	u32 v;
+
+#ifdef CONFIG_X86_64
 	exit_idle();
+#endif
 	irq_enter();
 	/*
 	 * Check if this really is a spurious interrupt and ACK it
@@ -1542,18 +1549,31 @@ asmlinkage void smp_spurious_interrupt(void)
 	if (v & (1 << (SPURIOUS_APIC_VECTOR & 0x1f)))
 		ack_APIC_irq();
 
+#ifdef CONFIG_X86_64
 	add_pda(irq_spurious_count, 1);
+#else
+	/* see sw-dev-man vol 3, chapter 7.4.13.5 */
+	printk(KERN_INFO "spurious APIC interrupt on CPU#%d, "
+	       "should never happen.\n", smp_processor_id());
+	__get_cpu_var(irq_stat).irq_spurious_count++;
+#endif
 	irq_exit();
 }
 
 /*
  * This interrupt should never happen with our APIC/SMP architecture
  */
+#ifdef CONFIG_X86_64
 asmlinkage void smp_error_interrupt(void)
+#else
+void smp_error_interrupt(struct pt_regs *regs)
+#endif
 {
-	unsigned int v, v1;
+	u32 v, v1;
 
+#ifdef CONFIG_X86_64
 	exit_idle();
+#endif
 	irq_enter();
 	/* First tickle the hardware, only then report what went on. -- REW */
 	v = apic_read(APIC_ESR);
-- 
cgit v1.2.3-55-g7522


From 2f04fa888d270951b9e0fe9e641ddd560d77ad1b Mon Sep 17 00:00:00 2001
From: Yinghai Lu
Date: Sun, 24 Aug 2008 02:01:54 -0700
Subject: x86: apic copy calibrate_APIC_clock to each other in apic_32/64.c

Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/apic_32.c |  86 +++++++++++++++++++
 arch/x86/kernel/apic_64.c | 215 ++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 301 insertions(+)

diff --git a/arch/x86/kernel/apic_32.c b/arch/x86/kernel/apic_32.c
index 60a901b2d4fe..05498c37f8d9 100644
--- a/arch/x86/kernel/apic_32.c
+++ b/arch/x86/kernel/apic_32.c
@@ -424,6 +424,90 @@ static void __cpuinit setup_APIC_timer(void)
 	clockevents_register_device(levt);
 }
 
+#ifdef CONFIG_X86_64
+/*
+ * In this function we calibrate APIC bus clocks to the external
+ * timer. Unfortunately we cannot use jiffies and the timer irq
+ * to calibrate, since some later bootup code depends on getting
+ * the first irq? Ugh.
+ *
+ * We want to do the calibration only once since we
+ * want to have local timer irqs syncron. CPUs connected
+ * by the same APIC bus have the very same bus frequency.
+ * And we want to have irqs off anyways, no accidental
+ * APIC irq that way.
+ */
+
+#define TICK_COUNT 100000000
+
+static int __init calibrate_APIC_clock(void)
+{
+	unsigned apic, apic_start;
+	unsigned long tsc, tsc_start;
+	int result;
+
+	local_irq_disable();
+
+	/*
+	 * Put whatever arbitrary (but long enough) timeout
+	 * value into the APIC clock, we just want to get the
+	 * counter running for calibration.
+	 *
+	 * No interrupt enable !
+	 */
+	__setup_APIC_LVTT(250000000, 0, 0);
+
+	apic_start = apic_read(APIC_TMCCT);
+#ifdef CONFIG_X86_PM_TIMER
+	if (apic_calibrate_pmtmr && pmtmr_ioport) {
+		pmtimer_wait(5000);  /* 5ms wait */
+		apic = apic_read(APIC_TMCCT);
+		result = (apic_start - apic) * 1000L / 5;
+	} else
+#endif
+	{
+		rdtscll(tsc_start);
+
+		do {
+			apic = apic_read(APIC_TMCCT);
+			rdtscll(tsc);
+		} while ((tsc - tsc_start) < TICK_COUNT &&
+				(apic_start - apic) < TICK_COUNT);
+
+		result = (apic_start - apic) * 1000L * tsc_khz /
+					(tsc - tsc_start);
+	}
+
+	local_irq_enable();
+
+	printk(KERN_DEBUG "APIC timer calibration result %d\n", result);
+
+	printk(KERN_INFO "Detected %d.%03d MHz APIC timer.\n",
+		result / 1000 / 1000, result / 1000 % 1000);
+
+	/* Calculate the scaled math multiplication factor */
+	lapic_clockevent.mult = div_sc(result, NSEC_PER_SEC,
+				       lapic_clockevent.shift);
+	lapic_clockevent.max_delta_ns =
+		clockevent_delta2ns(0x7FFFFF, &lapic_clockevent);
+	lapic_clockevent.min_delta_ns =
+		clockevent_delta2ns(0xF, &lapic_clockevent);
+
+	calibration_result = (result * APIC_DIVISOR) / HZ;
+
+	/*
+	 * Do a sanity check on the APIC calibration result
+	 */
+	if (calibration_result < (1000000 / HZ)) {
+		printk(KERN_WARNING
+			"APIC frequency too slow, disabling apic timer\n");
+		return -1;
+	}
+
+	return 0;
+}
+
+#else
 /*
  * In this functions we calibrate APIC bus clocks to the external timer.
  *
@@ -635,6 +719,8 @@ static int __init calibrate_APIC_clock(void)
 	return 0;
 }
 
+#endif
+
 /*
  * Setup the boot APIC
  *
diff --git a/arch/x86/kernel/apic_64.c b/arch/x86/kernel/apic_64.c
index c728885e4f4a..6e99af5ce678 100644
--- a/arch/x86/kernel/apic_64.c
+++ b/arch/x86/kernel/apic_64.c
@@ -478,6 +478,7 @@ static void __cpuinit setup_APIC_timer(void)
 	clockevents_register_device(levt);
 }
 
+#ifdef CONFIG_X86_64
 /*
  * In this function we calibrate APIC bus clocks to the external
  * timer. Unfortunately we cannot use jiffies and the timer irq
@@ -560,6 +561,220 @@ static int __init calibrate_APIC_clock(void)
 	return 0;
 }
 
+#else
+/*
+ * In this functions we calibrate APIC bus clocks to the external timer.
+ *
+ * We want to do the calibration only once since we want to have local timer
+ * irqs syncron. CPUs connected by the same APIC bus have the very same bus
+ * frequency.
+ *
+ * This was previously done by reading the PIT/HPET and waiting for a wrap
+ * around to find out, that a tick has elapsed. I have a box, where the PIT
+ * readout is broken, so it never gets out of the wait loop again. This was
+ * also reported by others.
+ *
+ * Monitoring the jiffies value is inaccurate and the clockevents
+ * infrastructure allows us to do a simple substitution of the interrupt
+ * handler.
+ *
+ * The calibration routine also uses the pm_timer when possible, as the PIT
+ * happens to run way too slow (factor 2.3 on my VAIO CoreDuo, which goes
+ * back to normal later in the boot process).
+ */
+
+#define LAPIC_CAL_LOOPS		(HZ/10)
+
+static __initdata int lapic_cal_loops = -1;
+static __initdata long lapic_cal_t1, lapic_cal_t2;
+static __initdata unsigned long long lapic_cal_tsc1, lapic_cal_tsc2;
+static __initdata unsigned long lapic_cal_pm1, lapic_cal_pm2;
+static __initdata unsigned long lapic_cal_j1, lapic_cal_j2;
+
+/*
+ * Temporary interrupt handler.
+ */
+static void __init lapic_cal_handler(struct clock_event_device *dev)
+{
+	unsigned long long tsc = 0;
+	long tapic = apic_read(APIC_TMCCT);
+	unsigned long pm = acpi_pm_read_early();
+
+	if (cpu_has_tsc)
+		rdtscll(tsc);
+
+	switch (lapic_cal_loops++) {
+	case 0:
+		lapic_cal_t1 = tapic;
+		lapic_cal_tsc1 = tsc;
+		lapic_cal_pm1 = pm;
+		lapic_cal_j1 = jiffies;
+		break;
+
+	case LAPIC_CAL_LOOPS:
+		lapic_cal_t2 = tapic;
+		lapic_cal_tsc2 = tsc;
+		if (pm < lapic_cal_pm1)
+			pm += ACPI_PM_OVRRUN;
+		lapic_cal_pm2 = pm;
+		lapic_cal_j2 = jiffies;
+		break;
+	}
+}
+
+static int __init calibrate_APIC_clock(void)
+{
+	struct clock_event_device *levt = &__get_cpu_var(lapic_events);
+	const long pm_100ms = PMTMR_TICKS_PER_SEC/10;
+	const long pm_thresh = pm_100ms/100;
+	void (*real_handler)(struct clock_event_device *dev);
+	unsigned long deltaj;
+	long delta, deltapm;
+	int pm_referenced = 0;
+
+	local_irq_disable();
+
+	/* Replace the global interrupt handler */
+	real_handler = global_clock_event->event_handler;
+	global_clock_event->event_handler = lapic_cal_handler;
+
+	/*
+	 * Setup the APIC counter to 1e9. There is no way the lapic
+	 * can underflow in the 100ms detection time frame
+	 */
+	__setup_APIC_LVTT(1000000000, 0, 0);
+
+	/* Let the interrupts run */
+	local_irq_enable();
+
+	while (lapic_cal_loops <= LAPIC_CAL_LOOPS)
+		cpu_relax();
+
+	local_irq_disable();
+
+	/* Restore the real event handler */
+	global_clock_event->event_handler = real_handler;
+
+	/* Build delta t1-t2 as apic timer counts down */
+	delta = lapic_cal_t1 - lapic_cal_t2;
+	apic_printk(APIC_VERBOSE, "... lapic delta = %ld\n", delta);
+
+	/* Check, if the PM timer is available */
+	deltapm = lapic_cal_pm2 - lapic_cal_pm1;
+	apic_printk(APIC_VERBOSE, "... PM timer delta = %ld\n", deltapm);
+
+	if (deltapm) {
+		unsigned long mult;
+		u64 res;
+
+		mult = clocksource_hz2mult(PMTMR_TICKS_PER_SEC, 22);
+
+		if (deltapm > (pm_100ms - pm_thresh) &&
+		    deltapm < (pm_100ms + pm_thresh)) {
+			apic_printk(APIC_VERBOSE, "... PM timer result ok\n");
+		} else {
+			res = (((u64) deltapm) *  mult) >> 22;
+			do_div(res, 1000000);
+			printk(KERN_WARNING "APIC calibration not consistent "
+			       "with PM Timer: %ldms instead of 100ms\n",
+			       (long)res);
+			/* Correct the lapic counter value */
+			res = (((u64) delta) * pm_100ms);
+			do_div(res, deltapm);
+			printk(KERN_INFO "APIC delta adjusted to PM-Timer: "
+			       "%lu (%ld)\n", (unsigned long) res, delta);
+			delta = (long) res;
+		}
+		pm_referenced = 1;
+	}
+
+	/* Calculate the scaled math multiplication factor */
+	lapic_clockevent.mult = div_sc(delta, TICK_NSEC * LAPIC_CAL_LOOPS,
+				       lapic_clockevent.shift);
+	lapic_clockevent.max_delta_ns =
+		clockevent_delta2ns(0x7FFFFF, &lapic_clockevent);
+	lapic_clockevent.min_delta_ns =
+		clockevent_delta2ns(0xF, &lapic_clockevent);
+
+	calibration_result = (delta * APIC_DIVISOR) / LAPIC_CAL_LOOPS;
+
+	apic_printk(APIC_VERBOSE, "..... delta %ld\n", delta);
+	apic_printk(APIC_VERBOSE, "..... mult: %ld\n", lapic_clockevent.mult);
+	apic_printk(APIC_VERBOSE, "..... calibration result: %u\n",
+		    calibration_result);
+
+	if (cpu_has_tsc) {
+		delta = (long)(lapic_cal_tsc2 - lapic_cal_tsc1);
+		apic_printk(APIC_VERBOSE, "..... CPU clock speed is "
+			    "%ld.%04ld MHz.\n",
+			    (delta / LAPIC_CAL_LOOPS) / (1000000 / HZ),
+			    (delta / LAPIC_CAL_LOOPS) % (1000000 / HZ));
+	}
+
+	apic_printk(APIC_VERBOSE, "..... host bus clock speed is "
+		    "%u.%04u MHz.\n",
+		    calibration_result / (1000000 / HZ),
+		    calibration_result % (1000000 / HZ));
+
+	/*
+	 * Do a sanity check on the APIC calibration result
+	 */
+	if (calibration_result < (1000000 / HZ)) {
+		local_irq_enable();
+		printk(KERN_WARNING
+		       "APIC frequency too slow, disabling apic timer\n");
+		return -1;
+	}
+
+	levt->features &= ~CLOCK_EVT_FEAT_DUMMY;
+
+	/* We trust the pm timer based calibration */
+	if (!pm_referenced) {
+		apic_printk(APIC_VERBOSE, "... verify APIC timer\n");
+
+		/*
+		 * Setup the apic timer manually
+		 */
+		levt->event_handler = lapic_cal_handler;
+		lapic_timer_setup(CLOCK_EVT_MODE_PERIODIC, levt);
+		lapic_cal_loops = -1;
+
+		/* Let the interrupts run */
+		local_irq_enable();
+
+		while (lapic_cal_loops <= LAPIC_CAL_LOOPS)
+			cpu_relax();
+
+		local_irq_disable();
+
+		/* Stop the lapic timer */
+		lapic_timer_setup(CLOCK_EVT_MODE_SHUTDOWN, levt);
+
+		local_irq_enable();
+
+		/* Jiffies delta */
+		deltaj = lapic_cal_j2 - lapic_cal_j1;
+		apic_printk(APIC_VERBOSE, "... jiffies delta = %lu\n", deltaj);
+
+		/* Check, if the jiffies result is consistent */
+		if (deltaj >= LAPIC_CAL_LOOPS-2 && deltaj <= LAPIC_CAL_LOOPS+2)
+			apic_printk(APIC_VERBOSE, "... jiffies result ok\n");
+		else
+			levt->features |= CLOCK_EVT_FEAT_DUMMY;
+	} else
+		local_irq_enable();
+
+	if (levt->features & CLOCK_EVT_FEAT_DUMMY) {
+		printk(KERN_WARNING
+		       "APIC timer disabled due to verification failure.\n");
+			return -1;
+	}
+
+	return 0;
+}
+
+#endif
+
 /*
  * Setup the boot APIC
  *
-- 
cgit v1.2.3-55-g7522


From 6c15822752a13748241e1a34068f2b15b660d28e Mon Sep 17 00:00:00 2001
From: Yinghai Lu
Date: Sun, 24 Aug 2008 02:01:55 -0700
Subject: x86: apic copy apic_64.c to apic_32.c

Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/apic_32.c | 188 ++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 188 insertions(+)

diff --git a/arch/x86/kernel/apic_32.c b/arch/x86/kernel/apic_32.c
index 05498c37f8d9..0b11d6e3f091 100644
--- a/arch/x86/kernel/apic_32.c
+++ b/arch/x86/kernel/apic_32.c
@@ -90,6 +90,24 @@ static __init int setup_apicpmtimer(char *s)
 __setup("apicpmtimer", setup_apicpmtimer);
 #endif
 
+#ifdef CONFIG_X86_64
+#define HAVE_X2APIC
+#endif
+
+#ifdef HAVE_X2APIC
+int x2apic;
+/* x2apic enabled before OS handover */
+int x2apic_preenabled;
+int disable_x2apic;
+static __init int setup_nox2apic(char *str)
+{
+	disable_x2apic = 1;
+	setup_clear_cpu_cap(X86_FEATURE_X2APIC);
+	return 0;
+}
+early_param("nox2apic", setup_nox2apic);
+#endif
+
 unsigned long mp_lapic_addr;
 int disable_apic;
 /* Disable local APIC timer from the kernel commandline or via dmi quirk */
@@ -231,6 +249,42 @@ static struct apic_ops xapic_ops = {
 struct apic_ops __read_mostly *apic_ops = &xapic_ops;
 EXPORT_SYMBOL_GPL(apic_ops);
 
+#ifdef HAVE_X2APIC
+static void x2apic_wait_icr_idle(void)
+{
+	/* no need to wait for icr idle in x2apic */
+	return;
+}
+
+static u32 safe_x2apic_wait_icr_idle(void)
+{
+	/* no need to wait for icr idle in x2apic */
+	return 0;
+}
+
+void x2apic_icr_write(u32 low, u32 id)
+{
+	wrmsrl(APIC_BASE_MSR + (APIC_ICR >> 4), ((__u64) id) << 32 | low);
+}
+
+u64 x2apic_icr_read(void)
+{
+	unsigned long val;
+
+	rdmsrl(APIC_BASE_MSR + (APIC_ICR >> 4), val);
+	return val;
+}
+
+static struct apic_ops x2apic_ops = {
+	.read = native_apic_msr_read,
+	.write = native_apic_msr_write,
+	.icr_read = x2apic_icr_read,
+	.icr_write = x2apic_icr_write,
+	.wait_icr_idle = x2apic_wait_icr_idle,
+	.safe_wait_icr_idle = safe_x2apic_wait_icr_idle,
+};
+#endif
+
 /**
  * enable_NMI_through_LVT0 - enable NMI through local vector table 0
  */
@@ -1308,6 +1362,127 @@ void __cpuinit end_local_APIC_setup(void)
 	apic_pm_activate();
 }
 
+#ifdef HAVE_X2APIC
+void check_x2apic(void)
+{
+	int msr, msr2;
+
+	rdmsr(MSR_IA32_APICBASE, msr, msr2);
+
+	if (msr & X2APIC_ENABLE) {
+		printk("x2apic enabled by BIOS, switching to x2apic ops\n");
+		x2apic_preenabled = x2apic = 1;
+		apic_ops = &x2apic_ops;
+	}
+}
+
+void enable_x2apic(void)
+{
+	int msr, msr2;
+
+	rdmsr(MSR_IA32_APICBASE, msr, msr2);
+	if (!(msr & X2APIC_ENABLE)) {
+		printk("Enabling x2apic\n");
+		wrmsr(MSR_IA32_APICBASE, msr | X2APIC_ENABLE, 0);
+	}
+}
+
+void enable_IR_x2apic(void)
+{
+#ifdef CONFIG_INTR_REMAP
+	int ret;
+	unsigned long flags;
+
+	if (!cpu_has_x2apic)
+		return;
+
+	if (!x2apic_preenabled && disable_x2apic) {
+		printk(KERN_INFO
+		       "Skipped enabling x2apic and Interrupt-remapping "
+		       "because of nox2apic\n");
+		return;
+	}
+
+	if (x2apic_preenabled && disable_x2apic)
+		panic("Bios already enabled x2apic, can't enforce nox2apic");
+
+	if (!x2apic_preenabled && skip_ioapic_setup) {
+		printk(KERN_INFO
+		       "Skipped enabling x2apic and Interrupt-remapping "
+		       "because of skipping io-apic setup\n");
+		return;
+	}
+
+	ret = dmar_table_init();
+	if (ret) {
+		printk(KERN_INFO
+		       "dmar_table_init() failed with %d:\n", ret);
+
+		if (x2apic_preenabled)
+			panic("x2apic enabled by bios. But IR enabling failed");
+		else
+			printk(KERN_INFO
+			       "Not enabling x2apic,Intr-remapping\n");
+		return;
+	}
+
+	local_irq_save(flags);
+	mask_8259A();
+	save_mask_IO_APIC_setup();
+
+	ret = enable_intr_remapping(1);
+
+	if (ret && x2apic_preenabled) {
+		local_irq_restore(flags);
+		panic("x2apic enabled by bios. But IR enabling failed");
+	}
+
+	if (ret)
+		goto end;
+
+	if (!x2apic) {
+		x2apic = 1;
+		apic_ops = &x2apic_ops;
+		enable_x2apic();
+	}
+end:
+	if (ret)
+		/*
+		 * IR enabling failed
+		 */
+		restore_IO_APIC_setup();
+	else
+		reinit_intr_remapped_IO_APIC(x2apic_preenabled);
+
+	unmask_8259A();
+	local_irq_restore(flags);
+
+	if (!ret) {
+		if (!x2apic_preenabled)
+			printk(KERN_INFO
+			       "Enabled x2apic and interrupt-remapping\n");
+		else
+			printk(KERN_INFO
+			       "Enabled Interrupt-remapping\n");
+	} else
+		printk(KERN_ERR
+		       "Failed to enable Interrupt-remapping and x2apic\n");
+#else
+	if (!cpu_has_x2apic)
+		return;
+
+	if (x2apic_preenabled)
+		panic("x2apic enabled prior OS handover,"
+		      " enable CONFIG_INTR_REMAP");
+
+	printk(KERN_INFO "Enable CONFIG_INTR_REMAP for enabling intr-remapping "
+	       " and x2apic\n");
+#endif
+
+	return;
+}
+#endif /* HAVE_X2APIC */
+
 #ifdef CONFIG_X86_64
 /*
  * Detect and enable local APICs on non-SMP boards.
@@ -1438,6 +1613,13 @@ void __init early_init_lapic_mapping(void)
  */
 void __init init_apic_mappings(void)
 {
+#ifdef HAVE_X2APIC
+	if (x2apic) {
+		boot_cpu_physical_apicid = read_apic_id();
+		return;
+	}
+#endif
+
 	/*
 	 * If no local APIC can be found then set up a fake all
 	 * zeroes page to simulate the local APIC and another
@@ -1501,6 +1683,7 @@ int __init APIC_init_uniprocessor(void)
 #ifdef CONFIG_X86_64
 	setup_apic_routing();
 #endif
+
 	verify_local_APIC();
 	connect_bsp_APIC();
 
@@ -1879,6 +2062,11 @@ static int lapic_resume(struct sys_device *dev)
 
 	local_irq_save(flags);
 
+#ifdef HAVE_X2APIC
+	if (x2apic)
+		enable_x2apic();
+	else
+#endif
 	{
 		/*
 		 * Make sure the APICBASE points to the right address
-- 
cgit v1.2.3-55-g7522


From 0f611ffaea81b8e1c69682188ba1ccaf7683a2ba Mon Sep 17 00:00:00 2001
From: Yinghai Lu
Date: Sun, 24 Aug 2008 02:01:56 -0700
Subject: x86: rename apic_32.c and apic_64.c to apic.c

Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/Makefile  |    2 +-
 arch/x86/kernel/apic.c    | 2311 ++++++++++++++++++++++++++++++++++++++++++++
 arch/x86/kernel/apic_32.c | 2312 ---------------------------------------------
 arch/x86/kernel/apic_64.c | 2311 --------------------------------------------
 4 files changed, 2312 insertions(+), 4624 deletions(-)
 create mode 100644 arch/x86/kernel/apic.c
 delete mode 100644 arch/x86/kernel/apic_32.c
 delete mode 100644 arch/x86/kernel/apic_64.c

diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index 7264f9c3af8f..45cecc59d894 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -60,7 +60,7 @@ obj-$(CONFIG_X86_32_SMP)	+= smpcommon.o
 obj-$(CONFIG_X86_64_SMP)	+= tsc_sync.o smpcommon.o
 obj-$(CONFIG_X86_TRAMPOLINE)	+= trampoline_$(BITS).o
 obj-$(CONFIG_X86_MPPARSE)	+= mpparse.o
-obj-$(CONFIG_X86_LOCAL_APIC)	+= apic_$(BITS).o nmi.o
+obj-$(CONFIG_X86_LOCAL_APIC)	+= apic.o nmi.o
 obj-$(CONFIG_X86_IO_APIC)	+= io_apic.o
 obj-$(CONFIG_X86_REBOOTFIXUPS)	+= reboot_fixups_32.o
 obj-$(CONFIG_DYNAMIC_FTRACE)	+= ftrace.o
diff --git a/arch/x86/kernel/apic.c b/arch/x86/kernel/apic.c
new file mode 100644
index 000000000000..6e99af5ce678
--- /dev/null
+++ b/arch/x86/kernel/apic.c
@@ -0,0 +1,2311 @@
+/*
+ *	Local APIC handling, local APIC timers
+ *
+ *	(c) 1999, 2000 Ingo Molnar <mingo@redhat.com>
+ *
+ *	Fixes
+ *	Maciej W. Rozycki	:	Bits for genuine 82489DX APICs;
+ *					thanks to Eric Gilmore
+ *					and Rolf G. Tews
+ *					for testing these extensively.
+ *	Maciej W. Rozycki	:	Various updates and fixes.
+ *	Mikael Pettersson	:	Power Management for UP-APIC.
+ *	Pavel Machek and
+ *	Mikael Pettersson	:	PM converted to driver model.
+ */
+
+#include <linux/init.h>
+
+#include <linux/mm.h>
+#include <linux/delay.h>
+#include <linux/bootmem.h>
+#include <linux/interrupt.h>
+#include <linux/mc146818rtc.h>
+#include <linux/kernel_stat.h>
+#include <linux/sysdev.h>
+#include <linux/ioport.h>
+#include <linux/cpu.h>
+#include <linux/clockchips.h>
+#include <linux/acpi_pmtmr.h>
+#include <linux/module.h>
+#include <linux/dmi.h>
+#include <linux/dmar.h>
+
+#include <asm/atomic.h>
+#include <asm/smp.h>
+#include <asm/mtrr.h>
+#include <asm/mpspec.h>
+#include <asm/desc.h>
+#include <asm/arch_hooks.h>
+#include <asm/hpet.h>
+#include <asm/pgalloc.h>
+#include <asm/i8253.h>
+#include <asm/nmi.h>
+#include <asm/idle.h>
+#include <asm/proto.h>
+#include <asm/timex.h>
+#include <asm/apic.h>
+#include <asm/i8259.h>
+
+#include <mach_apic.h>
+#include <mach_apicdef.h>
+#include <mach_ipi.h>
+
+/*
+ * Sanity check
+ */
+#if ((SPURIOUS_APIC_VECTOR & 0x0F) != 0x0F)
+# error SPURIOUS_APIC_VECTOR definition error
+#endif
+
+#ifdef CONFIG_X86_32
+/*
+ * Knob to control our willingness to enable the local APIC.
+ *
+ * +1=force-enable
+ */
+static int force_enable_local_apic;
+/*
+ * APIC command line parameters
+ */
+static int __init parse_lapic(char *arg)
+{
+	force_enable_local_apic = 1;
+	return 0;
+}
+early_param("lapic", parse_lapic);
+/* Local APIC was disabled by the BIOS and enabled by the kernel */
+static int enabled_via_apicbase;
+
+#endif
+
+#ifdef CONFIG_X86_64
+static int apic_calibrate_pmtmr __initdata;
+static __init int setup_apicpmtimer(char *s)
+{
+	apic_calibrate_pmtmr = 1;
+	notsc_setup(NULL);
+	return 0;
+}
+__setup("apicpmtimer", setup_apicpmtimer);
+#endif
+
+#ifdef CONFIG_X86_64
+#define HAVE_X2APIC
+#endif
+
+#ifdef HAVE_X2APIC
+int x2apic;
+/* x2apic enabled before OS handover */
+int x2apic_preenabled;
+int disable_x2apic;
+static __init int setup_nox2apic(char *str)
+{
+	disable_x2apic = 1;
+	setup_clear_cpu_cap(X86_FEATURE_X2APIC);
+	return 0;
+}
+early_param("nox2apic", setup_nox2apic);
+#endif
+
+unsigned long mp_lapic_addr;
+int disable_apic;
+/* Disable local APIC timer from the kernel commandline or via dmi quirk */
+static int disable_apic_timer __cpuinitdata;
+/* Local APIC timer works in C2 */
+int local_apic_timer_c2_ok;
+EXPORT_SYMBOL_GPL(local_apic_timer_c2_ok);
+
+int first_system_vector = 0xfe;
+
+char system_vectors[NR_VECTORS] = { [0 ... NR_VECTORS-1] = SYS_VECTOR_FREE};
+
+/*
+ * Debug level, exported for io_apic.c
+ */
+unsigned int apic_verbosity;
+
+int pic_mode;
+
+/* Have we found an MP table */
+int smp_found_config;
+
+static struct resource lapic_resource = {
+	.name = "Local APIC",
+	.flags = IORESOURCE_MEM | IORESOURCE_BUSY,
+};
+
+static unsigned int calibration_result;
+
+static int lapic_next_event(unsigned long delta,
+			    struct clock_event_device *evt);
+static void lapic_timer_setup(enum clock_event_mode mode,
+			      struct clock_event_device *evt);
+static void lapic_timer_broadcast(cpumask_t mask);
+static void apic_pm_activate(void);
+
+/*
+ * The local apic timer can be used for any function which is CPU local.
+ */
+static struct clock_event_device lapic_clockevent = {
+	.name		= "lapic",
+	.features	= CLOCK_EVT_FEAT_PERIODIC | CLOCK_EVT_FEAT_ONESHOT
+			| CLOCK_EVT_FEAT_C3STOP | CLOCK_EVT_FEAT_DUMMY,
+	.shift		= 32,
+	.set_mode	= lapic_timer_setup,
+	.set_next_event	= lapic_next_event,
+	.broadcast	= lapic_timer_broadcast,
+	.rating		= 100,
+	.irq		= -1,
+};
+static DEFINE_PER_CPU(struct clock_event_device, lapic_events);
+
+static unsigned long apic_phys;
+
+/*
+ * Get the LAPIC version
+ */
+static inline int lapic_get_version(void)
+{
+	return GET_APIC_VERSION(apic_read(APIC_LVR));
+}
+
+/*
+ * Check, if the APIC is integrated or a separate chip
+ */
+static inline int lapic_is_integrated(void)
+{
+#ifdef CONFIG_X86_64
+	return 1;
+#else
+	return APIC_INTEGRATED(lapic_get_version());
+#endif
+}
+
+/*
+ * Check, whether this is a modern or a first generation APIC
+ */
+static int modern_apic(void)
+{
+	/* AMD systems use old APIC versions, so check the CPU */
+	if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD &&
+	    boot_cpu_data.x86 >= 0xf)
+		return 1;
+	return lapic_get_version() >= 0x14;
+}
+
+/*
+ * Paravirt kernels also might be using these below ops. So we still
+ * use generic apic_read()/apic_write(), which might be pointing to different
+ * ops in PARAVIRT case.
+ */
+void xapic_wait_icr_idle(void)
+{
+	while (apic_read(APIC_ICR) & APIC_ICR_BUSY)
+		cpu_relax();
+}
+
+u32 safe_xapic_wait_icr_idle(void)
+{
+	u32 send_status;
+	int timeout;
+
+	timeout = 0;
+	do {
+		send_status = apic_read(APIC_ICR) & APIC_ICR_BUSY;
+		if (!send_status)
+			break;
+		udelay(100);
+	} while (timeout++ < 1000);
+
+	return send_status;
+}
+
+void xapic_icr_write(u32 low, u32 id)
+{
+	apic_write(APIC_ICR2, SET_APIC_DEST_FIELD(id));
+	apic_write(APIC_ICR, low);
+}
+
+u64 xapic_icr_read(void)
+{
+	u32 icr1, icr2;
+
+	icr2 = apic_read(APIC_ICR2);
+	icr1 = apic_read(APIC_ICR);
+
+	return icr1 | ((u64)icr2 << 32);
+}
+
+static struct apic_ops xapic_ops = {
+	.read = native_apic_mem_read,
+	.write = native_apic_mem_write,
+	.icr_read = xapic_icr_read,
+	.icr_write = xapic_icr_write,
+	.wait_icr_idle = xapic_wait_icr_idle,
+	.safe_wait_icr_idle = safe_xapic_wait_icr_idle,
+};
+
+struct apic_ops __read_mostly *apic_ops = &xapic_ops;
+EXPORT_SYMBOL_GPL(apic_ops);
+
+#ifdef HAVE_X2APIC
+static void x2apic_wait_icr_idle(void)
+{
+	/* no need to wait for icr idle in x2apic */
+	return;
+}
+
+static u32 safe_x2apic_wait_icr_idle(void)
+{
+	/* no need to wait for icr idle in x2apic */
+	return 0;
+}
+
+void x2apic_icr_write(u32 low, u32 id)
+{
+	wrmsrl(APIC_BASE_MSR + (APIC_ICR >> 4), ((__u64) id) << 32 | low);
+}
+
+u64 x2apic_icr_read(void)
+{
+	unsigned long val;
+
+	rdmsrl(APIC_BASE_MSR + (APIC_ICR >> 4), val);
+	return val;
+}
+
+static struct apic_ops x2apic_ops = {
+	.read = native_apic_msr_read,
+	.write = native_apic_msr_write,
+	.icr_read = x2apic_icr_read,
+	.icr_write = x2apic_icr_write,
+	.wait_icr_idle = x2apic_wait_icr_idle,
+	.safe_wait_icr_idle = safe_x2apic_wait_icr_idle,
+};
+#endif
+
+/**
+ * enable_NMI_through_LVT0 - enable NMI through local vector table 0
+ */
+void __cpuinit enable_NMI_through_LVT0(void)
+{
+	unsigned int v;
+
+	/* unmask and set to NMI */
+	v = APIC_DM_NMI;
+
+	/* Level triggered for 82489DX (32bit mode) */
+	if (!lapic_is_integrated())
+		v |= APIC_LVT_LEVEL_TRIGGER;
+
+	apic_write(APIC_LVT0, v);
+}
+
+#ifdef CONFIG_X86_32
+/**
+ * get_physical_broadcast - Get number of physical broadcast IDs
+ */
+int get_physical_broadcast(void)
+{
+	return modern_apic() ? 0xff : 0xf;
+}
+#endif
+
+/**
+ * lapic_get_maxlvt - get the maximum number of local vector table entries
+ */
+int lapic_get_maxlvt(void)
+{
+	unsigned int v;
+
+	v = apic_read(APIC_LVR);
+	/*
+	 * - we always have APIC integrated on 64bit mode
+	 * - 82489DXs do not report # of LVT entries
+	 */
+	return APIC_INTEGRATED(GET_APIC_VERSION(v)) ? GET_APIC_MAXLVT(v) : 2;
+}
+
+/*
+ * Local APIC timer
+ */
+
+/* Clock divisor */
+#ifdef CONFG_X86_64
+#define APIC_DIVISOR 1
+#else
+#define APIC_DIVISOR 16
+#endif
+
+/*
+ * This function sets up the local APIC timer, with a timeout of
+ * 'clocks' APIC bus clock. During calibration we actually call
+ * this function twice on the boot CPU, once with a bogus timeout
+ * value, second time for real. The other (noncalibrating) CPUs
+ * call this function only once, with the real, calibrated value.
+ *
+ * We do reads before writes even if unnecessary, to get around the
+ * P5 APIC double write bug.
+ */
+static void __setup_APIC_LVTT(unsigned int clocks, int oneshot, int irqen)
+{
+	unsigned int lvtt_value, tmp_value;
+
+	lvtt_value = LOCAL_TIMER_VECTOR;
+	if (!oneshot)
+		lvtt_value |= APIC_LVT_TIMER_PERIODIC;
+	if (!lapic_is_integrated())
+		lvtt_value |= SET_APIC_TIMER_BASE(APIC_TIMER_BASE_DIV);
+
+	if (!irqen)
+		lvtt_value |= APIC_LVT_MASKED;
+
+	apic_write(APIC_LVTT, lvtt_value);
+
+	/*
+	 * Divide PICLK by 16
+	 */
+	tmp_value = apic_read(APIC_TDCR);
+	apic_write(APIC_TDCR,
+		(tmp_value & ~(APIC_TDR_DIV_1 | APIC_TDR_DIV_TMBASE)) |
+		APIC_TDR_DIV_16);
+
+	if (!oneshot)
+		apic_write(APIC_TMICT, clocks / APIC_DIVISOR);
+}
+
+/*
+ * Setup extended LVT, AMD specific (K8, family 10h)
+ *
+ * Vector mappings are hard coded. On K8 only offset 0 (APIC500) and
+ * MCE interrupts are supported. Thus MCE offset must be set to 0.
+ *
+ * If mask=1, the LVT entry does not generate interrupts while mask=0
+ * enables the vector. See also the BKDGs.
+ */
+
+#define APIC_EILVT_LVTOFF_MCE 0
+#define APIC_EILVT_LVTOFF_IBS 1
+
+static void setup_APIC_eilvt(u8 lvt_off, u8 vector, u8 msg_type, u8 mask)
+{
+	unsigned long reg = (lvt_off << 4) + APIC_EILVT0;
+	unsigned int  v   = (mask << 16) | (msg_type << 8) | vector;
+
+	apic_write(reg, v);
+}
+
+u8 setup_APIC_eilvt_mce(u8 vector, u8 msg_type, u8 mask)
+{
+	setup_APIC_eilvt(APIC_EILVT_LVTOFF_MCE, vector, msg_type, mask);
+	return APIC_EILVT_LVTOFF_MCE;
+}
+
+u8 setup_APIC_eilvt_ibs(u8 vector, u8 msg_type, u8 mask)
+{
+	setup_APIC_eilvt(APIC_EILVT_LVTOFF_IBS, vector, msg_type, mask);
+	return APIC_EILVT_LVTOFF_IBS;
+}
+EXPORT_SYMBOL_GPL(setup_APIC_eilvt_ibs);
+
+/*
+ * Program the next event, relative to now
+ */
+static int lapic_next_event(unsigned long delta,
+			    struct clock_event_device *evt)
+{
+	apic_write(APIC_TMICT, delta);
+	return 0;
+}
+
+/*
+ * Setup the lapic timer in periodic or oneshot mode
+ */
+static void lapic_timer_setup(enum clock_event_mode mode,
+			      struct clock_event_device *evt)
+{
+	unsigned long flags;
+	unsigned int v;
+
+	/* Lapic used as dummy for broadcast ? */
+	if (evt->features & CLOCK_EVT_FEAT_DUMMY)
+		return;
+
+	local_irq_save(flags);
+
+	switch (mode) {
+	case CLOCK_EVT_MODE_PERIODIC:
+	case CLOCK_EVT_MODE_ONESHOT:
+		__setup_APIC_LVTT(calibration_result,
+				  mode != CLOCK_EVT_MODE_PERIODIC, 1);
+		break;
+	case CLOCK_EVT_MODE_UNUSED:
+	case CLOCK_EVT_MODE_SHUTDOWN:
+		v = apic_read(APIC_LVTT);
+		v |= (APIC_LVT_MASKED | LOCAL_TIMER_VECTOR);
+		apic_write(APIC_LVTT, v);
+		break;
+	case CLOCK_EVT_MODE_RESUME:
+		/* Nothing to do here */
+		break;
+	}
+
+	local_irq_restore(flags);
+}
+
+/*
+ * Local APIC timer broadcast function
+ */
+static void lapic_timer_broadcast(cpumask_t mask)
+{
+#ifdef CONFIG_SMP
+	send_IPI_mask(mask, LOCAL_TIMER_VECTOR);
+#endif
+}
+
+/*
+ * Setup the local APIC timer for this CPU. Copy the initilized values
+ * of the boot CPU and register the clock event in the framework.
+ */
+static void __cpuinit setup_APIC_timer(void)
+{
+	struct clock_event_device *levt = &__get_cpu_var(lapic_events);
+
+	memcpy(levt, &lapic_clockevent, sizeof(*levt));
+	levt->cpumask = cpumask_of_cpu(smp_processor_id());
+
+	clockevents_register_device(levt);
+}
+
+#ifdef CONFIG_X86_64
+/*
+ * In this function we calibrate APIC bus clocks to the external
+ * timer. Unfortunately we cannot use jiffies and the timer irq
+ * to calibrate, since some later bootup code depends on getting
+ * the first irq? Ugh.
+ *
+ * We want to do the calibration only once since we
+ * want to have local timer irqs syncron. CPUs connected
+ * by the same APIC bus have the very same bus frequency.
+ * And we want to have irqs off anyways, no accidental
+ * APIC irq that way.
+ */
+
+#define TICK_COUNT 100000000
+
+static int __init calibrate_APIC_clock(void)
+{
+	unsigned apic, apic_start;
+	unsigned long tsc, tsc_start;
+	int result;
+
+	local_irq_disable();
+
+	/*
+	 * Put whatever arbitrary (but long enough) timeout
+	 * value into the APIC clock, we just want to get the
+	 * counter running for calibration.
+	 *
+	 * No interrupt enable !
+	 */
+	__setup_APIC_LVTT(250000000, 0, 0);
+
+	apic_start = apic_read(APIC_TMCCT);
+#ifdef CONFIG_X86_PM_TIMER
+	if (apic_calibrate_pmtmr && pmtmr_ioport) {
+		pmtimer_wait(5000);  /* 5ms wait */
+		apic = apic_read(APIC_TMCCT);
+		result = (apic_start - apic) * 1000L / 5;
+	} else
+#endif
+	{
+		rdtscll(tsc_start);
+
+		do {
+			apic = apic_read(APIC_TMCCT);
+			rdtscll(tsc);
+		} while ((tsc - tsc_start) < TICK_COUNT &&
+				(apic_start - apic) < TICK_COUNT);
+
+		result = (apic_start - apic) * 1000L * tsc_khz /
+					(tsc - tsc_start);
+	}
+
+	local_irq_enable();
+
+	printk(KERN_DEBUG "APIC timer calibration result %d\n", result);
+
+	printk(KERN_INFO "Detected %d.%03d MHz APIC timer.\n",
+		result / 1000 / 1000, result / 1000 % 1000);
+
+	/* Calculate the scaled math multiplication factor */
+	lapic_clockevent.mult = div_sc(result, NSEC_PER_SEC,
+				       lapic_clockevent.shift);
+	lapic_clockevent.max_delta_ns =
+		clockevent_delta2ns(0x7FFFFF, &lapic_clockevent);
+	lapic_clockevent.min_delta_ns =
+		clockevent_delta2ns(0xF, &lapic_clockevent);
+
+	calibration_result = (result * APIC_DIVISOR) / HZ;
+
+	/*
+	 * Do a sanity check on the APIC calibration result
+	 */
+	if (calibration_result < (1000000 / HZ)) {
+		printk(KERN_WARNING
+			"APIC frequency too slow, disabling apic timer\n");
+		return -1;
+	}
+
+	return 0;
+}
+
+#else
+/*
+ * In this functions we calibrate APIC bus clocks to the external timer.
+ *
+ * We want to do the calibration only once since we want to have local timer
+ * irqs syncron. CPUs connected by the same APIC bus have the very same bus
+ * frequency.
+ *
+ * This was previously done by reading the PIT/HPET and waiting for a wrap
+ * around to find out, that a tick has elapsed. I have a box, where the PIT
+ * readout is broken, so it never gets out of the wait loop again. This was
+ * also reported by others.
+ *
+ * Monitoring the jiffies value is inaccurate and the clockevents
+ * infrastructure allows us to do a simple substitution of the interrupt
+ * handler.
+ *
+ * The calibration routine also uses the pm_timer when possible, as the PIT
+ * happens to run way too slow (factor 2.3 on my VAIO CoreDuo, which goes
+ * back to normal later in the boot process).
+ */
+
+#define LAPIC_CAL_LOOPS		(HZ/10)
+
+static __initdata int lapic_cal_loops = -1;
+static __initdata long lapic_cal_t1, lapic_cal_t2;
+static __initdata unsigned long long lapic_cal_tsc1, lapic_cal_tsc2;
+static __initdata unsigned long lapic_cal_pm1, lapic_cal_pm2;
+static __initdata unsigned long lapic_cal_j1, lapic_cal_j2;
+
+/*
+ * Temporary interrupt handler.
+ */
+static void __init lapic_cal_handler(struct clock_event_device *dev)
+{
+	unsigned long long tsc = 0;
+	long tapic = apic_read(APIC_TMCCT);
+	unsigned long pm = acpi_pm_read_early();
+
+	if (cpu_has_tsc)
+		rdtscll(tsc);
+
+	switch (lapic_cal_loops++) {
+	case 0:
+		lapic_cal_t1 = tapic;
+		lapic_cal_tsc1 = tsc;
+		lapic_cal_pm1 = pm;
+		lapic_cal_j1 = jiffies;
+		break;
+
+	case LAPIC_CAL_LOOPS:
+		lapic_cal_t2 = tapic;
+		lapic_cal_tsc2 = tsc;
+		if (pm < lapic_cal_pm1)
+			pm += ACPI_PM_OVRRUN;
+		lapic_cal_pm2 = pm;
+		lapic_cal_j2 = jiffies;
+		break;
+	}
+}
+
+static int __init calibrate_APIC_clock(void)
+{
+	struct clock_event_device *levt = &__get_cpu_var(lapic_events);
+	const long pm_100ms = PMTMR_TICKS_PER_SEC/10;
+	const long pm_thresh = pm_100ms/100;
+	void (*real_handler)(struct clock_event_device *dev);
+	unsigned long deltaj;
+	long delta, deltapm;
+	int pm_referenced = 0;
+
+	local_irq_disable();
+
+	/* Replace the global interrupt handler */
+	real_handler = global_clock_event->event_handler;
+	global_clock_event->event_handler = lapic_cal_handler;
+
+	/*
+	 * Setup the APIC counter to 1e9. There is no way the lapic
+	 * can underflow in the 100ms detection time frame
+	 */
+	__setup_APIC_LVTT(1000000000, 0, 0);
+
+	/* Let the interrupts run */
+	local_irq_enable();
+
+	while (lapic_cal_loops <= LAPIC_CAL_LOOPS)
+		cpu_relax();
+
+	local_irq_disable();
+
+	/* Restore the real event handler */
+	global_clock_event->event_handler = real_handler;
+
+	/* Build delta t1-t2 as apic timer counts down */
+	delta = lapic_cal_t1 - lapic_cal_t2;
+	apic_printk(APIC_VERBOSE, "... lapic delta = %ld\n", delta);
+
+	/* Check, if the PM timer is available */
+	deltapm = lapic_cal_pm2 - lapic_cal_pm1;
+	apic_printk(APIC_VERBOSE, "... PM timer delta = %ld\n", deltapm);
+
+	if (deltapm) {
+		unsigned long mult;
+		u64 res;
+
+		mult = clocksource_hz2mult(PMTMR_TICKS_PER_SEC, 22);
+
+		if (deltapm > (pm_100ms - pm_thresh) &&
+		    deltapm < (pm_100ms + pm_thresh)) {
+			apic_printk(APIC_VERBOSE, "... PM timer result ok\n");
+		} else {
+			res = (((u64) deltapm) *  mult) >> 22;
+			do_div(res, 1000000);
+			printk(KERN_WARNING "APIC calibration not consistent "
+			       "with PM Timer: %ldms instead of 100ms\n",
+			       (long)res);
+			/* Correct the lapic counter value */
+			res = (((u64) delta) * pm_100ms);
+			do_div(res, deltapm);
+			printk(KERN_INFO "APIC delta adjusted to PM-Timer: "
+			       "%lu (%ld)\n", (unsigned long) res, delta);
+			delta = (long) res;
+		}
+		pm_referenced = 1;
+	}
+
+	/* Calculate the scaled math multiplication factor */
+	lapic_clockevent.mult = div_sc(delta, TICK_NSEC * LAPIC_CAL_LOOPS,
+				       lapic_clockevent.shift);
+	lapic_clockevent.max_delta_ns =
+		clockevent_delta2ns(0x7FFFFF, &lapic_clockevent);
+	lapic_clockevent.min_delta_ns =
+		clockevent_delta2ns(0xF, &lapic_clockevent);
+
+	calibration_result = (delta * APIC_DIVISOR) / LAPIC_CAL_LOOPS;
+
+	apic_printk(APIC_VERBOSE, "..... delta %ld\n", delta);
+	apic_printk(APIC_VERBOSE, "..... mult: %ld\n", lapic_clockevent.mult);
+	apic_printk(APIC_VERBOSE, "..... calibration result: %u\n",
+		    calibration_result);
+
+	if (cpu_has_tsc) {
+		delta = (long)(lapic_cal_tsc2 - lapic_cal_tsc1);
+		apic_printk(APIC_VERBOSE, "..... CPU clock speed is "
+			    "%ld.%04ld MHz.\n",
+			    (delta / LAPIC_CAL_LOOPS) / (1000000 / HZ),
+			    (delta / LAPIC_CAL_LOOPS) % (1000000 / HZ));
+	}
+
+	apic_printk(APIC_VERBOSE, "..... host bus clock speed is "
+		    "%u.%04u MHz.\n",
+		    calibration_result / (1000000 / HZ),
+		    calibration_result % (1000000 / HZ));
+
+	/*
+	 * Do a sanity check on the APIC calibration result
+	 */
+	if (calibration_result < (1000000 / HZ)) {
+		local_irq_enable();
+		printk(KERN_WARNING
+		       "APIC frequency too slow, disabling apic timer\n");
+		return -1;
+	}
+
+	levt->features &= ~CLOCK_EVT_FEAT_DUMMY;
+
+	/* We trust the pm timer based calibration */
+	if (!pm_referenced) {
+		apic_printk(APIC_VERBOSE, "... verify APIC timer\n");
+
+		/*
+		 * Setup the apic timer manually
+		 */
+		levt->event_handler = lapic_cal_handler;
+		lapic_timer_setup(CLOCK_EVT_MODE_PERIODIC, levt);
+		lapic_cal_loops = -1;
+
+		/* Let the interrupts run */
+		local_irq_enable();
+
+		while (lapic_cal_loops <= LAPIC_CAL_LOOPS)
+			cpu_relax();
+
+		local_irq_disable();
+
+		/* Stop the lapic timer */
+		lapic_timer_setup(CLOCK_EVT_MODE_SHUTDOWN, levt);
+
+		local_irq_enable();
+
+		/* Jiffies delta */
+		deltaj = lapic_cal_j2 - lapic_cal_j1;
+		apic_printk(APIC_VERBOSE, "... jiffies delta = %lu\n", deltaj);
+
+		/* Check, if the jiffies result is consistent */
+		if (deltaj >= LAPIC_CAL_LOOPS-2 && deltaj <= LAPIC_CAL_LOOPS+2)
+			apic_printk(APIC_VERBOSE, "... jiffies result ok\n");
+		else
+			levt->features |= CLOCK_EVT_FEAT_DUMMY;
+	} else
+		local_irq_enable();
+
+	if (levt->features & CLOCK_EVT_FEAT_DUMMY) {
+		printk(KERN_WARNING
+		       "APIC timer disabled due to verification failure.\n");
+			return -1;
+	}
+
+	return 0;
+}
+
+#endif
+
+/*
+ * Setup the boot APIC
+ *
+ * Calibrate and verify the result.
+ */
+void __init setup_boot_APIC_clock(void)
+{
+	/*
+	 * The local apic timer can be disabled via the kernel
+	 * commandline or from the CPU detection code. Register the lapic
+	 * timer as a dummy clock event source on SMP systems, so the
+	 * broadcast mechanism is used. On UP systems simply ignore it.
+	 */
+	if (disable_apic_timer) {
+		printk(KERN_INFO "Disabling APIC timer\n");
+		/* No broadcast on UP ! */
+		if (num_possible_cpus() > 1) {
+			lapic_clockevent.mult = 1;
+			setup_APIC_timer();
+		}
+		return;
+	}
+
+	apic_printk(APIC_VERBOSE, "Using local APIC timer interrupts.\n"
+		    "calibrating APIC timer ...\n");
+
+	if (calibrate_APIC_clock()) {
+		/* No broadcast on UP ! */
+		if (num_possible_cpus() > 1)
+			setup_APIC_timer();
+		return;
+	}
+
+	/*
+	 * If nmi_watchdog is set to IO_APIC, we need the
+	 * PIT/HPET going.  Otherwise register lapic as a dummy
+	 * device.
+	 */
+	if (nmi_watchdog != NMI_IO_APIC)
+		lapic_clockevent.features &= ~CLOCK_EVT_FEAT_DUMMY;
+	else
+		printk(KERN_WARNING "APIC timer registered as dummy,"
+			" due to nmi_watchdog=%d!\n", nmi_watchdog);
+
+	/* Setup the lapic or request the broadcast */
+	setup_APIC_timer();
+}
+
+void __cpuinit setup_secondary_APIC_clock(void)
+{
+	setup_APIC_timer();
+}
+
+/*
+ * The guts of the apic timer interrupt
+ */
+static void local_apic_timer_interrupt(void)
+{
+	int cpu = smp_processor_id();
+	struct clock_event_device *evt = &per_cpu(lapic_events, cpu);
+
+	/*
+	 * Normally we should not be here till LAPIC has been initialized but
+	 * in some cases like kdump, its possible that there is a pending LAPIC
+	 * timer interrupt from previous kernel's context and is delivered in
+	 * new kernel the moment interrupts are enabled.
+	 *
+	 * Interrupts are enabled early and LAPIC is setup much later, hence
+	 * its possible that when we get here evt->event_handler is NULL.
+	 * Check for event_handler being NULL and discard the interrupt as
+	 * spurious.
+	 */
+	if (!evt->event_handler) {
+		printk(KERN_WARNING
+		       "Spurious LAPIC timer interrupt on cpu %d\n", cpu);
+		/* Switch it off */
+		lapic_timer_setup(CLOCK_EVT_MODE_SHUTDOWN, evt);
+		return;
+	}
+
+	/*
+	 * the NMI deadlock-detector uses this.
+	 */
+#ifdef CONFIG_X86_64
+	add_pda(apic_timer_irqs, 1);
+#else
+	per_cpu(irq_stat, cpu).apic_timer_irqs++;
+#endif
+
+	evt->event_handler(evt);
+}
+
+/*
+ * Local APIC timer interrupt. This is the most natural way for doing
+ * local interrupts, but local timer interrupts can be emulated by
+ * broadcast interrupts too. [in case the hw doesn't support APIC timers]
+ *
+ * [ if a single-CPU system runs an SMP kernel then we call the local
+ *   interrupt as well. Thus we cannot inline the local irq ... ]
+ */
+void smp_apic_timer_interrupt(struct pt_regs *regs)
+{
+	struct pt_regs *old_regs = set_irq_regs(regs);
+
+	/*
+	 * NOTE! We'd better ACK the irq immediately,
+	 * because timer handling can be slow.
+	 */
+	ack_APIC_irq();
+	/*
+	 * update_process_times() expects us to have done irq_enter().
+	 * Besides, if we don't timer interrupts ignore the global
+	 * interrupt lock, which is the WrongThing (tm) to do.
+	 */
+#ifdef CONFIG_X86_64
+	exit_idle();
+#endif
+	irq_enter();
+	local_apic_timer_interrupt();
+	irq_exit();
+
+	set_irq_regs(old_regs);
+}
+
+int setup_profiling_timer(unsigned int multiplier)
+{
+	return -EINVAL;
+}
+
+/*
+ * Local APIC start and shutdown
+ */
+
+/**
+ * clear_local_APIC - shutdown the local APIC
+ *
+ * This is called, when a CPU is disabled and before rebooting, so the state of
+ * the local APIC has no dangling leftovers. Also used to cleanout any BIOS
+ * leftovers during boot.
+ */
+void clear_local_APIC(void)
+{
+	int maxlvt;
+	u32 v;
+
+	/* APIC hasn't been mapped yet */
+	if (!apic_phys)
+		return;
+
+	maxlvt = lapic_get_maxlvt();
+	/*
+	 * Masking an LVT entry can trigger a local APIC error
+	 * if the vector is zero. Mask LVTERR first to prevent this.
+	 */
+	if (maxlvt >= 3) {
+		v = ERROR_APIC_VECTOR; /* any non-zero vector will do */
+		apic_write(APIC_LVTERR, v | APIC_LVT_MASKED);
+	}
+	/*
+	 * Careful: we have to set masks only first to deassert
+	 * any level-triggered sources.
+	 */
+	v = apic_read(APIC_LVTT);
+	apic_write(APIC_LVTT, v | APIC_LVT_MASKED);
+	v = apic_read(APIC_LVT0);
+	apic_write(APIC_LVT0, v | APIC_LVT_MASKED);
+	v = apic_read(APIC_LVT1);
+	apic_write(APIC_LVT1, v | APIC_LVT_MASKED);
+	if (maxlvt >= 4) {
+		v = apic_read(APIC_LVTPC);
+		apic_write(APIC_LVTPC, v | APIC_LVT_MASKED);
+	}
+
+	/* lets not touch this if we didn't frob it */
+#if defined(CONFIG_X86_MCE_P4THERMAL) || defined(X86_MCE_INTEL)
+	if (maxlvt >= 5) {
+		v = apic_read(APIC_LVTTHMR);
+		apic_write(APIC_LVTTHMR, v | APIC_LVT_MASKED);
+	}
+#endif
+	/*
+	 * Clean APIC state for other OSs:
+	 */
+	apic_write(APIC_LVTT, APIC_LVT_MASKED);
+	apic_write(APIC_LVT0, APIC_LVT_MASKED);
+	apic_write(APIC_LVT1, APIC_LVT_MASKED);
+	if (maxlvt >= 3)
+		apic_write(APIC_LVTERR, APIC_LVT_MASKED);
+	if (maxlvt >= 4)
+		apic_write(APIC_LVTPC, APIC_LVT_MASKED);
+
+	/* Integrated APIC (!82489DX) ? */
+	if (lapic_is_integrated()) {
+		if (maxlvt > 3)
+			/* Clear ESR due to Pentium errata 3AP and 11AP */
+			apic_write(APIC_ESR, 0);
+		apic_read(APIC_ESR);
+	}
+}
+
+/**
+ * disable_local_APIC - clear and disable the local APIC
+ */
+void disable_local_APIC(void)
+{
+	unsigned int value;
+
+	clear_local_APIC();
+
+	/*
+	 * Disable APIC (implies clearing of registers
+	 * for 82489DX!).
+	 */
+	value = apic_read(APIC_SPIV);
+	value &= ~APIC_SPIV_APIC_ENABLED;
+	apic_write(APIC_SPIV, value);
+
+#ifdef CONFIG_X86_32
+	/*
+	 * When LAPIC was disabled by the BIOS and enabled by the kernel,
+	 * restore the disabled state.
+	 */
+	if (enabled_via_apicbase) {
+		unsigned int l, h;
+
+		rdmsr(MSR_IA32_APICBASE, l, h);
+		l &= ~MSR_IA32_APICBASE_ENABLE;
+		wrmsr(MSR_IA32_APICBASE, l, h);
+	}
+#endif
+}
+
+/*
+ * If Linux enabled the LAPIC against the BIOS default disable it down before
+ * re-entering the BIOS on shutdown.  Otherwise the BIOS may get confused and
+ * not power-off.  Additionally clear all LVT entries before disable_local_APIC
+ * for the case where Linux didn't enable the LAPIC.
+ */
+void lapic_shutdown(void)
+{
+	unsigned long flags;
+
+	if (!cpu_has_apic)
+		return;
+
+	local_irq_save(flags);
+
+#ifdef CONFIG_X86_32
+	if (!enabled_via_apicbase)
+		clear_local_APIC();
+	else
+#endif
+		disable_local_APIC();
+
+
+	local_irq_restore(flags);
+}
+
+/*
+ * This is to verify that we're looking at a real local APIC.
+ * Check these against your board if the CPUs aren't getting
+ * started for no apparent reason.
+ */
+int __init verify_local_APIC(void)
+{
+	unsigned int reg0, reg1;
+
+	/*
+	 * The version register is read-only in a real APIC.
+	 */
+	reg0 = apic_read(APIC_LVR);
+	apic_printk(APIC_DEBUG, "Getting VERSION: %x\n", reg0);
+	apic_write(APIC_LVR, reg0 ^ APIC_LVR_MASK);
+	reg1 = apic_read(APIC_LVR);
+	apic_printk(APIC_DEBUG, "Getting VERSION: %x\n", reg1);
+
+	/*
+	 * The two version reads above should print the same
+	 * numbers.  If the second one is different, then we
+	 * poke at a non-APIC.
+	 */
+	if (reg1 != reg0)
+		return 0;
+
+	/*
+	 * Check if the version looks reasonably.
+	 */
+	reg1 = GET_APIC_VERSION(reg0);
+	if (reg1 == 0x00 || reg1 == 0xff)
+		return 0;
+	reg1 = lapic_get_maxlvt();
+	if (reg1 < 0x02 || reg1 == 0xff)
+		return 0;
+
+	/*
+	 * The ID register is read/write in a real APIC.
+	 */
+	reg0 = apic_read(APIC_ID);
+	apic_printk(APIC_DEBUG, "Getting ID: %x\n", reg0);
+	apic_write(APIC_ID, reg0 ^ APIC_ID_MASK);
+	reg1 = apic_read(APIC_ID);
+	apic_printk(APIC_DEBUG, "Getting ID: %x\n", reg1);
+	apic_write(APIC_ID, reg0);
+	if (reg1 != (reg0 ^ APIC_ID_MASK))
+		return 0;
+
+	/*
+	 * The next two are just to see if we have sane values.
+	 * They're only really relevant if we're in Virtual Wire
+	 * compatibility mode, but most boxes are anymore.
+	 */
+	reg0 = apic_read(APIC_LVT0);
+	apic_printk(APIC_DEBUG, "Getting LVT0: %x\n", reg0);
+	reg1 = apic_read(APIC_LVT1);
+	apic_printk(APIC_DEBUG, "Getting LVT1: %x\n", reg1);
+
+	return 1;
+}
+
+/**
+ * sync_Arb_IDs - synchronize APIC bus arbitration IDs
+ */
+void __init sync_Arb_IDs(void)
+{
+	/*
+	 * Unsupported on P4 - see Intel Dev. Manual Vol. 3, Ch. 8.6.1 And not
+	 * needed on AMD.
+	 */
+	if (modern_apic() || boot_cpu_data.x86_vendor == X86_VENDOR_AMD)
+		return;
+
+	/*
+	 * Wait for idle.
+	 */
+	apic_wait_icr_idle();
+
+	apic_printk(APIC_DEBUG, "Synchronizing Arb IDs.\n");
+	apic_write(APIC_ICR, APIC_DEST_ALLINC |
+			APIC_INT_LEVELTRIG | APIC_DM_INIT);
+}
+
+/*
+ * An initial setup of the virtual wire mode.
+ */
+void __init init_bsp_APIC(void)
+{
+	unsigned int value;
+
+	/*
+	 * Don't do the setup now if we have a SMP BIOS as the
+	 * through-I/O-APIC virtual wire mode might be active.
+	 */
+	if (smp_found_config || !cpu_has_apic)
+		return;
+
+	/*
+	 * Do not trust the local APIC being empty at bootup.
+	 */
+	clear_local_APIC();
+
+	/*
+	 * Enable APIC.
+	 */
+	value = apic_read(APIC_SPIV);
+	value &= ~APIC_VECTOR_MASK;
+	value |= APIC_SPIV_APIC_ENABLED;
+
+#ifdef CONFIG_X86_32
+	/* This bit is reserved on P4/Xeon and should be cleared */
+	if ((boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) &&
+	    (boot_cpu_data.x86 == 15))
+		value &= ~APIC_SPIV_FOCUS_DISABLED;
+	else
+#endif
+		value |= APIC_SPIV_FOCUS_DISABLED;
+	value |= SPURIOUS_APIC_VECTOR;
+	apic_write(APIC_SPIV, value);
+
+	/*
+	 * Set up the virtual wire mode.
+	 */
+	apic_write(APIC_LVT0, APIC_DM_EXTINT);
+	value = APIC_DM_NMI;
+	if (!lapic_is_integrated())		/* 82489DX */
+		value |= APIC_LVT_LEVEL_TRIGGER;
+	apic_write(APIC_LVT1, value);
+}
+
+static void __cpuinit lapic_setup_esr(void)
+{
+	unsigned long oldvalue, value, maxlvt;
+	if (lapic_is_integrated() && !esr_disable) {
+		if (esr_disable) {
+			/*
+			 * Something untraceable is creating bad interrupts on
+			 * secondary quads ... for the moment, just leave the
+			 * ESR disabled - we can't do anything useful with the
+			 * errors anyway - mbligh
+			 */
+			printk(KERN_INFO "Leaving ESR disabled.\n");
+			return;
+		}
+		/* !82489DX */
+		maxlvt = lapic_get_maxlvt();
+		if (maxlvt > 3)		/* Due to the Pentium erratum 3AP. */
+			apic_write(APIC_ESR, 0);
+		oldvalue = apic_read(APIC_ESR);
+
+		/* enables sending errors */
+		value = ERROR_APIC_VECTOR;
+		apic_write(APIC_LVTERR, value);
+		/*
+		 * spec says clear errors after enabling vector.
+		 */
+		if (maxlvt > 3)
+			apic_write(APIC_ESR, 0);
+		value = apic_read(APIC_ESR);
+		if (value != oldvalue)
+			apic_printk(APIC_VERBOSE, "ESR value before enabling "
+				"vector: 0x%08lx  after: 0x%08lx\n",
+				oldvalue, value);
+	} else {
+		printk(KERN_INFO "No ESR for 82489DX.\n");
+	}
+}
+
+
+/**
+ * setup_local_APIC - setup the local APIC
+ */
+void __cpuinit setup_local_APIC(void)
+{
+	unsigned int value;
+	int i, j;
+
+#ifdef CONFIG_X86_32
+	/* Pound the ESR really hard over the head with a big hammer - mbligh */
+	if (esr_disable) {
+		apic_write(APIC_ESR, 0);
+		apic_write(APIC_ESR, 0);
+		apic_write(APIC_ESR, 0);
+		apic_write(APIC_ESR, 0);
+	}
+#endif
+
+	preempt_disable();
+
+	/*
+	 * Double-check whether this APIC is really registered.
+	 * This is meaningless in clustered apic mode, so we skip it.
+	 */
+	if (!apic_id_registered())
+		BUG();
+
+	/*
+	 * Intel recommends to set DFR, LDR and TPR before enabling
+	 * an APIC.  See e.g. "AP-388 82489DX User's Manual" (Intel
+	 * document number 292116).  So here it goes...
+	 */
+	init_apic_ldr();
+
+	/*
+	 * Set Task Priority to 'accept all'. We never change this
+	 * later on.
+	 */
+	value = apic_read(APIC_TASKPRI);
+	value &= ~APIC_TPRI_MASK;
+	apic_write(APIC_TASKPRI, value);
+
+	/*
+	 * After a crash, we no longer service the interrupts and a pending
+	 * interrupt from previous kernel might still have ISR bit set.
+	 *
+	 * Most probably by now CPU has serviced that pending interrupt and
+	 * it might not have done the ack_APIC_irq() because it thought,
+	 * interrupt came from i8259 as ExtInt. LAPIC did not get EOI so it
+	 * does not clear the ISR bit and cpu thinks it has already serivced
+	 * the interrupt. Hence a vector might get locked. It was noticed
+	 * for timer irq (vector 0x31). Issue an extra EOI to clear ISR.
+	 */
+	for (i = APIC_ISR_NR - 1; i >= 0; i--) {
+		value = apic_read(APIC_ISR + i*0x10);
+		for (j = 31; j >= 0; j--) {
+			if (value & (1<<j))
+				ack_APIC_irq();
+		}
+	}
+
+	/*
+	 * Now that we are all set up, enable the APIC
+	 */
+	value = apic_read(APIC_SPIV);
+	value &= ~APIC_VECTOR_MASK;
+	/*
+	 * Enable APIC
+	 */
+	value |= APIC_SPIV_APIC_ENABLED;
+
+#ifdef CONFIG_X86_32
+	/*
+	 * Some unknown Intel IO/APIC (or APIC) errata is biting us with
+	 * certain networking cards. If high frequency interrupts are
+	 * happening on a particular IOAPIC pin, plus the IOAPIC routing
+	 * entry is masked/unmasked at a high rate as well then sooner or
+	 * later IOAPIC line gets 'stuck', no more interrupts are received
+	 * from the device. If focus CPU is disabled then the hang goes
+	 * away, oh well :-(
+	 *
+	 * [ This bug can be reproduced easily with a level-triggered
+	 *   PCI Ne2000 networking cards and PII/PIII processors, dual
+	 *   BX chipset. ]
+	 */
+	/*
+	 * Actually disabling the focus CPU check just makes the hang less
+	 * frequent as it makes the interrupt distributon model be more
+	 * like LRU than MRU (the short-term load is more even across CPUs).
+	 * See also the comment in end_level_ioapic_irq().  --macro
+	 */
+
+	/*
+	 * - enable focus processor (bit==0)
+	 * - 64bit mode always use processor focus
+	 *   so no need to set it
+	 */
+	value &= ~APIC_SPIV_FOCUS_DISABLED;
+#endif
+
+	/*
+	 * Set spurious IRQ vector
+	 */
+	value |= SPURIOUS_APIC_VECTOR;
+	apic_write(APIC_SPIV, value);
+
+	/*
+	 * Set up LVT0, LVT1:
+	 *
+	 * set up through-local-APIC on the BP's LINT0. This is not
+	 * strictly necessary in pure symmetric-IO mode, but sometimes
+	 * we delegate interrupts to the 8259A.
+	 */
+	/*
+	 * TODO: set up through-local-APIC from through-I/O-APIC? --macro
+	 */
+	value = apic_read(APIC_LVT0) & APIC_LVT_MASKED;
+	if (!smp_processor_id() && (pic_mode || !value)) {
+		value = APIC_DM_EXTINT;
+		apic_printk(APIC_VERBOSE, "enabled ExtINT on CPU#%d\n",
+				smp_processor_id());
+	} else {
+		value = APIC_DM_EXTINT | APIC_LVT_MASKED;
+		apic_printk(APIC_VERBOSE, "masked ExtINT on CPU#%d\n",
+				smp_processor_id());
+	}
+	apic_write(APIC_LVT0, value);
+
+	/*
+	 * only the BP should see the LINT1 NMI signal, obviously.
+	 */
+	if (!smp_processor_id())
+		value = APIC_DM_NMI;
+	else
+		value = APIC_DM_NMI | APIC_LVT_MASKED;
+	if (!lapic_is_integrated())		/* 82489DX */
+		value |= APIC_LVT_LEVEL_TRIGGER;
+	apic_write(APIC_LVT1, value);
+
+	preempt_enable();
+}
+
+void __cpuinit end_local_APIC_setup(void)
+{
+	lapic_setup_esr();
+
+#ifdef CONFIG_X86_32
+	{
+		unsigned int value;
+		/* Disable the local apic timer */
+		value = apic_read(APIC_LVTT);
+		value |= (APIC_LVT_MASKED | LOCAL_TIMER_VECTOR);
+		apic_write(APIC_LVTT, value);
+	}
+#endif
+
+	setup_apic_nmi_watchdog(NULL);
+	apic_pm_activate();
+}
+
+#ifdef HAVE_X2APIC
+void check_x2apic(void)
+{
+	int msr, msr2;
+
+	rdmsr(MSR_IA32_APICBASE, msr, msr2);
+
+	if (msr & X2APIC_ENABLE) {
+		printk("x2apic enabled by BIOS, switching to x2apic ops\n");
+		x2apic_preenabled = x2apic = 1;
+		apic_ops = &x2apic_ops;
+	}
+}
+
+void enable_x2apic(void)
+{
+	int msr, msr2;
+
+	rdmsr(MSR_IA32_APICBASE, msr, msr2);
+	if (!(msr & X2APIC_ENABLE)) {
+		printk("Enabling x2apic\n");
+		wrmsr(MSR_IA32_APICBASE, msr | X2APIC_ENABLE, 0);
+	}
+}
+
+void enable_IR_x2apic(void)
+{
+#ifdef CONFIG_INTR_REMAP
+	int ret;
+	unsigned long flags;
+
+	if (!cpu_has_x2apic)
+		return;
+
+	if (!x2apic_preenabled && disable_x2apic) {
+		printk(KERN_INFO
+		       "Skipped enabling x2apic and Interrupt-remapping "
+		       "because of nox2apic\n");
+		return;
+	}
+
+	if (x2apic_preenabled && disable_x2apic)
+		panic("Bios already enabled x2apic, can't enforce nox2apic");
+
+	if (!x2apic_preenabled && skip_ioapic_setup) {
+		printk(KERN_INFO
+		       "Skipped enabling x2apic and Interrupt-remapping "
+		       "because of skipping io-apic setup\n");
+		return;
+	}
+
+	ret = dmar_table_init();
+	if (ret) {
+		printk(KERN_INFO
+		       "dmar_table_init() failed with %d:\n", ret);
+
+		if (x2apic_preenabled)
+			panic("x2apic enabled by bios. But IR enabling failed");
+		else
+			printk(KERN_INFO
+			       "Not enabling x2apic,Intr-remapping\n");
+		return;
+	}
+
+	local_irq_save(flags);
+	mask_8259A();
+	save_mask_IO_APIC_setup();
+
+	ret = enable_intr_remapping(1);
+
+	if (ret && x2apic_preenabled) {
+		local_irq_restore(flags);
+		panic("x2apic enabled by bios. But IR enabling failed");
+	}
+
+	if (ret)
+		goto end;
+
+	if (!x2apic) {
+		x2apic = 1;
+		apic_ops = &x2apic_ops;
+		enable_x2apic();
+	}
+end:
+	if (ret)
+		/*
+		 * IR enabling failed
+		 */
+		restore_IO_APIC_setup();
+	else
+		reinit_intr_remapped_IO_APIC(x2apic_preenabled);
+
+	unmask_8259A();
+	local_irq_restore(flags);
+
+	if (!ret) {
+		if (!x2apic_preenabled)
+			printk(KERN_INFO
+			       "Enabled x2apic and interrupt-remapping\n");
+		else
+			printk(KERN_INFO
+			       "Enabled Interrupt-remapping\n");
+	} else
+		printk(KERN_ERR
+		       "Failed to enable Interrupt-remapping and x2apic\n");
+#else
+	if (!cpu_has_x2apic)
+		return;
+
+	if (x2apic_preenabled)
+		panic("x2apic enabled prior OS handover,"
+		      " enable CONFIG_INTR_REMAP");
+
+	printk(KERN_INFO "Enable CONFIG_INTR_REMAP for enabling intr-remapping "
+	       " and x2apic\n");
+#endif
+
+	return;
+}
+#endif /* HAVE_X2APIC */
+
+#ifdef CONFIG_X86_64
+/*
+ * Detect and enable local APICs on non-SMP boards.
+ * Original code written by Keir Fraser.
+ * On AMD64 we trust the BIOS - if it says no APIC it is likely
+ * not correctly set up (usually the APIC timer won't work etc.)
+ */
+static int __init detect_init_APIC(void)
+{
+	if (!cpu_has_apic) {
+		printk(KERN_INFO "No local APIC present\n");
+		return -1;
+	}
+
+	mp_lapic_addr = APIC_DEFAULT_PHYS_BASE;
+	boot_cpu_physical_apicid = 0;
+	return 0;
+}
+#else
+/*
+ * Detect and initialize APIC
+ */
+static int __init detect_init_APIC(void)
+{
+	u32 h, l, features;
+
+	/* Disabled by kernel option? */
+	if (disable_apic)
+		return -1;
+
+	switch (boot_cpu_data.x86_vendor) {
+	case X86_VENDOR_AMD:
+		if ((boot_cpu_data.x86 == 6 && boot_cpu_data.x86_model > 1) ||
+		    (boot_cpu_data.x86 == 15))
+			break;
+		goto no_apic;
+	case X86_VENDOR_INTEL:
+		if (boot_cpu_data.x86 == 6 || boot_cpu_data.x86 == 15 ||
+		    (boot_cpu_data.x86 == 5 && cpu_has_apic))
+			break;
+		goto no_apic;
+	default:
+		goto no_apic;
+	}
+
+	if (!cpu_has_apic) {
+		/*
+		 * Over-ride BIOS and try to enable the local APIC only if
+		 * "lapic" specified.
+		 */
+		if (!force_enable_local_apic) {
+			printk(KERN_INFO "Local APIC disabled by BIOS -- "
+			       "you can enable it with \"lapic\"\n");
+			return -1;
+		}
+		/*
+		 * Some BIOSes disable the local APIC in the APIC_BASE
+		 * MSR. This can only be done in software for Intel P6 or later
+		 * and AMD K7 (Model > 1) or later.
+		 */
+		rdmsr(MSR_IA32_APICBASE, l, h);
+		if (!(l & MSR_IA32_APICBASE_ENABLE)) {
+			printk(KERN_INFO
+			       "Local APIC disabled by BIOS -- reenabling.\n");
+			l &= ~MSR_IA32_APICBASE_BASE;
+			l |= MSR_IA32_APICBASE_ENABLE | APIC_DEFAULT_PHYS_BASE;
+			wrmsr(MSR_IA32_APICBASE, l, h);
+			enabled_via_apicbase = 1;
+		}
+	}
+	/*
+	 * The APIC feature bit should now be enabled
+	 * in `cpuid'
+	 */
+	features = cpuid_edx(1);
+	if (!(features & (1 << X86_FEATURE_APIC))) {
+		printk(KERN_WARNING "Could not enable APIC!\n");
+		return -1;
+	}
+	set_cpu_cap(&boot_cpu_data, X86_FEATURE_APIC);
+	mp_lapic_addr = APIC_DEFAULT_PHYS_BASE;
+
+	/* The BIOS may have set up the APIC at some other address */
+	rdmsr(MSR_IA32_APICBASE, l, h);
+	if (l & MSR_IA32_APICBASE_ENABLE)
+		mp_lapic_addr = l & MSR_IA32_APICBASE_BASE;
+
+	printk(KERN_INFO "Found and enabled local APIC!\n");
+
+	apic_pm_activate();
+
+	return 0;
+
+no_apic:
+	printk(KERN_INFO "No local APIC present or hardware disabled\n");
+	return -1;
+}
+#endif
+
+#ifdef CONFIG_X86_64
+void __init early_init_lapic_mapping(void)
+{
+	unsigned long phys_addr;
+
+	/*
+	 * If no local APIC can be found then go out
+	 * : it means there is no mpatable and MADT
+	 */
+	if (!smp_found_config)
+		return;
+
+	phys_addr = mp_lapic_addr;
+
+	set_fixmap_nocache(FIX_APIC_BASE, phys_addr);
+	apic_printk(APIC_VERBOSE, "mapped APIC to %16lx (%16lx)\n",
+		    APIC_BASE, phys_addr);
+
+	/*
+	 * Fetch the APIC ID of the BSP in case we have a
+	 * default configuration (or the MP table is broken).
+	 */
+	boot_cpu_physical_apicid = read_apic_id();
+}
+#endif
+
+/**
+ * init_apic_mappings - initialize APIC mappings
+ */
+void __init init_apic_mappings(void)
+{
+#ifdef HAVE_X2APIC
+	if (x2apic) {
+		boot_cpu_physical_apicid = read_apic_id();
+		return;
+	}
+#endif
+
+	/*
+	 * If no local APIC can be found then set up a fake all
+	 * zeroes page to simulate the local APIC and another
+	 * one for the IO-APIC.
+	 */
+	if (!smp_found_config && detect_init_APIC()) {
+		apic_phys = (unsigned long) alloc_bootmem_pages(PAGE_SIZE);
+		apic_phys = __pa(apic_phys);
+	} else
+		apic_phys = mp_lapic_addr;
+
+	set_fixmap_nocache(FIX_APIC_BASE, apic_phys);
+	apic_printk(APIC_VERBOSE, "mapped APIC to %16lx (%16lx)\n",
+				APIC_BASE, apic_phys);
+
+	/*
+	 * Fetch the APIC ID of the BSP in case we have a
+	 * default configuration (or the MP table is broken).
+	 */
+	if (boot_cpu_physical_apicid == -1U)
+		boot_cpu_physical_apicid = read_apic_id();
+}
+
+/*
+ * This initializes the IO-APIC and APIC hardware if this is
+ * a UP kernel.
+ */
+int apic_version[MAX_APICS];
+
+int __init APIC_init_uniprocessor(void)
+{
+#ifdef CONFIG_X86_64
+	if (disable_apic) {
+		printk(KERN_INFO "Apic disabled\n");
+		return -1;
+	}
+	if (!cpu_has_apic) {
+		disable_apic = 1;
+		printk(KERN_INFO "Apic disabled by BIOS\n");
+		return -1;
+	}
+#else
+	if (!smp_found_config && !cpu_has_apic)
+		return -1;
+
+	/*
+	 * Complain if the BIOS pretends there is one.
+	 */
+	if (!cpu_has_apic &&
+	    APIC_INTEGRATED(apic_version[boot_cpu_physical_apicid])) {
+		printk(KERN_ERR "BIOS bug, local APIC #%d not detected!...\n",
+		       boot_cpu_physical_apicid);
+		clear_cpu_cap(&boot_cpu_data, X86_FEATURE_APIC);
+		return -1;
+	}
+#endif
+
+#ifdef HAVE_X2APIC
+	enable_IR_x2apic();
+#endif
+#ifdef CONFIG_X86_64
+	setup_apic_routing();
+#endif
+
+	verify_local_APIC();
+	connect_bsp_APIC();
+
+#ifdef CONFIG_X86_64
+	apic_write(APIC_ID, SET_APIC_ID(boot_cpu_physical_apicid));
+#else
+	/*
+	 * Hack: In case of kdump, after a crash, kernel might be booting
+	 * on a cpu with non-zero lapic id. But boot_cpu_physical_apicid
+	 * might be zero if read from MP tables. Get it from LAPIC.
+	 */
+# ifdef CONFIG_CRASH_DUMP
+	boot_cpu_physical_apicid = read_apic_id();
+# endif
+#endif
+	physid_set_mask_of_physid(boot_cpu_physical_apicid, &phys_cpu_present_map);
+	setup_local_APIC();
+
+#ifdef CONFIG_X86_64
+	/*
+	 * Now enable IO-APICs, actually call clear_IO_APIC
+	 * We need clear_IO_APIC before enabling vector on BP
+	 */
+	if (!skip_ioapic_setup && nr_ioapics)
+		enable_IO_APIC();
+#endif
+
+#ifdef CONFIG_X86_IO_APIC
+	if (!smp_found_config || skip_ioapic_setup || !nr_ioapics)
+#endif
+		localise_nmi_watchdog();
+	end_local_APIC_setup();
+
+#ifdef CONFIG_X86_IO_APIC
+	if (smp_found_config && !skip_ioapic_setup && nr_ioapics)
+		setup_IO_APIC();
+# ifdef CONFIG_X86_64
+	else
+		nr_ioapics = 0;
+# endif
+#endif
+
+#ifdef CONFIG_X86_64
+	setup_boot_APIC_clock();
+	check_nmi_watchdog();
+#else
+	setup_boot_clock();
+#endif
+
+	return 0;
+}
+
+/*
+ * Local APIC interrupts
+ */
+
+/*
+ * This interrupt should _never_ happen with our APIC/SMP architecture
+ */
+#ifdef CONFIG_X86_64
+asmlinkage void smp_spurious_interrupt(void)
+#else
+void smp_spurious_interrupt(struct pt_regs *regs)
+#endif
+{
+	u32 v;
+
+#ifdef CONFIG_X86_64
+	exit_idle();
+#endif
+	irq_enter();
+	/*
+	 * Check if this really is a spurious interrupt and ACK it
+	 * if it is a vectored one.  Just in case...
+	 * Spurious interrupts should not be ACKed.
+	 */
+	v = apic_read(APIC_ISR + ((SPURIOUS_APIC_VECTOR & ~0x1f) >> 1));
+	if (v & (1 << (SPURIOUS_APIC_VECTOR & 0x1f)))
+		ack_APIC_irq();
+
+#ifdef CONFIG_X86_64
+	add_pda(irq_spurious_count, 1);
+#else
+	/* see sw-dev-man vol 3, chapter 7.4.13.5 */
+	printk(KERN_INFO "spurious APIC interrupt on CPU#%d, "
+	       "should never happen.\n", smp_processor_id());
+	__get_cpu_var(irq_stat).irq_spurious_count++;
+#endif
+	irq_exit();
+}
+
+/*
+ * This interrupt should never happen with our APIC/SMP architecture
+ */
+#ifdef CONFIG_X86_64
+asmlinkage void smp_error_interrupt(void)
+#else
+void smp_error_interrupt(struct pt_regs *regs)
+#endif
+{
+	u32 v, v1;
+
+#ifdef CONFIG_X86_64
+	exit_idle();
+#endif
+	irq_enter();
+	/* First tickle the hardware, only then report what went on. -- REW */
+	v = apic_read(APIC_ESR);
+	apic_write(APIC_ESR, 0);
+	v1 = apic_read(APIC_ESR);
+	ack_APIC_irq();
+	atomic_inc(&irq_err_count);
+
+	/* Here is what the APIC error bits mean:
+	   0: Send CS error
+	   1: Receive CS error
+	   2: Send accept error
+	   3: Receive accept error
+	   4: Reserved
+	   5: Send illegal vector
+	   6: Received illegal vector
+	   7: Illegal register address
+	*/
+	printk(KERN_DEBUG "APIC error on CPU%d: %02x(%02x)\n",
+		smp_processor_id(), v , v1);
+	irq_exit();
+}
+
+/**
+ * connect_bsp_APIC - attach the APIC to the interrupt system
+ */
+void __init connect_bsp_APIC(void)
+{
+#ifdef CONFIG_X86_32
+	if (pic_mode) {
+		/*
+		 * Do not trust the local APIC being empty at bootup.
+		 */
+		clear_local_APIC();
+		/*
+		 * PIC mode, enable APIC mode in the IMCR, i.e.  connect BSP's
+		 * local APIC to INT and NMI lines.
+		 */
+		apic_printk(APIC_VERBOSE, "leaving PIC mode, "
+				"enabling APIC mode.\n");
+		outb(0x70, 0x22);
+		outb(0x01, 0x23);
+	}
+#endif
+	enable_apic_mode();
+}
+
+/**
+ * disconnect_bsp_APIC - detach the APIC from the interrupt system
+ * @virt_wire_setup:	indicates, whether virtual wire mode is selected
+ *
+ * Virtual wire mode is necessary to deliver legacy interrupts even when the
+ * APIC is disabled.
+ */
+void disconnect_bsp_APIC(int virt_wire_setup)
+{
+	unsigned int value;
+
+#ifdef CONFIG_X86_32
+	if (pic_mode) {
+		/*
+		 * Put the board back into PIC mode (has an effect only on
+		 * certain older boards).  Note that APIC interrupts, including
+		 * IPIs, won't work beyond this point!  The only exception are
+		 * INIT IPIs.
+		 */
+		apic_printk(APIC_VERBOSE, "disabling APIC mode, "
+				"entering PIC mode.\n");
+		outb(0x70, 0x22);
+		outb(0x00, 0x23);
+		return;
+	}
+#endif
+
+	/* Go back to Virtual Wire compatibility mode */
+
+	/* For the spurious interrupt use vector F, and enable it */
+	value = apic_read(APIC_SPIV);
+	value &= ~APIC_VECTOR_MASK;
+	value |= APIC_SPIV_APIC_ENABLED;
+	value |= 0xf;
+	apic_write(APIC_SPIV, value);
+
+	if (!virt_wire_setup) {
+		/*
+		 * For LVT0 make it edge triggered, active high,
+		 * external and enabled
+		 */
+		value = apic_read(APIC_LVT0);
+		value &= ~(APIC_MODE_MASK | APIC_SEND_PENDING |
+			APIC_INPUT_POLARITY | APIC_LVT_REMOTE_IRR |
+			APIC_LVT_LEVEL_TRIGGER | APIC_LVT_MASKED);
+		value |= APIC_LVT_REMOTE_IRR | APIC_SEND_PENDING;
+		value = SET_APIC_DELIVERY_MODE(value, APIC_MODE_EXTINT);
+		apic_write(APIC_LVT0, value);
+	} else {
+		/* Disable LVT0 */
+		apic_write(APIC_LVT0, APIC_LVT_MASKED);
+	}
+
+	/*
+	 * For LVT1 make it edge triggered, active high,
+	 * nmi and enabled
+	 */
+	value = apic_read(APIC_LVT1);
+	value &= ~(APIC_MODE_MASK | APIC_SEND_PENDING |
+			APIC_INPUT_POLARITY | APIC_LVT_REMOTE_IRR |
+			APIC_LVT_LEVEL_TRIGGER | APIC_LVT_MASKED);
+	value |= APIC_LVT_REMOTE_IRR | APIC_SEND_PENDING;
+	value = SET_APIC_DELIVERY_MODE(value, APIC_MODE_NMI);
+	apic_write(APIC_LVT1, value);
+}
+
+void __cpuinit generic_processor_info(int apicid, int version)
+{
+	int cpu;
+	cpumask_t tmp_map;
+
+	/*
+	 * Validate version
+	 */
+	if (version == 0x0) {
+		printk(KERN_WARNING "BIOS bug, APIC version is 0 for CPU#%d! "
+				"fixing up to 0x10. (tell your hw vendor)\n",
+				version);
+		version = 0x10;
+	}
+	apic_version[apicid] = version;
+
+	if (num_processors >= NR_CPUS) {
+		printk(KERN_WARNING "WARNING: NR_CPUS limit of %i reached."
+			"  Processor ignored.\n", NR_CPUS);
+		return;
+	}
+
+	num_processors++;
+	cpus_complement(tmp_map, cpu_present_map);
+	cpu = first_cpu(tmp_map);
+
+	physid_set(apicid, phys_cpu_present_map);
+	if (apicid == boot_cpu_physical_apicid) {
+		/*
+		 * x86_bios_cpu_apicid is required to have processors listed
+		 * in same order as logical cpu numbers. Hence the first
+		 * entry is BSP, and so on.
+		 */
+		cpu = 0;
+	}
+	if (apicid > max_physical_apicid)
+		max_physical_apicid = apicid;
+
+#ifdef CONFIG_X86_32
+	/*
+	 * Would be preferable to switch to bigsmp when CONFIG_HOTPLUG_CPU=y
+	 * but we need to work other dependencies like SMP_SUSPEND etc
+	 * before this can be done without some confusion.
+	 * if (CPU_HOTPLUG_ENABLED || num_processors > 8)
+	 *       - Ashok Raj <ashok.raj@intel.com>
+	 */
+	if (max_physical_apicid >= 8) {
+		switch (boot_cpu_data.x86_vendor) {
+		case X86_VENDOR_INTEL:
+			if (!APIC_XAPIC(version)) {
+				def_to_bigsmp = 0;
+				break;
+			}
+			/* If P4 and above fall through */
+		case X86_VENDOR_AMD:
+			def_to_bigsmp = 1;
+		}
+	}
+#endif
+
+#if defined(CONFIG_X86_SMP) || defined(CONFIG_X86_64)
+	/* are we being called early in kernel startup? */
+	if (early_per_cpu_ptr(x86_cpu_to_apicid)) {
+		u16 *cpu_to_apicid = early_per_cpu_ptr(x86_cpu_to_apicid);
+		u16 *bios_cpu_apicid = early_per_cpu_ptr(x86_bios_cpu_apicid);
+
+		cpu_to_apicid[cpu] = apicid;
+		bios_cpu_apicid[cpu] = apicid;
+	} else {
+		per_cpu(x86_cpu_to_apicid, cpu) = apicid;
+		per_cpu(x86_bios_cpu_apicid, cpu) = apicid;
+	}
+#endif
+
+	cpu_set(cpu, cpu_possible_map);
+	cpu_set(cpu, cpu_present_map);
+}
+
+#ifdef CONFIG_X86_64
+int hard_smp_processor_id(void)
+{
+	return read_apic_id();
+}
+#endif
+
+/*
+ * Power management
+ */
+#ifdef CONFIG_PM
+
+static struct {
+	/*
+	 * 'active' is true if the local APIC was enabled by us and
+	 * not the BIOS; this signifies that we are also responsible
+	 * for disabling it before entering apm/acpi suspend
+	 */
+	int active;
+	/* r/w apic fields */
+	unsigned int apic_id;
+	unsigned int apic_taskpri;
+	unsigned int apic_ldr;
+	unsigned int apic_dfr;
+	unsigned int apic_spiv;
+	unsigned int apic_lvtt;
+	unsigned int apic_lvtpc;
+	unsigned int apic_lvt0;
+	unsigned int apic_lvt1;
+	unsigned int apic_lvterr;
+	unsigned int apic_tmict;
+	unsigned int apic_tdcr;
+	unsigned int apic_thmr;
+} apic_pm_state;
+
+static int lapic_suspend(struct sys_device *dev, pm_message_t state)
+{
+	unsigned long flags;
+	int maxlvt;
+
+	if (!apic_pm_state.active)
+		return 0;
+
+	maxlvt = lapic_get_maxlvt();
+
+	apic_pm_state.apic_id = apic_read(APIC_ID);
+	apic_pm_state.apic_taskpri = apic_read(APIC_TASKPRI);
+	apic_pm_state.apic_ldr = apic_read(APIC_LDR);
+	apic_pm_state.apic_dfr = apic_read(APIC_DFR);
+	apic_pm_state.apic_spiv = apic_read(APIC_SPIV);
+	apic_pm_state.apic_lvtt = apic_read(APIC_LVTT);
+	if (maxlvt >= 4)
+		apic_pm_state.apic_lvtpc = apic_read(APIC_LVTPC);
+	apic_pm_state.apic_lvt0 = apic_read(APIC_LVT0);
+	apic_pm_state.apic_lvt1 = apic_read(APIC_LVT1);
+	apic_pm_state.apic_lvterr = apic_read(APIC_LVTERR);
+	apic_pm_state.apic_tmict = apic_read(APIC_TMICT);
+	apic_pm_state.apic_tdcr = apic_read(APIC_TDCR);
+#if defined(CONFIG_X86_MCE_P4THERMAL) || defined(CONFIG_X86_MCE_INTEL)
+	if (maxlvt >= 5)
+		apic_pm_state.apic_thmr = apic_read(APIC_LVTTHMR);
+#endif
+
+	local_irq_save(flags);
+	disable_local_APIC();
+	local_irq_restore(flags);
+	return 0;
+}
+
+static int lapic_resume(struct sys_device *dev)
+{
+	unsigned int l, h;
+	unsigned long flags;
+	int maxlvt;
+
+	if (!apic_pm_state.active)
+		return 0;
+
+	maxlvt = lapic_get_maxlvt();
+
+	local_irq_save(flags);
+
+#ifdef HAVE_X2APIC
+	if (x2apic)
+		enable_x2apic();
+	else
+#endif
+	{
+		/*
+		 * Make sure the APICBASE points to the right address
+		 *
+		 * FIXME! This will be wrong if we ever support suspend on
+		 * SMP! We'll need to do this as part of the CPU restore!
+		 */
+		rdmsr(MSR_IA32_APICBASE, l, h);
+		l &= ~MSR_IA32_APICBASE_BASE;
+		l |= MSR_IA32_APICBASE_ENABLE | mp_lapic_addr;
+		wrmsr(MSR_IA32_APICBASE, l, h);
+	}
+
+	apic_write(APIC_LVTERR, ERROR_APIC_VECTOR | APIC_LVT_MASKED);
+	apic_write(APIC_ID, apic_pm_state.apic_id);
+	apic_write(APIC_DFR, apic_pm_state.apic_dfr);
+	apic_write(APIC_LDR, apic_pm_state.apic_ldr);
+	apic_write(APIC_TASKPRI, apic_pm_state.apic_taskpri);
+	apic_write(APIC_SPIV, apic_pm_state.apic_spiv);
+	apic_write(APIC_LVT0, apic_pm_state.apic_lvt0);
+	apic_write(APIC_LVT1, apic_pm_state.apic_lvt1);
+#if defined(CONFIG_X86_MCE_P4THERMAL) || defined(CONFIG_X86_MCE_INTEL)
+	if (maxlvt >= 5)
+		apic_write(APIC_LVTTHMR, apic_pm_state.apic_thmr);
+#endif
+	if (maxlvt >= 4)
+		apic_write(APIC_LVTPC, apic_pm_state.apic_lvtpc);
+	apic_write(APIC_LVTT, apic_pm_state.apic_lvtt);
+	apic_write(APIC_TDCR, apic_pm_state.apic_tdcr);
+	apic_write(APIC_TMICT, apic_pm_state.apic_tmict);
+	apic_write(APIC_ESR, 0);
+	apic_read(APIC_ESR);
+	apic_write(APIC_LVTERR, apic_pm_state.apic_lvterr);
+	apic_write(APIC_ESR, 0);
+	apic_read(APIC_ESR);
+
+	local_irq_restore(flags);
+
+	return 0;
+}
+
+/*
+ * This device has no shutdown method - fully functioning local APICs
+ * are needed on every CPU up until machine_halt/restart/poweroff.
+ */
+
+static struct sysdev_class lapic_sysclass = {
+	.name		= "lapic",
+	.resume		= lapic_resume,
+	.suspend	= lapic_suspend,
+};
+
+static struct sys_device device_lapic = {
+	.id	= 0,
+	.cls	= &lapic_sysclass,
+};
+
+static void __cpuinit apic_pm_activate(void)
+{
+	apic_pm_state.active = 1;
+}
+
+static int __init init_lapic_sysfs(void)
+{
+	int error;
+
+	if (!cpu_has_apic)
+		return 0;
+	/* XXX: remove suspend/resume procs if !apic_pm_state.active? */
+
+	error = sysdev_class_register(&lapic_sysclass);
+	if (!error)
+		error = sysdev_register(&device_lapic);
+	return error;
+}
+device_initcall(init_lapic_sysfs);
+
+#else	/* CONFIG_PM */
+
+static void apic_pm_activate(void) { }
+
+#endif	/* CONFIG_PM */
+
+#ifdef CONFIG_X86_64
+/*
+ * apic_is_clustered_box() -- Check if we can expect good TSC
+ *
+ * Thus far, the major user of this is IBM's Summit2 series:
+ *
+ * Clustered boxes may have unsynced TSC problems if they are
+ * multi-chassis. Use available data to take a good guess.
+ * If in doubt, go HPET.
+ */
+__cpuinit int apic_is_clustered_box(void)
+{
+	int i, clusters, zeros;
+	unsigned id;
+	u16 *bios_cpu_apicid;
+	DECLARE_BITMAP(clustermap, NUM_APIC_CLUSTERS);
+
+	/*
+	 * there is not this kind of box with AMD CPU yet.
+	 * Some AMD box with quadcore cpu and 8 sockets apicid
+	 * will be [4, 0x23] or [8, 0x27] could be thought to
+	 * vsmp box still need checking...
+	 */
+	if ((boot_cpu_data.x86_vendor == X86_VENDOR_AMD) && !is_vsmp_box())
+		return 0;
+
+	bios_cpu_apicid = early_per_cpu_ptr(x86_bios_cpu_apicid);
+	bitmap_zero(clustermap, NUM_APIC_CLUSTERS);
+
+	for (i = 0; i < NR_CPUS; i++) {
+		/* are we being called early in kernel startup? */
+		if (bios_cpu_apicid) {
+			id = bios_cpu_apicid[i];
+		}
+		else if (i < nr_cpu_ids) {
+			if (cpu_present(i))
+				id = per_cpu(x86_bios_cpu_apicid, i);
+			else
+				continue;
+		}
+		else
+			break;
+
+		if (id != BAD_APICID)
+			__set_bit(APIC_CLUSTERID(id), clustermap);
+	}
+
+	/* Problem:  Partially populated chassis may not have CPUs in some of
+	 * the APIC clusters they have been allocated.  Only present CPUs have
+	 * x86_bios_cpu_apicid entries, thus causing zeroes in the bitmap.
+	 * Since clusters are allocated sequentially, count zeros only if
+	 * they are bounded by ones.
+	 */
+	clusters = 0;
+	zeros = 0;
+	for (i = 0; i < NUM_APIC_CLUSTERS; i++) {
+		if (test_bit(i, clustermap)) {
+			clusters += 1 + zeros;
+			zeros = 0;
+		} else
+			++zeros;
+	}
+
+	/* ScaleMP vSMPowered boxes have one cluster per board and TSCs are
+	 * not guaranteed to be synced between boards
+	 */
+	if (is_vsmp_box() && clusters > 1)
+		return 1;
+
+	/*
+	 * If clusters > 2, then should be multi-chassis.
+	 * May have to revisit this when multi-core + hyperthreaded CPUs come
+	 * out, but AFAIK this will work even for them.
+	 */
+	return (clusters > 2);
+}
+#endif
+
+/*
+ * APIC command line parameters
+ */
+static int __init setup_disableapic(char *arg)
+{
+	disable_apic = 1;
+	setup_clear_cpu_cap(X86_FEATURE_APIC);
+	return 0;
+}
+early_param("disableapic", setup_disableapic);
+
+/* same as disableapic, for compatibility */
+static int __init setup_nolapic(char *arg)
+{
+	return setup_disableapic(arg);
+}
+early_param("nolapic", setup_nolapic);
+
+static int __init parse_lapic_timer_c2_ok(char *arg)
+{
+	local_apic_timer_c2_ok = 1;
+	return 0;
+}
+early_param("lapic_timer_c2_ok", parse_lapic_timer_c2_ok);
+
+static int __init parse_disable_apic_timer(char *arg)
+{
+	disable_apic_timer = 1;
+	return 0;
+}
+early_param("noapictimer", parse_disable_apic_timer);
+
+static int __init parse_nolapic_timer(char *arg)
+{
+	disable_apic_timer = 1;
+	return 0;
+}
+early_param("nolapic_timer", parse_nolapic_timer);
+
+static int __init apic_set_verbosity(char *arg)
+{
+	if (!arg)  {
+#ifdef CONFIG_X86_64
+		skip_ioapic_setup = 0;
+		return 0;
+#endif
+		return -EINVAL;
+	}
+
+	if (strcmp("debug", arg) == 0)
+		apic_verbosity = APIC_DEBUG;
+	else if (strcmp("verbose", arg) == 0)
+		apic_verbosity = APIC_VERBOSE;
+	else {
+		printk(KERN_WARNING "APIC Verbosity level %s not recognised"
+			" use apic=verbose or apic=debug\n", arg);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+early_param("apic", apic_set_verbosity);
+
+static int __init lapic_insert_resource(void)
+{
+	if (!apic_phys)
+		return -1;
+
+	/* Put local APIC into the resource map. */
+	lapic_resource.start = apic_phys;
+	lapic_resource.end = lapic_resource.start + PAGE_SIZE - 1;
+	insert_resource(&iomem_resource, &lapic_resource);
+
+	return 0;
+}
+
+/*
+ * need call insert after e820_reserve_resources()
+ * that is using request_resource
+ */
+late_initcall(lapic_insert_resource);
diff --git a/arch/x86/kernel/apic_32.c b/arch/x86/kernel/apic_32.c
deleted file mode 100644
index 0b11d6e3f091..000000000000
--- a/arch/x86/kernel/apic_32.c
+++ /dev/null
@@ -1,2312 +0,0 @@
-/*
- *	Local APIC handling, local APIC timers
- *
- *	(c) 1999, 2000 Ingo Molnar <mingo@redhat.com>
- *
- *	Fixes
- *	Maciej W. Rozycki	:	Bits for genuine 82489DX APICs;
- *					thanks to Eric Gilmore
- *					and Rolf G. Tews
- *					for testing these extensively.
- *	Maciej W. Rozycki	:	Various updates and fixes.
- *	Mikael Pettersson	:	Power Management for UP-APIC.
- *	Pavel Machek and
- *	Mikael Pettersson	:	PM converted to driver model.
- */
-
-#include <linux/init.h>
-
-#include <linux/mm.h>
-#include <linux/delay.h>
-#include <linux/bootmem.h>
-#include <linux/interrupt.h>
-#include <linux/mc146818rtc.h>
-#include <linux/kernel_stat.h>
-#include <linux/sysdev.h>
-#include <linux/ioport.h>
-#include <linux/cpu.h>
-#include <linux/clockchips.h>
-#include <linux/acpi_pmtmr.h>
-#include <linux/module.h>
-#include <linux/dmi.h>
-#include <linux/dmar.h>
-
-#include <asm/atomic.h>
-#include <asm/smp.h>
-#include <asm/mtrr.h>
-#include <asm/mpspec.h>
-#include <asm/desc.h>
-#include <asm/arch_hooks.h>
-#include <asm/hpet.h>
-#include <asm/pgalloc.h>
-#include <asm/i8253.h>
-#include <asm/nmi.h>
-#include <asm/idle.h>
-#include <asm/proto.h>
-#include <asm/timex.h>
-#include <asm/apic.h>
-#include <asm/i8259.h>
-
-#include <mach_apic.h>
-#include <mach_apicdef.h>
-#include <mach_ipi.h>
-
-/*
- * Sanity check
- */
-#if ((SPURIOUS_APIC_VECTOR & 0x0F) != 0x0F)
-# error SPURIOUS_APIC_VECTOR definition error
-#endif
-
-#ifdef CONFIG_X86_32
-/*
- * Knob to control our willingness to enable the local APIC.
- *
- * +1=force-enable
- */
-static int force_enable_local_apic;
-/*
- * APIC command line parameters
- */
-static int __init parse_lapic(char *arg)
-{
-	force_enable_local_apic = 1;
-	return 0;
-}
-early_param("lapic", parse_lapic);
-/* Local APIC was disabled by the BIOS and enabled by the kernel */
-static int enabled_via_apicbase;
-
-#endif
-
-#ifdef CONFIG_X86_64
-static int apic_calibrate_pmtmr __initdata;
-static __init int setup_apicpmtimer(char *s)
-{
-	apic_calibrate_pmtmr = 1;
-	notsc_setup(NULL);
-	return 0;
-}
-__setup("apicpmtimer", setup_apicpmtimer);
-#endif
-
-#ifdef CONFIG_X86_64
-#define HAVE_X2APIC
-#endif
-
-#ifdef HAVE_X2APIC
-int x2apic;
-/* x2apic enabled before OS handover */
-int x2apic_preenabled;
-int disable_x2apic;
-static __init int setup_nox2apic(char *str)
-{
-	disable_x2apic = 1;
-	setup_clear_cpu_cap(X86_FEATURE_X2APIC);
-	return 0;
-}
-early_param("nox2apic", setup_nox2apic);
-#endif
-
-unsigned long mp_lapic_addr;
-int disable_apic;
-/* Disable local APIC timer from the kernel commandline or via dmi quirk */
-static int disable_apic_timer __cpuinitdata;
-/* Local APIC timer works in C2 */
-int local_apic_timer_c2_ok;
-EXPORT_SYMBOL_GPL(local_apic_timer_c2_ok);
-
-int first_system_vector = 0xfe;
-
-char system_vectors[NR_VECTORS] = { [0 ... NR_VECTORS-1] = SYS_VECTOR_FREE};
-
-/*
- * Debug level, exported for io_apic.c
- */
-unsigned int apic_verbosity;
-
-int pic_mode;
-
-/* Have we found an MP table */
-int smp_found_config;
-
-static struct resource lapic_resource = {
-	.name = "Local APIC",
-	.flags = IORESOURCE_MEM | IORESOURCE_BUSY,
-};
-
-static unsigned int calibration_result;
-
-static int lapic_next_event(unsigned long delta,
-			    struct clock_event_device *evt);
-static void lapic_timer_setup(enum clock_event_mode mode,
-			      struct clock_event_device *evt);
-static void lapic_timer_broadcast(cpumask_t mask);
-static void apic_pm_activate(void);
-
-/*
- * The local apic timer can be used for any function which is CPU local.
- */
-static struct clock_event_device lapic_clockevent = {
-	.name		= "lapic",
-	.features	= CLOCK_EVT_FEAT_PERIODIC | CLOCK_EVT_FEAT_ONESHOT
-			| CLOCK_EVT_FEAT_C3STOP | CLOCK_EVT_FEAT_DUMMY,
-	.shift		= 32,
-	.set_mode	= lapic_timer_setup,
-	.set_next_event	= lapic_next_event,
-	.broadcast	= lapic_timer_broadcast,
-	.rating		= 100,
-	.irq		= -1,
-};
-static DEFINE_PER_CPU(struct clock_event_device, lapic_events);
-
-static unsigned long apic_phys;
-
-/*
- * Get the LAPIC version
- */
-static inline int lapic_get_version(void)
-{
-	return GET_APIC_VERSION(apic_read(APIC_LVR));
-}
-
-/*
- * Check, if the APIC is integrated or a separate chip
- */
-static inline int lapic_is_integrated(void)
-{
-#ifdef CONFIG_X86_64
-	return 1;
-#else
-	return APIC_INTEGRATED(lapic_get_version());
-#endif
-}
-
-/*
- * Check, whether this is a modern or a first generation APIC
- */
-static int modern_apic(void)
-{
-	/* AMD systems use old APIC versions, so check the CPU */
-	if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD &&
-	    boot_cpu_data.x86 >= 0xf)
-		return 1;
-	return lapic_get_version() >= 0x14;
-}
-
-/*
- * Paravirt kernels also might be using these below ops. So we still
- * use generic apic_read()/apic_write(), which might be pointing to different
- * ops in PARAVIRT case.
- */
-void xapic_wait_icr_idle(void)
-{
-	while (apic_read(APIC_ICR) & APIC_ICR_BUSY)
-		cpu_relax();
-}
-
-u32 safe_xapic_wait_icr_idle(void)
-{
-	u32 send_status;
-	int timeout;
-
-	timeout = 0;
-	do {
-		send_status = apic_read(APIC_ICR) & APIC_ICR_BUSY;
-		if (!send_status)
-			break;
-		udelay(100);
-	} while (timeout++ < 1000);
-
-	return send_status;
-}
-
-void xapic_icr_write(u32 low, u32 id)
-{
-	apic_write(APIC_ICR2, SET_APIC_DEST_FIELD(id));
-	apic_write(APIC_ICR, low);
-}
-
-u64 xapic_icr_read(void)
-{
-	u32 icr1, icr2;
-
-	icr2 = apic_read(APIC_ICR2);
-	icr1 = apic_read(APIC_ICR);
-
-	return icr1 | ((u64)icr2 << 32);
-}
-
-static struct apic_ops xapic_ops = {
-	.read = native_apic_mem_read,
-	.write = native_apic_mem_write,
-	.icr_read = xapic_icr_read,
-	.icr_write = xapic_icr_write,
-	.wait_icr_idle = xapic_wait_icr_idle,
-	.safe_wait_icr_idle = safe_xapic_wait_icr_idle,
-};
-
-struct apic_ops __read_mostly *apic_ops = &xapic_ops;
-EXPORT_SYMBOL_GPL(apic_ops);
-
-#ifdef HAVE_X2APIC
-static void x2apic_wait_icr_idle(void)
-{
-	/* no need to wait for icr idle in x2apic */
-	return;
-}
-
-static u32 safe_x2apic_wait_icr_idle(void)
-{
-	/* no need to wait for icr idle in x2apic */
-	return 0;
-}
-
-void x2apic_icr_write(u32 low, u32 id)
-{
-	wrmsrl(APIC_BASE_MSR + (APIC_ICR >> 4), ((__u64) id) << 32 | low);
-}
-
-u64 x2apic_icr_read(void)
-{
-	unsigned long val;
-
-	rdmsrl(APIC_BASE_MSR + (APIC_ICR >> 4), val);
-	return val;
-}
-
-static struct apic_ops x2apic_ops = {
-	.read = native_apic_msr_read,
-	.write = native_apic_msr_write,
-	.icr_read = x2apic_icr_read,
-	.icr_write = x2apic_icr_write,
-	.wait_icr_idle = x2apic_wait_icr_idle,
-	.safe_wait_icr_idle = safe_x2apic_wait_icr_idle,
-};
-#endif
-
-/**
- * enable_NMI_through_LVT0 - enable NMI through local vector table 0
- */
-void __cpuinit enable_NMI_through_LVT0(void)
-{
-	unsigned int v;
-
-	/* unmask and set to NMI */
-	v = APIC_DM_NMI;
-
-	/* Level triggered for 82489DX (32bit mode) */
-	if (!lapic_is_integrated())
-		v |= APIC_LVT_LEVEL_TRIGGER;
-
-	apic_write(APIC_LVT0, v);
-}
-
-#ifdef CONFIG_X86_32
-/**
- * get_physical_broadcast - Get number of physical broadcast IDs
- */
-int get_physical_broadcast(void)
-{
-	return modern_apic() ? 0xff : 0xf;
-}
-#endif
-
-/**
- * lapic_get_maxlvt - get the maximum number of local vector table entries
- */
-int lapic_get_maxlvt(void)
-{
-	unsigned int v;
-
-	v = apic_read(APIC_LVR);
-	/*
-	 * - we always have APIC integrated on 64bit mode
-	 * - 82489DXs do not report # of LVT entries
-	 */
-	return APIC_INTEGRATED(GET_APIC_VERSION(v)) ? GET_APIC_MAXLVT(v) : 2;
-}
-
-/*
- * Local APIC timer
- */
-
-/* Clock divisor */
-#ifdef CONFG_X86_64
-#define APIC_DIVISOR 1
-#else
-#define APIC_DIVISOR 16
-#endif
-
-/*
- * This function sets up the local APIC timer, with a timeout of
- * 'clocks' APIC bus clock. During calibration we actually call
- * this function twice on the boot CPU, once with a bogus timeout
- * value, second time for real. The other (noncalibrating) CPUs
- * call this function only once, with the real, calibrated value.
- *
- * We do reads before writes even if unnecessary, to get around the
- * P5 APIC double write bug.
- */
-static void __setup_APIC_LVTT(unsigned int clocks, int oneshot, int irqen)
-{
-	unsigned int lvtt_value, tmp_value;
-
-	lvtt_value = LOCAL_TIMER_VECTOR;
-	if (!oneshot)
-		lvtt_value |= APIC_LVT_TIMER_PERIODIC;
-	if (!lapic_is_integrated())
-		lvtt_value |= SET_APIC_TIMER_BASE(APIC_TIMER_BASE_DIV);
-
-	if (!irqen)
-		lvtt_value |= APIC_LVT_MASKED;
-
-	apic_write(APIC_LVTT, lvtt_value);
-
-	/*
-	 * Divide PICLK by 16
-	 */
-	tmp_value = apic_read(APIC_TDCR);
-	apic_write(APIC_TDCR,
-		(tmp_value & ~(APIC_TDR_DIV_1 | APIC_TDR_DIV_TMBASE)) |
-		APIC_TDR_DIV_16);
-
-	if (!oneshot)
-		apic_write(APIC_TMICT, clocks / APIC_DIVISOR);
-}
-
-/*
- * Setup extended LVT, AMD specific (K8, family 10h)
- *
- * Vector mappings are hard coded. On K8 only offset 0 (APIC500) and
- * MCE interrupts are supported. Thus MCE offset must be set to 0.
- *
- * If mask=1, the LVT entry does not generate interrupts while mask=0
- * enables the vector. See also the BKDGs.
- */
-
-#define APIC_EILVT_LVTOFF_MCE 0
-#define APIC_EILVT_LVTOFF_IBS 1
-
-static void setup_APIC_eilvt(u8 lvt_off, u8 vector, u8 msg_type, u8 mask)
-{
-	unsigned long reg = (lvt_off << 4) + APIC_EILVT0;
-	unsigned int  v   = (mask << 16) | (msg_type << 8) | vector;
-
-	apic_write(reg, v);
-}
-
-u8 setup_APIC_eilvt_mce(u8 vector, u8 msg_type, u8 mask)
-{
-	setup_APIC_eilvt(APIC_EILVT_LVTOFF_MCE, vector, msg_type, mask);
-	return APIC_EILVT_LVTOFF_MCE;
-}
-
-u8 setup_APIC_eilvt_ibs(u8 vector, u8 msg_type, u8 mask)
-{
-	setup_APIC_eilvt(APIC_EILVT_LVTOFF_IBS, vector, msg_type, mask);
-	return APIC_EILVT_LVTOFF_IBS;
-}
-EXPORT_SYMBOL_GPL(setup_APIC_eilvt_ibs);
-
-/*
- * Program the next event, relative to now
- */
-static int lapic_next_event(unsigned long delta,
-			    struct clock_event_device *evt)
-{
-	apic_write(APIC_TMICT, delta);
-	return 0;
-}
-
-/*
- * Setup the lapic timer in periodic or oneshot mode
- */
-static void lapic_timer_setup(enum clock_event_mode mode,
-			      struct clock_event_device *evt)
-{
-	unsigned long flags;
-	unsigned int v;
-
-	/* Lapic used as dummy for broadcast ? */
-	if (evt->features & CLOCK_EVT_FEAT_DUMMY)
-		return;
-
-	local_irq_save(flags);
-
-	switch (mode) {
-	case CLOCK_EVT_MODE_PERIODIC:
-	case CLOCK_EVT_MODE_ONESHOT:
-		__setup_APIC_LVTT(calibration_result,
-				  mode != CLOCK_EVT_MODE_PERIODIC, 1);
-		break;
-	case CLOCK_EVT_MODE_UNUSED:
-	case CLOCK_EVT_MODE_SHUTDOWN:
-		v = apic_read(APIC_LVTT);
-		v |= (APIC_LVT_MASKED | LOCAL_TIMER_VECTOR);
-		apic_write(APIC_LVTT, v);
-		break;
-	case CLOCK_EVT_MODE_RESUME:
-		/* Nothing to do here */
-		break;
-	}
-
-	local_irq_restore(flags);
-}
-
-/*
- * Local APIC timer broadcast function
- */
-static void lapic_timer_broadcast(cpumask_t mask)
-{
-#ifdef CONFIG_SMP
-	send_IPI_mask(mask, LOCAL_TIMER_VECTOR);
-#endif
-}
-
-/*
- * Setup the local APIC timer for this CPU. Copy the initilized values
- * of the boot CPU and register the clock event in the framework.
- */
-static void __cpuinit setup_APIC_timer(void)
-{
-	struct clock_event_device *levt = &__get_cpu_var(lapic_events);
-
-	memcpy(levt, &lapic_clockevent, sizeof(*levt));
-	levt->cpumask = cpumask_of_cpu(smp_processor_id());
-
-	clockevents_register_device(levt);
-}
-
-#ifdef CONFIG_X86_64
-/*
- * In this function we calibrate APIC bus clocks to the external
- * timer. Unfortunately we cannot use jiffies and the timer irq
- * to calibrate, since some later bootup code depends on getting
- * the first irq? Ugh.
- *
- * We want to do the calibration only once since we
- * want to have local timer irqs syncron. CPUs connected
- * by the same APIC bus have the very same bus frequency.
- * And we want to have irqs off anyways, no accidental
- * APIC irq that way.
- */
-
-#define TICK_COUNT 100000000
-
-static int __init calibrate_APIC_clock(void)
-{
-	unsigned apic, apic_start;
-	unsigned long tsc, tsc_start;
-	int result;
-
-	local_irq_disable();
-
-	/*
-	 * Put whatever arbitrary (but long enough) timeout
-	 * value into the APIC clock, we just want to get the
-	 * counter running for calibration.
-	 *
-	 * No interrupt enable !
-	 */
-	__setup_APIC_LVTT(250000000, 0, 0);
-
-	apic_start = apic_read(APIC_TMCCT);
-#ifdef CONFIG_X86_PM_TIMER
-	if (apic_calibrate_pmtmr && pmtmr_ioport) {
-		pmtimer_wait(5000);  /* 5ms wait */
-		apic = apic_read(APIC_TMCCT);
-		result = (apic_start - apic) * 1000L / 5;
-	} else
-#endif
-	{
-		rdtscll(tsc_start);
-
-		do {
-			apic = apic_read(APIC_TMCCT);
-			rdtscll(tsc);
-		} while ((tsc - tsc_start) < TICK_COUNT &&
-				(apic_start - apic) < TICK_COUNT);
-
-		result = (apic_start - apic) * 1000L * tsc_khz /
-					(tsc - tsc_start);
-	}
-
-	local_irq_enable();
-
-	printk(KERN_DEBUG "APIC timer calibration result %d\n", result);
-
-	printk(KERN_INFO "Detected %d.%03d MHz APIC timer.\n",
-		result / 1000 / 1000, result / 1000 % 1000);
-
-	/* Calculate the scaled math multiplication factor */
-	lapic_clockevent.mult = div_sc(result, NSEC_PER_SEC,
-				       lapic_clockevent.shift);
-	lapic_clockevent.max_delta_ns =
-		clockevent_delta2ns(0x7FFFFF, &lapic_clockevent);
-	lapic_clockevent.min_delta_ns =
-		clockevent_delta2ns(0xF, &lapic_clockevent);
-
-	calibration_result = (result * APIC_DIVISOR) / HZ;
-
-	/*
-	 * Do a sanity check on the APIC calibration result
-	 */
-	if (calibration_result < (1000000 / HZ)) {
-		printk(KERN_WARNING
-			"APIC frequency too slow, disabling apic timer\n");
-		return -1;
-	}
-
-	return 0;
-}
-
-#else
-/*
- * In this functions we calibrate APIC bus clocks to the external timer.
- *
- * We want to do the calibration only once since we want to have local timer
- * irqs syncron. CPUs connected by the same APIC bus have the very same bus
- * frequency.
- *
- * This was previously done by reading the PIT/HPET and waiting for a wrap
- * around to find out, that a tick has elapsed. I have a box, where the PIT
- * readout is broken, so it never gets out of the wait loop again. This was
- * also reported by others.
- *
- * Monitoring the jiffies value is inaccurate and the clockevents
- * infrastructure allows us to do a simple substitution of the interrupt
- * handler.
- *
- * The calibration routine also uses the pm_timer when possible, as the PIT
- * happens to run way too slow (factor 2.3 on my VAIO CoreDuo, which goes
- * back to normal later in the boot process).
- */
-
-#define LAPIC_CAL_LOOPS		(HZ/10)
-
-static __initdata int lapic_cal_loops = -1;
-static __initdata long lapic_cal_t1, lapic_cal_t2;
-static __initdata unsigned long long lapic_cal_tsc1, lapic_cal_tsc2;
-static __initdata unsigned long lapic_cal_pm1, lapic_cal_pm2;
-static __initdata unsigned long lapic_cal_j1, lapic_cal_j2;
-
-/*
- * Temporary interrupt handler.
- */
-static void __init lapic_cal_handler(struct clock_event_device *dev)
-{
-	unsigned long long tsc = 0;
-	long tapic = apic_read(APIC_TMCCT);
-	unsigned long pm = acpi_pm_read_early();
-
-	if (cpu_has_tsc)
-		rdtscll(tsc);
-
-	switch (lapic_cal_loops++) {
-	case 0:
-		lapic_cal_t1 = tapic;
-		lapic_cal_tsc1 = tsc;
-		lapic_cal_pm1 = pm;
-		lapic_cal_j1 = jiffies;
-		break;
-
-	case LAPIC_CAL_LOOPS:
-		lapic_cal_t2 = tapic;
-		lapic_cal_tsc2 = tsc;
-		if (pm < lapic_cal_pm1)
-			pm += ACPI_PM_OVRRUN;
-		lapic_cal_pm2 = pm;
-		lapic_cal_j2 = jiffies;
-		break;
-	}
-}
-
-static int __init calibrate_APIC_clock(void)
-{
-	struct clock_event_device *levt = &__get_cpu_var(lapic_events);
-	const long pm_100ms = PMTMR_TICKS_PER_SEC/10;
-	const long pm_thresh = pm_100ms/100;
-	void (*real_handler)(struct clock_event_device *dev);
-	unsigned long deltaj;
-	long delta, deltapm;
-	int pm_referenced = 0;
-
-	local_irq_disable();
-
-	/* Replace the global interrupt handler */
-	real_handler = global_clock_event->event_handler;
-	global_clock_event->event_handler = lapic_cal_handler;
-
-	/*
-	 * Setup the APIC counter to 1e9. There is no way the lapic
-	 * can underflow in the 100ms detection time frame
-	 */
-	__setup_APIC_LVTT(1000000000, 0, 0);
-
-	/* Let the interrupts run */
-	local_irq_enable();
-
-	while (lapic_cal_loops <= LAPIC_CAL_LOOPS)
-		cpu_relax();
-
-	local_irq_disable();
-
-	/* Restore the real event handler */
-	global_clock_event->event_handler = real_handler;
-
-	/* Build delta t1-t2 as apic timer counts down */
-	delta = lapic_cal_t1 - lapic_cal_t2;
-	apic_printk(APIC_VERBOSE, "... lapic delta = %ld\n", delta);
-
-	/* Check, if the PM timer is available */
-	deltapm = lapic_cal_pm2 - lapic_cal_pm1;
-	apic_printk(APIC_VERBOSE, "... PM timer delta = %ld\n", deltapm);
-
-	if (deltapm) {
-		unsigned long mult;
-		u64 res;
-
-		mult = clocksource_hz2mult(PMTMR_TICKS_PER_SEC, 22);
-
-		if (deltapm > (pm_100ms - pm_thresh) &&
-		    deltapm < (pm_100ms + pm_thresh)) {
-			apic_printk(APIC_VERBOSE, "... PM timer result ok\n");
-		} else {
-			res = (((u64) deltapm) *  mult) >> 22;
-			do_div(res, 1000000);
-			printk(KERN_WARNING "APIC calibration not consistent "
-			       "with PM Timer: %ldms instead of 100ms\n",
-			       (long)res);
-			/* Correct the lapic counter value */
-			res = (((u64) delta) * pm_100ms);
-			do_div(res, deltapm);
-			printk(KERN_INFO "APIC delta adjusted to PM-Timer: "
-			       "%lu (%ld)\n", (unsigned long) res, delta);
-			delta = (long) res;
-		}
-		pm_referenced = 1;
-	}
-
-	/* Calculate the scaled math multiplication factor */
-	lapic_clockevent.mult = div_sc(delta, TICK_NSEC * LAPIC_CAL_LOOPS,
-				       lapic_clockevent.shift);
-	lapic_clockevent.max_delta_ns =
-		clockevent_delta2ns(0x7FFFFF, &lapic_clockevent);
-	lapic_clockevent.min_delta_ns =
-		clockevent_delta2ns(0xF, &lapic_clockevent);
-
-	calibration_result = (delta * APIC_DIVISOR) / LAPIC_CAL_LOOPS;
-
-	apic_printk(APIC_VERBOSE, "..... delta %ld\n", delta);
-	apic_printk(APIC_VERBOSE, "..... mult: %ld\n", lapic_clockevent.mult);
-	apic_printk(APIC_VERBOSE, "..... calibration result: %u\n",
-		    calibration_result);
-
-	if (cpu_has_tsc) {
-		delta = (long)(lapic_cal_tsc2 - lapic_cal_tsc1);
-		apic_printk(APIC_VERBOSE, "..... CPU clock speed is "
-			    "%ld.%04ld MHz.\n",
-			    (delta / LAPIC_CAL_LOOPS) / (1000000 / HZ),
-			    (delta / LAPIC_CAL_LOOPS) % (1000000 / HZ));
-	}
-
-	apic_printk(APIC_VERBOSE, "..... host bus clock speed is "
-		    "%u.%04u MHz.\n",
-		    calibration_result / (1000000 / HZ),
-		    calibration_result % (1000000 / HZ));
-
-	/*
-	 * Do a sanity check on the APIC calibration result
-	 */
-	if (calibration_result < (1000000 / HZ)) {
-		local_irq_enable();
-		printk(KERN_WARNING
-		       "APIC frequency too slow, disabling apic timer\n");
-		return -1;
-	}
-
-	levt->features &= ~CLOCK_EVT_FEAT_DUMMY;
-
-	/* We trust the pm timer based calibration */
-	if (!pm_referenced) {
-		apic_printk(APIC_VERBOSE, "... verify APIC timer\n");
-
-		/*
-		 * Setup the apic timer manually
-		 */
-		levt->event_handler = lapic_cal_handler;
-		lapic_timer_setup(CLOCK_EVT_MODE_PERIODIC, levt);
-		lapic_cal_loops = -1;
-
-		/* Let the interrupts run */
-		local_irq_enable();
-
-		while (lapic_cal_loops <= LAPIC_CAL_LOOPS)
-			cpu_relax();
-
-		local_irq_disable();
-
-		/* Stop the lapic timer */
-		lapic_timer_setup(CLOCK_EVT_MODE_SHUTDOWN, levt);
-
-		local_irq_enable();
-
-		/* Jiffies delta */
-		deltaj = lapic_cal_j2 - lapic_cal_j1;
-		apic_printk(APIC_VERBOSE, "... jiffies delta = %lu\n", deltaj);
-
-		/* Check, if the jiffies result is consistent */
-		if (deltaj >= LAPIC_CAL_LOOPS-2 && deltaj <= LAPIC_CAL_LOOPS+2)
-			apic_printk(APIC_VERBOSE, "... jiffies result ok\n");
-		else
-			levt->features |= CLOCK_EVT_FEAT_DUMMY;
-	} else
-		local_irq_enable();
-
-	if (levt->features & CLOCK_EVT_FEAT_DUMMY) {
-		printk(KERN_WARNING
-		       "APIC timer disabled due to verification failure.\n");
-			return -1;
-	}
-
-	return 0;
-}
-
-#endif
-
-/*
- * Setup the boot APIC
- *
- * Calibrate and verify the result.
- */
-void __init setup_boot_APIC_clock(void)
-{
-	/*
-	 * The local apic timer can be disabled via the kernel
-	 * commandline or from the CPU detection code. Register the lapic
-	 * timer as a dummy clock event source on SMP systems, so the
-	 * broadcast mechanism is used. On UP systems simply ignore it.
-	 */
-	if (disable_apic_timer) {
-		printk(KERN_INFO "Disabling APIC timer\n");
-		/* No broadcast on UP ! */
-		if (num_possible_cpus() > 1) {
-			lapic_clockevent.mult = 1;
-			setup_APIC_timer();
-		}
-		return;
-	}
-
-	apic_printk(APIC_VERBOSE, "Using local APIC timer interrupts.\n"
-		    "calibrating APIC timer ...\n");
-
-	if (calibrate_APIC_clock()) {
-		/* No broadcast on UP ! */
-		if (num_possible_cpus() > 1)
-			setup_APIC_timer();
-		return;
-	}
-
-	/*
-	 * If nmi_watchdog is set to IO_APIC, we need the
-	 * PIT/HPET going.  Otherwise register lapic as a dummy
-	 * device.
-	 */
-	if (nmi_watchdog != NMI_IO_APIC)
-		lapic_clockevent.features &= ~CLOCK_EVT_FEAT_DUMMY;
-	else
-		printk(KERN_WARNING "APIC timer registered as dummy,"
-			" due to nmi_watchdog=%d!\n", nmi_watchdog);
-
-	/* Setup the lapic or request the broadcast */
-	setup_APIC_timer();
-}
-
-void __cpuinit setup_secondary_APIC_clock(void)
-{
-	setup_APIC_timer();
-}
-
-/*
- * The guts of the apic timer interrupt
- */
-static void local_apic_timer_interrupt(void)
-{
-	int cpu = smp_processor_id();
-	struct clock_event_device *evt = &per_cpu(lapic_events, cpu);
-
-	/*
-	 * Normally we should not be here till LAPIC has been initialized but
-	 * in some cases like kdump, its possible that there is a pending LAPIC
-	 * timer interrupt from previous kernel's context and is delivered in
-	 * new kernel the moment interrupts are enabled.
-	 *
-	 * Interrupts are enabled early and LAPIC is setup much later, hence
-	 * its possible that when we get here evt->event_handler is NULL.
-	 * Check for event_handler being NULL and discard the interrupt as
-	 * spurious.
-	 */
-	if (!evt->event_handler) {
-		printk(KERN_WARNING
-		       "Spurious LAPIC timer interrupt on cpu %d\n", cpu);
-		/* Switch it off */
-		lapic_timer_setup(CLOCK_EVT_MODE_SHUTDOWN, evt);
-		return;
-	}
-
-	/*
-	 * the NMI deadlock-detector uses this.
-	 */
-#ifdef CONFIG_X86_64
-	add_pda(apic_timer_irqs, 1);
-#else
-	per_cpu(irq_stat, cpu).apic_timer_irqs++;
-#endif
-
-	evt->event_handler(evt);
-}
-
-/*
- * Local APIC timer interrupt. This is the most natural way for doing
- * local interrupts, but local timer interrupts can be emulated by
- * broadcast interrupts too. [in case the hw doesn't support APIC timers]
- *
- * [ if a single-CPU system runs an SMP kernel then we call the local
- *   interrupt as well. Thus we cannot inline the local irq ... ]
- */
-void smp_apic_timer_interrupt(struct pt_regs *regs)
-{
-	struct pt_regs *old_regs = set_irq_regs(regs);
-
-	/*
-	 * NOTE! We'd better ACK the irq immediately,
-	 * because timer handling can be slow.
-	 */
-	ack_APIC_irq();
-	/*
-	 * update_process_times() expects us to have done irq_enter().
-	 * Besides, if we don't timer interrupts ignore the global
-	 * interrupt lock, which is the WrongThing (tm) to do.
-	 */
-#ifdef CONFIG_X86_64
-	exit_idle();
-#endif
-	irq_enter();
-	local_apic_timer_interrupt();
-	irq_exit();
-
-	set_irq_regs(old_regs);
-}
-
-int setup_profiling_timer(unsigned int multiplier)
-{
-	return -EINVAL;
-}
-
-/*
- * Local APIC start and shutdown
- */
-
-/**
- * clear_local_APIC - shutdown the local APIC
- *
- * This is called, when a CPU is disabled and before rebooting, so the state of
- * the local APIC has no dangling leftovers. Also used to cleanout any BIOS
- * leftovers during boot.
- */
-void clear_local_APIC(void)
-{
-	int maxlvt;
-	u32 v;
-
-	/* APIC hasn't been mapped yet */
-	if (!apic_phys)
-		return;
-
-	maxlvt = lapic_get_maxlvt();
-	/*
-	 * Masking an LVT entry can trigger a local APIC error
-	 * if the vector is zero. Mask LVTERR first to prevent this.
-	 */
-	if (maxlvt >= 3) {
-		v = ERROR_APIC_VECTOR; /* any non-zero vector will do */
-		apic_write(APIC_LVTERR, v | APIC_LVT_MASKED);
-	}
-	/*
-	 * Careful: we have to set masks only first to deassert
-	 * any level-triggered sources.
-	 */
-	v = apic_read(APIC_LVTT);
-	apic_write(APIC_LVTT, v | APIC_LVT_MASKED);
-	v = apic_read(APIC_LVT0);
-	apic_write(APIC_LVT0, v | APIC_LVT_MASKED);
-	v = apic_read(APIC_LVT1);
-	apic_write(APIC_LVT1, v | APIC_LVT_MASKED);
-	if (maxlvt >= 4) {
-		v = apic_read(APIC_LVTPC);
-		apic_write(APIC_LVTPC, v | APIC_LVT_MASKED);
-	}
-
-	/* lets not touch this if we didn't frob it */
-#if defined(CONFIG_X86_MCE_P4THERMAL) || defined(X86_MCE_INTEL)
-	if (maxlvt >= 5) {
-		v = apic_read(APIC_LVTTHMR);
-		apic_write(APIC_LVTTHMR, v | APIC_LVT_MASKED);
-	}
-#endif
-	/*
-	 * Clean APIC state for other OSs:
-	 */
-	apic_write(APIC_LVTT, APIC_LVT_MASKED);
-	apic_write(APIC_LVT0, APIC_LVT_MASKED);
-	apic_write(APIC_LVT1, APIC_LVT_MASKED);
-	if (maxlvt >= 3)
-		apic_write(APIC_LVTERR, APIC_LVT_MASKED);
-	if (maxlvt >= 4)
-		apic_write(APIC_LVTPC, APIC_LVT_MASKED);
-
-	/* Integrated APIC (!82489DX) ? */
-	if (lapic_is_integrated()) {
-		if (maxlvt > 3)
-			/* Clear ESR due to Pentium errata 3AP and 11AP */
-			apic_write(APIC_ESR, 0);
-		apic_read(APIC_ESR);
-	}
-}
-
-/**
- * disable_local_APIC - clear and disable the local APIC
- */
-void disable_local_APIC(void)
-{
-	unsigned int value;
-
-	clear_local_APIC();
-
-	/*
-	 * Disable APIC (implies clearing of registers
-	 * for 82489DX!).
-	 */
-	value = apic_read(APIC_SPIV);
-	value &= ~APIC_SPIV_APIC_ENABLED;
-	apic_write(APIC_SPIV, value);
-
-#ifdef CONFIG_X86_32
-	/*
-	 * When LAPIC was disabled by the BIOS and enabled by the kernel,
-	 * restore the disabled state.
-	 */
-	if (enabled_via_apicbase) {
-		unsigned int l, h;
-
-		rdmsr(MSR_IA32_APICBASE, l, h);
-		l &= ~MSR_IA32_APICBASE_ENABLE;
-		wrmsr(MSR_IA32_APICBASE, l, h);
-	}
-#endif
-}
-
-/*
- * If Linux enabled the LAPIC against the BIOS default disable it down before
- * re-entering the BIOS on shutdown.  Otherwise the BIOS may get confused and
- * not power-off.  Additionally clear all LVT entries before disable_local_APIC
- * for the case where Linux didn't enable the LAPIC.
- */
-void lapic_shutdown(void)
-{
-	unsigned long flags;
-
-	if (!cpu_has_apic)
-		return;
-
-	local_irq_save(flags);
-
-#ifdef CONFIG_X86_32
-	if (!enabled_via_apicbase)
-		clear_local_APIC();
-	else
-#endif
-		disable_local_APIC();
-
-
-	local_irq_restore(flags);
-}
-
-/*
- * This is to verify that we're looking at a real local APIC.
- * Check these against your board if the CPUs aren't getting
- * started for no apparent reason.
- */
-int __init verify_local_APIC(void)
-{
-	unsigned int reg0, reg1;
-
-	/*
-	 * The version register is read-only in a real APIC.
-	 */
-	reg0 = apic_read(APIC_LVR);
-	apic_printk(APIC_DEBUG, "Getting VERSION: %x\n", reg0);
-	apic_write(APIC_LVR, reg0 ^ APIC_LVR_MASK);
-	reg1 = apic_read(APIC_LVR);
-	apic_printk(APIC_DEBUG, "Getting VERSION: %x\n", reg1);
-
-	/*
-	 * The two version reads above should print the same
-	 * numbers.  If the second one is different, then we
-	 * poke at a non-APIC.
-	 */
-	if (reg1 != reg0)
-		return 0;
-
-	/*
-	 * Check if the version looks reasonably.
-	 */
-	reg1 = GET_APIC_VERSION(reg0);
-	if (reg1 == 0x00 || reg1 == 0xff)
-		return 0;
-	reg1 = lapic_get_maxlvt();
-	if (reg1 < 0x02 || reg1 == 0xff)
-		return 0;
-
-	/*
-	 * The ID register is read/write in a real APIC.
-	 */
-	reg0 = apic_read(APIC_ID);
-	apic_printk(APIC_DEBUG, "Getting ID: %x\n", reg0);
-	apic_write(APIC_ID, reg0 ^ APIC_ID_MASK);
-	reg1 = apic_read(APIC_ID);
-	apic_printk(APIC_DEBUG, "Getting ID: %x\n", reg1);
-	apic_write(APIC_ID, reg0);
-	if (reg1 != (reg0 ^ APIC_ID_MASK))
-		return 0;
-
-	/*
-	 * The next two are just to see if we have sane values.
-	 * They're only really relevant if we're in Virtual Wire
-	 * compatibility mode, but most boxes are anymore.
-	 */
-	reg0 = apic_read(APIC_LVT0);
-	apic_printk(APIC_DEBUG, "Getting LVT0: %x\n", reg0);
-	reg1 = apic_read(APIC_LVT1);
-	apic_printk(APIC_DEBUG, "Getting LVT1: %x\n", reg1);
-
-	return 1;
-}
-
-/**
- * sync_Arb_IDs - synchronize APIC bus arbitration IDs
- */
-void __init sync_Arb_IDs(void)
-{
-	/*
-	 * Unsupported on P4 - see Intel Dev. Manual Vol. 3, Ch. 8.6.1 And not
-	 * needed on AMD.
-	 */
-	if (modern_apic() || boot_cpu_data.x86_vendor == X86_VENDOR_AMD)
-		return;
-
-	/*
-	 * Wait for idle.
-	 */
-	apic_wait_icr_idle();
-
-	apic_printk(APIC_DEBUG, "Synchronizing Arb IDs.\n");
-	apic_write(APIC_ICR, APIC_DEST_ALLINC |
-			APIC_INT_LEVELTRIG | APIC_DM_INIT);
-}
-
-/*
- * An initial setup of the virtual wire mode.
- */
-void __init init_bsp_APIC(void)
-{
-	unsigned int value;
-
-	/*
-	 * Don't do the setup now if we have a SMP BIOS as the
-	 * through-I/O-APIC virtual wire mode might be active.
-	 */
-	if (smp_found_config || !cpu_has_apic)
-		return;
-
-	/*
-	 * Do not trust the local APIC being empty at bootup.
-	 */
-	clear_local_APIC();
-
-	/*
-	 * Enable APIC.
-	 */
-	value = apic_read(APIC_SPIV);
-	value &= ~APIC_VECTOR_MASK;
-	value |= APIC_SPIV_APIC_ENABLED;
-
-#ifdef CONFIG_X86_32
-	/* This bit is reserved on P4/Xeon and should be cleared */
-	if ((boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) &&
-	    (boot_cpu_data.x86 == 15))
-		value &= ~APIC_SPIV_FOCUS_DISABLED;
-	else
-#endif
-		value |= APIC_SPIV_FOCUS_DISABLED;
-	value |= SPURIOUS_APIC_VECTOR;
-	apic_write(APIC_SPIV, value);
-
-	/*
-	 * Set up the virtual wire mode.
-	 */
-	apic_write(APIC_LVT0, APIC_DM_EXTINT);
-	value = APIC_DM_NMI;
-	if (!lapic_is_integrated())		/* 82489DX */
-		value |= APIC_LVT_LEVEL_TRIGGER;
-	apic_write(APIC_LVT1, value);
-}
-
-static void __cpuinit lapic_setup_esr(void)
-{
-	unsigned long oldvalue, value, maxlvt;
-	if (lapic_is_integrated() && !esr_disable) {
-		if (esr_disable) {
-			/*
-			 * Something untraceable is creating bad interrupts on
-			 * secondary quads ... for the moment, just leave the
-			 * ESR disabled - we can't do anything useful with the
-			 * errors anyway - mbligh
-			 */
-			printk(KERN_INFO "Leaving ESR disabled.\n");
-			return;
-		}
-		/* !82489DX */
-		maxlvt = lapic_get_maxlvt();
-		if (maxlvt > 3)		/* Due to the Pentium erratum 3AP. */
-			apic_write(APIC_ESR, 0);
-		oldvalue = apic_read(APIC_ESR);
-
-		/* enables sending errors */
-		value = ERROR_APIC_VECTOR;
-		apic_write(APIC_LVTERR, value);
-		/*
-		 * spec says clear errors after enabling vector.
-		 */
-		if (maxlvt > 3)
-			apic_write(APIC_ESR, 0);
-		value = apic_read(APIC_ESR);
-		if (value != oldvalue)
-			apic_printk(APIC_VERBOSE, "ESR value before enabling "
-				"vector: 0x%08lx  after: 0x%08lx\n",
-				oldvalue, value);
-	} else {
-		printk(KERN_INFO "No ESR for 82489DX.\n");
-	}
-}
-
-
-/**
- * setup_local_APIC - setup the local APIC
- */
-void __cpuinit setup_local_APIC(void)
-{
-	unsigned int value;
-	int i, j;
-
-#ifdef CONFIG_X86_32
-	/* Pound the ESR really hard over the head with a big hammer - mbligh */
-	if (esr_disable) {
-		apic_write(APIC_ESR, 0);
-		apic_write(APIC_ESR, 0);
-		apic_write(APIC_ESR, 0);
-		apic_write(APIC_ESR, 0);
-	}
-#endif
-
-	preempt_disable();
-
-	/*
-	 * Double-check whether this APIC is really registered.
-	 * This is meaningless in clustered apic mode, so we skip it.
-	 */
-	if (!apic_id_registered())
-		BUG();
-
-	/*
-	 * Intel recommends to set DFR, LDR and TPR before enabling
-	 * an APIC.  See e.g. "AP-388 82489DX User's Manual" (Intel
-	 * document number 292116).  So here it goes...
-	 */
-	init_apic_ldr();
-
-	/*
-	 * Set Task Priority to 'accept all'. We never change this
-	 * later on.
-	 */
-	value = apic_read(APIC_TASKPRI);
-	value &= ~APIC_TPRI_MASK;
-	apic_write(APIC_TASKPRI, value);
-
-	/*
-	 * After a crash, we no longer service the interrupts and a pending
-	 * interrupt from previous kernel might still have ISR bit set.
-	 *
-	 * Most probably by now CPU has serviced that pending interrupt and
-	 * it might not have done the ack_APIC_irq() because it thought,
-	 * interrupt came from i8259 as ExtInt. LAPIC did not get EOI so it
-	 * does not clear the ISR bit and cpu thinks it has already serivced
-	 * the interrupt. Hence a vector might get locked. It was noticed
-	 * for timer irq (vector 0x31). Issue an extra EOI to clear ISR.
-	 */
-	for (i = APIC_ISR_NR - 1; i >= 0; i--) {
-		value = apic_read(APIC_ISR + i*0x10);
-		for (j = 31; j >= 0; j--) {
-			if (value & (1<<j))
-				ack_APIC_irq();
-		}
-	}
-
-	/*
-	 * Now that we are all set up, enable the APIC
-	 */
-	value = apic_read(APIC_SPIV);
-	value &= ~APIC_VECTOR_MASK;
-	/*
-	 * Enable APIC
-	 */
-	value |= APIC_SPIV_APIC_ENABLED;
-
-#ifdef CONFIG_X86_32
-	/*
-	 * Some unknown Intel IO/APIC (or APIC) errata is biting us with
-	 * certain networking cards. If high frequency interrupts are
-	 * happening on a particular IOAPIC pin, plus the IOAPIC routing
-	 * entry is masked/unmasked at a high rate as well then sooner or
-	 * later IOAPIC line gets 'stuck', no more interrupts are received
-	 * from the device. If focus CPU is disabled then the hang goes
-	 * away, oh well :-(
-	 *
-	 * [ This bug can be reproduced easily with a level-triggered
-	 *   PCI Ne2000 networking cards and PII/PIII processors, dual
-	 *   BX chipset. ]
-	 */
-	/*
-	 * Actually disabling the focus CPU check just makes the hang less
-	 * frequent as it makes the interrupt distributon model be more
-	 * like LRU than MRU (the short-term load is more even across CPUs).
-	 * See also the comment in end_level_ioapic_irq().  --macro
-	 */
-
-	/*
-	 * - enable focus processor (bit==0)
-	 * - 64bit mode always use processor focus
-	 *   so no need to set it
-	 */
-	value &= ~APIC_SPIV_FOCUS_DISABLED;
-#endif
-
-	/*
-	 * Set spurious IRQ vector
-	 */
-	value |= SPURIOUS_APIC_VECTOR;
-	apic_write(APIC_SPIV, value);
-
-	/*
-	 * Set up LVT0, LVT1:
-	 *
-	 * set up through-local-APIC on the BP's LINT0. This is not
-	 * strictly necessary in pure symmetric-IO mode, but sometimes
-	 * we delegate interrupts to the 8259A.
-	 */
-	/*
-	 * TODO: set up through-local-APIC from through-I/O-APIC? --macro
-	 */
-	value = apic_read(APIC_LVT0) & APIC_LVT_MASKED;
-	if (!smp_processor_id() && (pic_mode || !value)) {
-		value = APIC_DM_EXTINT;
-		apic_printk(APIC_VERBOSE, "enabled ExtINT on CPU#%d\n",
-				smp_processor_id());
-	} else {
-		value = APIC_DM_EXTINT | APIC_LVT_MASKED;
-		apic_printk(APIC_VERBOSE, "masked ExtINT on CPU#%d\n",
-				smp_processor_id());
-	}
-	apic_write(APIC_LVT0, value);
-
-	/*
-	 * only the BP should see the LINT1 NMI signal, obviously.
-	 */
-	if (!smp_processor_id())
-		value = APIC_DM_NMI;
-	else
-		value = APIC_DM_NMI | APIC_LVT_MASKED;
-	if (!lapic_is_integrated())		/* 82489DX */
-		value |= APIC_LVT_LEVEL_TRIGGER;
-	apic_write(APIC_LVT1, value);
-
-	preempt_enable();
-}
-
-void __cpuinit end_local_APIC_setup(void)
-{
-	lapic_setup_esr();
-
-#ifdef CONFIG_X86_32
-	{
-		unsigned int value;
-		/* Disable the local apic timer */
-		value = apic_read(APIC_LVTT);
-		value |= (APIC_LVT_MASKED | LOCAL_TIMER_VECTOR);
-		apic_write(APIC_LVTT, value);
-	}
-#endif
-
-	setup_apic_nmi_watchdog(NULL);
-	apic_pm_activate();
-}
-
-#ifdef HAVE_X2APIC
-void check_x2apic(void)
-{
-	int msr, msr2;
-
-	rdmsr(MSR_IA32_APICBASE, msr, msr2);
-
-	if (msr & X2APIC_ENABLE) {
-		printk("x2apic enabled by BIOS, switching to x2apic ops\n");
-		x2apic_preenabled = x2apic = 1;
-		apic_ops = &x2apic_ops;
-	}
-}
-
-void enable_x2apic(void)
-{
-	int msr, msr2;
-
-	rdmsr(MSR_IA32_APICBASE, msr, msr2);
-	if (!(msr & X2APIC_ENABLE)) {
-		printk("Enabling x2apic\n");
-		wrmsr(MSR_IA32_APICBASE, msr | X2APIC_ENABLE, 0);
-	}
-}
-
-void enable_IR_x2apic(void)
-{
-#ifdef CONFIG_INTR_REMAP
-	int ret;
-	unsigned long flags;
-
-	if (!cpu_has_x2apic)
-		return;
-
-	if (!x2apic_preenabled && disable_x2apic) {
-		printk(KERN_INFO
-		       "Skipped enabling x2apic and Interrupt-remapping "
-		       "because of nox2apic\n");
-		return;
-	}
-
-	if (x2apic_preenabled && disable_x2apic)
-		panic("Bios already enabled x2apic, can't enforce nox2apic");
-
-	if (!x2apic_preenabled && skip_ioapic_setup) {
-		printk(KERN_INFO
-		       "Skipped enabling x2apic and Interrupt-remapping "
-		       "because of skipping io-apic setup\n");
-		return;
-	}
-
-	ret = dmar_table_init();
-	if (ret) {
-		printk(KERN_INFO
-		       "dmar_table_init() failed with %d:\n", ret);
-
-		if (x2apic_preenabled)
-			panic("x2apic enabled by bios. But IR enabling failed");
-		else
-			printk(KERN_INFO
-			       "Not enabling x2apic,Intr-remapping\n");
-		return;
-	}
-
-	local_irq_save(flags);
-	mask_8259A();
-	save_mask_IO_APIC_setup();
-
-	ret = enable_intr_remapping(1);
-
-	if (ret && x2apic_preenabled) {
-		local_irq_restore(flags);
-		panic("x2apic enabled by bios. But IR enabling failed");
-	}
-
-	if (ret)
-		goto end;
-
-	if (!x2apic) {
-		x2apic = 1;
-		apic_ops = &x2apic_ops;
-		enable_x2apic();
-	}
-end:
-	if (ret)
-		/*
-		 * IR enabling failed
-		 */
-		restore_IO_APIC_setup();
-	else
-		reinit_intr_remapped_IO_APIC(x2apic_preenabled);
-
-	unmask_8259A();
-	local_irq_restore(flags);
-
-	if (!ret) {
-		if (!x2apic_preenabled)
-			printk(KERN_INFO
-			       "Enabled x2apic and interrupt-remapping\n");
-		else
-			printk(KERN_INFO
-			       "Enabled Interrupt-remapping\n");
-	} else
-		printk(KERN_ERR
-		       "Failed to enable Interrupt-remapping and x2apic\n");
-#else
-	if (!cpu_has_x2apic)
-		return;
-
-	if (x2apic_preenabled)
-		panic("x2apic enabled prior OS handover,"
-		      " enable CONFIG_INTR_REMAP");
-
-	printk(KERN_INFO "Enable CONFIG_INTR_REMAP for enabling intr-remapping "
-	       " and x2apic\n");
-#endif
-
-	return;
-}
-#endif /* HAVE_X2APIC */
-
-#ifdef CONFIG_X86_64
-/*
- * Detect and enable local APICs on non-SMP boards.
- * Original code written by Keir Fraser.
- * On AMD64 we trust the BIOS - if it says no APIC it is likely
- * not correctly set up (usually the APIC timer won't work etc.)
- */
-static int __init detect_init_APIC(void)
-{
-	if (!cpu_has_apic) {
-		printk(KERN_INFO "No local APIC present\n");
-		return -1;
-	}
-
-	mp_lapic_addr = APIC_DEFAULT_PHYS_BASE;
-	boot_cpu_physical_apicid = 0;
-	return 0;
-}
-#else
-/*
- * Detect and initialize APIC
- */
-static int __init detect_init_APIC(void)
-{
-	u32 h, l, features;
-
-	/* Disabled by kernel option? */
-	if (disable_apic)
-		return -1;
-
-	switch (boot_cpu_data.x86_vendor) {
-	case X86_VENDOR_AMD:
-		if ((boot_cpu_data.x86 == 6 && boot_cpu_data.x86_model > 1) ||
-		    (boot_cpu_data.x86 == 15))
-			break;
-		goto no_apic;
-	case X86_VENDOR_INTEL:
-		if (boot_cpu_data.x86 == 6 || boot_cpu_data.x86 == 15 ||
-		    (boot_cpu_data.x86 == 5 && cpu_has_apic))
-			break;
-		goto no_apic;
-	default:
-		goto no_apic;
-	}
-
-	if (!cpu_has_apic) {
-		/*
-		 * Over-ride BIOS and try to enable the local APIC only if
-		 * "lapic" specified.
-		 */
-		if (!force_enable_local_apic) {
-			printk(KERN_INFO "Local APIC disabled by BIOS -- "
-			       "you can enable it with \"lapic\"\n");
-			return -1;
-		}
-		/*
-		 * Some BIOSes disable the local APIC in the APIC_BASE
-		 * MSR. This can only be done in software for Intel P6 or later
-		 * and AMD K7 (Model > 1) or later.
-		 */
-		rdmsr(MSR_IA32_APICBASE, l, h);
-		if (!(l & MSR_IA32_APICBASE_ENABLE)) {
-			printk(KERN_INFO
-			       "Local APIC disabled by BIOS -- reenabling.\n");
-			l &= ~MSR_IA32_APICBASE_BASE;
-			l |= MSR_IA32_APICBASE_ENABLE | APIC_DEFAULT_PHYS_BASE;
-			wrmsr(MSR_IA32_APICBASE, l, h);
-			enabled_via_apicbase = 1;
-		}
-	}
-	/*
-	 * The APIC feature bit should now be enabled
-	 * in `cpuid'
-	 */
-	features = cpuid_edx(1);
-	if (!(features & (1 << X86_FEATURE_APIC))) {
-		printk(KERN_WARNING "Could not enable APIC!\n");
-		return -1;
-	}
-	set_cpu_cap(&boot_cpu_data, X86_FEATURE_APIC);
-	mp_lapic_addr = APIC_DEFAULT_PHYS_BASE;
-
-	/* The BIOS may have set up the APIC at some other address */
-	rdmsr(MSR_IA32_APICBASE, l, h);
-	if (l & MSR_IA32_APICBASE_ENABLE)
-		mp_lapic_addr = l & MSR_IA32_APICBASE_BASE;
-
-	printk(KERN_INFO "Found and enabled local APIC!\n");
-
-	apic_pm_activate();
-
-	return 0;
-
-no_apic:
-	printk(KERN_INFO "No local APIC present or hardware disabled\n");
-	return -1;
-}
-#endif
-
-#ifdef CONFIG_X86_64
-void __init early_init_lapic_mapping(void)
-{
-	unsigned long phys_addr;
-
-	/*
-	 * If no local APIC can be found then go out
-	 * : it means there is no mpatable and MADT
-	 */
-	if (!smp_found_config)
-		return;
-
-	phys_addr = mp_lapic_addr;
-
-	set_fixmap_nocache(FIX_APIC_BASE, phys_addr);
-	apic_printk(APIC_VERBOSE, "mapped APIC to %16lx (%16lx)\n",
-		    APIC_BASE, phys_addr);
-
-	/*
-	 * Fetch the APIC ID of the BSP in case we have a
-	 * default configuration (or the MP table is broken).
-	 */
-	boot_cpu_physical_apicid = read_apic_id();
-}
-#endif
-
-/**
- * init_apic_mappings - initialize APIC mappings
- */
-void __init init_apic_mappings(void)
-{
-#ifdef HAVE_X2APIC
-	if (x2apic) {
-		boot_cpu_physical_apicid = read_apic_id();
-		return;
-	}
-#endif
-
-	/*
-	 * If no local APIC can be found then set up a fake all
-	 * zeroes page to simulate the local APIC and another
-	 * one for the IO-APIC.
-	 */
-	if (!smp_found_config && detect_init_APIC()) {
-		apic_phys = (unsigned long) alloc_bootmem_pages(PAGE_SIZE);
-		apic_phys = __pa(apic_phys);
-	} else
-		apic_phys = mp_lapic_addr;
-
-	set_fixmap_nocache(FIX_APIC_BASE, apic_phys);
-	apic_printk(APIC_VERBOSE, "mapped APIC to %16lx (%16lx)\n",
-				APIC_BASE, apic_phys);
-
-	/*
-	 * Fetch the APIC ID of the BSP in case we have a
-	 * default configuration (or the MP table is broken).
-	 */
-	if (boot_cpu_physical_apicid == -1U)
-		boot_cpu_physical_apicid = read_apic_id();
-}
-
-/*
- * This initializes the IO-APIC and APIC hardware if this is
- * a UP kernel.
- */
-int apic_version[MAX_APICS];
-
-int __init APIC_init_uniprocessor(void)
-{
-#ifdef CONFIG_X86_64
-	if (disable_apic) {
-		printk(KERN_INFO "Apic disabled\n");
-		return -1;
-	}
-	if (!cpu_has_apic) {
-		disable_apic = 1;
-		printk(KERN_INFO "Apic disabled by BIOS\n");
-		return -1;
-	}
-#else
-	if (!smp_found_config && !cpu_has_apic)
-		return -1;
-
-	/*
-	 * Complain if the BIOS pretends there is one.
-	 */
-	if (!cpu_has_apic &&
-	    APIC_INTEGRATED(apic_version[boot_cpu_physical_apicid])) {
-		printk(KERN_ERR "BIOS bug, local APIC #%d not detected!...\n",
-		       boot_cpu_physical_apicid);
-		clear_cpu_cap(&boot_cpu_data, X86_FEATURE_APIC);
-		return -1;
-	}
-#endif
-
-#ifdef HAVE_X2APIC
-	enable_IR_x2apic();
-#endif
-#ifdef CONFIG_X86_64
-	setup_apic_routing();
-#endif
-
-	verify_local_APIC();
-	connect_bsp_APIC();
-
-#ifdef CONFIG_X86_64
-	apic_write(APIC_ID, SET_APIC_ID(boot_cpu_physical_apicid));
-#else
-	/*
-	 * Hack: In case of kdump, after a crash, kernel might be booting
-	 * on a cpu with non-zero lapic id. But boot_cpu_physical_apicid
-	 * might be zero if read from MP tables. Get it from LAPIC.
-	 */
-# ifdef CONFIG_CRASH_DUMP
-	boot_cpu_physical_apicid = read_apic_id();
-# endif
-#endif
-	physid_set_mask_of_physid(boot_cpu_physical_apicid, &phys_cpu_present_map);
-	setup_local_APIC();
-
-#ifdef CONFIG_X86_64
-	/*
-	 * Now enable IO-APICs, actually call clear_IO_APIC
-	 * We need clear_IO_APIC before enabling vector on BP
-	 */
-	if (!skip_ioapic_setup && nr_ioapics)
-		enable_IO_APIC();
-#endif
-
-#ifdef CONFIG_X86_IO_APIC
-	if (!smp_found_config || skip_ioapic_setup || !nr_ioapics)
-#endif
-		localise_nmi_watchdog();
-	end_local_APIC_setup();
-
-#ifdef CONFIG_X86_IO_APIC
-	if (smp_found_config && !skip_ioapic_setup && nr_ioapics)
-		setup_IO_APIC();
-# ifdef CONFIG_X86_64
-	else
-		nr_ioapics = 0;
-# endif
-#endif
-
-#ifdef CONFIG_X86_64
-	setup_boot_APIC_clock();
-	check_nmi_watchdog();
-#else
-	setup_boot_clock();
-#endif
-
-	return 0;
-}
-
-/*
- * Local APIC interrupts
- */
-
-/*
- * This interrupt should _never_ happen with our APIC/SMP architecture
- */
-#ifdef CONFIG_X86_64
-asmlinkage void smp_spurious_interrupt(void)
-#else
-void smp_spurious_interrupt(struct pt_regs *regs)
-#endif
-{
-	u32 v;
-
-#ifdef CONFIG_X86_64
-	exit_idle();
-#endif
-	irq_enter();
-	/*
-	 * Check if this really is a spurious interrupt and ACK it
-	 * if it is a vectored one.  Just in case...
-	 * Spurious interrupts should not be ACKed.
-	 */
-	v = apic_read(APIC_ISR + ((SPURIOUS_APIC_VECTOR & ~0x1f) >> 1));
-	if (v & (1 << (SPURIOUS_APIC_VECTOR & 0x1f)))
-		ack_APIC_irq();
-
-#ifdef CONFIG_X86_64
-	add_pda(irq_spurious_count, 1);
-#else
-	/* see sw-dev-man vol 3, chapter 7.4.13.5 */
-	printk(KERN_INFO "spurious APIC interrupt on CPU#%d, "
-	       "should never happen.\n", smp_processor_id());
-	__get_cpu_var(irq_stat).irq_spurious_count++;
-#endif
-	irq_exit();
-}
-
-/*
- * This interrupt should never happen with our APIC/SMP architecture
- */
-#ifdef CONFIG_X86_64
-asmlinkage void smp_error_interrupt(void)
-#else
-void smp_error_interrupt(struct pt_regs *regs)
-#endif
-{
-	u32 v, v1;
-
-#ifdef CONFIG_X86_64
-	exit_idle();
-#endif
-	irq_enter();
-	/* First tickle the hardware, only then report what went on. -- REW */
-	v = apic_read(APIC_ESR);
-	apic_write(APIC_ESR, 0);
-	v1 = apic_read(APIC_ESR);
-	ack_APIC_irq();
-	atomic_inc(&irq_err_count);
-
-	/* Here is what the APIC error bits mean:
-	   0: Send CS error
-	   1: Receive CS error
-	   2: Send accept error
-	   3: Receive accept error
-	   4: Reserved
-	   5: Send illegal vector
-	   6: Received illegal vector
-	   7: Illegal register address
-	*/
-	printk(KERN_DEBUG "APIC error on CPU%d: %02x(%02x)\n",
-		smp_processor_id(), v , v1);
-	irq_exit();
-}
-
-/**
- * connect_bsp_APIC - attach the APIC to the interrupt system
- */
-void __init connect_bsp_APIC(void)
-{
-#ifdef CONFIG_X86_32
-	if (pic_mode) {
-		/*
-		 * Do not trust the local APIC being empty at bootup.
-		 */
-		clear_local_APIC();
-		/*
-		 * PIC mode, enable APIC mode in the IMCR, i.e.  connect BSP's
-		 * local APIC to INT and NMI lines.
-		 */
-		apic_printk(APIC_VERBOSE, "leaving PIC mode, "
-				"enabling APIC mode.\n");
-		outb(0x70, 0x22);
-		outb(0x01, 0x23);
-	}
-#endif
-	enable_apic_mode();
-}
-
-/**
- * disconnect_bsp_APIC - detach the APIC from the interrupt system
- * @virt_wire_setup:	indicates, whether virtual wire mode is selected
- *
- * Virtual wire mode is necessary to deliver legacy interrupts even when the
- * APIC is disabled.
- */
-void disconnect_bsp_APIC(int virt_wire_setup)
-{
-	unsigned int value;
-
-#ifdef CONFIG_X86_32
-	if (pic_mode) {
-		/*
-		 * Put the board back into PIC mode (has an effect only on
-		 * certain older boards).  Note that APIC interrupts, including
-		 * IPIs, won't work beyond this point!  The only exception are
-		 * INIT IPIs.
-		 */
-		apic_printk(APIC_VERBOSE, "disabling APIC mode, "
-				"entering PIC mode.\n");
-		outb(0x70, 0x22);
-		outb(0x00, 0x23);
-		return;
-	}
-#endif
-
-	/* Go back to Virtual Wire compatibility mode */
-
-	/* For the spurious interrupt use vector F, and enable it */
-	value = apic_read(APIC_SPIV);
-	value &= ~APIC_VECTOR_MASK;
-	value |= APIC_SPIV_APIC_ENABLED;
-	value |= 0xf;
-	apic_write(APIC_SPIV, value);
-
-	if (!virt_wire_setup) {
-		/*
-		 * For LVT0 make it edge triggered, active high,
-		 * external and enabled
-		 */
-		value = apic_read(APIC_LVT0);
-		value &= ~(APIC_MODE_MASK | APIC_SEND_PENDING |
-			APIC_INPUT_POLARITY | APIC_LVT_REMOTE_IRR |
-			APIC_LVT_LEVEL_TRIGGER | APIC_LVT_MASKED);
-		value |= APIC_LVT_REMOTE_IRR | APIC_SEND_PENDING;
-		value = SET_APIC_DELIVERY_MODE(value, APIC_MODE_EXTINT);
-		apic_write(APIC_LVT0, value);
-	} else {
-		/* Disable LVT0 */
-		apic_write(APIC_LVT0, APIC_LVT_MASKED);
-	}
-
-	/*
-	 * For LVT1 make it edge triggered, active high,
-	 * nmi and enabled
-	 */
-	value = apic_read(APIC_LVT1);
-	value &= ~(APIC_MODE_MASK | APIC_SEND_PENDING |
-			APIC_INPUT_POLARITY | APIC_LVT_REMOTE_IRR |
-			APIC_LVT_LEVEL_TRIGGER | APIC_LVT_MASKED);
-	value |= APIC_LVT_REMOTE_IRR | APIC_SEND_PENDING;
-	value = SET_APIC_DELIVERY_MODE(value, APIC_MODE_NMI);
-	apic_write(APIC_LVT1, value);
-}
-
-void __cpuinit generic_processor_info(int apicid, int version)
-{
-	int cpu;
-	cpumask_t tmp_map;
-
-	/*
-	 * Validate version
-	 */
-	if (version == 0x0) {
-		printk(KERN_WARNING "BIOS bug, APIC version is 0 for CPU#%d! "
-				"fixing up to 0x10. (tell your hw vendor)\n",
-				version);
-		version = 0x10;
-	}
-	apic_version[apicid] = version;
-
-	if (num_processors >= NR_CPUS) {
-		printk(KERN_WARNING "WARNING: NR_CPUS limit of %i reached."
-			"  Processor ignored.\n", NR_CPUS);
-		return;
-	}
-
-	num_processors++;
-	cpus_complement(tmp_map, cpu_present_map);
-	cpu = first_cpu(tmp_map);
-
-	physid_set(apicid, phys_cpu_present_map);
-	if (apicid == boot_cpu_physical_apicid) {
-		/*
-		 * x86_bios_cpu_apicid is required to have processors listed
-		 * in same order as logical cpu numbers. Hence the first
-		 * entry is BSP, and so on.
-		 */
-		cpu = 0;
-	}
-	if (apicid > max_physical_apicid)
-		max_physical_apicid = apicid;
-
-#ifdef CONFIG_X86_32
-	/*
-	 * Would be preferable to switch to bigsmp when CONFIG_HOTPLUG_CPU=y
-	 * but we need to work other dependencies like SMP_SUSPEND etc
-	 * before this can be done without some confusion.
-	 * if (CPU_HOTPLUG_ENABLED || num_processors > 8)
-	 *       - Ashok Raj <ashok.raj@intel.com>
-	 */
-	if (max_physical_apicid >= 8) {
-		switch (boot_cpu_data.x86_vendor) {
-		case X86_VENDOR_INTEL:
-			if (!APIC_XAPIC(version)) {
-				def_to_bigsmp = 0;
-				break;
-			}
-			/* If P4 and above fall through */
-		case X86_VENDOR_AMD:
-			def_to_bigsmp = 1;
-		}
-	}
-#endif
-
-#if defined(CONFIG_X86_SMP) || defined(CONFIG_X86_64)
-	/* are we being called early in kernel startup? */
-	if (early_per_cpu_ptr(x86_cpu_to_apicid)) {
-		u16 *cpu_to_apicid = early_per_cpu_ptr(x86_cpu_to_apicid);
-		u16 *bios_cpu_apicid = early_per_cpu_ptr(x86_bios_cpu_apicid);
-
-		cpu_to_apicid[cpu] = apicid;
-		bios_cpu_apicid[cpu] = apicid;
-	} else {
-		per_cpu(x86_cpu_to_apicid, cpu) = apicid;
-		per_cpu(x86_bios_cpu_apicid, cpu) = apicid;
-	}
-#endif
-
-	cpu_set(cpu, cpu_possible_map);
-	cpu_set(cpu, cpu_present_map);
-}
-
-#ifdef CONFIG_X86_64
-int hard_smp_processor_id(void)
-{
-	return read_apic_id();
-}
-#endif
-
-/*
- * Power management
- */
-#ifdef CONFIG_PM
-
-static struct {
-	/*
-	 * 'active' is true if the local APIC was enabled by us and
-	 * not the BIOS; this signifies that we are also responsible
-	 * for disabling it before entering apm/acpi suspend
-	 */
-	int active;
-	/* r/w apic fields */
-	unsigned int apic_id;
-	unsigned int apic_taskpri;
-	unsigned int apic_ldr;
-	unsigned int apic_dfr;
-	unsigned int apic_spiv;
-	unsigned int apic_lvtt;
-	unsigned int apic_lvtpc;
-	unsigned int apic_lvt0;
-	unsigned int apic_lvt1;
-	unsigned int apic_lvterr;
-	unsigned int apic_tmict;
-	unsigned int apic_tdcr;
-	unsigned int apic_thmr;
-} apic_pm_state;
-
-static int lapic_suspend(struct sys_device *dev, pm_message_t state)
-{
-	unsigned long flags;
-	int maxlvt;
-
-	if (!apic_pm_state.active)
-		return 0;
-
-	maxlvt = lapic_get_maxlvt();
-
-	apic_pm_state.apic_id = apic_read(APIC_ID);
-	apic_pm_state.apic_taskpri = apic_read(APIC_TASKPRI);
-	apic_pm_state.apic_ldr = apic_read(APIC_LDR);
-	apic_pm_state.apic_dfr = apic_read(APIC_DFR);
-	apic_pm_state.apic_spiv = apic_read(APIC_SPIV);
-	apic_pm_state.apic_lvtt = apic_read(APIC_LVTT);
-	if (maxlvt >= 4)
-		apic_pm_state.apic_lvtpc = apic_read(APIC_LVTPC);
-	apic_pm_state.apic_lvt0 = apic_read(APIC_LVT0);
-	apic_pm_state.apic_lvt1 = apic_read(APIC_LVT1);
-	apic_pm_state.apic_lvterr = apic_read(APIC_LVTERR);
-	apic_pm_state.apic_tmict = apic_read(APIC_TMICT);
-	apic_pm_state.apic_tdcr = apic_read(APIC_TDCR);
-#if defined(CONFIG_X86_MCE_P4THERMAL) || defined(CONFIG_X86_MCE_INTEL)
-	if (maxlvt >= 5)
-		apic_pm_state.apic_thmr = apic_read(APIC_LVTTHMR);
-#endif
-
-	local_irq_save(flags);
-	disable_local_APIC();
-	local_irq_restore(flags);
-	return 0;
-}
-
-static int lapic_resume(struct sys_device *dev)
-{
-	unsigned int l, h;
-	unsigned long flags;
-	int maxlvt;
-
-	if (!apic_pm_state.active)
-		return 0;
-
-	maxlvt = lapic_get_maxlvt();
-
-	local_irq_save(flags);
-
-#ifdef HAVE_X2APIC
-	if (x2apic)
-		enable_x2apic();
-	else
-#endif
-	{
-		/*
-		 * Make sure the APICBASE points to the right address
-		 *
-		 * FIXME! This will be wrong if we ever support suspend on
-		 * SMP! We'll need to do this as part of the CPU restore!
-		 */
-		rdmsr(MSR_IA32_APICBASE, l, h);
-		l &= ~MSR_IA32_APICBASE_BASE;
-		l |= MSR_IA32_APICBASE_ENABLE | mp_lapic_addr;
-		wrmsr(MSR_IA32_APICBASE, l, h);
-	}
-
-	apic_write(APIC_LVTERR, ERROR_APIC_VECTOR | APIC_LVT_MASKED);
-	apic_write(APIC_ID, apic_pm_state.apic_id);
-	apic_write(APIC_DFR, apic_pm_state.apic_dfr);
-	apic_write(APIC_LDR, apic_pm_state.apic_ldr);
-	apic_write(APIC_TASKPRI, apic_pm_state.apic_taskpri);
-	apic_write(APIC_SPIV, apic_pm_state.apic_spiv);
-	apic_write(APIC_LVT0, apic_pm_state.apic_lvt0);
-	apic_write(APIC_LVT1, apic_pm_state.apic_lvt1);
-#if defined(CONFIG_X86_MCE_P4THERMAL) || defined(CONFIG_X86_MCE_INTEL)
-	if (maxlvt >= 5)
-		apic_write(APIC_LVTTHMR, apic_pm_state.apic_thmr);
-#endif
-	if (maxlvt >= 4)
-		apic_write(APIC_LVTPC, apic_pm_state.apic_lvtpc);
-	apic_write(APIC_LVTT, apic_pm_state.apic_lvtt);
-	apic_write(APIC_TDCR, apic_pm_state.apic_tdcr);
-	apic_write(APIC_TMICT, apic_pm_state.apic_tmict);
-	apic_write(APIC_ESR, 0);
-	apic_read(APIC_ESR);
-	apic_write(APIC_LVTERR, apic_pm_state.apic_lvterr);
-	apic_write(APIC_ESR, 0);
-	apic_read(APIC_ESR);
-
-	local_irq_restore(flags);
-
-	return 0;
-}
-
-/*
- * This device has no shutdown method - fully functioning local APICs
- * are needed on every CPU up until machine_halt/restart/poweroff.
- */
-
-static struct sysdev_class lapic_sysclass = {
-	.name		= "lapic",
-	.resume		= lapic_resume,
-	.suspend	= lapic_suspend,
-};
-
-static struct sys_device device_lapic = {
-	.id	= 0,
-	.cls	= &lapic_sysclass,
-};
-
-static void __cpuinit apic_pm_activate(void)
-{
-	apic_pm_state.active = 1;
-}
-
-static int __init init_lapic_sysfs(void)
-{
-	int error;
-
-	if (!cpu_has_apic)
-		return 0;
-	/* XXX: remove suspend/resume procs if !apic_pm_state.active? */
-
-	error = sysdev_class_register(&lapic_sysclass);
-	if (!error)
-		error = sysdev_register(&device_lapic);
-	return error;
-}
-device_initcall(init_lapic_sysfs);
-
-#else	/* CONFIG_PM */
-
-static void apic_pm_activate(void) { }
-
-#endif	/* CONFIG_PM */
-
-#ifdef CONFIG_X86_64
-/*
- * apic_is_clustered_box() -- Check if we can expect good TSC
- *
- * Thus far, the major user of this is IBM's Summit2 series:
- *
- * Clustered boxes may have unsynced TSC problems if they are
- * multi-chassis. Use available data to take a good guess.
- * If in doubt, go HPET.
- */
-__cpuinit int apic_is_clustered_box(void)
-{
-	int i, clusters, zeros;
-	unsigned id;
-	u16 *bios_cpu_apicid;
-	DECLARE_BITMAP(clustermap, NUM_APIC_CLUSTERS);
-
-	/*
-	 * there is not this kind of box with AMD CPU yet.
-	 * Some AMD box with quadcore cpu and 8 sockets apicid
-	 * will be [4, 0x23] or [8, 0x27] could be thought to
-	 * vsmp box still need checking...
-	 */
-	if ((boot_cpu_data.x86_vendor == X86_VENDOR_AMD) && !is_vsmp_box())
-		return 0;
-
-	bios_cpu_apicid = early_per_cpu_ptr(x86_bios_cpu_apicid);
-	bitmap_zero(clustermap, NUM_APIC_CLUSTERS);
-
-	for (i = 0; i < NR_CPUS; i++) {
-		/* are we being called early in kernel startup? */
-		if (bios_cpu_apicid) {
-			id = bios_cpu_apicid[i];
-		}
-		else if (i < nr_cpu_ids) {
-			if (cpu_present(i))
-				id = per_cpu(x86_bios_cpu_apicid, i);
-			else
-				continue;
-		}
-		else
-			break;
-
-		if (id != BAD_APICID)
-			__set_bit(APIC_CLUSTERID(id), clustermap);
-	}
-
-	/* Problem:  Partially populated chassis may not have CPUs in some of
-	 * the APIC clusters they have been allocated.  Only present CPUs have
-	 * x86_bios_cpu_apicid entries, thus causing zeroes in the bitmap.
-	 * Since clusters are allocated sequentially, count zeros only if
-	 * they are bounded by ones.
-	 */
-	clusters = 0;
-	zeros = 0;
-	for (i = 0; i < NUM_APIC_CLUSTERS; i++) {
-		if (test_bit(i, clustermap)) {
-			clusters += 1 + zeros;
-			zeros = 0;
-		} else
-			++zeros;
-	}
-
-	/* ScaleMP vSMPowered boxes have one cluster per board and TSCs are
-	 * not guaranteed to be synced between boards
-	 */
-	if (is_vsmp_box() && clusters > 1)
-		return 1;
-
-	/*
-	 * If clusters > 2, then should be multi-chassis.
-	 * May have to revisit this when multi-core + hyperthreaded CPUs come
-	 * out, but AFAIK this will work even for them.
-	 */
-	return (clusters > 2);
-}
-#endif
-
-/*
- * APIC command line parameters
- */
-static int __init setup_disableapic(char *arg)
-{
-	disable_apic = 1;
-	setup_clear_cpu_cap(X86_FEATURE_APIC);
-	return 0;
-}
-early_param("disableapic", setup_disableapic);
-
-/* same as disableapic, for compatibility */
-static int __init setup_nolapic(char *arg)
-{
-	return setup_disableapic(arg);
-}
-early_param("nolapic", setup_nolapic);
-
-static int __init parse_lapic_timer_c2_ok(char *arg)
-{
-	local_apic_timer_c2_ok = 1;
-	return 0;
-}
-early_param("lapic_timer_c2_ok", parse_lapic_timer_c2_ok);
-
-static int __init parse_disable_apic_timer(char *arg)
-{
-	disable_apic_timer = 1;
-	return 0;
-}
-early_param("noapictimer", parse_disable_apic_timer);
-
-static int __init parse_nolapic_timer(char *arg)
-{
-	disable_apic_timer = 1;
-	return 0;
-}
-early_param("nolapic_timer", parse_nolapic_timer);
-
-static int __init apic_set_verbosity(char *arg)
-{
-	if (!arg)  {
-#ifdef CONFIG_X86_64
-		skip_ioapic_setup = 0;
-		ioapic_force = 1;
-		return 0;
-#endif
-		return -EINVAL;
-	}
-
-	if (strcmp("debug", arg) == 0)
-		apic_verbosity = APIC_DEBUG;
-	else if (strcmp("verbose", arg) == 0)
-		apic_verbosity = APIC_VERBOSE;
-	else {
-		printk(KERN_WARNING "APIC Verbosity level %s not recognised"
-			" use apic=verbose or apic=debug\n", arg);
-		return -EINVAL;
-	}
-
-	return 0;
-}
-early_param("apic", apic_set_verbosity);
-
-static int __init lapic_insert_resource(void)
-{
-	if (!apic_phys)
-		return -1;
-
-	/* Put local APIC into the resource map. */
-	lapic_resource.start = apic_phys;
-	lapic_resource.end = lapic_resource.start + PAGE_SIZE - 1;
-	insert_resource(&iomem_resource, &lapic_resource);
-
-	return 0;
-}
-
-/*
- * need call insert after e820_reserve_resources()
- * that is using request_resource
- */
-late_initcall(lapic_insert_resource);
diff --git a/arch/x86/kernel/apic_64.c b/arch/x86/kernel/apic_64.c
deleted file mode 100644
index 6e99af5ce678..000000000000
--- a/arch/x86/kernel/apic_64.c
+++ /dev/null
@@ -1,2311 +0,0 @@
-/*
- *	Local APIC handling, local APIC timers
- *
- *	(c) 1999, 2000 Ingo Molnar <mingo@redhat.com>
- *
- *	Fixes
- *	Maciej W. Rozycki	:	Bits for genuine 82489DX APICs;
- *					thanks to Eric Gilmore
- *					and Rolf G. Tews
- *					for testing these extensively.
- *	Maciej W. Rozycki	:	Various updates and fixes.
- *	Mikael Pettersson	:	Power Management for UP-APIC.
- *	Pavel Machek and
- *	Mikael Pettersson	:	PM converted to driver model.
- */
-
-#include <linux/init.h>
-
-#include <linux/mm.h>
-#include <linux/delay.h>
-#include <linux/bootmem.h>
-#include <linux/interrupt.h>
-#include <linux/mc146818rtc.h>
-#include <linux/kernel_stat.h>
-#include <linux/sysdev.h>
-#include <linux/ioport.h>
-#include <linux/cpu.h>
-#include <linux/clockchips.h>
-#include <linux/acpi_pmtmr.h>
-#include <linux/module.h>
-#include <linux/dmi.h>
-#include <linux/dmar.h>
-
-#include <asm/atomic.h>
-#include <asm/smp.h>
-#include <asm/mtrr.h>
-#include <asm/mpspec.h>
-#include <asm/desc.h>
-#include <asm/arch_hooks.h>
-#include <asm/hpet.h>
-#include <asm/pgalloc.h>
-#include <asm/i8253.h>
-#include <asm/nmi.h>
-#include <asm/idle.h>
-#include <asm/proto.h>
-#include <asm/timex.h>
-#include <asm/apic.h>
-#include <asm/i8259.h>
-
-#include <mach_apic.h>
-#include <mach_apicdef.h>
-#include <mach_ipi.h>
-
-/*
- * Sanity check
- */
-#if ((SPURIOUS_APIC_VECTOR & 0x0F) != 0x0F)
-# error SPURIOUS_APIC_VECTOR definition error
-#endif
-
-#ifdef CONFIG_X86_32
-/*
- * Knob to control our willingness to enable the local APIC.
- *
- * +1=force-enable
- */
-static int force_enable_local_apic;
-/*
- * APIC command line parameters
- */
-static int __init parse_lapic(char *arg)
-{
-	force_enable_local_apic = 1;
-	return 0;
-}
-early_param("lapic", parse_lapic);
-/* Local APIC was disabled by the BIOS and enabled by the kernel */
-static int enabled_via_apicbase;
-
-#endif
-
-#ifdef CONFIG_X86_64
-static int apic_calibrate_pmtmr __initdata;
-static __init int setup_apicpmtimer(char *s)
-{
-	apic_calibrate_pmtmr = 1;
-	notsc_setup(NULL);
-	return 0;
-}
-__setup("apicpmtimer", setup_apicpmtimer);
-#endif
-
-#ifdef CONFIG_X86_64
-#define HAVE_X2APIC
-#endif
-
-#ifdef HAVE_X2APIC
-int x2apic;
-/* x2apic enabled before OS handover */
-int x2apic_preenabled;
-int disable_x2apic;
-static __init int setup_nox2apic(char *str)
-{
-	disable_x2apic = 1;
-	setup_clear_cpu_cap(X86_FEATURE_X2APIC);
-	return 0;
-}
-early_param("nox2apic", setup_nox2apic);
-#endif
-
-unsigned long mp_lapic_addr;
-int disable_apic;
-/* Disable local APIC timer from the kernel commandline or via dmi quirk */
-static int disable_apic_timer __cpuinitdata;
-/* Local APIC timer works in C2 */
-int local_apic_timer_c2_ok;
-EXPORT_SYMBOL_GPL(local_apic_timer_c2_ok);
-
-int first_system_vector = 0xfe;
-
-char system_vectors[NR_VECTORS] = { [0 ... NR_VECTORS-1] = SYS_VECTOR_FREE};
-
-/*
- * Debug level, exported for io_apic.c
- */
-unsigned int apic_verbosity;
-
-int pic_mode;
-
-/* Have we found an MP table */
-int smp_found_config;
-
-static struct resource lapic_resource = {
-	.name = "Local APIC",
-	.flags = IORESOURCE_MEM | IORESOURCE_BUSY,
-};
-
-static unsigned int calibration_result;
-
-static int lapic_next_event(unsigned long delta,
-			    struct clock_event_device *evt);
-static void lapic_timer_setup(enum clock_event_mode mode,
-			      struct clock_event_device *evt);
-static void lapic_timer_broadcast(cpumask_t mask);
-static void apic_pm_activate(void);
-
-/*
- * The local apic timer can be used for any function which is CPU local.
- */
-static struct clock_event_device lapic_clockevent = {
-	.name		= "lapic",
-	.features	= CLOCK_EVT_FEAT_PERIODIC | CLOCK_EVT_FEAT_ONESHOT
-			| CLOCK_EVT_FEAT_C3STOP | CLOCK_EVT_FEAT_DUMMY,
-	.shift		= 32,
-	.set_mode	= lapic_timer_setup,
-	.set_next_event	= lapic_next_event,
-	.broadcast	= lapic_timer_broadcast,
-	.rating		= 100,
-	.irq		= -1,
-};
-static DEFINE_PER_CPU(struct clock_event_device, lapic_events);
-
-static unsigned long apic_phys;
-
-/*
- * Get the LAPIC version
- */
-static inline int lapic_get_version(void)
-{
-	return GET_APIC_VERSION(apic_read(APIC_LVR));
-}
-
-/*
- * Check, if the APIC is integrated or a separate chip
- */
-static inline int lapic_is_integrated(void)
-{
-#ifdef CONFIG_X86_64
-	return 1;
-#else
-	return APIC_INTEGRATED(lapic_get_version());
-#endif
-}
-
-/*
- * Check, whether this is a modern or a first generation APIC
- */
-static int modern_apic(void)
-{
-	/* AMD systems use old APIC versions, so check the CPU */
-	if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD &&
-	    boot_cpu_data.x86 >= 0xf)
-		return 1;
-	return lapic_get_version() >= 0x14;
-}
-
-/*
- * Paravirt kernels also might be using these below ops. So we still
- * use generic apic_read()/apic_write(), which might be pointing to different
- * ops in PARAVIRT case.
- */
-void xapic_wait_icr_idle(void)
-{
-	while (apic_read(APIC_ICR) & APIC_ICR_BUSY)
-		cpu_relax();
-}
-
-u32 safe_xapic_wait_icr_idle(void)
-{
-	u32 send_status;
-	int timeout;
-
-	timeout = 0;
-	do {
-		send_status = apic_read(APIC_ICR) & APIC_ICR_BUSY;
-		if (!send_status)
-			break;
-		udelay(100);
-	} while (timeout++ < 1000);
-
-	return send_status;
-}
-
-void xapic_icr_write(u32 low, u32 id)
-{
-	apic_write(APIC_ICR2, SET_APIC_DEST_FIELD(id));
-	apic_write(APIC_ICR, low);
-}
-
-u64 xapic_icr_read(void)
-{
-	u32 icr1, icr2;
-
-	icr2 = apic_read(APIC_ICR2);
-	icr1 = apic_read(APIC_ICR);
-
-	return icr1 | ((u64)icr2 << 32);
-}
-
-static struct apic_ops xapic_ops = {
-	.read = native_apic_mem_read,
-	.write = native_apic_mem_write,
-	.icr_read = xapic_icr_read,
-	.icr_write = xapic_icr_write,
-	.wait_icr_idle = xapic_wait_icr_idle,
-	.safe_wait_icr_idle = safe_xapic_wait_icr_idle,
-};
-
-struct apic_ops __read_mostly *apic_ops = &xapic_ops;
-EXPORT_SYMBOL_GPL(apic_ops);
-
-#ifdef HAVE_X2APIC
-static void x2apic_wait_icr_idle(void)
-{
-	/* no need to wait for icr idle in x2apic */
-	return;
-}
-
-static u32 safe_x2apic_wait_icr_idle(void)
-{
-	/* no need to wait for icr idle in x2apic */
-	return 0;
-}
-
-void x2apic_icr_write(u32 low, u32 id)
-{
-	wrmsrl(APIC_BASE_MSR + (APIC_ICR >> 4), ((__u64) id) << 32 | low);
-}
-
-u64 x2apic_icr_read(void)
-{
-	unsigned long val;
-
-	rdmsrl(APIC_BASE_MSR + (APIC_ICR >> 4), val);
-	return val;
-}
-
-static struct apic_ops x2apic_ops = {
-	.read = native_apic_msr_read,
-	.write = native_apic_msr_write,
-	.icr_read = x2apic_icr_read,
-	.icr_write = x2apic_icr_write,
-	.wait_icr_idle = x2apic_wait_icr_idle,
-	.safe_wait_icr_idle = safe_x2apic_wait_icr_idle,
-};
-#endif
-
-/**
- * enable_NMI_through_LVT0 - enable NMI through local vector table 0
- */
-void __cpuinit enable_NMI_through_LVT0(void)
-{
-	unsigned int v;
-
-	/* unmask and set to NMI */
-	v = APIC_DM_NMI;
-
-	/* Level triggered for 82489DX (32bit mode) */
-	if (!lapic_is_integrated())
-		v |= APIC_LVT_LEVEL_TRIGGER;
-
-	apic_write(APIC_LVT0, v);
-}
-
-#ifdef CONFIG_X86_32
-/**
- * get_physical_broadcast - Get number of physical broadcast IDs
- */
-int get_physical_broadcast(void)
-{
-	return modern_apic() ? 0xff : 0xf;
-}
-#endif
-
-/**
- * lapic_get_maxlvt - get the maximum number of local vector table entries
- */
-int lapic_get_maxlvt(void)
-{
-	unsigned int v;
-
-	v = apic_read(APIC_LVR);
-	/*
-	 * - we always have APIC integrated on 64bit mode
-	 * - 82489DXs do not report # of LVT entries
-	 */
-	return APIC_INTEGRATED(GET_APIC_VERSION(v)) ? GET_APIC_MAXLVT(v) : 2;
-}
-
-/*
- * Local APIC timer
- */
-
-/* Clock divisor */
-#ifdef CONFG_X86_64
-#define APIC_DIVISOR 1
-#else
-#define APIC_DIVISOR 16
-#endif
-
-/*
- * This function sets up the local APIC timer, with a timeout of
- * 'clocks' APIC bus clock. During calibration we actually call
- * this function twice on the boot CPU, once with a bogus timeout
- * value, second time for real. The other (noncalibrating) CPUs
- * call this function only once, with the real, calibrated value.
- *
- * We do reads before writes even if unnecessary, to get around the
- * P5 APIC double write bug.
- */
-static void __setup_APIC_LVTT(unsigned int clocks, int oneshot, int irqen)
-{
-	unsigned int lvtt_value, tmp_value;
-
-	lvtt_value = LOCAL_TIMER_VECTOR;
-	if (!oneshot)
-		lvtt_value |= APIC_LVT_TIMER_PERIODIC;
-	if (!lapic_is_integrated())
-		lvtt_value |= SET_APIC_TIMER_BASE(APIC_TIMER_BASE_DIV);
-
-	if (!irqen)
-		lvtt_value |= APIC_LVT_MASKED;
-
-	apic_write(APIC_LVTT, lvtt_value);
-
-	/*
-	 * Divide PICLK by 16
-	 */
-	tmp_value = apic_read(APIC_TDCR);
-	apic_write(APIC_TDCR,
-		(tmp_value & ~(APIC_TDR_DIV_1 | APIC_TDR_DIV_TMBASE)) |
-		APIC_TDR_DIV_16);
-
-	if (!oneshot)
-		apic_write(APIC_TMICT, clocks / APIC_DIVISOR);
-}
-
-/*
- * Setup extended LVT, AMD specific (K8, family 10h)
- *
- * Vector mappings are hard coded. On K8 only offset 0 (APIC500) and
- * MCE interrupts are supported. Thus MCE offset must be set to 0.
- *
- * If mask=1, the LVT entry does not generate interrupts while mask=0
- * enables the vector. See also the BKDGs.
- */
-
-#define APIC_EILVT_LVTOFF_MCE 0
-#define APIC_EILVT_LVTOFF_IBS 1
-
-static void setup_APIC_eilvt(u8 lvt_off, u8 vector, u8 msg_type, u8 mask)
-{
-	unsigned long reg = (lvt_off << 4) + APIC_EILVT0;
-	unsigned int  v   = (mask << 16) | (msg_type << 8) | vector;
-
-	apic_write(reg, v);
-}
-
-u8 setup_APIC_eilvt_mce(u8 vector, u8 msg_type, u8 mask)
-{
-	setup_APIC_eilvt(APIC_EILVT_LVTOFF_MCE, vector, msg_type, mask);
-	return APIC_EILVT_LVTOFF_MCE;
-}
-
-u8 setup_APIC_eilvt_ibs(u8 vector, u8 msg_type, u8 mask)
-{
-	setup_APIC_eilvt(APIC_EILVT_LVTOFF_IBS, vector, msg_type, mask);
-	return APIC_EILVT_LVTOFF_IBS;
-}
-EXPORT_SYMBOL_GPL(setup_APIC_eilvt_ibs);
-
-/*
- * Program the next event, relative to now
- */
-static int lapic_next_event(unsigned long delta,
-			    struct clock_event_device *evt)
-{
-	apic_write(APIC_TMICT, delta);
-	return 0;
-}
-
-/*
- * Setup the lapic timer in periodic or oneshot mode
- */
-static void lapic_timer_setup(enum clock_event_mode mode,
-			      struct clock_event_device *evt)
-{
-	unsigned long flags;
-	unsigned int v;
-
-	/* Lapic used as dummy for broadcast ? */
-	if (evt->features & CLOCK_EVT_FEAT_DUMMY)
-		return;
-
-	local_irq_save(flags);
-
-	switch (mode) {
-	case CLOCK_EVT_MODE_PERIODIC:
-	case CLOCK_EVT_MODE_ONESHOT:
-		__setup_APIC_LVTT(calibration_result,
-				  mode != CLOCK_EVT_MODE_PERIODIC, 1);
-		break;
-	case CLOCK_EVT_MODE_UNUSED:
-	case CLOCK_EVT_MODE_SHUTDOWN:
-		v = apic_read(APIC_LVTT);
-		v |= (APIC_LVT_MASKED | LOCAL_TIMER_VECTOR);
-		apic_write(APIC_LVTT, v);
-		break;
-	case CLOCK_EVT_MODE_RESUME:
-		/* Nothing to do here */
-		break;
-	}
-
-	local_irq_restore(flags);
-}
-
-/*
- * Local APIC timer broadcast function
- */
-static void lapic_timer_broadcast(cpumask_t mask)
-{
-#ifdef CONFIG_SMP
-	send_IPI_mask(mask, LOCAL_TIMER_VECTOR);
-#endif
-}
-
-/*
- * Setup the local APIC timer for this CPU. Copy the initilized values
- * of the boot CPU and register the clock event in the framework.
- */
-static void __cpuinit setup_APIC_timer(void)
-{
-	struct clock_event_device *levt = &__get_cpu_var(lapic_events);
-
-	memcpy(levt, &lapic_clockevent, sizeof(*levt));
-	levt->cpumask = cpumask_of_cpu(smp_processor_id());
-
-	clockevents_register_device(levt);
-}
-
-#ifdef CONFIG_X86_64
-/*
- * In this function we calibrate APIC bus clocks to the external
- * timer. Unfortunately we cannot use jiffies and the timer irq
- * to calibrate, since some later bootup code depends on getting
- * the first irq? Ugh.
- *
- * We want to do the calibration only once since we
- * want to have local timer irqs syncron. CPUs connected
- * by the same APIC bus have the very same bus frequency.
- * And we want to have irqs off anyways, no accidental
- * APIC irq that way.
- */
-
-#define TICK_COUNT 100000000
-
-static int __init calibrate_APIC_clock(void)
-{
-	unsigned apic, apic_start;
-	unsigned long tsc, tsc_start;
-	int result;
-
-	local_irq_disable();
-
-	/*
-	 * Put whatever arbitrary (but long enough) timeout
-	 * value into the APIC clock, we just want to get the
-	 * counter running for calibration.
-	 *
-	 * No interrupt enable !
-	 */
-	__setup_APIC_LVTT(250000000, 0, 0);
-
-	apic_start = apic_read(APIC_TMCCT);
-#ifdef CONFIG_X86_PM_TIMER
-	if (apic_calibrate_pmtmr && pmtmr_ioport) {
-		pmtimer_wait(5000);  /* 5ms wait */
-		apic = apic_read(APIC_TMCCT);
-		result = (apic_start - apic) * 1000L / 5;
-	} else
-#endif
-	{
-		rdtscll(tsc_start);
-
-		do {
-			apic = apic_read(APIC_TMCCT);
-			rdtscll(tsc);
-		} while ((tsc - tsc_start) < TICK_COUNT &&
-				(apic_start - apic) < TICK_COUNT);
-
-		result = (apic_start - apic) * 1000L * tsc_khz /
-					(tsc - tsc_start);
-	}
-
-	local_irq_enable();
-
-	printk(KERN_DEBUG "APIC timer calibration result %d\n", result);
-
-	printk(KERN_INFO "Detected %d.%03d MHz APIC timer.\n",
-		result / 1000 / 1000, result / 1000 % 1000);
-
-	/* Calculate the scaled math multiplication factor */
-	lapic_clockevent.mult = div_sc(result, NSEC_PER_SEC,
-				       lapic_clockevent.shift);
-	lapic_clockevent.max_delta_ns =
-		clockevent_delta2ns(0x7FFFFF, &lapic_clockevent);
-	lapic_clockevent.min_delta_ns =
-		clockevent_delta2ns(0xF, &lapic_clockevent);
-
-	calibration_result = (result * APIC_DIVISOR) / HZ;
-
-	/*
-	 * Do a sanity check on the APIC calibration result
-	 */
-	if (calibration_result < (1000000 / HZ)) {
-		printk(KERN_WARNING
-			"APIC frequency too slow, disabling apic timer\n");
-		return -1;
-	}
-
-	return 0;
-}
-
-#else
-/*
- * In this functions we calibrate APIC bus clocks to the external timer.
- *
- * We want to do the calibration only once since we want to have local timer
- * irqs syncron. CPUs connected by the same APIC bus have the very same bus
- * frequency.
- *
- * This was previously done by reading the PIT/HPET and waiting for a wrap
- * around to find out, that a tick has elapsed. I have a box, where the PIT
- * readout is broken, so it never gets out of the wait loop again. This was
- * also reported by others.
- *
- * Monitoring the jiffies value is inaccurate and the clockevents
- * infrastructure allows us to do a simple substitution of the interrupt
- * handler.
- *
- * The calibration routine also uses the pm_timer when possible, as the PIT
- * happens to run way too slow (factor 2.3 on my VAIO CoreDuo, which goes
- * back to normal later in the boot process).
- */
-
-#define LAPIC_CAL_LOOPS		(HZ/10)
-
-static __initdata int lapic_cal_loops = -1;
-static __initdata long lapic_cal_t1, lapic_cal_t2;
-static __initdata unsigned long long lapic_cal_tsc1, lapic_cal_tsc2;
-static __initdata unsigned long lapic_cal_pm1, lapic_cal_pm2;
-static __initdata unsigned long lapic_cal_j1, lapic_cal_j2;
-
-/*
- * Temporary interrupt handler.
- */
-static void __init lapic_cal_handler(struct clock_event_device *dev)
-{
-	unsigned long long tsc = 0;
-	long tapic = apic_read(APIC_TMCCT);
-	unsigned long pm = acpi_pm_read_early();
-
-	if (cpu_has_tsc)
-		rdtscll(tsc);
-
-	switch (lapic_cal_loops++) {
-	case 0:
-		lapic_cal_t1 = tapic;
-		lapic_cal_tsc1 = tsc;
-		lapic_cal_pm1 = pm;
-		lapic_cal_j1 = jiffies;
-		break;
-
-	case LAPIC_CAL_LOOPS:
-		lapic_cal_t2 = tapic;
-		lapic_cal_tsc2 = tsc;
-		if (pm < lapic_cal_pm1)
-			pm += ACPI_PM_OVRRUN;
-		lapic_cal_pm2 = pm;
-		lapic_cal_j2 = jiffies;
-		break;
-	}
-}
-
-static int __init calibrate_APIC_clock(void)
-{
-	struct clock_event_device *levt = &__get_cpu_var(lapic_events);
-	const long pm_100ms = PMTMR_TICKS_PER_SEC/10;
-	const long pm_thresh = pm_100ms/100;
-	void (*real_handler)(struct clock_event_device *dev);
-	unsigned long deltaj;
-	long delta, deltapm;
-	int pm_referenced = 0;
-
-	local_irq_disable();
-
-	/* Replace the global interrupt handler */
-	real_handler = global_clock_event->event_handler;
-	global_clock_event->event_handler = lapic_cal_handler;
-
-	/*
-	 * Setup the APIC counter to 1e9. There is no way the lapic
-	 * can underflow in the 100ms detection time frame
-	 */
-	__setup_APIC_LVTT(1000000000, 0, 0);
-
-	/* Let the interrupts run */
-	local_irq_enable();
-
-	while (lapic_cal_loops <= LAPIC_CAL_LOOPS)
-		cpu_relax();
-
-	local_irq_disable();
-
-	/* Restore the real event handler */
-	global_clock_event->event_handler = real_handler;
-
-	/* Build delta t1-t2 as apic timer counts down */
-	delta = lapic_cal_t1 - lapic_cal_t2;
-	apic_printk(APIC_VERBOSE, "... lapic delta = %ld\n", delta);
-
-	/* Check, if the PM timer is available */
-	deltapm = lapic_cal_pm2 - lapic_cal_pm1;
-	apic_printk(APIC_VERBOSE, "... PM timer delta = %ld\n", deltapm);
-
-	if (deltapm) {
-		unsigned long mult;
-		u64 res;
-
-		mult = clocksource_hz2mult(PMTMR_TICKS_PER_SEC, 22);
-
-		if (deltapm > (pm_100ms - pm_thresh) &&
-		    deltapm < (pm_100ms + pm_thresh)) {
-			apic_printk(APIC_VERBOSE, "... PM timer result ok\n");
-		} else {
-			res = (((u64) deltapm) *  mult) >> 22;
-			do_div(res, 1000000);
-			printk(KERN_WARNING "APIC calibration not consistent "
-			       "with PM Timer: %ldms instead of 100ms\n",
-			       (long)res);
-			/* Correct the lapic counter value */
-			res = (((u64) delta) * pm_100ms);
-			do_div(res, deltapm);
-			printk(KERN_INFO "APIC delta adjusted to PM-Timer: "
-			       "%lu (%ld)\n", (unsigned long) res, delta);
-			delta = (long) res;
-		}
-		pm_referenced = 1;
-	}
-
-	/* Calculate the scaled math multiplication factor */
-	lapic_clockevent.mult = div_sc(delta, TICK_NSEC * LAPIC_CAL_LOOPS,
-				       lapic_clockevent.shift);
-	lapic_clockevent.max_delta_ns =
-		clockevent_delta2ns(0x7FFFFF, &lapic_clockevent);
-	lapic_clockevent.min_delta_ns =
-		clockevent_delta2ns(0xF, &lapic_clockevent);
-
-	calibration_result = (delta * APIC_DIVISOR) / LAPIC_CAL_LOOPS;
-
-	apic_printk(APIC_VERBOSE, "..... delta %ld\n", delta);
-	apic_printk(APIC_VERBOSE, "..... mult: %ld\n", lapic_clockevent.mult);
-	apic_printk(APIC_VERBOSE, "..... calibration result: %u\n",
-		    calibration_result);
-
-	if (cpu_has_tsc) {
-		delta = (long)(lapic_cal_tsc2 - lapic_cal_tsc1);
-		apic_printk(APIC_VERBOSE, "..... CPU clock speed is "
-			    "%ld.%04ld MHz.\n",
-			    (delta / LAPIC_CAL_LOOPS) / (1000000 / HZ),
-			    (delta / LAPIC_CAL_LOOPS) % (1000000 / HZ));
-	}
-
-	apic_printk(APIC_VERBOSE, "..... host bus clock speed is "
-		    "%u.%04u MHz.\n",
-		    calibration_result / (1000000 / HZ),
-		    calibration_result % (1000000 / HZ));
-
-	/*
-	 * Do a sanity check on the APIC calibration result
-	 */
-	if (calibration_result < (1000000 / HZ)) {
-		local_irq_enable();
-		printk(KERN_WARNING
-		       "APIC frequency too slow, disabling apic timer\n");
-		return -1;
-	}
-
-	levt->features &= ~CLOCK_EVT_FEAT_DUMMY;
-
-	/* We trust the pm timer based calibration */
-	if (!pm_referenced) {
-		apic_printk(APIC_VERBOSE, "... verify APIC timer\n");
-
-		/*
-		 * Setup the apic timer manually
-		 */
-		levt->event_handler = lapic_cal_handler;
-		lapic_timer_setup(CLOCK_EVT_MODE_PERIODIC, levt);
-		lapic_cal_loops = -1;
-
-		/* Let the interrupts run */
-		local_irq_enable();
-
-		while (lapic_cal_loops <= LAPIC_CAL_LOOPS)
-			cpu_relax();
-
-		local_irq_disable();
-
-		/* Stop the lapic timer */
-		lapic_timer_setup(CLOCK_EVT_MODE_SHUTDOWN, levt);
-
-		local_irq_enable();
-
-		/* Jiffies delta */
-		deltaj = lapic_cal_j2 - lapic_cal_j1;
-		apic_printk(APIC_VERBOSE, "... jiffies delta = %lu\n", deltaj);
-
-		/* Check, if the jiffies result is consistent */
-		if (deltaj >= LAPIC_CAL_LOOPS-2 && deltaj <= LAPIC_CAL_LOOPS+2)
-			apic_printk(APIC_VERBOSE, "... jiffies result ok\n");
-		else
-			levt->features |= CLOCK_EVT_FEAT_DUMMY;
-	} else
-		local_irq_enable();
-
-	if (levt->features & CLOCK_EVT_FEAT_DUMMY) {
-		printk(KERN_WARNING
-		       "APIC timer disabled due to verification failure.\n");
-			return -1;
-	}
-
-	return 0;
-}
-
-#endif
-
-/*
- * Setup the boot APIC
- *
- * Calibrate and verify the result.
- */
-void __init setup_boot_APIC_clock(void)
-{
-	/*
-	 * The local apic timer can be disabled via the kernel
-	 * commandline or from the CPU detection code. Register the lapic
-	 * timer as a dummy clock event source on SMP systems, so the
-	 * broadcast mechanism is used. On UP systems simply ignore it.
-	 */
-	if (disable_apic_timer) {
-		printk(KERN_INFO "Disabling APIC timer\n");
-		/* No broadcast on UP ! */
-		if (num_possible_cpus() > 1) {
-			lapic_clockevent.mult = 1;
-			setup_APIC_timer();
-		}
-		return;
-	}
-
-	apic_printk(APIC_VERBOSE, "Using local APIC timer interrupts.\n"
-		    "calibrating APIC timer ...\n");
-
-	if (calibrate_APIC_clock()) {
-		/* No broadcast on UP ! */
-		if (num_possible_cpus() > 1)
-			setup_APIC_timer();
-		return;
-	}
-
-	/*
-	 * If nmi_watchdog is set to IO_APIC, we need the
-	 * PIT/HPET going.  Otherwise register lapic as a dummy
-	 * device.
-	 */
-	if (nmi_watchdog != NMI_IO_APIC)
-		lapic_clockevent.features &= ~CLOCK_EVT_FEAT_DUMMY;
-	else
-		printk(KERN_WARNING "APIC timer registered as dummy,"
-			" due to nmi_watchdog=%d!\n", nmi_watchdog);
-
-	/* Setup the lapic or request the broadcast */
-	setup_APIC_timer();
-}
-
-void __cpuinit setup_secondary_APIC_clock(void)
-{
-	setup_APIC_timer();
-}
-
-/*
- * The guts of the apic timer interrupt
- */
-static void local_apic_timer_interrupt(void)
-{
-	int cpu = smp_processor_id();
-	struct clock_event_device *evt = &per_cpu(lapic_events, cpu);
-
-	/*
-	 * Normally we should not be here till LAPIC has been initialized but
-	 * in some cases like kdump, its possible that there is a pending LAPIC
-	 * timer interrupt from previous kernel's context and is delivered in
-	 * new kernel the moment interrupts are enabled.
-	 *
-	 * Interrupts are enabled early and LAPIC is setup much later, hence
-	 * its possible that when we get here evt->event_handler is NULL.
-	 * Check for event_handler being NULL and discard the interrupt as
-	 * spurious.
-	 */
-	if (!evt->event_handler) {
-		printk(KERN_WARNING
-		       "Spurious LAPIC timer interrupt on cpu %d\n", cpu);
-		/* Switch it off */
-		lapic_timer_setup(CLOCK_EVT_MODE_SHUTDOWN, evt);
-		return;
-	}
-
-	/*
-	 * the NMI deadlock-detector uses this.
-	 */
-#ifdef CONFIG_X86_64
-	add_pda(apic_timer_irqs, 1);
-#else
-	per_cpu(irq_stat, cpu).apic_timer_irqs++;
-#endif
-
-	evt->event_handler(evt);
-}
-
-/*
- * Local APIC timer interrupt. This is the most natural way for doing
- * local interrupts, but local timer interrupts can be emulated by
- * broadcast interrupts too. [in case the hw doesn't support APIC timers]
- *
- * [ if a single-CPU system runs an SMP kernel then we call the local
- *   interrupt as well. Thus we cannot inline the local irq ... ]
- */
-void smp_apic_timer_interrupt(struct pt_regs *regs)
-{
-	struct pt_regs *old_regs = set_irq_regs(regs);
-
-	/*
-	 * NOTE! We'd better ACK the irq immediately,
-	 * because timer handling can be slow.
-	 */
-	ack_APIC_irq();
-	/*
-	 * update_process_times() expects us to have done irq_enter().
-	 * Besides, if we don't timer interrupts ignore the global
-	 * interrupt lock, which is the WrongThing (tm) to do.
-	 */
-#ifdef CONFIG_X86_64
-	exit_idle();
-#endif
-	irq_enter();
-	local_apic_timer_interrupt();
-	irq_exit();
-
-	set_irq_regs(old_regs);
-}
-
-int setup_profiling_timer(unsigned int multiplier)
-{
-	return -EINVAL;
-}
-
-/*
- * Local APIC start and shutdown
- */
-
-/**
- * clear_local_APIC - shutdown the local APIC
- *
- * This is called, when a CPU is disabled and before rebooting, so the state of
- * the local APIC has no dangling leftovers. Also used to cleanout any BIOS
- * leftovers during boot.
- */
-void clear_local_APIC(void)
-{
-	int maxlvt;
-	u32 v;
-
-	/* APIC hasn't been mapped yet */
-	if (!apic_phys)
-		return;
-
-	maxlvt = lapic_get_maxlvt();
-	/*
-	 * Masking an LVT entry can trigger a local APIC error
-	 * if the vector is zero. Mask LVTERR first to prevent this.
-	 */
-	if (maxlvt >= 3) {
-		v = ERROR_APIC_VECTOR; /* any non-zero vector will do */
-		apic_write(APIC_LVTERR, v | APIC_LVT_MASKED);
-	}
-	/*
-	 * Careful: we have to set masks only first to deassert
-	 * any level-triggered sources.
-	 */
-	v = apic_read(APIC_LVTT);
-	apic_write(APIC_LVTT, v | APIC_LVT_MASKED);
-	v = apic_read(APIC_LVT0);
-	apic_write(APIC_LVT0, v | APIC_LVT_MASKED);
-	v = apic_read(APIC_LVT1);
-	apic_write(APIC_LVT1, v | APIC_LVT_MASKED);
-	if (maxlvt >= 4) {
-		v = apic_read(APIC_LVTPC);
-		apic_write(APIC_LVTPC, v | APIC_LVT_MASKED);
-	}
-
-	/* lets not touch this if we didn't frob it */
-#if defined(CONFIG_X86_MCE_P4THERMAL) || defined(X86_MCE_INTEL)
-	if (maxlvt >= 5) {
-		v = apic_read(APIC_LVTTHMR);
-		apic_write(APIC_LVTTHMR, v | APIC_LVT_MASKED);
-	}
-#endif
-	/*
-	 * Clean APIC state for other OSs:
-	 */
-	apic_write(APIC_LVTT, APIC_LVT_MASKED);
-	apic_write(APIC_LVT0, APIC_LVT_MASKED);
-	apic_write(APIC_LVT1, APIC_LVT_MASKED);
-	if (maxlvt >= 3)
-		apic_write(APIC_LVTERR, APIC_LVT_MASKED);
-	if (maxlvt >= 4)
-		apic_write(APIC_LVTPC, APIC_LVT_MASKED);
-
-	/* Integrated APIC (!82489DX) ? */
-	if (lapic_is_integrated()) {
-		if (maxlvt > 3)
-			/* Clear ESR due to Pentium errata 3AP and 11AP */
-			apic_write(APIC_ESR, 0);
-		apic_read(APIC_ESR);
-	}
-}
-
-/**
- * disable_local_APIC - clear and disable the local APIC
- */
-void disable_local_APIC(void)
-{
-	unsigned int value;
-
-	clear_local_APIC();
-
-	/*
-	 * Disable APIC (implies clearing of registers
-	 * for 82489DX!).
-	 */
-	value = apic_read(APIC_SPIV);
-	value &= ~APIC_SPIV_APIC_ENABLED;
-	apic_write(APIC_SPIV, value);
-
-#ifdef CONFIG_X86_32
-	/*
-	 * When LAPIC was disabled by the BIOS and enabled by the kernel,
-	 * restore the disabled state.
-	 */
-	if (enabled_via_apicbase) {
-		unsigned int l, h;
-
-		rdmsr(MSR_IA32_APICBASE, l, h);
-		l &= ~MSR_IA32_APICBASE_ENABLE;
-		wrmsr(MSR_IA32_APICBASE, l, h);
-	}
-#endif
-}
-
-/*
- * If Linux enabled the LAPIC against the BIOS default disable it down before
- * re-entering the BIOS on shutdown.  Otherwise the BIOS may get confused and
- * not power-off.  Additionally clear all LVT entries before disable_local_APIC
- * for the case where Linux didn't enable the LAPIC.
- */
-void lapic_shutdown(void)
-{
-	unsigned long flags;
-
-	if (!cpu_has_apic)
-		return;
-
-	local_irq_save(flags);
-
-#ifdef CONFIG_X86_32
-	if (!enabled_via_apicbase)
-		clear_local_APIC();
-	else
-#endif
-		disable_local_APIC();
-
-
-	local_irq_restore(flags);
-}
-
-/*
- * This is to verify that we're looking at a real local APIC.
- * Check these against your board if the CPUs aren't getting
- * started for no apparent reason.
- */
-int __init verify_local_APIC(void)
-{
-	unsigned int reg0, reg1;
-
-	/*
-	 * The version register is read-only in a real APIC.
-	 */
-	reg0 = apic_read(APIC_LVR);
-	apic_printk(APIC_DEBUG, "Getting VERSION: %x\n", reg0);
-	apic_write(APIC_LVR, reg0 ^ APIC_LVR_MASK);
-	reg1 = apic_read(APIC_LVR);
-	apic_printk(APIC_DEBUG, "Getting VERSION: %x\n", reg1);
-
-	/*
-	 * The two version reads above should print the same
-	 * numbers.  If the second one is different, then we
-	 * poke at a non-APIC.
-	 */
-	if (reg1 != reg0)
-		return 0;
-
-	/*
-	 * Check if the version looks reasonably.
-	 */
-	reg1 = GET_APIC_VERSION(reg0);
-	if (reg1 == 0x00 || reg1 == 0xff)
-		return 0;
-	reg1 = lapic_get_maxlvt();
-	if (reg1 < 0x02 || reg1 == 0xff)
-		return 0;
-
-	/*
-	 * The ID register is read/write in a real APIC.
-	 */
-	reg0 = apic_read(APIC_ID);
-	apic_printk(APIC_DEBUG, "Getting ID: %x\n", reg0);
-	apic_write(APIC_ID, reg0 ^ APIC_ID_MASK);
-	reg1 = apic_read(APIC_ID);
-	apic_printk(APIC_DEBUG, "Getting ID: %x\n", reg1);
-	apic_write(APIC_ID, reg0);
-	if (reg1 != (reg0 ^ APIC_ID_MASK))
-		return 0;
-
-	/*
-	 * The next two are just to see if we have sane values.
-	 * They're only really relevant if we're in Virtual Wire
-	 * compatibility mode, but most boxes are anymore.
-	 */
-	reg0 = apic_read(APIC_LVT0);
-	apic_printk(APIC_DEBUG, "Getting LVT0: %x\n", reg0);
-	reg1 = apic_read(APIC_LVT1);
-	apic_printk(APIC_DEBUG, "Getting LVT1: %x\n", reg1);
-
-	return 1;
-}
-
-/**
- * sync_Arb_IDs - synchronize APIC bus arbitration IDs
- */
-void __init sync_Arb_IDs(void)
-{
-	/*
-	 * Unsupported on P4 - see Intel Dev. Manual Vol. 3, Ch. 8.6.1 And not
-	 * needed on AMD.
-	 */
-	if (modern_apic() || boot_cpu_data.x86_vendor == X86_VENDOR_AMD)
-		return;
-
-	/*
-	 * Wait for idle.
-	 */
-	apic_wait_icr_idle();
-
-	apic_printk(APIC_DEBUG, "Synchronizing Arb IDs.\n");
-	apic_write(APIC_ICR, APIC_DEST_ALLINC |
-			APIC_INT_LEVELTRIG | APIC_DM_INIT);
-}
-
-/*
- * An initial setup of the virtual wire mode.
- */
-void __init init_bsp_APIC(void)
-{
-	unsigned int value;
-
-	/*
-	 * Don't do the setup now if we have a SMP BIOS as the
-	 * through-I/O-APIC virtual wire mode might be active.
-	 */
-	if (smp_found_config || !cpu_has_apic)
-		return;
-
-	/*
-	 * Do not trust the local APIC being empty at bootup.
-	 */
-	clear_local_APIC();
-
-	/*
-	 * Enable APIC.
-	 */
-	value = apic_read(APIC_SPIV);
-	value &= ~APIC_VECTOR_MASK;
-	value |= APIC_SPIV_APIC_ENABLED;
-
-#ifdef CONFIG_X86_32
-	/* This bit is reserved on P4/Xeon and should be cleared */
-	if ((boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) &&
-	    (boot_cpu_data.x86 == 15))
-		value &= ~APIC_SPIV_FOCUS_DISABLED;
-	else
-#endif
-		value |= APIC_SPIV_FOCUS_DISABLED;
-	value |= SPURIOUS_APIC_VECTOR;
-	apic_write(APIC_SPIV, value);
-
-	/*
-	 * Set up the virtual wire mode.
-	 */
-	apic_write(APIC_LVT0, APIC_DM_EXTINT);
-	value = APIC_DM_NMI;
-	if (!lapic_is_integrated())		/* 82489DX */
-		value |= APIC_LVT_LEVEL_TRIGGER;
-	apic_write(APIC_LVT1, value);
-}
-
-static void __cpuinit lapic_setup_esr(void)
-{
-	unsigned long oldvalue, value, maxlvt;
-	if (lapic_is_integrated() && !esr_disable) {
-		if (esr_disable) {
-			/*
-			 * Something untraceable is creating bad interrupts on
-			 * secondary quads ... for the moment, just leave the
-			 * ESR disabled - we can't do anything useful with the
-			 * errors anyway - mbligh
-			 */
-			printk(KERN_INFO "Leaving ESR disabled.\n");
-			return;
-		}
-		/* !82489DX */
-		maxlvt = lapic_get_maxlvt();
-		if (maxlvt > 3)		/* Due to the Pentium erratum 3AP. */
-			apic_write(APIC_ESR, 0);
-		oldvalue = apic_read(APIC_ESR);
-
-		/* enables sending errors */
-		value = ERROR_APIC_VECTOR;
-		apic_write(APIC_LVTERR, value);
-		/*
-		 * spec says clear errors after enabling vector.
-		 */
-		if (maxlvt > 3)
-			apic_write(APIC_ESR, 0);
-		value = apic_read(APIC_ESR);
-		if (value != oldvalue)
-			apic_printk(APIC_VERBOSE, "ESR value before enabling "
-				"vector: 0x%08lx  after: 0x%08lx\n",
-				oldvalue, value);
-	} else {
-		printk(KERN_INFO "No ESR for 82489DX.\n");
-	}
-}
-
-
-/**
- * setup_local_APIC - setup the local APIC
- */
-void __cpuinit setup_local_APIC(void)
-{
-	unsigned int value;
-	int i, j;
-
-#ifdef CONFIG_X86_32
-	/* Pound the ESR really hard over the head with a big hammer - mbligh */
-	if (esr_disable) {
-		apic_write(APIC_ESR, 0);
-		apic_write(APIC_ESR, 0);
-		apic_write(APIC_ESR, 0);
-		apic_write(APIC_ESR, 0);
-	}
-#endif
-
-	preempt_disable();
-
-	/*
-	 * Double-check whether this APIC is really registered.
-	 * This is meaningless in clustered apic mode, so we skip it.
-	 */
-	if (!apic_id_registered())
-		BUG();
-
-	/*
-	 * Intel recommends to set DFR, LDR and TPR before enabling
-	 * an APIC.  See e.g. "AP-388 82489DX User's Manual" (Intel
-	 * document number 292116).  So here it goes...
-	 */
-	init_apic_ldr();
-
-	/*
-	 * Set Task Priority to 'accept all'. We never change this
-	 * later on.
-	 */
-	value = apic_read(APIC_TASKPRI);
-	value &= ~APIC_TPRI_MASK;
-	apic_write(APIC_TASKPRI, value);
-
-	/*
-	 * After a crash, we no longer service the interrupts and a pending
-	 * interrupt from previous kernel might still have ISR bit set.
-	 *
-	 * Most probably by now CPU has serviced that pending interrupt and
-	 * it might not have done the ack_APIC_irq() because it thought,
-	 * interrupt came from i8259 as ExtInt. LAPIC did not get EOI so it
-	 * does not clear the ISR bit and cpu thinks it has already serivced
-	 * the interrupt. Hence a vector might get locked. It was noticed
-	 * for timer irq (vector 0x31). Issue an extra EOI to clear ISR.
-	 */
-	for (i = APIC_ISR_NR - 1; i >= 0; i--) {
-		value = apic_read(APIC_ISR + i*0x10);
-		for (j = 31; j >= 0; j--) {
-			if (value & (1<<j))
-				ack_APIC_irq();
-		}
-	}
-
-	/*
-	 * Now that we are all set up, enable the APIC
-	 */
-	value = apic_read(APIC_SPIV);
-	value &= ~APIC_VECTOR_MASK;
-	/*
-	 * Enable APIC
-	 */
-	value |= APIC_SPIV_APIC_ENABLED;
-
-#ifdef CONFIG_X86_32
-	/*
-	 * Some unknown Intel IO/APIC (or APIC) errata is biting us with
-	 * certain networking cards. If high frequency interrupts are
-	 * happening on a particular IOAPIC pin, plus the IOAPIC routing
-	 * entry is masked/unmasked at a high rate as well then sooner or
-	 * later IOAPIC line gets 'stuck', no more interrupts are received
-	 * from the device. If focus CPU is disabled then the hang goes
-	 * away, oh well :-(
-	 *
-	 * [ This bug can be reproduced easily with a level-triggered
-	 *   PCI Ne2000 networking cards and PII/PIII processors, dual
-	 *   BX chipset. ]
-	 */
-	/*
-	 * Actually disabling the focus CPU check just makes the hang less
-	 * frequent as it makes the interrupt distributon model be more
-	 * like LRU than MRU (the short-term load is more even across CPUs).
-	 * See also the comment in end_level_ioapic_irq().  --macro
-	 */
-
-	/*
-	 * - enable focus processor (bit==0)
-	 * - 64bit mode always use processor focus
-	 *   so no need to set it
-	 */
-	value &= ~APIC_SPIV_FOCUS_DISABLED;
-#endif
-
-	/*
-	 * Set spurious IRQ vector
-	 */
-	value |= SPURIOUS_APIC_VECTOR;
-	apic_write(APIC_SPIV, value);
-
-	/*
-	 * Set up LVT0, LVT1:
-	 *
-	 * set up through-local-APIC on the BP's LINT0. This is not
-	 * strictly necessary in pure symmetric-IO mode, but sometimes
-	 * we delegate interrupts to the 8259A.
-	 */
-	/*
-	 * TODO: set up through-local-APIC from through-I/O-APIC? --macro
-	 */
-	value = apic_read(APIC_LVT0) & APIC_LVT_MASKED;
-	if (!smp_processor_id() && (pic_mode || !value)) {
-		value = APIC_DM_EXTINT;
-		apic_printk(APIC_VERBOSE, "enabled ExtINT on CPU#%d\n",
-				smp_processor_id());
-	} else {
-		value = APIC_DM_EXTINT | APIC_LVT_MASKED;
-		apic_printk(APIC_VERBOSE, "masked ExtINT on CPU#%d\n",
-				smp_processor_id());
-	}
-	apic_write(APIC_LVT0, value);
-
-	/*
-	 * only the BP should see the LINT1 NMI signal, obviously.
-	 */
-	if (!smp_processor_id())
-		value = APIC_DM_NMI;
-	else
-		value = APIC_DM_NMI | APIC_LVT_MASKED;
-	if (!lapic_is_integrated())		/* 82489DX */
-		value |= APIC_LVT_LEVEL_TRIGGER;
-	apic_write(APIC_LVT1, value);
-
-	preempt_enable();
-}
-
-void __cpuinit end_local_APIC_setup(void)
-{
-	lapic_setup_esr();
-
-#ifdef CONFIG_X86_32
-	{
-		unsigned int value;
-		/* Disable the local apic timer */
-		value = apic_read(APIC_LVTT);
-		value |= (APIC_LVT_MASKED | LOCAL_TIMER_VECTOR);
-		apic_write(APIC_LVTT, value);
-	}
-#endif
-
-	setup_apic_nmi_watchdog(NULL);
-	apic_pm_activate();
-}
-
-#ifdef HAVE_X2APIC
-void check_x2apic(void)
-{
-	int msr, msr2;
-
-	rdmsr(MSR_IA32_APICBASE, msr, msr2);
-
-	if (msr & X2APIC_ENABLE) {
-		printk("x2apic enabled by BIOS, switching to x2apic ops\n");
-		x2apic_preenabled = x2apic = 1;
-		apic_ops = &x2apic_ops;
-	}
-}
-
-void enable_x2apic(void)
-{
-	int msr, msr2;
-
-	rdmsr(MSR_IA32_APICBASE, msr, msr2);
-	if (!(msr & X2APIC_ENABLE)) {
-		printk("Enabling x2apic\n");
-		wrmsr(MSR_IA32_APICBASE, msr | X2APIC_ENABLE, 0);
-	}
-}
-
-void enable_IR_x2apic(void)
-{
-#ifdef CONFIG_INTR_REMAP
-	int ret;
-	unsigned long flags;
-
-	if (!cpu_has_x2apic)
-		return;
-
-	if (!x2apic_preenabled && disable_x2apic) {
-		printk(KERN_INFO
-		       "Skipped enabling x2apic and Interrupt-remapping "
-		       "because of nox2apic\n");
-		return;
-	}
-
-	if (x2apic_preenabled && disable_x2apic)
-		panic("Bios already enabled x2apic, can't enforce nox2apic");
-
-	if (!x2apic_preenabled && skip_ioapic_setup) {
-		printk(KERN_INFO
-		       "Skipped enabling x2apic and Interrupt-remapping "
-		       "because of skipping io-apic setup\n");
-		return;
-	}
-
-	ret = dmar_table_init();
-	if (ret) {
-		printk(KERN_INFO
-		       "dmar_table_init() failed with %d:\n", ret);
-
-		if (x2apic_preenabled)
-			panic("x2apic enabled by bios. But IR enabling failed");
-		else
-			printk(KERN_INFO
-			       "Not enabling x2apic,Intr-remapping\n");
-		return;
-	}
-
-	local_irq_save(flags);
-	mask_8259A();
-	save_mask_IO_APIC_setup();
-
-	ret = enable_intr_remapping(1);
-
-	if (ret && x2apic_preenabled) {
-		local_irq_restore(flags);
-		panic("x2apic enabled by bios. But IR enabling failed");
-	}
-
-	if (ret)
-		goto end;
-
-	if (!x2apic) {
-		x2apic = 1;
-		apic_ops = &x2apic_ops;
-		enable_x2apic();
-	}
-end:
-	if (ret)
-		/*
-		 * IR enabling failed
-		 */
-		restore_IO_APIC_setup();
-	else
-		reinit_intr_remapped_IO_APIC(x2apic_preenabled);
-
-	unmask_8259A();
-	local_irq_restore(flags);
-
-	if (!ret) {
-		if (!x2apic_preenabled)
-			printk(KERN_INFO
-			       "Enabled x2apic and interrupt-remapping\n");
-		else
-			printk(KERN_INFO
-			       "Enabled Interrupt-remapping\n");
-	} else
-		printk(KERN_ERR
-		       "Failed to enable Interrupt-remapping and x2apic\n");
-#else
-	if (!cpu_has_x2apic)
-		return;
-
-	if (x2apic_preenabled)
-		panic("x2apic enabled prior OS handover,"
-		      " enable CONFIG_INTR_REMAP");
-
-	printk(KERN_INFO "Enable CONFIG_INTR_REMAP for enabling intr-remapping "
-	       " and x2apic\n");
-#endif
-
-	return;
-}
-#endif /* HAVE_X2APIC */
-
-#ifdef CONFIG_X86_64
-/*
- * Detect and enable local APICs on non-SMP boards.
- * Original code written by Keir Fraser.
- * On AMD64 we trust the BIOS - if it says no APIC it is likely
- * not correctly set up (usually the APIC timer won't work etc.)
- */
-static int __init detect_init_APIC(void)
-{
-	if (!cpu_has_apic) {
-		printk(KERN_INFO "No local APIC present\n");
-		return -1;
-	}
-
-	mp_lapic_addr = APIC_DEFAULT_PHYS_BASE;
-	boot_cpu_physical_apicid = 0;
-	return 0;
-}
-#else
-/*
- * Detect and initialize APIC
- */
-static int __init detect_init_APIC(void)
-{
-	u32 h, l, features;
-
-	/* Disabled by kernel option? */
-	if (disable_apic)
-		return -1;
-
-	switch (boot_cpu_data.x86_vendor) {
-	case X86_VENDOR_AMD:
-		if ((boot_cpu_data.x86 == 6 && boot_cpu_data.x86_model > 1) ||
-		    (boot_cpu_data.x86 == 15))
-			break;
-		goto no_apic;
-	case X86_VENDOR_INTEL:
-		if (boot_cpu_data.x86 == 6 || boot_cpu_data.x86 == 15 ||
-		    (boot_cpu_data.x86 == 5 && cpu_has_apic))
-			break;
-		goto no_apic;
-	default:
-		goto no_apic;
-	}
-
-	if (!cpu_has_apic) {
-		/*
-		 * Over-ride BIOS and try to enable the local APIC only if
-		 * "lapic" specified.
-		 */
-		if (!force_enable_local_apic) {
-			printk(KERN_INFO "Local APIC disabled by BIOS -- "
-			       "you can enable it with \"lapic\"\n");
-			return -1;
-		}
-		/*
-		 * Some BIOSes disable the local APIC in the APIC_BASE
-		 * MSR. This can only be done in software for Intel P6 or later
-		 * and AMD K7 (Model > 1) or later.
-		 */
-		rdmsr(MSR_IA32_APICBASE, l, h);
-		if (!(l & MSR_IA32_APICBASE_ENABLE)) {
-			printk(KERN_INFO
-			       "Local APIC disabled by BIOS -- reenabling.\n");
-			l &= ~MSR_IA32_APICBASE_BASE;
-			l |= MSR_IA32_APICBASE_ENABLE | APIC_DEFAULT_PHYS_BASE;
-			wrmsr(MSR_IA32_APICBASE, l, h);
-			enabled_via_apicbase = 1;
-		}
-	}
-	/*
-	 * The APIC feature bit should now be enabled
-	 * in `cpuid'
-	 */
-	features = cpuid_edx(1);
-	if (!(features & (1 << X86_FEATURE_APIC))) {
-		printk(KERN_WARNING "Could not enable APIC!\n");
-		return -1;
-	}
-	set_cpu_cap(&boot_cpu_data, X86_FEATURE_APIC);
-	mp_lapic_addr = APIC_DEFAULT_PHYS_BASE;
-
-	/* The BIOS may have set up the APIC at some other address */
-	rdmsr(MSR_IA32_APICBASE, l, h);
-	if (l & MSR_IA32_APICBASE_ENABLE)
-		mp_lapic_addr = l & MSR_IA32_APICBASE_BASE;
-
-	printk(KERN_INFO "Found and enabled local APIC!\n");
-
-	apic_pm_activate();
-
-	return 0;
-
-no_apic:
-	printk(KERN_INFO "No local APIC present or hardware disabled\n");
-	return -1;
-}
-#endif
-
-#ifdef CONFIG_X86_64
-void __init early_init_lapic_mapping(void)
-{
-	unsigned long phys_addr;
-
-	/*
-	 * If no local APIC can be found then go out
-	 * : it means there is no mpatable and MADT
-	 */
-	if (!smp_found_config)
-		return;
-
-	phys_addr = mp_lapic_addr;
-
-	set_fixmap_nocache(FIX_APIC_BASE, phys_addr);
-	apic_printk(APIC_VERBOSE, "mapped APIC to %16lx (%16lx)\n",
-		    APIC_BASE, phys_addr);
-
-	/*
-	 * Fetch the APIC ID of the BSP in case we have a
-	 * default configuration (or the MP table is broken).
-	 */
-	boot_cpu_physical_apicid = read_apic_id();
-}
-#endif
-
-/**
- * init_apic_mappings - initialize APIC mappings
- */
-void __init init_apic_mappings(void)
-{
-#ifdef HAVE_X2APIC
-	if (x2apic) {
-		boot_cpu_physical_apicid = read_apic_id();
-		return;
-	}
-#endif
-
-	/*
-	 * If no local APIC can be found then set up a fake all
-	 * zeroes page to simulate the local APIC and another
-	 * one for the IO-APIC.
-	 */
-	if (!smp_found_config && detect_init_APIC()) {
-		apic_phys = (unsigned long) alloc_bootmem_pages(PAGE_SIZE);
-		apic_phys = __pa(apic_phys);
-	} else
-		apic_phys = mp_lapic_addr;
-
-	set_fixmap_nocache(FIX_APIC_BASE, apic_phys);
-	apic_printk(APIC_VERBOSE, "mapped APIC to %16lx (%16lx)\n",
-				APIC_BASE, apic_phys);
-
-	/*
-	 * Fetch the APIC ID of the BSP in case we have a
-	 * default configuration (or the MP table is broken).
-	 */
-	if (boot_cpu_physical_apicid == -1U)
-		boot_cpu_physical_apicid = read_apic_id();
-}
-
-/*
- * This initializes the IO-APIC and APIC hardware if this is
- * a UP kernel.
- */
-int apic_version[MAX_APICS];
-
-int __init APIC_init_uniprocessor(void)
-{
-#ifdef CONFIG_X86_64
-	if (disable_apic) {
-		printk(KERN_INFO "Apic disabled\n");
-		return -1;
-	}
-	if (!cpu_has_apic) {
-		disable_apic = 1;
-		printk(KERN_INFO "Apic disabled by BIOS\n");
-		return -1;
-	}
-#else
-	if (!smp_found_config && !cpu_has_apic)
-		return -1;
-
-	/*
-	 * Complain if the BIOS pretends there is one.
-	 */
-	if (!cpu_has_apic &&
-	    APIC_INTEGRATED(apic_version[boot_cpu_physical_apicid])) {
-		printk(KERN_ERR "BIOS bug, local APIC #%d not detected!...\n",
-		       boot_cpu_physical_apicid);
-		clear_cpu_cap(&boot_cpu_data, X86_FEATURE_APIC);
-		return -1;
-	}
-#endif
-
-#ifdef HAVE_X2APIC
-	enable_IR_x2apic();
-#endif
-#ifdef CONFIG_X86_64
-	setup_apic_routing();
-#endif
-
-	verify_local_APIC();
-	connect_bsp_APIC();
-
-#ifdef CONFIG_X86_64
-	apic_write(APIC_ID, SET_APIC_ID(boot_cpu_physical_apicid));
-#else
-	/*
-	 * Hack: In case of kdump, after a crash, kernel might be booting
-	 * on a cpu with non-zero lapic id. But boot_cpu_physical_apicid
-	 * might be zero if read from MP tables. Get it from LAPIC.
-	 */
-# ifdef CONFIG_CRASH_DUMP
-	boot_cpu_physical_apicid = read_apic_id();
-# endif
-#endif
-	physid_set_mask_of_physid(boot_cpu_physical_apicid, &phys_cpu_present_map);
-	setup_local_APIC();
-
-#ifdef CONFIG_X86_64
-	/*
-	 * Now enable IO-APICs, actually call clear_IO_APIC
-	 * We need clear_IO_APIC before enabling vector on BP
-	 */
-	if (!skip_ioapic_setup && nr_ioapics)
-		enable_IO_APIC();
-#endif
-
-#ifdef CONFIG_X86_IO_APIC
-	if (!smp_found_config || skip_ioapic_setup || !nr_ioapics)
-#endif
-		localise_nmi_watchdog();
-	end_local_APIC_setup();
-
-#ifdef CONFIG_X86_IO_APIC
-	if (smp_found_config && !skip_ioapic_setup && nr_ioapics)
-		setup_IO_APIC();
-# ifdef CONFIG_X86_64
-	else
-		nr_ioapics = 0;
-# endif
-#endif
-
-#ifdef CONFIG_X86_64
-	setup_boot_APIC_clock();
-	check_nmi_watchdog();
-#else
-	setup_boot_clock();
-#endif
-
-	return 0;
-}
-
-/*
- * Local APIC interrupts
- */
-
-/*
- * This interrupt should _never_ happen with our APIC/SMP architecture
- */
-#ifdef CONFIG_X86_64
-asmlinkage void smp_spurious_interrupt(void)
-#else
-void smp_spurious_interrupt(struct pt_regs *regs)
-#endif
-{
-	u32 v;
-
-#ifdef CONFIG_X86_64
-	exit_idle();
-#endif
-	irq_enter();
-	/*
-	 * Check if this really is a spurious interrupt and ACK it
-	 * if it is a vectored one.  Just in case...
-	 * Spurious interrupts should not be ACKed.
-	 */
-	v = apic_read(APIC_ISR + ((SPURIOUS_APIC_VECTOR & ~0x1f) >> 1));
-	if (v & (1 << (SPURIOUS_APIC_VECTOR & 0x1f)))
-		ack_APIC_irq();
-
-#ifdef CONFIG_X86_64
-	add_pda(irq_spurious_count, 1);
-#else
-	/* see sw-dev-man vol 3, chapter 7.4.13.5 */
-	printk(KERN_INFO "spurious APIC interrupt on CPU#%d, "
-	       "should never happen.\n", smp_processor_id());
-	__get_cpu_var(irq_stat).irq_spurious_count++;
-#endif
-	irq_exit();
-}
-
-/*
- * This interrupt should never happen with our APIC/SMP architecture
- */
-#ifdef CONFIG_X86_64
-asmlinkage void smp_error_interrupt(void)
-#else
-void smp_error_interrupt(struct pt_regs *regs)
-#endif
-{
-	u32 v, v1;
-
-#ifdef CONFIG_X86_64
-	exit_idle();
-#endif
-	irq_enter();
-	/* First tickle the hardware, only then report what went on. -- REW */
-	v = apic_read(APIC_ESR);
-	apic_write(APIC_ESR, 0);
-	v1 = apic_read(APIC_ESR);
-	ack_APIC_irq();
-	atomic_inc(&irq_err_count);
-
-	/* Here is what the APIC error bits mean:
-	   0: Send CS error
-	   1: Receive CS error
-	   2: Send accept error
-	   3: Receive accept error
-	   4: Reserved
-	   5: Send illegal vector
-	   6: Received illegal vector
-	   7: Illegal register address
-	*/
-	printk(KERN_DEBUG "APIC error on CPU%d: %02x(%02x)\n",
-		smp_processor_id(), v , v1);
-	irq_exit();
-}
-
-/**
- * connect_bsp_APIC - attach the APIC to the interrupt system
- */
-void __init connect_bsp_APIC(void)
-{
-#ifdef CONFIG_X86_32
-	if (pic_mode) {
-		/*
-		 * Do not trust the local APIC being empty at bootup.
-		 */
-		clear_local_APIC();
-		/*
-		 * PIC mode, enable APIC mode in the IMCR, i.e.  connect BSP's
-		 * local APIC to INT and NMI lines.
-		 */
-		apic_printk(APIC_VERBOSE, "leaving PIC mode, "
-				"enabling APIC mode.\n");
-		outb(0x70, 0x22);
-		outb(0x01, 0x23);
-	}
-#endif
-	enable_apic_mode();
-}
-
-/**
- * disconnect_bsp_APIC - detach the APIC from the interrupt system
- * @virt_wire_setup:	indicates, whether virtual wire mode is selected
- *
- * Virtual wire mode is necessary to deliver legacy interrupts even when the
- * APIC is disabled.
- */
-void disconnect_bsp_APIC(int virt_wire_setup)
-{
-	unsigned int value;
-
-#ifdef CONFIG_X86_32
-	if (pic_mode) {
-		/*
-		 * Put the board back into PIC mode (has an effect only on
-		 * certain older boards).  Note that APIC interrupts, including
-		 * IPIs, won't work beyond this point!  The only exception are
-		 * INIT IPIs.
-		 */
-		apic_printk(APIC_VERBOSE, "disabling APIC mode, "
-				"entering PIC mode.\n");
-		outb(0x70, 0x22);
-		outb(0x00, 0x23);
-		return;
-	}
-#endif
-
-	/* Go back to Virtual Wire compatibility mode */
-
-	/* For the spurious interrupt use vector F, and enable it */
-	value = apic_read(APIC_SPIV);
-	value &= ~APIC_VECTOR_MASK;
-	value |= APIC_SPIV_APIC_ENABLED;
-	value |= 0xf;
-	apic_write(APIC_SPIV, value);
-
-	if (!virt_wire_setup) {
-		/*
-		 * For LVT0 make it edge triggered, active high,
-		 * external and enabled
-		 */
-		value = apic_read(APIC_LVT0);
-		value &= ~(APIC_MODE_MASK | APIC_SEND_PENDING |
-			APIC_INPUT_POLARITY | APIC_LVT_REMOTE_IRR |
-			APIC_LVT_LEVEL_TRIGGER | APIC_LVT_MASKED);
-		value |= APIC_LVT_REMOTE_IRR | APIC_SEND_PENDING;
-		value = SET_APIC_DELIVERY_MODE(value, APIC_MODE_EXTINT);
-		apic_write(APIC_LVT0, value);
-	} else {
-		/* Disable LVT0 */
-		apic_write(APIC_LVT0, APIC_LVT_MASKED);
-	}
-
-	/*
-	 * For LVT1 make it edge triggered, active high,
-	 * nmi and enabled
-	 */
-	value = apic_read(APIC_LVT1);
-	value &= ~(APIC_MODE_MASK | APIC_SEND_PENDING |
-			APIC_INPUT_POLARITY | APIC_LVT_REMOTE_IRR |
-			APIC_LVT_LEVEL_TRIGGER | APIC_LVT_MASKED);
-	value |= APIC_LVT_REMOTE_IRR | APIC_SEND_PENDING;
-	value = SET_APIC_DELIVERY_MODE(value, APIC_MODE_NMI);
-	apic_write(APIC_LVT1, value);
-}
-
-void __cpuinit generic_processor_info(int apicid, int version)
-{
-	int cpu;
-	cpumask_t tmp_map;
-
-	/*
-	 * Validate version
-	 */
-	if (version == 0x0) {
-		printk(KERN_WARNING "BIOS bug, APIC version is 0 for CPU#%d! "
-				"fixing up to 0x10. (tell your hw vendor)\n",
-				version);
-		version = 0x10;
-	}
-	apic_version[apicid] = version;
-
-	if (num_processors >= NR_CPUS) {
-		printk(KERN_WARNING "WARNING: NR_CPUS limit of %i reached."
-			"  Processor ignored.\n", NR_CPUS);
-		return;
-	}
-
-	num_processors++;
-	cpus_complement(tmp_map, cpu_present_map);
-	cpu = first_cpu(tmp_map);
-
-	physid_set(apicid, phys_cpu_present_map);
-	if (apicid == boot_cpu_physical_apicid) {
-		/*
-		 * x86_bios_cpu_apicid is required to have processors listed
-		 * in same order as logical cpu numbers. Hence the first
-		 * entry is BSP, and so on.
-		 */
-		cpu = 0;
-	}
-	if (apicid > max_physical_apicid)
-		max_physical_apicid = apicid;
-
-#ifdef CONFIG_X86_32
-	/*
-	 * Would be preferable to switch to bigsmp when CONFIG_HOTPLUG_CPU=y
-	 * but we need to work other dependencies like SMP_SUSPEND etc
-	 * before this can be done without some confusion.
-	 * if (CPU_HOTPLUG_ENABLED || num_processors > 8)
-	 *       - Ashok Raj <ashok.raj@intel.com>
-	 */
-	if (max_physical_apicid >= 8) {
-		switch (boot_cpu_data.x86_vendor) {
-		case X86_VENDOR_INTEL:
-			if (!APIC_XAPIC(version)) {
-				def_to_bigsmp = 0;
-				break;
-			}
-			/* If P4 and above fall through */
-		case X86_VENDOR_AMD:
-			def_to_bigsmp = 1;
-		}
-	}
-#endif
-
-#if defined(CONFIG_X86_SMP) || defined(CONFIG_X86_64)
-	/* are we being called early in kernel startup? */
-	if (early_per_cpu_ptr(x86_cpu_to_apicid)) {
-		u16 *cpu_to_apicid = early_per_cpu_ptr(x86_cpu_to_apicid);
-		u16 *bios_cpu_apicid = early_per_cpu_ptr(x86_bios_cpu_apicid);
-
-		cpu_to_apicid[cpu] = apicid;
-		bios_cpu_apicid[cpu] = apicid;
-	} else {
-		per_cpu(x86_cpu_to_apicid, cpu) = apicid;
-		per_cpu(x86_bios_cpu_apicid, cpu) = apicid;
-	}
-#endif
-
-	cpu_set(cpu, cpu_possible_map);
-	cpu_set(cpu, cpu_present_map);
-}
-
-#ifdef CONFIG_X86_64
-int hard_smp_processor_id(void)
-{
-	return read_apic_id();
-}
-#endif
-
-/*
- * Power management
- */
-#ifdef CONFIG_PM
-
-static struct {
-	/*
-	 * 'active' is true if the local APIC was enabled by us and
-	 * not the BIOS; this signifies that we are also responsible
-	 * for disabling it before entering apm/acpi suspend
-	 */
-	int active;
-	/* r/w apic fields */
-	unsigned int apic_id;
-	unsigned int apic_taskpri;
-	unsigned int apic_ldr;
-	unsigned int apic_dfr;
-	unsigned int apic_spiv;
-	unsigned int apic_lvtt;
-	unsigned int apic_lvtpc;
-	unsigned int apic_lvt0;
-	unsigned int apic_lvt1;
-	unsigned int apic_lvterr;
-	unsigned int apic_tmict;
-	unsigned int apic_tdcr;
-	unsigned int apic_thmr;
-} apic_pm_state;
-
-static int lapic_suspend(struct sys_device *dev, pm_message_t state)
-{
-	unsigned long flags;
-	int maxlvt;
-
-	if (!apic_pm_state.active)
-		return 0;
-
-	maxlvt = lapic_get_maxlvt();
-
-	apic_pm_state.apic_id = apic_read(APIC_ID);
-	apic_pm_state.apic_taskpri = apic_read(APIC_TASKPRI);
-	apic_pm_state.apic_ldr = apic_read(APIC_LDR);
-	apic_pm_state.apic_dfr = apic_read(APIC_DFR);
-	apic_pm_state.apic_spiv = apic_read(APIC_SPIV);
-	apic_pm_state.apic_lvtt = apic_read(APIC_LVTT);
-	if (maxlvt >= 4)
-		apic_pm_state.apic_lvtpc = apic_read(APIC_LVTPC);
-	apic_pm_state.apic_lvt0 = apic_read(APIC_LVT0);
-	apic_pm_state.apic_lvt1 = apic_read(APIC_LVT1);
-	apic_pm_state.apic_lvterr = apic_read(APIC_LVTERR);
-	apic_pm_state.apic_tmict = apic_read(APIC_TMICT);
-	apic_pm_state.apic_tdcr = apic_read(APIC_TDCR);
-#if defined(CONFIG_X86_MCE_P4THERMAL) || defined(CONFIG_X86_MCE_INTEL)
-	if (maxlvt >= 5)
-		apic_pm_state.apic_thmr = apic_read(APIC_LVTTHMR);
-#endif
-
-	local_irq_save(flags);
-	disable_local_APIC();
-	local_irq_restore(flags);
-	return 0;
-}
-
-static int lapic_resume(struct sys_device *dev)
-{
-	unsigned int l, h;
-	unsigned long flags;
-	int maxlvt;
-
-	if (!apic_pm_state.active)
-		return 0;
-
-	maxlvt = lapic_get_maxlvt();
-
-	local_irq_save(flags);
-
-#ifdef HAVE_X2APIC
-	if (x2apic)
-		enable_x2apic();
-	else
-#endif
-	{
-		/*
-		 * Make sure the APICBASE points to the right address
-		 *
-		 * FIXME! This will be wrong if we ever support suspend on
-		 * SMP! We'll need to do this as part of the CPU restore!
-		 */
-		rdmsr(MSR_IA32_APICBASE, l, h);
-		l &= ~MSR_IA32_APICBASE_BASE;
-		l |= MSR_IA32_APICBASE_ENABLE | mp_lapic_addr;
-		wrmsr(MSR_IA32_APICBASE, l, h);
-	}
-
-	apic_write(APIC_LVTERR, ERROR_APIC_VECTOR | APIC_LVT_MASKED);
-	apic_write(APIC_ID, apic_pm_state.apic_id);
-	apic_write(APIC_DFR, apic_pm_state.apic_dfr);
-	apic_write(APIC_LDR, apic_pm_state.apic_ldr);
-	apic_write(APIC_TASKPRI, apic_pm_state.apic_taskpri);
-	apic_write(APIC_SPIV, apic_pm_state.apic_spiv);
-	apic_write(APIC_LVT0, apic_pm_state.apic_lvt0);
-	apic_write(APIC_LVT1, apic_pm_state.apic_lvt1);
-#if defined(CONFIG_X86_MCE_P4THERMAL) || defined(CONFIG_X86_MCE_INTEL)
-	if (maxlvt >= 5)
-		apic_write(APIC_LVTTHMR, apic_pm_state.apic_thmr);
-#endif
-	if (maxlvt >= 4)
-		apic_write(APIC_LVTPC, apic_pm_state.apic_lvtpc);
-	apic_write(APIC_LVTT, apic_pm_state.apic_lvtt);
-	apic_write(APIC_TDCR, apic_pm_state.apic_tdcr);
-	apic_write(APIC_TMICT, apic_pm_state.apic_tmict);
-	apic_write(APIC_ESR, 0);
-	apic_read(APIC_ESR);
-	apic_write(APIC_LVTERR, apic_pm_state.apic_lvterr);
-	apic_write(APIC_ESR, 0);
-	apic_read(APIC_ESR);
-
-	local_irq_restore(flags);
-
-	return 0;
-}
-
-/*
- * This device has no shutdown method - fully functioning local APICs
- * are needed on every CPU up until machine_halt/restart/poweroff.
- */
-
-static struct sysdev_class lapic_sysclass = {
-	.name		= "lapic",
-	.resume		= lapic_resume,
-	.suspend	= lapic_suspend,
-};
-
-static struct sys_device device_lapic = {
-	.id	= 0,
-	.cls	= &lapic_sysclass,
-};
-
-static void __cpuinit apic_pm_activate(void)
-{
-	apic_pm_state.active = 1;
-}
-
-static int __init init_lapic_sysfs(void)
-{
-	int error;
-
-	if (!cpu_has_apic)
-		return 0;
-	/* XXX: remove suspend/resume procs if !apic_pm_state.active? */
-
-	error = sysdev_class_register(&lapic_sysclass);
-	if (!error)
-		error = sysdev_register(&device_lapic);
-	return error;
-}
-device_initcall(init_lapic_sysfs);
-
-#else	/* CONFIG_PM */
-
-static void apic_pm_activate(void) { }
-
-#endif	/* CONFIG_PM */
-
-#ifdef CONFIG_X86_64
-/*
- * apic_is_clustered_box() -- Check if we can expect good TSC
- *
- * Thus far, the major user of this is IBM's Summit2 series:
- *
- * Clustered boxes may have unsynced TSC problems if they are
- * multi-chassis. Use available data to take a good guess.
- * If in doubt, go HPET.
- */
-__cpuinit int apic_is_clustered_box(void)
-{
-	int i, clusters, zeros;
-	unsigned id;
-	u16 *bios_cpu_apicid;
-	DECLARE_BITMAP(clustermap, NUM_APIC_CLUSTERS);
-
-	/*
-	 * there is not this kind of box with AMD CPU yet.
-	 * Some AMD box with quadcore cpu and 8 sockets apicid
-	 * will be [4, 0x23] or [8, 0x27] could be thought to
-	 * vsmp box still need checking...
-	 */
-	if ((boot_cpu_data.x86_vendor == X86_VENDOR_AMD) && !is_vsmp_box())
-		return 0;
-
-	bios_cpu_apicid = early_per_cpu_ptr(x86_bios_cpu_apicid);
-	bitmap_zero(clustermap, NUM_APIC_CLUSTERS);
-
-	for (i = 0; i < NR_CPUS; i++) {
-		/* are we being called early in kernel startup? */
-		if (bios_cpu_apicid) {
-			id = bios_cpu_apicid[i];
-		}
-		else if (i < nr_cpu_ids) {
-			if (cpu_present(i))
-				id = per_cpu(x86_bios_cpu_apicid, i);
-			else
-				continue;
-		}
-		else
-			break;
-
-		if (id != BAD_APICID)
-			__set_bit(APIC_CLUSTERID(id), clustermap);
-	}
-
-	/* Problem:  Partially populated chassis may not have CPUs in some of
-	 * the APIC clusters they have been allocated.  Only present CPUs have
-	 * x86_bios_cpu_apicid entries, thus causing zeroes in the bitmap.
-	 * Since clusters are allocated sequentially, count zeros only if
-	 * they are bounded by ones.
-	 */
-	clusters = 0;
-	zeros = 0;
-	for (i = 0; i < NUM_APIC_CLUSTERS; i++) {
-		if (test_bit(i, clustermap)) {
-			clusters += 1 + zeros;
-			zeros = 0;
-		} else
-			++zeros;
-	}
-
-	/* ScaleMP vSMPowered boxes have one cluster per board and TSCs are
-	 * not guaranteed to be synced between boards
-	 */
-	if (is_vsmp_box() && clusters > 1)
-		return 1;
-
-	/*
-	 * If clusters > 2, then should be multi-chassis.
-	 * May have to revisit this when multi-core + hyperthreaded CPUs come
-	 * out, but AFAIK this will work even for them.
-	 */
-	return (clusters > 2);
-}
-#endif
-
-/*
- * APIC command line parameters
- */
-static int __init setup_disableapic(char *arg)
-{
-	disable_apic = 1;
-	setup_clear_cpu_cap(X86_FEATURE_APIC);
-	return 0;
-}
-early_param("disableapic", setup_disableapic);
-
-/* same as disableapic, for compatibility */
-static int __init setup_nolapic(char *arg)
-{
-	return setup_disableapic(arg);
-}
-early_param("nolapic", setup_nolapic);
-
-static int __init parse_lapic_timer_c2_ok(char *arg)
-{
-	local_apic_timer_c2_ok = 1;
-	return 0;
-}
-early_param("lapic_timer_c2_ok", parse_lapic_timer_c2_ok);
-
-static int __init parse_disable_apic_timer(char *arg)
-{
-	disable_apic_timer = 1;
-	return 0;
-}
-early_param("noapictimer", parse_disable_apic_timer);
-
-static int __init parse_nolapic_timer(char *arg)
-{
-	disable_apic_timer = 1;
-	return 0;
-}
-early_param("nolapic_timer", parse_nolapic_timer);
-
-static int __init apic_set_verbosity(char *arg)
-{
-	if (!arg)  {
-#ifdef CONFIG_X86_64
-		skip_ioapic_setup = 0;
-		return 0;
-#endif
-		return -EINVAL;
-	}
-
-	if (strcmp("debug", arg) == 0)
-		apic_verbosity = APIC_DEBUG;
-	else if (strcmp("verbose", arg) == 0)
-		apic_verbosity = APIC_VERBOSE;
-	else {
-		printk(KERN_WARNING "APIC Verbosity level %s not recognised"
-			" use apic=verbose or apic=debug\n", arg);
-		return -EINVAL;
-	}
-
-	return 0;
-}
-early_param("apic", apic_set_verbosity);
-
-static int __init lapic_insert_resource(void)
-{
-	if (!apic_phys)
-		return -1;
-
-	/* Put local APIC into the resource map. */
-	lapic_resource.start = apic_phys;
-	lapic_resource.end = lapic_resource.start + PAGE_SIZE - 1;
-	insert_resource(&iomem_resource, &lapic_resource);
-
-	return 0;
-}
-
-/*
- * need call insert after e820_reserve_resources()
- * that is using request_resource
- */
-late_initcall(lapic_insert_resource);
-- 
cgit v1.2.3-55-g7522


From e492c5ae85428d4a3815d06bf308c590120b928b Mon Sep 17 00:00:00 2001
From: Yinghai Lu
Date: Sun, 24 Aug 2008 22:41:26 -0700
Subject: x86: let 64 bit to use 32 bit calibrate_apic_clock

Use the 32-bit APIC calibration code - it's more mature.

Signed-of-by: Yinghai Lu <yhlu.kernel@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/apic.c | 88 ++------------------------------------------------
 1 file changed, 2 insertions(+), 86 deletions(-)

diff --git a/arch/x86/kernel/apic.c b/arch/x86/kernel/apic.c
index 6e99af5ce678..ca5ef71f4208 100644
--- a/arch/x86/kernel/apic.c
+++ b/arch/x86/kernel/apic.c
@@ -478,90 +478,6 @@ static void __cpuinit setup_APIC_timer(void)
 	clockevents_register_device(levt);
 }
 
-#ifdef CONFIG_X86_64
-/*
- * In this function we calibrate APIC bus clocks to the external
- * timer. Unfortunately we cannot use jiffies and the timer irq
- * to calibrate, since some later bootup code depends on getting
- * the first irq? Ugh.
- *
- * We want to do the calibration only once since we
- * want to have local timer irqs syncron. CPUs connected
- * by the same APIC bus have the very same bus frequency.
- * And we want to have irqs off anyways, no accidental
- * APIC irq that way.
- */
-
-#define TICK_COUNT 100000000
-
-static int __init calibrate_APIC_clock(void)
-{
-	unsigned apic, apic_start;
-	unsigned long tsc, tsc_start;
-	int result;
-
-	local_irq_disable();
-
-	/*
-	 * Put whatever arbitrary (but long enough) timeout
-	 * value into the APIC clock, we just want to get the
-	 * counter running for calibration.
-	 *
-	 * No interrupt enable !
-	 */
-	__setup_APIC_LVTT(250000000, 0, 0);
-
-	apic_start = apic_read(APIC_TMCCT);
-#ifdef CONFIG_X86_PM_TIMER
-	if (apic_calibrate_pmtmr && pmtmr_ioport) {
-		pmtimer_wait(5000);  /* 5ms wait */
-		apic = apic_read(APIC_TMCCT);
-		result = (apic_start - apic) * 1000L / 5;
-	} else
-#endif
-	{
-		rdtscll(tsc_start);
-
-		do {
-			apic = apic_read(APIC_TMCCT);
-			rdtscll(tsc);
-		} while ((tsc - tsc_start) < TICK_COUNT &&
-				(apic_start - apic) < TICK_COUNT);
-
-		result = (apic_start - apic) * 1000L * tsc_khz /
-					(tsc - tsc_start);
-	}
-
-	local_irq_enable();
-
-	printk(KERN_DEBUG "APIC timer calibration result %d\n", result);
-
-	printk(KERN_INFO "Detected %d.%03d MHz APIC timer.\n",
-		result / 1000 / 1000, result / 1000 % 1000);
-
-	/* Calculate the scaled math multiplication factor */
-	lapic_clockevent.mult = div_sc(result, NSEC_PER_SEC,
-				       lapic_clockevent.shift);
-	lapic_clockevent.max_delta_ns =
-		clockevent_delta2ns(0x7FFFFF, &lapic_clockevent);
-	lapic_clockevent.min_delta_ns =
-		clockevent_delta2ns(0xF, &lapic_clockevent);
-
-	calibration_result = (result * APIC_DIVISOR) / HZ;
-
-	/*
-	 * Do a sanity check on the APIC calibration result
-	 */
-	if (calibration_result < (1000000 / HZ)) {
-		printk(KERN_WARNING
-			"APIC frequency too slow, disabling apic timer\n");
-		return -1;
-	}
-
-	return 0;
-}
-
-#else
 /*
  * In this functions we calibrate APIC bus clocks to the external timer.
  *
@@ -659,6 +575,7 @@ static int __init calibrate_APIC_clock(void)
 	delta = lapic_cal_t1 - lapic_cal_t2;
 	apic_printk(APIC_VERBOSE, "... lapic delta = %ld\n", delta);
 
+#ifdef CONFIG_X86_PM_TIMER
 	/* Check, if the PM timer is available */
 	deltapm = lapic_cal_pm2 - lapic_cal_pm1;
 	apic_printk(APIC_VERBOSE, "... PM timer delta = %ld\n", deltapm);
@@ -687,6 +604,7 @@ static int __init calibrate_APIC_clock(void)
 		}
 		pm_referenced = 1;
 	}
+#endif
 
 	/* Calculate the scaled math multiplication factor */
 	lapic_clockevent.mult = div_sc(delta, TICK_NSEC * LAPIC_CAL_LOOPS,
@@ -773,8 +691,6 @@ static int __init calibrate_APIC_clock(void)
 	return 0;
 }
 
-#endif
-
 /*
  * Setup the boot APIC
  *
-- 
cgit v1.2.3-55-g7522


From 8c464a4b23ca283b414022ebc77787f3c7040fa7 Mon Sep 17 00:00:00 2001
From: Yinghai Lu
Date: Mon, 25 Aug 2008 12:41:19 -0700
Subject: sparseirq: move kstat_irqs from kstat to irq_desc - fix

fix non-sparseirq architectures.

Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/irq.h         |  4 ++--
 include/linux/kernel_stat.h | 10 ++++++++--
 kernel/irq/chip.c           | 21 ++++++++++++++++++++-
 kernel/irq/handle.c         | 10 ++++++++++
 4 files changed, 40 insertions(+), 5 deletions(-)

diff --git a/include/linux/irq.h b/include/linux/irq.h
index 2445d2b3d5dc..93fe9a943e71 100644
--- a/include/linux/irq.h
+++ b/include/linux/irq.h
@@ -161,8 +161,6 @@ struct irq_desc {
 #endif
 #ifdef CONFIG_HAVE_DYN_ARRAY
 	unsigned int            *kstat_irqs;
-#else
-	unsigned int            kstat_irqs[NR_CPUS];
 #endif
 #if defined(CONFIG_INTR_REMAP) && defined(CONFIG_HAVE_SPARSE_IRQ)
        struct irq_2_iommu      *irq_2_iommu;
@@ -219,8 +217,10 @@ extern struct irq_desc *sparse_irqs;
 
 #endif
 
+#ifdef CONFIG_HAVE_DYN_ARRAY
 #define kstat_irqs_this_cpu(DESC) \
 	((DESC)->kstat_irqs[smp_processor_id()])
+#endif
 
 /*
  * Migration helpers for obsolete names, they will go away:
diff --git a/include/linux/kernel_stat.h b/include/linux/kernel_stat.h
index f10616712de5..21249d8c1293 100644
--- a/include/linux/kernel_stat.h
+++ b/include/linux/kernel_stat.h
@@ -28,7 +28,7 @@ struct cpu_usage_stat {
 
 struct kernel_stat {
 	struct cpu_usage_stat	cpustat;
-#ifndef CONFIG_GENERIC_HARDIRQS
+#ifndef CONFIG_HAVE_DYN_ARRAY
        unsigned int irqs[NR_IRQS];
 #endif
 };
@@ -41,7 +41,13 @@ DECLARE_PER_CPU(struct kernel_stat, kstat);
 
 extern unsigned long long nr_context_switches(void);
 
-#ifndef CONFIG_GENERIC_HARDIRQS
+#ifndef CONFIG_HAVE_DYN_ARRAY
+#define kstat_irqs_this_cpu(irq) \
+	(kstat_this_cpu.irqs[irq])
+#endif
+
+
+#ifndef CONFIG_HAVE_DYN_ARRAY
 static inline unsigned int kstat_irqs_cpu(unsigned int irq, int cpu)
 {
        return kstat_cpu(cpu).irqs[irq];
diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c
index 9fc5e69213de..4ef555c50db8 100644
--- a/kernel/irq/chip.c
+++ b/kernel/irq/chip.c
@@ -327,7 +327,11 @@ handle_simple_irq(unsigned int irq, struct irq_desc *desc)
 	if (unlikely(desc->status & IRQ_INPROGRESS))
 		goto out_unlock;
 	desc->status &= ~(IRQ_REPLAY | IRQ_WAITING);
+#ifdef CONFIG_HAVE_DYN_ARRAY
 	kstat_irqs_this_cpu(desc)++;
+#else
+	kstat_irqs_this_cpu(irq)++;
+#endif
 
 	action = desc->action;
 	if (unlikely(!action || (desc->status & IRQ_DISABLED)))
@@ -368,7 +372,11 @@ handle_level_irq(unsigned int irq, struct irq_desc *desc)
 	if (unlikely(desc->status & IRQ_INPROGRESS))
 		goto out_unlock;
 	desc->status &= ~(IRQ_REPLAY | IRQ_WAITING);
+#ifdef CONFIG_HAVE_DYN_ARRAY
 	kstat_irqs_this_cpu(desc)++;
+#else
+	kstat_irqs_this_cpu(irq)++;
+#endif
 
 	/*
 	 * If its disabled or no action available
@@ -415,7 +423,11 @@ handle_fasteoi_irq(unsigned int irq, struct irq_desc *desc)
 		goto out;
 
 	desc->status &= ~(IRQ_REPLAY | IRQ_WAITING);
+#ifdef CONFIG_HAVE_DYN_ARRAY
 	kstat_irqs_this_cpu(desc)++;
+#else
+	kstat_irqs_this_cpu(irq)++;
+#endif
 
 	/*
 	 * If its disabled or no action available
@@ -479,8 +491,11 @@ handle_edge_irq(unsigned int irq, struct irq_desc *desc)
 		mask_ack_irq(desc, irq);
 		goto out_unlock;
 	}
-
+#ifdef CONFIG_HAVE_DYN_ARRAY
 	kstat_irqs_this_cpu(desc)++;
+#else
+	kstat_irqs_this_cpu(irq)++;
+#endif
 
 	/* Start handling the irq */
 	desc->chip->ack(irq);
@@ -535,7 +550,11 @@ handle_percpu_irq(unsigned int irq, struct irq_desc *desc)
 {
 	irqreturn_t action_ret;
 
+#ifdef CONFIG_HAVE_DYN_ARRAY
 	kstat_irqs_this_cpu(desc)++;
+#else
+	kstat_irqs_this_cpu(irq)++;
+#endif
 
 	if (desc->chip->ack)
 		desc->chip->ack(irq);
diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c
index d638a911cbc1..eae69373a9c6 100644
--- a/kernel/irq/handle.c
+++ b/kernel/irq/handle.c
@@ -34,7 +34,11 @@ void
 handle_bad_irq(unsigned int irq, struct irq_desc *desc)
 {
 	print_irq_desc(irq, desc);
+#ifdef CONFIG_HAVE_DYN_ARRAY
 	kstat_irqs_this_cpu(desc)++;
+#else
+	kstat_irqs_this_cpu(irq)++;
+#endif
 	ack_bad_irq(irq);
 }
 
@@ -401,7 +405,11 @@ unsigned int __do_IRQ(unsigned int irq)
 	struct irqaction *action;
 	unsigned int status;
 
+#ifdef CONFIG_HAVE_DYN_ARRAY
 	kstat_irqs_this_cpu(desc)++;
+#else
+	kstat_irqs_this_cpu(irq)++;
+#endif
 	if (CHECK_IRQ_PER_CPU(desc->status)) {
 		irqreturn_t action_ret;
 
@@ -501,10 +509,12 @@ void early_init_irq_lock_class(void)
 }
 #endif
 
+#ifdef CONFIG_HAVE_DYN_ARRAY
 unsigned int kstat_irqs_cpu(unsigned int irq, int cpu)
 {
 	struct irq_desc *desc = irq_to_desc(irq);
 	return desc->kstat_irqs[cpu];
 }
+#endif
 EXPORT_SYMBOL(kstat_irqs_cpu);
 
-- 
cgit v1.2.3-55-g7522


From 1dd6ba2e179773597e20f17f66049a64e6c4b2ec Mon Sep 17 00:00:00 2001
From: Cyrill Gorcunov
Date: Mon, 25 Aug 2008 21:27:26 +0400
Subject: x86: apic - unify smp_spurious/error_interrupt declaration

According to entry_64.S we do pass pt_regs pointer
into interrupt handlers but don't use them. So we
safely may merge the declarations.

Signed-off-by: Cyrill Gorcunov <gorcunov@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/apic.c   | 8 --------
 include/asm-x86/hw_irq.h | 5 -----
 2 files changed, 13 deletions(-)

diff --git a/arch/x86/kernel/apic.c b/arch/x86/kernel/apic.c
index ca5ef71f4208..0556f375e40b 100644
--- a/arch/x86/kernel/apic.c
+++ b/arch/x86/kernel/apic.c
@@ -1659,11 +1659,7 @@ int __init APIC_init_uniprocessor(void)
 /*
  * This interrupt should _never_ happen with our APIC/SMP architecture
  */
-#ifdef CONFIG_X86_64
-asmlinkage void smp_spurious_interrupt(void)
-#else
 void smp_spurious_interrupt(struct pt_regs *regs)
-#endif
 {
 	u32 v;
 
@@ -1694,11 +1690,7 @@ void smp_spurious_interrupt(struct pt_regs *regs)
 /*
  * This interrupt should never happen with our APIC/SMP architecture
  */
-#ifdef CONFIG_X86_64
-asmlinkage void smp_error_interrupt(void)
-#else
 void smp_error_interrupt(struct pt_regs *regs)
-#endif
 {
 	u32 v, v1;
 
diff --git a/include/asm-x86/hw_irq.h b/include/asm-x86/hw_irq.h
index 39c7a4745d25..749d042f0556 100644
--- a/include/asm-x86/hw_irq.h
+++ b/include/asm-x86/hw_irq.h
@@ -96,13 +96,8 @@ extern asmlinkage void qic_call_function_interrupt(void);
 
 /* SMP */
 extern void smp_apic_timer_interrupt(struct pt_regs *);
-#ifdef CONFIG_X86_32
 extern void smp_spurious_interrupt(struct pt_regs *);
 extern void smp_error_interrupt(struct pt_regs *);
-#else
-extern asmlinkage void smp_spurious_interrupt(void);
-extern asmlinkage void smp_error_interrupt(void);
-#endif
 #ifdef CONFIG_X86_SMP
 extern void smp_reschedule_interrupt(struct pt_regs *);
 extern void smp_call_function_interrupt(struct pt_regs *);
-- 
cgit v1.2.3-55-g7522


From c59d85a7b7822b83fc9783314543eea0ca860480 Mon Sep 17 00:00:00 2001
From: Ingo Molnar
Date: Thu, 28 Aug 2008 08:56:33 +0200
Subject: sparseirq: export nr_irqs on m68k/sparc/s390

Stephen Rothwell reported such build failures on m68k/sparc/s390:

> ERROR: "nr_irqs" [drivers/net/hamradio/baycom_ser_fdx.ko] undefined!
> ERROR: "nr_irqs" [drivers/net/3c59x.ko] undefined!

export nr_irqs on these architectures too.

Reported-by: Stephen Rothwell <sfr@canb.auug.org.au>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/m68k/kernel/ints.c | 1 +
 arch/s390/kernel/irq.c  | 1 +
 arch/sparc/kernel/irq.c | 1 +
 3 files changed, 3 insertions(+)

diff --git a/arch/m68k/kernel/ints.c b/arch/m68k/kernel/ints.c
index 74453d15692e..44169e4cd91d 100644
--- a/arch/m68k/kernel/ints.c
+++ b/arch/m68k/kernel/ints.c
@@ -47,6 +47,7 @@
 #endif
 
 int nr_irqs = NR_IRQS;
+EXPORT_SYMBOL(nr_irqs);
 
 extern u32 auto_irqhandler_fixup[];
 extern u32 user_irqhandler_fixup[];
diff --git a/arch/s390/kernel/irq.c b/arch/s390/kernel/irq.c
index 14eb5496c8a8..3624c4a0037a 100644
--- a/arch/s390/kernel/irq.c
+++ b/arch/s390/kernel/irq.c
@@ -18,6 +18,7 @@
 #include <linux/profile.h>
 
 int nr_irqs = NR_IRQS;
+EXPORT_SYMBOL(nr_irqs);
 
 /*
  * show_interrupts is needed by /proc/interrupts.
diff --git a/arch/sparc/kernel/irq.c b/arch/sparc/kernel/irq.c
index 059598b7e0f0..4b99e3ce3916 100644
--- a/arch/sparc/kernel/irq.c
+++ b/arch/sparc/kernel/irq.c
@@ -57,6 +57,7 @@
 #endif /* SMP */
 
 int nr_irqs = NR_IRQS;
+EXPORT_SYMBOL(nr_irqs);
 
 unsigned long __raw_local_irq_save(void)
 {
-- 
cgit v1.2.3-55-g7522


From a11b5abef50722e42a7d13f6b799c4f606fcb797 Mon Sep 17 00:00:00 2001
From: Yinghai Lu
Date: Wed, 3 Sep 2008 16:58:31 -0700
Subject: x2apic: fix reserved APIC register accesses in print_local_APIC()

APIC_ARBPRI is a reserved register for XAPIC and beyond.
APIC_RRR is a reserved register except for 82489DX, APIC for Pentium processors.
APIC_EOI is a write only register.
APIC_DFR is reserved in x2apic mode.

Access to these registers in x2apic will result in #GP fault. Fix these
apic register accesses.

Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>
Signed-off-by: Suresh Siddha <suresh.b.siddha@intel.com>
Cc: Maciej W. Rozycki <macro@linux-mips.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/io_apic.c | 27 ++++++++++++++++++---------
 include/asm-x86/apic.h    | 14 ++++++++++++++
 2 files changed, 32 insertions(+), 9 deletions(-)

diff --git a/arch/x86/kernel/io_apic.c b/arch/x86/kernel/io_apic.c
index d28128e0392c..93ceb19d1e90 100644
--- a/arch/x86/kernel/io_apic.c
+++ b/arch/x86/kernel/io_apic.c
@@ -1751,21 +1751,30 @@ __apicdebuginit(void) print_local_APIC(void *dummy)
 	printk(KERN_DEBUG "... APIC TASKPRI: %08x (%02x)\n", v, v & APIC_TPRI_MASK);
 
 	if (APIC_INTEGRATED(ver)) {                     /* !82489DX */
-		v = apic_read(APIC_ARBPRI);
-		printk(KERN_DEBUG "... APIC ARBPRI: %08x (%02x)\n", v,
-			v & APIC_ARBPRI_MASK);
+		if (!APIC_XAPIC(ver)) {
+			v = apic_read(APIC_ARBPRI);
+			printk(KERN_DEBUG "... APIC ARBPRI: %08x (%02x)\n", v,
+			       v & APIC_ARBPRI_MASK);
+		}
 		v = apic_read(APIC_PROCPRI);
 		printk(KERN_DEBUG "... APIC PROCPRI: %08x\n", v);
 	}
 
-	v = apic_read(APIC_EOI);
-	printk(KERN_DEBUG "... APIC EOI: %08x\n", v);
-	v = apic_read(APIC_RRR);
-	printk(KERN_DEBUG "... APIC RRR: %08x\n", v);
+	/*
+	 * Remote read supported only in the 82489DX and local APIC for
+	 * Pentium processors.
+	 */
+	if (!APIC_INTEGRATED(ver) || maxlvt == 3) {
+		v = apic_read(APIC_RRR);
+		printk(KERN_DEBUG "... APIC RRR: %08x\n", v);
+	}
+
 	v = apic_read(APIC_LDR);
 	printk(KERN_DEBUG "... APIC LDR: %08x\n", v);
-	v = apic_read(APIC_DFR);
-	printk(KERN_DEBUG "... APIC DFR: %08x\n", v);
+	if (!x2apic_enabled()) {
+		v = apic_read(APIC_DFR);
+		printk(KERN_DEBUG "... APIC DFR: %08x\n", v);
+	}
 	v = apic_read(APIC_SPIV);
 	printk(KERN_DEBUG "... APIC SPIV: %08x\n", v);
 
diff --git a/include/asm-x86/apic.h b/include/asm-x86/apic.h
index 2d970f6bc2a1..ef1d72dbdfe0 100644
--- a/include/asm-x86/apic.h
+++ b/include/asm-x86/apic.h
@@ -98,6 +98,20 @@ extern void check_x2apic(void);
 extern void enable_x2apic(void);
 extern void enable_IR_x2apic(void);
 extern void x2apic_icr_write(u32 low, u32 id);
+static inline int x2apic_enabled(void)
+{
+	int msr, msr2;
+
+	if (!cpu_has_x2apic)
+		return 0;
+
+	rdmsr(MSR_IA32_APICBASE, msr, msr2);
+	if (msr & X2APIC_ENABLE)
+		return 1;
+	return 0;
+}
+#else
+#define x2apic_enabled()	0
 #endif
 
 struct apic_ops {
-- 
cgit v1.2.3-55-g7522


From f6dd5c3106fb283e37d915eeb33019ef40510f85 Mon Sep 17 00:00:00 2001
From: Yinghai Lu
Date: Wed, 3 Sep 2008 16:58:32 -0700
Subject: dmar: fix using early fixmap mapping for DMAR table parsing

Very early detection of the DMAR tables will setup fixmap mapping. For
parsing these tables later (while enabling dma and/or interrupt remapping),
early fixmap mapping shouldn't be used. Fix it by calling table detection
routines again, which will call generic apci_get_table() for setting up
the correct mapping.

Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>
Signed-off-by: Suresh Siddha <suresh.b.siddha@intel.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 drivers/pci/dmar.c   | 49 ++++++++++++++++++++++++++++---------------------
 include/linux/dmar.h |  1 -
 2 files changed, 28 insertions(+), 22 deletions(-)

diff --git a/drivers/pci/dmar.c b/drivers/pci/dmar.c
index bd2c01674f5e..f2c5eb6e78f7 100644
--- a/drivers/pci/dmar.c
+++ b/drivers/pci/dmar.c
@@ -289,6 +289,24 @@ dmar_table_print_dmar_entry(struct acpi_dmar_header *header)
 	}
 }
 
+/**
+ * dmar_table_detect - checks to see if the platform supports DMAR devices
+ */
+static int __init dmar_table_detect(void)
+{
+	acpi_status status = AE_OK;
+
+	/* if we could find DMAR table, then there are DMAR devices */
+	status = acpi_get_table(ACPI_SIG_DMAR, 0,
+				(struct acpi_table_header **)&dmar_tbl);
+
+	if (ACPI_SUCCESS(status) && !dmar_tbl) {
+		printk (KERN_WARNING PREFIX "Unable to map DMAR\n");
+		status = AE_NOT_FOUND;
+	}
+
+	return (ACPI_SUCCESS(status) ? 1 : 0);
+}
 
 /**
  * parse_dmar_table - parses the DMA reporting table
@@ -300,6 +318,12 @@ parse_dmar_table(void)
 	struct acpi_dmar_header *entry_header;
 	int ret = 0;
 
+	/*
+	 * Do it again, earlier dmar_tbl mapping could be mapped with
+	 * fixed map.
+	 */
+	dmar_table_detect();
+
 	dmar = (struct acpi_table_dmar *)dmar_tbl;
 	if (!dmar)
 		return -ENODEV;
@@ -430,30 +454,11 @@ int __init dmar_table_init(void)
 	return 0;
 }
 
-/**
- * early_dmar_detect - checks to see if the platform supports DMAR devices
- */
-int __init early_dmar_detect(void)
-{
-	acpi_status status = AE_OK;
-
-	/* if we could find DMAR table, then there are DMAR devices */
-	status = acpi_get_table(ACPI_SIG_DMAR, 0,
-				(struct acpi_table_header **)&dmar_tbl);
-
-	if (ACPI_SUCCESS(status) && !dmar_tbl) {
-		printk (KERN_WARNING PREFIX "Unable to map DMAR\n");
-		status = AE_NOT_FOUND;
-	}
-
-	return (ACPI_SUCCESS(status) ? 1 : 0);
-}
-
 void __init detect_intel_iommu(void)
 {
 	int ret;
 
-	ret = early_dmar_detect();
+	ret = dmar_table_detect();
 
 #ifdef CONFIG_DMAR
 	{
@@ -479,14 +484,16 @@ void __init detect_intel_iommu(void)
 			       " x2apic support\n");
 
 			dmar_disabled = 1;
-			return;
+			goto end;
 		}
 
 		if (ret && !no_iommu && !iommu_detected && !swiotlb &&
 		    !dmar_disabled)
 			iommu_detected = 1;
 	}
+end:
 #endif
+	dmar_tbl = NULL;
 }
 
 
diff --git a/include/linux/dmar.h b/include/linux/dmar.h
index c360c558e59e..f1984fc3e06d 100644
--- a/include/linux/dmar.h
+++ b/include/linux/dmar.h
@@ -45,7 +45,6 @@ extern struct list_head dmar_drhd_units;
 	list_for_each_entry(drhd, &dmar_drhd_units, list)
 
 extern int dmar_table_init(void);
-extern int early_dmar_detect(void);
 extern int dmar_dev_scope_init(void);
 
 /* Intel IOMMU detection */
-- 
cgit v1.2.3-55-g7522


From 74d04bd7dcb4c6130fd8a314d28bfecc9ae7c360 Mon Sep 17 00:00:00 2001
From: Yinghai Lu
Date: Wed, 3 Sep 2008 16:58:33 -0700
Subject: dmar: initialize the return value in dmar_parse_dev()

initialize the return value in dmar_parse_dev()

Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>
Signed-off-by: Suresh Siddha <suresh.b.siddha@intel.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 drivers/pci/dmar.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/pci/dmar.c b/drivers/pci/dmar.c
index f2c5eb6e78f7..d281a03695f7 100644
--- a/drivers/pci/dmar.c
+++ b/drivers/pci/dmar.c
@@ -193,7 +193,7 @@ dmar_parse_dev(struct dmar_drhd_unit *dmaru)
 {
 	struct acpi_dmar_hardware_unit *drhd;
 	static int include_all;
-	int ret;
+	int ret = 0;
 
 	drhd = (struct acpi_dmar_hardware_unit *) dmaru->hdr;
 
-- 
cgit v1.2.3-55-g7522


From 04e2ea67069e285404192a35c24dfe7c53b9c61f Mon Sep 17 00:00:00 2001
From: Suresh Siddha
Date: Wed, 3 Sep 2008 16:58:34 -0700
Subject: dmar: use list_for_each_entry_safe() in dmar_dev_scope_init()

In dmar_dev_scope_init(), functions called under for_each_drhd_unit()/
for_each_rmrr_units() can delete the list entry under some error conditions.

So we should use list_for_each_entry_safe() for safe traversal.

Signed-off-by: Suresh Siddha <suresh.b.siddha@intel.com>
Acked-by: Yinghai Lu <yhlu.kernel@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 drivers/pci/dmar.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/pci/dmar.c b/drivers/pci/dmar.c
index d281a03695f7..ceb338dfa3f2 100644
--- a/drivers/pci/dmar.c
+++ b/drivers/pci/dmar.c
@@ -397,10 +397,10 @@ dmar_find_matched_drhd_unit(struct pci_dev *dev)
 
 int __init dmar_dev_scope_init(void)
 {
-	struct dmar_drhd_unit *drhd;
+	struct dmar_drhd_unit *drhd, *drhd_n;
 	int ret = -ENODEV;
 
-	for_each_drhd_unit(drhd) {
+	list_for_each_entry_safe(drhd, drhd_n, &dmar_drhd_units, list) {
 		ret = dmar_parse_dev(drhd);
 		if (ret)
 			return ret;
@@ -408,8 +408,8 @@ int __init dmar_dev_scope_init(void)
 
 #ifdef CONFIG_DMAR
 	{
-		struct dmar_rmrr_unit *rmrr;
-		for_each_rmrr_units(rmrr) {
+		struct dmar_rmrr_unit *rmrr, *rmrr_n;
+		list_for_each_entry_safe(rmrr, rmrr_n, &dmar_rmrr_units, list) {
 			ret = rmrr_parse_dev(rmrr);
 			if (ret)
 				return ret;
-- 
cgit v1.2.3-55-g7522


From 1c7d1bcad218808a4f67a4492a5e1d920e85c239 Mon Sep 17 00:00:00 2001
From: Suresh Siddha
Date: Wed, 3 Sep 2008 16:58:35 -0700
Subject: dmar: fix dmar_parse_dev() devices_cnt error condition check

It is possible that,
instead of PCI endpoint/sub-hierarchy structures, only IO-APIC/HPET
devices may be reported under device scope structures. Fix the devices_cnt
error check, which cares about only PCI structures and removes the
dma-remapping unit structure (dmaru) when the devices_cnt is zero
and include_all flag is not set.

Signed-off-by: Suresh Siddha <suresh.b.siddha@intel.com>
Acked-by: Yinghai Lu <yhlu.kernel@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 drivers/pci/dmar.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/pci/dmar.c b/drivers/pci/dmar.c
index ceb338dfa3f2..9527405ae198 100644
--- a/drivers/pci/dmar.c
+++ b/drivers/pci/dmar.c
@@ -212,7 +212,7 @@ dmar_parse_dev(struct dmar_drhd_unit *dmaru)
 		include_all = 1;
 	}
 
-	if (ret || (dmaru->devices_cnt == 0 && !dmaru->include_all)) {
+	if (ret) {
 		list_del(&dmaru->list);
 		kfree(dmaru);
 	}
-- 
cgit v1.2.3-55-g7522


From e8fc96ed3603924e7aa09fd5e4dbd289b7e69907 Mon Sep 17 00:00:00 2001
From: H. Peter Anvin
Date: Thu, 4 Sep 2008 09:56:10 -0700
Subject: dyn_array: don't break compiling for !CONFIG_SMP

Impact: build failure on uniprocessor

When compiling for !CONFIG_SMP, per_cpu_alloc_dyn_array() would fail
to compile, since it uses per_cpu_offset, which is not defined for
uniprocessor builds.

Hence, do not compile per_cpu_alloc_dyn_array() for !CONFIG_SMP.
Attempting to call this function in a uniprocessor configuration would
be simply wrong in the first place.

Signed-off-by: H. Peter Anvin <hpa@zytor.com>
---
 init/dyn_array.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/init/dyn_array.c b/init/dyn_array.c
index c4cd902a1180..1dc08140f3cb 100644
--- a/init/dyn_array.c
+++ b/init/dyn_array.c
@@ -91,6 +91,7 @@ unsigned long __init per_cpu_dyn_array_size(unsigned long *align)
 	return total_size;
 }
 
+#ifdef CONFIG_SMP
 void __init per_cpu_alloc_dyn_array(int cpu, char *ptr)
 {
 #ifdef CONFIG_HAVE_DYN_ARRAY
@@ -122,3 +123,4 @@ void __init per_cpu_alloc_dyn_array(int cpu, char *ptr)
 	}
 #endif
 }
+#endif
-- 
cgit v1.2.3-55-g7522


From 02c1df199c990cd21c09a4dffaa06d4e0b7bf2bf Mon Sep 17 00:00:00 2001
From: Yinghai Lu
Date: Thu, 4 Sep 2008 20:57:11 +0200
Subject: x86: print out if acpi want physical flat of all

Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/genapic_flat_64.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/arch/x86/kernel/genapic_flat_64.c b/arch/x86/kernel/genapic_flat_64.c
index 9eca5ba7a6b1..2ec2de8d8c46 100644
--- a/arch/x86/kernel/genapic_flat_64.c
+++ b/arch/x86/kernel/genapic_flat_64.c
@@ -179,8 +179,10 @@ static int __init physflat_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
 	 * is an example).
 	 */
 	if (acpi_gbl_FADT.header.revision > FADT2_REVISION_ID &&
-		(acpi_gbl_FADT.flags & ACPI_FADT_APIC_PHYSICAL))
+		(acpi_gbl_FADT.flags & ACPI_FADT_APIC_PHYSICAL)) {
+		printk(KERN_DEBUG "system APIC only can use physical flat");
 		return 1;
+	}
 #endif
 
 	return 0;
-- 
cgit v1.2.3-55-g7522


From b558cb35f1ef92837cba0fba9aad267e5eff1f65 Mon Sep 17 00:00:00 2001
From: Yinghai Lu
Date: Thu, 4 Sep 2008 20:57:13 +0200
Subject: dyn_array: fix typo

Andrew found the typo could break some platforms.

also fix a checkpatch warning.

Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 init/dyn_array.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/init/dyn_array.c b/init/dyn_array.c
index 1dc08140f3cb..cf1e04c83b33 100644
--- a/init/dyn_array.c
+++ b/init/dyn_array.c
@@ -54,7 +54,7 @@ void __init pre_alloc_dyn_array(void)
 			da->init_work(da);
 	}
 #else
-#ifdef CONFIF_GENERIC_HARDIRQS
+#ifdef CONFIG_GENERIC_HARDIRQS
 	unsigned int i;
 
 	for (i = 0; i < NR_IRQS; i++)
@@ -117,9 +117,8 @@ void __init per_cpu_alloc_dyn_array(int cpu, char *ptr)
 
 		phys += size;
 
-		if (da->init_work) {
+		if (da->init_work)
 			da->init_work(da);
-		}
 	}
 #endif
 }
-- 
cgit v1.2.3-55-g7522


From 676f4a920be27160747439fe71026aa15ec78e5a Mon Sep 17 00:00:00 2001
From: Cyrill Gorcunov
Date: Thu, 4 Sep 2008 22:37:49 +0400
Subject: x86: io-apic - use ARRAY_SIZE macro

Make the code width a bit shorter with ARRAY_SIZE macro.

Signed-off-by: Cyrill Gorcunov <gorcunov@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/io_apic.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/kernel/io_apic.c b/arch/x86/kernel/io_apic.c
index 93ceb19d1e90..9b01fdadcb9b 100644
--- a/arch/x86/kernel/io_apic.c
+++ b/arch/x86/kernel/io_apic.c
@@ -166,7 +166,7 @@ static void __init init_work(void *data)
 
 	memcpy(cfg, irq_cfg_legacy, sizeof(irq_cfg_legacy));
 
-	legacy_count = sizeof(irq_cfg_legacy)/sizeof(irq_cfg_legacy[0]);
+	legacy_count = ARRAY_SIZE(irq_cfg_legacy);
 	for (i = legacy_count; i < *da->nr; i++)
 		init_one_irq_cfg(&cfg[i]);
 
-- 
cgit v1.2.3-55-g7522


From ac54a6c9371bacb86bee1db23f7d82e8685c7e17 Mon Sep 17 00:00:00 2001
From: Cyrill Gorcunov
Date: Thu, 4 Sep 2008 22:37:50 +0400
Subject: x86: io-apic - declare irq_cfg_lock for SPARSE_IRQ only

We use irq_cfg_lock lock in SPARSE_IRQ only context so
move it under #ifdef and compiler will be happy.

Signed-off-by: Cyrill Gorcunov <gorcunov@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/io_apic.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/arch/x86/kernel/io_apic.c b/arch/x86/kernel/io_apic.c
index 9b01fdadcb9b..d22fecf828b8 100644
--- a/arch/x86/kernel/io_apic.c
+++ b/arch/x86/kernel/io_apic.c
@@ -147,14 +147,15 @@ static void init_one_irq_cfg(struct irq_cfg *cfg)
 
 static struct irq_cfg *irq_cfgx;
 
+#ifdef CONFIG_HAVE_SPARSE_IRQ
 /*
  * Protect the irq_cfgx_free freelist:
  */
 static DEFINE_SPINLOCK(irq_cfg_lock);
 
-#ifdef CONFIG_HAVE_SPARSE_IRQ
 static struct irq_cfg *irq_cfgx_free;
 #endif
+
 static void __init init_work(void *data)
 {
 	struct dyn_array *da = data;
-- 
cgit v1.2.3-55-g7522


From e65ef88c20d5c68bde18f559e0d0ad7d718beb28 Mon Sep 17 00:00:00 2001
From: Dean Nelson
Date: Fri, 5 Sep 2008 09:07:20 -0500
Subject: irq: error missed ifndef CONFIG_HAVE_SPARSE_IRQ

An error return from create_irq_nr() is 0, but an error return from
create_irq() is -1.

Signed-off-by: Dean Nelson <dcn@sgi.com>
Cc: Yinghai Lu <yhlu.kernel@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 drivers/pci/htirq.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/pci/htirq.c b/drivers/pci/htirq.c
index 7c5aef13fcdb..7b180e0c634e 100644
--- a/drivers/pci/htirq.c
+++ b/drivers/pci/htirq.c
@@ -144,7 +144,7 @@ int __ht_create_irq(struct pci_dev *dev, int idx, ht_irq_update_t *update)
 #else
 	irq = create_irq();
 #endif
-	if (irq == 0) {
+	if (irq <= 0) {
 		kfree(cfg);
 		return -EBUSY;
 	}
-- 
cgit v1.2.3-55-g7522


From 21056830c4e5c6735f1d49453398d7186ca4e8d7 Mon Sep 17 00:00:00 2001
From: Dean Nelson
Date: Fri, 5 Sep 2008 09:10:40 -0500
Subject: irq: set_irq_chip() has redundant call to irq_to_desc()

Extraneous call to irq_to_desc().

Signed-off-by: Dean Nelson <dcn@sgi.com>
Cc: Yinghai Lu <yhlu.kernel@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 kernel/irq/chip.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c
index 4ef555c50db8..570d1ea1db5d 100644
--- a/kernel/irq/chip.c
+++ b/kernel/irq/chip.c
@@ -102,7 +102,6 @@ int set_irq_chip(unsigned int irq, struct irq_chip *chip)
 	if (!chip)
 		chip = &no_irq_chip;
 
-	desc = irq_to_desc(irq);
 	spin_lock_irqsave(&desc->lock, flags);
 	irq_chip_set_defaults(chip);
 	desc->chip = chip;
-- 
cgit v1.2.3-55-g7522


From 1f3addcf2d54394d10713be947eeaf18d2b998a9 Mon Sep 17 00:00:00 2001
From: Yinghai Lu
Date: Fri, 5 Sep 2008 10:03:37 -0700
Subject: irq: error missed ifndef CONFIG_HAVE_SPARSE_IRQ, v2

need to change irq to int too

Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 drivers/pci/htirq.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/pci/htirq.c b/drivers/pci/htirq.c
index 7b180e0c634e..9e4929a00832 100644
--- a/drivers/pci/htirq.c
+++ b/drivers/pci/htirq.c
@@ -109,7 +109,7 @@ int __ht_create_irq(struct pci_dev *dev, int idx, ht_irq_update_t *update)
 	u32 data;
 	int max_irq;
 	int pos;
-	unsigned int irq;
+	int irq;
 	unsigned int irq_want;
 
 	pos = pci_find_ht_capability(dev, HT_CAPTYPE_IRQ);
-- 
cgit v1.2.3-55-g7522


From 932775a4ab622e3c99bd59f14cc7d96722f79501 Mon Sep 17 00:00:00 2001
From: venkatesh.pallipadi@intel.com
Date: Fri, 5 Sep 2008 18:02:15 -0700
Subject: x86: HPET_MSI change IRQ affinity in process context when it is
 disabled

Change the IRQ affinity in the process context when the IRQ is disabled.

Signed-off-by: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Signed-off-by: Shaohua Li <shaohua.li@intel.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 kernel/irq/manage.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
index ddc956861a58..ad2ce72c83c4 100644
--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c
@@ -87,10 +87,11 @@ int irq_set_affinity(unsigned int irq, cpumask_t cpumask)
 		return -EINVAL;
 
 #ifdef CONFIG_GENERIC_PENDING_IRQ
-	if (desc->status & IRQ_MOVE_PCNTXT) {
+	if (desc->status & IRQ_MOVE_PCNTXT || desc->status & IRQ_DISABLED) {
 		unsigned long flags;
 
 		spin_lock_irqsave(&desc->lock, flags);
+		desc->affinity = cpumask;
 		desc->chip->set_affinity(irq, cpumask);
 		spin_unlock_irqrestore(&desc->lock, flags);
 	} else
-- 
cgit v1.2.3-55-g7522


From b40d575bf0679c45aaf9e1161fc51a6b041b7210 Mon Sep 17 00:00:00 2001
From: venkatesh.pallipadi@intel.com
Date: Fri, 5 Sep 2008 18:02:16 -0700
Subject: x86: HPET_MSI Refactor code in preparation for HPET_MSI

Preparatory patch before the actual HPET MSI changes. Sets up hpet_set_mode
and hpet_next_event for the MSI related changes. Just the code
refactoring and should be zero functional change.

Signed-off-by: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Signed-off-by: Shaohua Li <shaohua.li@intel.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/hpet.c | 42 +++++++++++++++++++++++++++---------------
 1 file changed, 27 insertions(+), 15 deletions(-)

diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c
index acf62fc233da..f7cb5e9e261e 100644
--- a/arch/x86/kernel/hpet.c
+++ b/arch/x86/kernel/hpet.c
@@ -227,44 +227,44 @@ static void hpet_legacy_clockevent_register(void)
 	printk(KERN_DEBUG "hpet clockevent registered\n");
 }
 
-static void hpet_legacy_set_mode(enum clock_event_mode mode,
-			  struct clock_event_device *evt)
+static void hpet_set_mode(enum clock_event_mode mode,
+			  struct clock_event_device *evt, int timer)
 {
 	unsigned long cfg, cmp, now;
 	uint64_t delta;
 
 	switch(mode) {
 	case CLOCK_EVT_MODE_PERIODIC:
-		delta = ((uint64_t)(NSEC_PER_SEC/HZ)) * hpet_clockevent.mult;
-		delta >>= hpet_clockevent.shift;
+		delta = ((uint64_t)(NSEC_PER_SEC/HZ)) * evt->mult;
+		delta >>= evt->shift;
 		now = hpet_readl(HPET_COUNTER);
 		cmp = now + (unsigned long) delta;
-		cfg = hpet_readl(HPET_T0_CFG);
+		cfg = hpet_readl(HPET_Tn_CFG(timer));
 		cfg |= HPET_TN_ENABLE | HPET_TN_PERIODIC |
 		       HPET_TN_SETVAL | HPET_TN_32BIT;
-		hpet_writel(cfg, HPET_T0_CFG);
+		hpet_writel(cfg, HPET_Tn_CFG(timer));
 		/*
 		 * The first write after writing TN_SETVAL to the
 		 * config register sets the counter value, the second
 		 * write sets the period.
 		 */
-		hpet_writel(cmp, HPET_T0_CMP);
+		hpet_writel(cmp, HPET_Tn_CMP(timer));
 		udelay(1);
-		hpet_writel((unsigned long) delta, HPET_T0_CMP);
+		hpet_writel((unsigned long) delta, HPET_Tn_CMP(timer));
 		break;
 
 	case CLOCK_EVT_MODE_ONESHOT:
-		cfg = hpet_readl(HPET_T0_CFG);
+		cfg = hpet_readl(HPET_Tn_CFG(timer));
 		cfg &= ~HPET_TN_PERIODIC;
 		cfg |= HPET_TN_ENABLE | HPET_TN_32BIT;
-		hpet_writel(cfg, HPET_T0_CFG);
+		hpet_writel(cfg, HPET_Tn_CFG(timer));
 		break;
 
 	case CLOCK_EVT_MODE_UNUSED:
 	case CLOCK_EVT_MODE_SHUTDOWN:
-		cfg = hpet_readl(HPET_T0_CFG);
+		cfg = hpet_readl(HPET_Tn_CFG(timer));
 		cfg &= ~HPET_TN_ENABLE;
-		hpet_writel(cfg, HPET_T0_CFG);
+		hpet_writel(cfg, HPET_Tn_CFG(timer));
 		break;
 
 	case CLOCK_EVT_MODE_RESUME:
@@ -273,14 +273,14 @@ static void hpet_legacy_set_mode(enum clock_event_mode mode,
 	}
 }
 
-static int hpet_legacy_next_event(unsigned long delta,
-				  struct clock_event_device *evt)
+static int hpet_next_event(unsigned long delta,
+			   struct clock_event_device *evt, int timer)
 {
 	u32 cnt;
 
 	cnt = hpet_readl(HPET_COUNTER);
 	cnt += (u32) delta;
-	hpet_writel(cnt, HPET_T0_CMP);
+	hpet_writel(cnt, HPET_Tn_CMP(timer));
 
 	/*
 	 * We need to read back the CMP register to make sure that
@@ -292,6 +292,18 @@ static int hpet_legacy_next_event(unsigned long delta,
 	return (s32)((u32)hpet_readl(HPET_COUNTER) - cnt) >= 0 ? -ETIME : 0;
 }
 
+static void hpet_legacy_set_mode(enum clock_event_mode mode,
+			struct clock_event_device *evt)
+{
+	hpet_set_mode(mode, evt, 0);
+}
+
+static int hpet_legacy_next_event(unsigned long delta,
+			struct clock_event_device *evt)
+{
+	return hpet_next_event(delta, evt, 0);
+}
+
 /*
  * Clock source related code
  */
-- 
cgit v1.2.3-55-g7522


From 58ac1e76ce77d515bd5cb65dbc465a040da341c6 Mon Sep 17 00:00:00 2001
From: venkatesh.pallipadi@intel.com
Date: Fri, 5 Sep 2008 18:02:17 -0700
Subject: x86: HPET_MSI Basic HPET_MSI setup code

Basic HPET MSI setup code. Routines to perform basic MSI read write
in HPET memory map and setting up irq_chip for HPET MSI.

Signed-off-by: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Signed-off-by: Shaohua Li <shaohua.li@intel.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/hpet.c    | 54 +++++++++++++++++++++++++++++++++++++++
 arch/x86/kernel/io_apic.c | 64 +++++++++++++++++++++++++++++++++++++++++++++++
 include/asm-x86/hpet.h    | 21 ++++++++++++++++
 3 files changed, 139 insertions(+)

diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c
index f7cb5e9e261e..3f10d16a8348 100644
--- a/arch/x86/kernel/hpet.c
+++ b/arch/x86/kernel/hpet.c
@@ -6,6 +6,8 @@
 #include <linux/init.h>
 #include <linux/sysdev.h>
 #include <linux/pm.h>
+#include <linux/interrupt.h>
+#include <linux/cpu.h>
 
 #include <asm/fixmap.h>
 #include <asm/hpet.h>
@@ -25,6 +27,15 @@
 unsigned long hpet_address;
 static void __iomem *hpet_virt_address;
 
+struct hpet_dev {
+	struct clock_event_device evt;
+	unsigned int num;
+	int cpu;
+	unsigned int irq;
+	unsigned int flags;
+	char name[10];
+};
+
 unsigned long hpet_readl(unsigned long a)
 {
 	return readl(hpet_virt_address + a);
@@ -304,6 +315,49 @@ static int hpet_legacy_next_event(unsigned long delta,
 	return hpet_next_event(delta, evt, 0);
 }
 
+/*
+ * HPET MSI Support
+ */
+
+void hpet_msi_unmask(unsigned int irq)
+{
+	struct hpet_dev *hdev = get_irq_data(irq);
+	unsigned long cfg;
+
+	/* unmask it */
+	cfg = hpet_readl(HPET_Tn_CFG(hdev->num));
+	cfg |= HPET_TN_FSB;
+	hpet_writel(cfg, HPET_Tn_CFG(hdev->num));
+}
+
+void hpet_msi_mask(unsigned int irq)
+{
+	unsigned long cfg;
+	struct hpet_dev *hdev = get_irq_data(irq);
+
+	/* mask it */
+	cfg = hpet_readl(HPET_Tn_CFG(hdev->num));
+	cfg &= ~HPET_TN_FSB;
+	hpet_writel(cfg, HPET_Tn_CFG(hdev->num));
+}
+
+void hpet_msi_write(unsigned int irq, struct msi_msg *msg)
+{
+	struct hpet_dev *hdev = get_irq_data(irq);
+
+	hpet_writel(msg->data, HPET_Tn_ROUTE(hdev->num));
+	hpet_writel(msg->address_lo, HPET_Tn_ROUTE(hdev->num) + 4);
+}
+
+void hpet_msi_read(unsigned int irq, struct msi_msg *msg)
+{
+	struct hpet_dev *hdev = get_irq_data(irq);
+
+	msg->data = hpet_readl(HPET_Tn_ROUTE(hdev->num));
+	msg->address_lo = hpet_readl(HPET_Tn_ROUTE(hdev->num) + 4);
+	msg->address_hi = 0;
+}
+
 /*
  * Clock source related code
  */
diff --git a/arch/x86/kernel/io_apic.c b/arch/x86/kernel/io_apic.c
index d22fecf828b8..77fa155becf6 100644
--- a/arch/x86/kernel/io_apic.c
+++ b/arch/x86/kernel/io_apic.c
@@ -41,6 +41,7 @@
 #endif
 #include <linux/bootmem.h>
 #include <linux/dmar.h>
+#include <linux/hpet.h>
 
 #include <asm/idle.h>
 #include <asm/io.h>
@@ -56,6 +57,7 @@
 #include <asm/hypertransport.h>
 #include <asm/setup.h>
 #include <asm/irq_remapping.h>
+#include <asm/hpet.h>
 
 #include <mach_ipi.h>
 #include <mach_apic.h>
@@ -3522,6 +3524,68 @@ int arch_setup_dmar_msi(unsigned int irq)
 }
 #endif
 
+#ifdef CONFIG_HPET_TIMER
+
+#ifdef CONFIG_SMP
+static void hpet_msi_set_affinity(unsigned int irq, cpumask_t mask)
+{
+	struct irq_cfg *cfg;
+	struct irq_desc *desc;
+	struct msi_msg msg;
+	unsigned int dest;
+	cpumask_t tmp;
+
+	cpus_and(tmp, mask, cpu_online_map);
+	if (cpus_empty(tmp))
+		return;
+
+	if (assign_irq_vector(irq, mask))
+		return;
+
+	cfg = irq_cfg(irq);
+	cpus_and(tmp, cfg->domain, mask);
+	dest = cpu_mask_to_apicid(tmp);
+
+	hpet_msi_read(irq, &msg);
+
+	msg.data &= ~MSI_DATA_VECTOR_MASK;
+	msg.data |= MSI_DATA_VECTOR(cfg->vector);
+	msg.address_lo &= ~MSI_ADDR_DEST_ID_MASK;
+	msg.address_lo |= MSI_ADDR_DEST_ID(dest);
+
+	hpet_msi_write(irq, &msg);
+	desc = irq_to_desc(irq);
+	desc->affinity = mask;
+}
+#endif /* CONFIG_SMP */
+
+struct irq_chip hpet_msi_type = {
+	.name = "HPET_MSI",
+	.unmask = hpet_msi_unmask,
+	.mask = hpet_msi_mask,
+	.ack = ack_apic_edge,
+#ifdef CONFIG_SMP
+	.set_affinity = hpet_msi_set_affinity,
+#endif
+	.retrigger = ioapic_retrigger_irq,
+};
+
+int arch_setup_hpet_msi(unsigned int irq)
+{
+	int ret;
+	struct msi_msg msg;
+
+	ret = msi_compose_msg(NULL, irq, &msg);
+	if (ret < 0)
+		return ret;
+
+	hpet_msi_write(irq, &msg);
+	set_irq_chip_and_handler_name(irq, &hpet_msi_type, handle_edge_irq,
+		"edge");
+	return 0;
+}
+#endif
+
 #endif /* CONFIG_PCI_MSI */
 /*
  * Hypertransport interrupt support
diff --git a/include/asm-x86/hpet.h b/include/asm-x86/hpet.h
index cbbbb6d4dd32..58b273f6ef07 100644
--- a/include/asm-x86/hpet.h
+++ b/include/asm-x86/hpet.h
@@ -1,6 +1,8 @@
 #ifndef ASM_X86__HPET_H
 #define ASM_X86__HPET_H
 
+#include <linux/msi.h>
+
 #ifdef CONFIG_HPET_TIMER
 
 #define HPET_MMAP_SIZE		1024
@@ -10,6 +12,11 @@
 #define HPET_CFG		0x010
 #define HPET_STATUS		0x020
 #define HPET_COUNTER		0x0f0
+
+#define HPET_Tn_CFG(n)		(0x100 + 0x20 * n)
+#define HPET_Tn_CMP(n)		(0x108 + 0x20 * n)
+#define HPET_Tn_ROUTE(n)	(0x110 + 0x20 * n)
+
 #define HPET_T0_CFG		0x100
 #define HPET_T0_CMP		0x108
 #define HPET_T0_ROUTE		0x110
@@ -65,6 +72,20 @@ extern void hpet_disable(void);
 extern unsigned long hpet_readl(unsigned long a);
 extern void force_hpet_resume(void);
 
+extern void hpet_msi_unmask(unsigned int irq);
+extern void hpet_msi_mask(unsigned int irq);
+extern void hpet_msi_write(unsigned int irq, struct msi_msg *msg);
+extern void hpet_msi_read(unsigned int irq, struct msi_msg *msg);
+
+#ifdef CONFIG_PCI_MSI
+extern int arch_setup_hpet_msi(unsigned int irq);
+#else
+static inline int arch_setup_hpet_msi(unsigned int irq)
+{
+	return -EINVAL;
+}
+#endif
+
 #ifdef CONFIG_HPET_EMULATE_RTC
 
 #include <linux/interrupt.h>
-- 
cgit v1.2.3-55-g7522


From 4588c1f0354ac96a358b3f9e8e4331c51cf3336f Mon Sep 17 00:00:00 2001
From: Ingo Molnar
Date: Sat, 6 Sep 2008 14:19:17 +0200
Subject: x86: HPET_MSI Basic HPET_MSI setup code, cleanups

small style cleanups.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/hpet.c | 50 +++++++++++++++++++++++++-------------------------
 1 file changed, 25 insertions(+), 25 deletions(-)

diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c
index 3f10d16a8348..03d3655734b4 100644
--- a/arch/x86/kernel/hpet.c
+++ b/arch/x86/kernel/hpet.c
@@ -1,39 +1,39 @@
 #include <linux/clocksource.h>
 #include <linux/clockchips.h>
+#include <linux/interrupt.h>
+#include <linux/sysdev.h>
 #include <linux/delay.h>
 #include <linux/errno.h>
 #include <linux/hpet.h>
 #include <linux/init.h>
-#include <linux/sysdev.h>
-#include <linux/pm.h>
-#include <linux/interrupt.h>
 #include <linux/cpu.h>
+#include <linux/pm.h>
+#include <linux/io.h>
 
 #include <asm/fixmap.h>
-#include <asm/hpet.h>
 #include <asm/i8253.h>
-#include <asm/io.h>
+#include <asm/hpet.h>
 
-#define HPET_MASK	CLOCKSOURCE_MASK(32)
-#define HPET_SHIFT	22
+#define HPET_MASK			CLOCKSOURCE_MASK(32)
+#define HPET_SHIFT			22
 
 /* FSEC = 10^-15
    NSEC = 10^-9 */
-#define FSEC_PER_NSEC	1000000L
+#define FSEC_PER_NSEC			1000000L
 
 /*
  * HPET address is set in acpi/boot.c, when an ACPI entry exists
  */
-unsigned long hpet_address;
-static void __iomem *hpet_virt_address;
+unsigned long				hpet_address;
+static void __iomem			*hpet_virt_address;
 
 struct hpet_dev {
-	struct clock_event_device evt;
-	unsigned int num;
-	int cpu;
-	unsigned int irq;
-	unsigned int flags;
-	char name[10];
+	struct clock_event_device	evt;
+	unsigned int			num;
+	int				cpu;
+	unsigned int			irq;
+	unsigned int			flags;
+	char				name[10];
 };
 
 unsigned long hpet_readl(unsigned long a)
@@ -70,7 +70,7 @@ static inline void hpet_clear_mapping(void)
 static int boot_hpet_disable;
 int hpet_force_user;
 
-static int __init hpet_setup(char* str)
+static int __init hpet_setup(char *str)
 {
 	if (str) {
 		if (!strncmp("disable", str, 7))
@@ -91,7 +91,7 @@ __setup("nohpet", disable_hpet);
 
 static inline int is_hpet_capable(void)
 {
-	return (!boot_hpet_disable && hpet_address);
+	return !boot_hpet_disable && hpet_address;
 }
 
 /*
@@ -122,10 +122,10 @@ static void hpet_reserve_platform_timers(unsigned long id)
 
 	nrtimers = ((id & HPET_ID_NUMBER) >> HPET_ID_NUMBER_SHIFT) + 1;
 
-	memset(&hd, 0, sizeof (hd));
-	hd.hd_phys_address = hpet_address;
-	hd.hd_address = hpet;
-	hd.hd_nirqs = nrtimers;
+	memset(&hd, 0, sizeof(hd));
+	hd.hd_phys_address	= hpet_address;
+	hd.hd_address		= hpet;
+	hd.hd_nirqs		= nrtimers;
 	hpet_reserve_timer(&hd, 0);
 
 #ifdef CONFIG_HPET_EMULATE_RTC
@@ -141,8 +141,8 @@ static void hpet_reserve_platform_timers(unsigned long id)
 	hd.hd_irq[1] = HPET_LEGACY_RTC;
 
 	for (i = 2; i < nrtimers; timer++, i++) {
-		hd.hd_irq[i] = (readl(&timer->hpet_config) & Tn_INT_ROUTE_CNF_MASK) >>
-			Tn_INT_ROUTE_CNF_SHIFT;
+		hd.hd_irq[i] = (readl(&timer->hpet_config) &
+			Tn_INT_ROUTE_CNF_MASK) >> Tn_INT_ROUTE_CNF_SHIFT;
 	}
 
 	hpet_alloc(&hd);
@@ -244,7 +244,7 @@ static void hpet_set_mode(enum clock_event_mode mode,
 	unsigned long cfg, cmp, now;
 	uint64_t delta;
 
-	switch(mode) {
+	switch (mode) {
 	case CLOCK_EVT_MODE_PERIODIC:
 		delta = ((uint64_t)(NSEC_PER_SEC/HZ)) * evt->mult;
 		delta >>= evt->shift;
-- 
cgit v1.2.3-55-g7522


From 26afe5f2fbf06ea0765aaa316640c4dd472310c0 Mon Sep 17 00:00:00 2001
From: venkatesh.pallipadi@intel.com
Date: Fri, 5 Sep 2008 18:02:18 -0700
Subject: x86: HPET_MSI Initialise per-cpu HPET timers

Initialize a per CPU HPET MSI timer when possible. We retain the HPET
timer 0 (IRQ 0) and timer 1 (IRQ 8) as is when legacy mode is being used. We
setup the remaining HPET timers as per CPU MSI based timers. This per CPU
timer will eliminate the need for timer broadcasting with IRQ 0 when there
is non-functional LAPIC timer across CPU deep C-states.

If there are more CPUs than number of available timers, CPUs that do not
find any timer to use will continue using LAPIC and IRQ 0 broadcast.

Signed-off-by: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Signed-off-by: Shaohua Li <shaohua.li@intel.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/hpet.c | 295 ++++++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 293 insertions(+), 2 deletions(-)

diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c
index 03d3655734b4..31e9191b7e19 100644
--- a/arch/x86/kernel/hpet.c
+++ b/arch/x86/kernel/hpet.c
@@ -21,10 +21,19 @@
    NSEC = 10^-9 */
 #define FSEC_PER_NSEC			1000000L
 
+#define HPET_DEV_USED_BIT		2
+#define HPET_DEV_USED			(1 << HPET_DEV_USED_BIT)
+#define HPET_DEV_VALID			0x8
+#define HPET_DEV_FSB_CAP		0x1000
+#define HPET_DEV_PERI_CAP		0x2000
+
+#define EVT_TO_HPET_DEV(evt) container_of(evt, struct hpet_dev, evt)
+
 /*
  * HPET address is set in acpi/boot.c, when an ACPI entry exists
  */
 unsigned long				hpet_address;
+unsigned long				hpet_num_timers;
 static void __iomem			*hpet_virt_address;
 
 struct hpet_dev {
@@ -36,6 +45,10 @@ struct hpet_dev {
 	char				name[10];
 };
 
+static struct hpet_dev			*hpet_devs;
+
+static DEFINE_PER_CPU(struct hpet_dev *, cpu_hpet_dev);
+
 unsigned long hpet_readl(unsigned long a)
 {
 	return readl(hpet_virt_address + a);
@@ -145,6 +158,16 @@ static void hpet_reserve_platform_timers(unsigned long id)
 			Tn_INT_ROUTE_CNF_MASK) >> Tn_INT_ROUTE_CNF_SHIFT;
 	}
 
+	for (i = 0; i < nrtimers; i++) {
+		struct hpet_dev *hdev = &hpet_devs[i];
+
+		if (!(hdev->flags & HPET_DEV_VALID))
+			continue;
+
+		hd.hd_irq[hdev->num] = hdev->irq;
+		hpet_reserve_timer(&hd, hdev->num);
+	}
+
 	hpet_alloc(&hd);
 
 }
@@ -238,6 +261,8 @@ static void hpet_legacy_clockevent_register(void)
 	printk(KERN_DEBUG "hpet clockevent registered\n");
 }
 
+static int hpet_setup_msi_irq(unsigned int irq);
+
 static void hpet_set_mode(enum clock_event_mode mode,
 			  struct clock_event_device *evt, int timer)
 {
@@ -279,7 +304,15 @@ static void hpet_set_mode(enum clock_event_mode mode,
 		break;
 
 	case CLOCK_EVT_MODE_RESUME:
-		hpet_enable_legacy_int();
+		if (timer == 0) {
+			hpet_enable_legacy_int();
+		} else {
+			struct hpet_dev *hdev = EVT_TO_HPET_DEV(evt);
+			hpet_setup_msi_irq(hdev->irq);
+			disable_irq(hdev->irq);
+			irq_set_affinity(hdev->irq, cpumask_of_cpu(hdev->cpu));
+			enable_irq(hdev->irq);
+		}
 		break;
 	}
 }
@@ -318,7 +351,7 @@ static int hpet_legacy_next_event(unsigned long delta,
 /*
  * HPET MSI Support
  */
-
+#ifdef CONFIG_PCI_MSI
 void hpet_msi_unmask(unsigned int irq)
 {
 	struct hpet_dev *hdev = get_irq_data(irq);
@@ -358,6 +391,253 @@ void hpet_msi_read(unsigned int irq, struct msi_msg *msg)
 	msg->address_hi = 0;
 }
 
+static void hpet_msi_set_mode(enum clock_event_mode mode,
+				struct clock_event_device *evt)
+{
+	struct hpet_dev *hdev = EVT_TO_HPET_DEV(evt);
+	hpet_set_mode(mode, evt, hdev->num);
+}
+
+static int hpet_msi_next_event(unsigned long delta,
+				struct clock_event_device *evt)
+{
+	struct hpet_dev *hdev = EVT_TO_HPET_DEV(evt);
+	return hpet_next_event(delta, evt, hdev->num);
+}
+
+static int hpet_setup_msi_irq(unsigned int irq)
+{
+	if (arch_setup_hpet_msi(irq)) {
+		destroy_irq(irq);
+		return -EINVAL;
+	}
+	return 0;
+}
+
+static int hpet_assign_irq(struct hpet_dev *dev)
+{
+	unsigned int irq;
+
+	irq = create_irq();
+	if (!irq)
+		return -EINVAL;
+
+	set_irq_data(irq, dev);
+
+	if (hpet_setup_msi_irq(irq))
+		return -EINVAL;
+
+	dev->irq = irq;
+	return 0;
+}
+
+static irqreturn_t hpet_interrupt_handler(int irq, void *data)
+{
+	struct hpet_dev *dev = (struct hpet_dev *)data;
+	struct clock_event_device *hevt = &dev->evt;
+
+	if (!hevt->event_handler) {
+		printk(KERN_INFO "Spurious HPET timer interrupt on HPET timer %d\n",
+				dev->num);
+		return IRQ_HANDLED;
+	}
+
+	hevt->event_handler(hevt);
+	return IRQ_HANDLED;
+}
+
+static int hpet_setup_irq(struct hpet_dev *dev)
+{
+
+	if (request_irq(dev->irq, hpet_interrupt_handler,
+			IRQF_SHARED|IRQF_NOBALANCING, dev->name, dev))
+		return -1;
+
+	disable_irq(dev->irq);
+	irq_set_affinity(dev->irq, cpumask_of_cpu(dev->cpu));
+	enable_irq(dev->irq);
+
+	return 0;
+}
+
+/* This should be called in specific @cpu */
+static void init_one_hpet_msi_clockevent(struct hpet_dev *hdev, int cpu)
+{
+	struct clock_event_device *evt = &hdev->evt;
+	uint64_t hpet_freq;
+
+	WARN_ON(cpu != smp_processor_id());
+	if (!(hdev->flags & HPET_DEV_VALID))
+		return;
+
+	if (hpet_setup_msi_irq(hdev->irq))
+		return;
+
+	hdev->cpu = cpu;
+	per_cpu(cpu_hpet_dev, cpu) = hdev;
+	evt->name = hdev->name;
+	hpet_setup_irq(hdev);
+	evt->irq = hdev->irq;
+
+	evt->rating = 110;
+	evt->features = CLOCK_EVT_FEAT_ONESHOT;
+	if (hdev->flags & HPET_DEV_PERI_CAP)
+		evt->features |= CLOCK_EVT_FEAT_PERIODIC;
+
+	evt->set_mode = hpet_msi_set_mode;
+	evt->set_next_event = hpet_msi_next_event;
+	evt->shift = 32;
+
+	/*
+	 * The period is a femto seconds value. We need to calculate the
+	 * scaled math multiplication factor for nanosecond to hpet tick
+	 * conversion.
+	 */
+	hpet_freq = 1000000000000000ULL;
+	do_div(hpet_freq, hpet_period);
+	evt->mult = div_sc((unsigned long) hpet_freq,
+				      NSEC_PER_SEC, evt->shift);
+	/* Calculate the max delta */
+	evt->max_delta_ns = clockevent_delta2ns(0x7FFFFFFF, evt);
+	/* 5 usec minimum reprogramming delta. */
+	evt->min_delta_ns = 5000;
+
+	evt->cpumask = cpumask_of_cpu(hdev->cpu);
+	clockevents_register_device(evt);
+}
+
+#ifdef CONFIG_HPET
+/* Reserve at least one timer for userspace (/dev/hpet) */
+#define RESERVE_TIMERS 1
+#else
+#define RESERVE_TIMERS 0
+#endif
+void hpet_msi_capability_lookup(unsigned int start_timer)
+{
+	unsigned int id;
+	unsigned int num_timers;
+	unsigned int num_timers_used = 0;
+	int i;
+
+	id = hpet_readl(HPET_ID);
+
+	num_timers = ((id & HPET_ID_NUMBER) >> HPET_ID_NUMBER_SHIFT);
+	num_timers++; /* Value read out starts from 0 */
+
+	hpet_devs = kzalloc(sizeof(struct hpet_dev) * num_timers, GFP_KERNEL);
+	if (!hpet_devs)
+		return;
+
+	hpet_num_timers = num_timers;
+
+	for (i = start_timer; i < num_timers - RESERVE_TIMERS; i++) {
+		struct hpet_dev *hdev = &hpet_devs[num_timers_used];
+		unsigned long cfg = hpet_readl(HPET_Tn_CFG(i));
+
+		/* Only consider HPET timer with MSI support */
+		if (!(cfg & HPET_TN_FSB_CAP))
+			continue;
+
+		hdev->flags = 0;
+		if (cfg & HPET_TN_PERIODIC_CAP)
+			hdev->flags |= HPET_DEV_PERI_CAP;
+		hdev->num = i;
+
+		sprintf(hdev->name, "hpet%d", i);
+		if (hpet_assign_irq(hdev))
+			continue;
+
+		hdev->flags |= HPET_DEV_FSB_CAP;
+		hdev->flags |= HPET_DEV_VALID;
+		num_timers_used++;
+		if (num_timers_used == num_possible_cpus())
+			break;
+	}
+
+	printk(KERN_INFO "HPET: %d timers in total, %d timers will be used for per-cpu timer\n",
+		num_timers, num_timers_used);
+}
+
+static struct hpet_dev *hpet_get_unused_timer(void)
+{
+	int i;
+
+	if (!hpet_devs)
+		return NULL;
+
+	for (i = 0; i < hpet_num_timers; i++) {
+		struct hpet_dev *hdev = &hpet_devs[i];
+
+		if (!(hdev->flags & HPET_DEV_VALID))
+			continue;
+		if (test_and_set_bit(HPET_DEV_USED_BIT,
+			(unsigned long *)&hdev->flags))
+			continue;
+		return hdev;
+	}
+	return NULL;
+}
+
+struct hpet_work_struct {
+	struct delayed_work work;
+	struct completion complete;
+};
+
+static void hpet_work(struct work_struct *w)
+{
+	struct hpet_dev *hdev;
+	int cpu = smp_processor_id();
+	struct hpet_work_struct *hpet_work;
+
+	hpet_work = container_of(w, struct hpet_work_struct, work.work);
+
+	hdev = hpet_get_unused_timer();
+	if (hdev)
+		init_one_hpet_msi_clockevent(hdev, cpu);
+
+	complete(&hpet_work->complete);
+}
+
+static int hpet_cpuhp_notify(struct notifier_block *n,
+		unsigned long action, void *hcpu)
+{
+	unsigned long cpu = (unsigned long)hcpu;
+	struct hpet_work_struct work;
+	struct hpet_dev *hdev = per_cpu(cpu_hpet_dev, cpu);
+
+	switch (action & 0xf) {
+	case CPU_ONLINE:
+		INIT_DELAYED_WORK(&work.work, hpet_work);
+		init_completion(&work.complete);
+		/* FIXME: add schedule_work_on() */
+		schedule_delayed_work_on(cpu, &work.work, 0);
+		wait_for_completion(&work.complete);
+		break;
+	case CPU_DEAD:
+		if (hdev) {
+			free_irq(hdev->irq, hdev);
+			hdev->flags &= ~HPET_DEV_USED;
+			per_cpu(cpu_hpet_dev, cpu) = NULL;
+		}
+		break;
+	}
+	return NOTIFY_OK;
+}
+#else
+
+void hpet_msi_capability_lookup(unsigned int start_timer)
+{
+	return;
+}
+
+static int hpet_cpuhp_notify(struct notifier_block *n,
+		unsigned long action, void *hcpu)
+{
+	return NOTIFY_OK;
+}
+
+#endif
+
 /*
  * Clock source related code
  */
@@ -493,8 +773,10 @@ int __init hpet_enable(void)
 
 	if (id & HPET_ID_LEGSUP) {
 		hpet_legacy_clockevent_register();
+		hpet_msi_capability_lookup(2);
 		return 1;
 	}
+	hpet_msi_capability_lookup(0);
 	return 0;
 
 out_nohpet:
@@ -511,6 +793,8 @@ out_nohpet:
  */
 static __init int hpet_late_init(void)
 {
+	int cpu;
+
 	if (boot_hpet_disable)
 		return -ENODEV;
 
@@ -526,6 +810,13 @@ static __init int hpet_late_init(void)
 
 	hpet_reserve_platform_timers(hpet_readl(HPET_ID));
 
+	for_each_online_cpu(cpu) {
+		hpet_cpuhp_notify(NULL, CPU_ONLINE, (void *)(long)cpu);
+	}
+
+	/* This notifier should be called after workqueue is ready */
+	hotcpu_notifier(hpet_cpuhp_notify, -20);
+
 	return 0;
 }
 fs_initcall(hpet_late_init);
-- 
cgit v1.2.3-55-g7522


From 3c2cbd2490656fb4b6ede586c557a2b09811a432 Mon Sep 17 00:00:00 2001
From: Cyrill Gorcunov
Date: Sat, 6 Sep 2008 14:15:33 +0400
Subject: x86: io-apic - code style cleaning for setup_IO_APIC_irqs

By changing printout form we are able to shrink (and clean up) code a bit.

Former printout example:

	init IO_APIC IRQs
	 IO-APIC (apicid-pin) 1-1, 1-2, 1-3 not connected.
	 IO-APIC (apicid-pin) 2-1, 2-2, 2-3 not connected.

New printout example:

	init IO_APIC IRQs
	 1-1 1-2 1-3 (apicid-pin) not connected
	 2-1 2-2 2-3 (apicid-pin) not connected

Signed-off-by: Cyrill Gorcunov <gorcunov@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/io_apic.c | 53 +++++++++++++++++++++++++----------------------
 1 file changed, 28 insertions(+), 25 deletions(-)

diff --git a/arch/x86/kernel/io_apic.c b/arch/x86/kernel/io_apic.c
index 77fa155becf6..4040d575a21e 100644
--- a/arch/x86/kernel/io_apic.c
+++ b/arch/x86/kernel/io_apic.c
@@ -1518,41 +1518,44 @@ static void setup_IO_APIC_irq(int apic, int pin, unsigned int irq,
 
 static void __init setup_IO_APIC_irqs(void)
 {
-	int apic, pin, idx, irq, first_notcon = 1;
+	int apic, pin, idx, irq;
+	int notcon = 0;
 
 	apic_printk(APIC_VERBOSE, KERN_DEBUG "init IO_APIC IRQs\n");
 
 	for (apic = 0; apic < nr_ioapics; apic++) {
-	for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) {
-
-		idx = find_irq_entry(apic,pin,mp_INT);
-		if (idx == -1) {
-			if (first_notcon) {
-				apic_printk(APIC_VERBOSE, KERN_DEBUG " IO-APIC (apicid-pin) %d-%d", mp_ioapics[apic].mp_apicid, pin);
-				first_notcon = 0;
-			} else
-				apic_printk(APIC_VERBOSE, ", %d-%d", mp_ioapics[apic].mp_apicid, pin);
-			continue;
-		}
-		if (!first_notcon) {
-			apic_printk(APIC_VERBOSE, " not connected.\n");
-			first_notcon = 1;
-		}
+		for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) {
+
+			idx = find_irq_entry(apic, pin, mp_INT);
+			if (idx == -1) {
+				apic_printk(APIC_VERBOSE,
+					KERN_DEBUG " %d-%d",
+					mp_ioapics[apic].mp_apicid, pin);
+				if (!notcon)
+					notcon = 1;
+				continue;
+			}
 
-		irq = pin_2_irq(idx, apic, pin);
+			irq = pin_2_irq(idx, apic, pin);
 #ifdef CONFIG_X86_32
-                if (multi_timer_check(apic, irq))
-                        continue;
+			if (multi_timer_check(apic, irq))
+				continue;
 #endif
-		add_pin_to_irq(irq, apic, pin);
+			add_pin_to_irq(irq, apic, pin);
 
-		setup_IO_APIC_irq(apic, pin, irq,
-				  irq_trigger(idx), irq_polarity(idx));
-	}
+			setup_IO_APIC_irq(apic, pin, irq,
+					irq_trigger(idx), irq_polarity(idx));
+		}
+		if (notcon) {
+			apic_printk(APIC_VERBOSE,
+				KERN_DEBUG " (apicid-pin) not connected\n");
+			notcon = 0;
+		}
 	}
 
-	if (!first_notcon)
-		apic_printk(APIC_VERBOSE, " not connected.\n");
+	if (notcon)
+		apic_printk(APIC_VERBOSE,
+			KERN_DEBUG " (apicid-pin) not connected\n");
 }
 
 /*
-- 
cgit v1.2.3-55-g7522


From 9e6cad9b1230b5563c4b38335bf1ed0f73c5d74a Mon Sep 17 00:00:00 2001
From: Yinghai Lu
Date: Sat, 6 Sep 2008 10:26:50 -0700
Subject: dyn_array: remove one panic

Andrew said, we don't need duplicated panic.
because __alloc_bootmem already have that.

Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 init/dyn_array.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/init/dyn_array.c b/init/dyn_array.c
index cf1e04c83b33..778d9d508158 100644
--- a/init/dyn_array.c
+++ b/init/dyn_array.c
@@ -33,11 +33,9 @@ void __init pre_alloc_dyn_array(void)
 
 	/* allocate them all together */
 	max_align = max_t(unsigned long, max_align, PAGE_SIZE);
-	ptr = __alloc_bootmem_nopanic(total_size, max_align, 0);
-	if (!ptr)
-		panic("Can not alloc dyn_alloc\n");
-
+	ptr = __alloc_bootmem(total_size, max_align, 0);
 	phys = virt_to_phys(ptr);
+
 	for (daa = __dyn_array_start ; daa < __dyn_array_end; daa++) {
 		struct dyn_array *da = *daa;
 
-- 
cgit v1.2.3-55-g7522


From 79c09698cac8df16c5539447d5e1047fde9742e5 Mon Sep 17 00:00:00 2001
From: Yinghai Lu
Date: Sun, 7 Sep 2008 17:58:57 -0700
Subject: x86: lapic address print out like io apic addr

Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/apic.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/kernel/apic.c b/arch/x86/kernel/apic.c
index 0556f375e40b..f3f50858048e 100644
--- a/arch/x86/kernel/apic.c
+++ b/arch/x86/kernel/apic.c
@@ -1548,7 +1548,7 @@ void __init init_apic_mappings(void)
 		apic_phys = mp_lapic_addr;
 
 	set_fixmap_nocache(FIX_APIC_BASE, apic_phys);
-	apic_printk(APIC_VERBOSE, "mapped APIC to %16lx (%16lx)\n",
+	apic_printk(APIC_VERBOSE, "mapped APIC to %08lx (%08lx)\n",
 				APIC_BASE, apic_phys);
 
 	/*
-- 
cgit v1.2.3-55-g7522


From 2a554fb132cf804477087057b9b0ff2162984507 Mon Sep 17 00:00:00 2001
From: Cyrill Gorcunov
Date: Mon, 8 Sep 2008 19:38:06 +0400
Subject: x86: io-apic - do not use KERN_DEBUG marker too much

Do not use KERN_DEBUG several times on the same line being printed.
Introduced by mine previous patch, sorry.

Reported-by: Yinghai Lu <yhlu.kernel@gmail.com>
Signed-off-by: Cyrill Gorcunov <gorcunov@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/io_apic.c | 17 +++++++++++------
 1 file changed, 11 insertions(+), 6 deletions(-)

diff --git a/arch/x86/kernel/io_apic.c b/arch/x86/kernel/io_apic.c
index 4040d575a21e..12e59df026db 100644
--- a/arch/x86/kernel/io_apic.c
+++ b/arch/x86/kernel/io_apic.c
@@ -1528,11 +1528,16 @@ static void __init setup_IO_APIC_irqs(void)
 
 			idx = find_irq_entry(apic, pin, mp_INT);
 			if (idx == -1) {
-				apic_printk(APIC_VERBOSE,
-					KERN_DEBUG " %d-%d",
-					mp_ioapics[apic].mp_apicid, pin);
-				if (!notcon)
+				if (!notcon) {
 					notcon = 1;
+					apic_printk(APIC_VERBOSE,
+						KERN_DEBUG " %d-%d",
+						mp_ioapics[apic].mp_apicid,
+						pin);
+				} else
+					apic_printk(APIC_VERBOSE, " %d-%d",
+						mp_ioapics[apic].mp_apicid,
+						pin);
 				continue;
 			}
 
@@ -1548,14 +1553,14 @@ static void __init setup_IO_APIC_irqs(void)
 		}
 		if (notcon) {
 			apic_printk(APIC_VERBOSE,
-				KERN_DEBUG " (apicid-pin) not connected\n");
+				" (apicid-pin) not connected\n");
 			notcon = 0;
 		}
 	}
 
 	if (notcon)
 		apic_printk(APIC_VERBOSE,
-			KERN_DEBUG " (apicid-pin) not connected\n");
+			" (apicid-pin) not connected\n");
 }
 
 /*
-- 
cgit v1.2.3-55-g7522


From f0ed4e695faf6766927c8cfbda2bc7530c7210c2 Mon Sep 17 00:00:00 2001
From: Venki Pallipadi
Date: Mon, 8 Sep 2008 10:18:40 -0700
Subject: x86: using HPET in MSI mode and setting up per CPU HPET timers, fix

On Sat, Sep 06, 2008 at 06:03:53AM -0700, Ingo Molnar wrote:
>
> it crashes two testsystems, the fault on a NULL pointer in hpet init,
> with:
>
> initcall print_all_ICs+0x0/0x520 returned 0 after 26 msecs
> calling  hpet_late_init+0x0/0x1c0
> BUG: unable to handle kernel NULL pointer dereference at 000000000000008c
> IP: [<ffffffff80d228be>] hpet_late_init+0xfe/0x1c0
> PGD 0
> Oops: 0000 [1] SMP
> CPU 0
> Modules linked in:
> Pid: 1, comm: swapper Not tainted 2.6.27-rc5 #29725
> RIP: 0010:[<ffffffff80d228be>]  [<ffffffff80d228be>] hpet_late_init+0xfe/0x1c0
> RSP: 0018:ffff88003fa07dd0  EFLAGS: 00010246
> RAX: 0000000000000000 RBX: 0000000000000003 RCX: 0000000000000000
> RDX: ffffc20000000160 RSI: 0000000000000000 RDI: 0000000000000003
> RBP: ffff88003fa07e90 R08: 0000000000000000 R09: ffff88003fa07dd0
> R10: 0000000000000001 R11: 0000000000000000 R12: ffff88003fa07dd0
> R13: 0000000000000002 R14: ffffc20000000000 R15: 000000006f57e511
> FS:  0000000000000000(0000) GS:ffffffff80cf6a80(0000) knlGS:0000000000000000
> CS:  0010 DS: 0018 ES: 0018 CR0: 000000008005003b
> CR2: 000000000000008c CR3: 0000000000201000 CR4: 00000000000006e0
> DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
> DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400
> Process swapper (pid: 1, threadinfo ffff88003fa06000, task ffff88003fa08000)
> Stack:  00000000fed00000 ffffc20000000000 0000000100000003 0000000800000002
>  0000000000000000 0000000000000000 0000000000000000 0000000000000000
>  0000000000000000 0000000000000000 0000000000000000 0000000000000000
> Call Trace:
>  [<ffffffff80d227c0>] ? hpet_late_init+0x0/0x1c0
>  [<ffffffff80209045>] do_one_initcall+0x45/0x190
>  [<ffffffff80296f39>] ? register_irq_proc+0x19/0xe0
>  [<ffffffff80d0d140>] ? early_idt_handler+0x0/0x73
>  [<ffffffff80d0dabc>] kernel_init+0x14c/0x1b0
>  [<ffffffff80942ac1>] ? trace_hardirqs_on_thunk+0x3a/0x3f
>  [<ffffffff8020dbd9>] child_rip+0xa/0x11
>  [<ffffffff8020ceee>] ? restore_args+0x0/0x30
>  [<ffffffff80d0d970>] ? kernel_init+0x0/0x1b0
>  [<ffffffff8020dbcf>] ? child_rip+0x0/0x11
> Code: 20 48 83 c1 01 48 39 f1 75 e3 44 89 e8 4c 8b 05 29 29 22 00 31 f6 48 8d 78 01 66 66 90 89 f0 48 8d 04 80 48 c1 e0 05 4a 8d 0c 00 <f6> 81 8c 00 00 00 08 74 26 8b 81 80 00 00 00 8b 91 88 00 00 00
> RIP  [<ffffffff80d228be>] hpet_late_init+0xfe/0x1c0
>  RSP <ffff88003fa07dd0>
> CR2: 000000000000008c
> Kernel panic - not syncing: Fatal exception

There was one code path, with CONFIG_PCI_MSI disabled, where we were accessing
hpet_devs without initialization. That resulted in the above crash. The change
below adds a check for hpet_devs.

Signed-off-by: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Signed-off-by: Shaohua Li <shaohua.li@intel.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/hpet.c | 28 +++++++++++++++++++---------
 1 file changed, 19 insertions(+), 9 deletions(-)

diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c
index 31e9191b7e19..01005aeda7d9 100644
--- a/arch/x86/kernel/hpet.c
+++ b/arch/x86/kernel/hpet.c
@@ -126,6 +126,24 @@ EXPORT_SYMBOL_GPL(is_hpet_enabled);
  * timer 0 and timer 1 in case of RTC emulation.
  */
 #ifdef CONFIG_HPET
+static void hpet_reserve_msi_timers(struct hpet_data *hd)
+{
+	int i;
+
+	if (!hpet_devs)
+		return;
+
+	for (i = 0; i < hpet_num_timers; i++) {
+		struct hpet_dev *hdev = &hpet_devs[i];
+
+		if (!(hdev->flags & HPET_DEV_VALID))
+			continue;
+
+		hd->hd_irq[hdev->num] = hdev->irq;
+		hpet_reserve_timer(hd, hdev->num);
+	}
+}
+
 static void hpet_reserve_platform_timers(unsigned long id)
 {
 	struct hpet __iomem *hpet = hpet_virt_address;
@@ -158,15 +176,7 @@ static void hpet_reserve_platform_timers(unsigned long id)
 			Tn_INT_ROUTE_CNF_MASK) >> Tn_INT_ROUTE_CNF_SHIFT;
 	}
 
-	for (i = 0; i < nrtimers; i++) {
-		struct hpet_dev *hdev = &hpet_devs[i];
-
-		if (!(hdev->flags & HPET_DEV_VALID))
-			continue;
-
-		hd.hd_irq[hdev->num] = hdev->irq;
-		hpet_reserve_timer(&hd, hdev->num);
-	}
+	hpet_reserve_msi_timers(&hd);
 
 	hpet_alloc(&hd);
 
-- 
cgit v1.2.3-55-g7522


From ba374c9baef910fbc5373541d98c50f15e82c3f8 Mon Sep 17 00:00:00 2001
From: Steven Noonan
Date: Mon, 8 Sep 2008 16:19:09 -0700
Subject: x86: fix HPET compiler error when not using CONFIG_PCI_MSI

Added dummy function for hpet_setup_msi_irq().

Signed-off-by: Steven Noonan <steven@uplinklabs.net>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/hpet.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c
index 01005aeda7d9..422c577ef691 100644
--- a/arch/x86/kernel/hpet.c
+++ b/arch/x86/kernel/hpet.c
@@ -635,6 +635,10 @@ static int hpet_cpuhp_notify(struct notifier_block *n,
 }
 #else
 
+static int hpet_setup_msi_irq(unsigned int irq)
+{
+	return 0;
+}
 void hpet_msi_capability_lookup(unsigned int start_timer)
 {
 	return;
-- 
cgit v1.2.3-55-g7522


From 87783be4c26d79f5cde245e6e8a9fd2b295e92d7 Mon Sep 17 00:00:00 2001
From: Cyrill Gorcunov
Date: Wed, 10 Sep 2008 22:19:50 +0400
Subject: x86: io-apic - get rid of __DO_ACTION macro

Replace __DO_ACTION macro with io_apic_modify_irq function.
This allow us to 'grep' definitions being hided by
__DO_ACTION macro:

	__unmask_IO_APIC_irq
	__mask_IO_APIC_irq
	__mask_and_edge_IO_APIC_irq
	__unmask_and_level_IO_APIC_irq

Signed-off-by: Cyrill Gorcunov <gorcunov@gmail.com>
Acked-by: Yinghai Lu <yhlu.kernel@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/io_apic.c | 103 +++++++++++++++++++++++-----------------------
 1 file changed, 52 insertions(+), 51 deletions(-)

diff --git a/arch/x86/kernel/io_apic.c b/arch/x86/kernel/io_apic.c
index 12e59df026db..ac47384724e3 100644
--- a/arch/x86/kernel/io_apic.c
+++ b/arch/x86/kernel/io_apic.c
@@ -643,65 +643,66 @@ static void __init replace_pin_at_irq(unsigned int irq,
 		add_pin_to_irq(irq, newapic, newpin);
 }
 
-#define __DO_ACTION(R, ACTION_ENABLE, ACTION_DISABLE, FINAL)		\
-									\
-{									\
-	int pin;							\
-	struct irq_cfg *cfg;						\
-	struct irq_pin_list *entry;					\
-									\
-	cfg = irq_cfg(irq);						\
-	entry = cfg->irq_2_pin;						\
-	for (;;) {							\
-		unsigned int reg;					\
-		if (!entry)						\
-			break;						\
-		pin = entry->pin;					\
-		reg = io_apic_read(entry->apic, 0x10 + R + pin*2);	\
-		reg ACTION_DISABLE;					\
-		reg ACTION_ENABLE;					\
-		io_apic_modify(entry->apic, 0x10 + R + pin*2, reg);	\
-		FINAL;							\
-		if (!entry->next)					\
-			break;						\
-		entry = entry->next;					\
-	}								\
-}
-
-#define DO_ACTION(name,R, ACTION_ENABLE, ACTION_DISABLE, FINAL)		\
-									\
-	static void name##_IO_APIC_irq (unsigned int irq)		\
-	__DO_ACTION(R, ACTION_ENABLE, ACTION_DISABLE, FINAL)
-
-/* mask = 0 */
-DO_ACTION(__unmask,	0, |= 0, &= ~IO_APIC_REDIR_MASKED, )
+static inline void io_apic_modify_irq(unsigned int irq,
+				int mask_and, int mask_or,
+				void (*final)(struct irq_pin_list *entry))
+{
+	int pin;
+	struct irq_cfg *cfg;
+	struct irq_pin_list *entry;
+
+	cfg = irq_cfg(irq);
+	for (entry = cfg->irq_2_pin; entry != NULL; entry = entry->next) {
+		unsigned int reg;
+		pin = entry->pin;
+		reg = io_apic_read(entry->apic, 0x10 + pin * 2);
+		reg &= mask_and;
+		reg |= mask_or;
+		io_apic_modify(entry->apic, 0x10 + pin * 2, reg);
+		if (final)
+			final(entry);
+	}
+}
+
+static void __unmask_IO_APIC_irq(unsigned int irq)
+{
+	io_apic_modify_irq(irq, ~IO_APIC_REDIR_MASKED, 0, NULL);
+}
 
 #ifdef CONFIG_X86_64
-/*
- * Synchronize the IO-APIC and the CPU by doing
- * a dummy read from the IO-APIC
- */
-static inline void io_apic_sync(unsigned int apic)
+void io_apic_sync(struct irq_pin_list *entry)
 {
-	struct io_apic __iomem *io_apic = io_apic_base(apic);
+	/*
+	 * Synchronize the IO-APIC and the CPU by doing
+	 * a dummy read from the IO-APIC
+	 */
+	struct io_apic __iomem *io_apic;
+	io_apic = io_apic_base(entry->apic);
 	readl(&io_apic->data);
 }
 
-/* mask = 1 */
-DO_ACTION(__mask,	0, |= IO_APIC_REDIR_MASKED, &= ~0, io_apic_sync(entry->apic))
-
-#else
-
-/* mask = 1 */
-DO_ACTION(__mask,	0, |= IO_APIC_REDIR_MASKED, &= ~0, )
-
-/* mask = 1, trigger = 0 */
-DO_ACTION(__mask_and_edge, 0, |= IO_APIC_REDIR_MASKED, &= ~IO_APIC_REDIR_LEVEL_TRIGGER, )
+static void __mask_IO_APIC_irq(unsigned int irq)
+{
+	io_apic_modify_irq(irq, ~0, IO_APIC_REDIR_MASKED, &io_apic_sync);
+}
+#else /* CONFIG_X86_32 */
+static void __mask_IO_APIC_irq(unsigned int irq)
+{
+	io_apic_modify_irq(irq, ~0, IO_APIC_REDIR_MASKED, NULL);
+}
 
-/* mask = 0, trigger = 1 */
-DO_ACTION(__unmask_and_level, 0, |= IO_APIC_REDIR_LEVEL_TRIGGER, &= ~IO_APIC_REDIR_MASKED, )
+static void __mask_and_edge_IO_APIC_irq(unsigned int irq)
+{
+	io_apic_modify_irq(irq, ~IO_APIC_REDIR_LEVEL_TRIGGER,
+			IO_APIC_REDIR_MASKED, NULL);
+}
 
-#endif
+static void __unmask_and_level_IO_APIC_irq(unsigned int irq)
+{
+	io_apic_modify_irq(irq, ~IO_APIC_REDIR_MASKED,
+			IO_APIC_REDIR_LEVEL_TRIGGER, NULL);
+}
+#endif /* CONFIG_X86_32 */
 
 static void mask_IO_APIC_irq (unsigned int irq)
 {
-- 
cgit v1.2.3-55-g7522


From 823b259b80158a5fb694f6784e18b5bae669c599 Mon Sep 17 00:00:00 2001
From: Yinghai Lu
Date: Wed, 10 Sep 2008 21:56:46 -0700
Subject: x86: print out apic id in hex format

Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/apic.c      | 2 +-
 arch/x86/kernel/cpu/amd.c   | 2 +-
 arch/x86/kernel/cpu/intel.c | 2 +-
 arch/x86/kernel/smpboot.c   | 6 +++---
 4 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/arch/x86/kernel/apic.c b/arch/x86/kernel/apic.c
index f3f50858048e..11cb18639581 100644
--- a/arch/x86/kernel/apic.c
+++ b/arch/x86/kernel/apic.c
@@ -1586,7 +1586,7 @@ int __init APIC_init_uniprocessor(void)
 	 */
 	if (!cpu_has_apic &&
 	    APIC_INTEGRATED(apic_version[boot_cpu_physical_apicid])) {
-		printk(KERN_ERR "BIOS bug, local APIC #%d not detected!...\n",
+		printk(KERN_ERR "BIOS bug, local APIC 0x%x not detected!...\n",
 		       boot_cpu_physical_apicid);
 		clear_cpu_cap(&boot_cpu_data, X86_FEATURE_APIC);
 		return -1;
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index 32e73520adf7..8f1e31db2ad5 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -249,7 +249,7 @@ static void __cpuinit srat_detect_node(struct cpuinfo_x86 *c)
 	}
 	numa_set_node(cpu, node);
 
-	printk(KERN_INFO "CPU %d/%x -> Node %d\n", cpu, apicid, node);
+	printk(KERN_INFO "CPU %d/0x%x -> Node %d\n", cpu, apicid, node);
 #endif
 }
 
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
index 99468dbd08da..cce0b6118d55 100644
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -174,7 +174,7 @@ static void __cpuinit srat_detect_node(void)
 		node = first_node(node_online_map);
 	numa_set_node(cpu, node);
 
-	printk(KERN_INFO "CPU %d/%x -> Node %d\n", cpu, apicid, node);
+	printk(KERN_INFO "CPU %d/0x%x -> Node %d\n", cpu, apicid, node);
 #endif
 }
 
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 8c3aca7cb343..f84de2ff933c 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -543,10 +543,10 @@ static inline void __inquire_remote_apic(int apicid)
 	int timeout;
 	u32 status;
 
-	printk(KERN_INFO "Inquiring remote APIC #%d...\n", apicid);
+	printk(KERN_INFO "Inquiring remote APIC 0x%x...\n", apicid);
 
 	for (i = 0; i < ARRAY_SIZE(regs); i++) {
-		printk(KERN_INFO "... APIC #%d %s: ", apicid, names[i]);
+		printk(KERN_INFO "... APIC 0x%x %s: ", apicid, names[i]);
 
 		/*
 		 * Wait for idle.
@@ -874,7 +874,7 @@ do_rest:
 	start_ip = setup_trampoline();
 
 	/* So we see what's up   */
-	printk(KERN_INFO "Booting processor %d/%d ip %lx\n",
+	printk(KERN_INFO "Booting processor %d APIC 0x%x ip 0x%lx\n",
 			  cpu, apicid, start_ip);
 
 	/*
-- 
cgit v1.2.3-55-g7522


From c87695ea74f21d180e1df3a1d00ac3dd432ea03b Mon Sep 17 00:00:00 2001
From: Yinghai Lu
Date: Sun, 14 Sep 2008 02:33:11 -0700
Subject: dyn_array: use %pF instead of print_fn_descriptor_symbol

... and tidy up the printouts. Suggested by Andrew Morton.

Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 init/dyn_array.c | 21 +++++++++------------
 1 file changed, 9 insertions(+), 12 deletions(-)

diff --git a/init/dyn_array.c b/init/dyn_array.c
index 778d9d508158..c8d5e2a18588 100644
--- a/init/dyn_array.c
+++ b/init/dyn_array.c
@@ -17,10 +17,9 @@ void __init pre_alloc_dyn_array(void)
 	for (daa = __dyn_array_start ; daa < __dyn_array_end; daa++) {
 		struct dyn_array *da = *daa;
 
+		printk(KERN_DEBUG "dyn_array %pF size:%#lx nr:%d align:%#lx\n",
+			da->name, da->size, *da->nr, da->align);
 		size = da->size * (*da->nr);
-		print_fn_descriptor_symbol("dyn_array %s ", da->name);
-		printk(KERN_CONT "size:%#lx nr:%d align:%#lx\n",
-			da->size, *da->nr, da->align);
 		total_size += roundup(size, da->align);
 		if (da->align > max_align)
 			max_align = da->align;
@@ -40,11 +39,10 @@ void __init pre_alloc_dyn_array(void)
 		struct dyn_array *da = *daa;
 
 		size = da->size * (*da->nr);
-		print_fn_descriptor_symbol("dyn_array %s ", da->name);
-
 		phys = roundup(phys, da->align);
+		printk(KERN_DEBUG "dyn_array %pF ==> [%#lx - %#lx]\n",
+			da->name, phys, phys + size);
 		*da->name = phys_to_virt(phys);
-		printk(KERN_CONT " ==> [%#lx - %#lx]\n", phys, phys + size);
 
 		phys += size;
 
@@ -72,10 +70,9 @@ unsigned long __init per_cpu_dyn_array_size(unsigned long *align)
 	for (daa = __per_cpu_dyn_array_start ; daa < __per_cpu_dyn_array_end; daa++) {
 		struct dyn_array *da = *daa;
 
+		printk(KERN_DEBUG "per_cpu_dyn_array %pF size:%#lx nr:%d align:%#lx\n",
+			da->name, da->size, *da->nr, da->align);
 		size = da->size * (*da->nr);
-		print_fn_descriptor_symbol("per_cpu_dyn_array %s ", da->name);
-		printk(KERN_CONT "size:%#lx nr:%d align:%#lx\n",
-			da->size, *da->nr, da->align);
 		total_size += roundup(size, da->align);
 		if (da->align > max_align)
 			max_align = da->align;
@@ -103,15 +100,15 @@ void __init per_cpu_alloc_dyn_array(int cpu, char *ptr)
 		struct dyn_array *da = *daa;
 
 		size = da->size * (*da->nr);
-		print_fn_descriptor_symbol("per_cpu_dyn_array %s ", da->name);
-
 		phys = roundup(phys, da->align);
+		printk(KERN_DEBUG "per_cpu_dyn_array %pF ==> [%#lx - %#lx]\n",
+			da->name, phys, phys + size);
+
 		addr = (unsigned long)da->name;
 		addr += per_cpu_offset(cpu);
 		array = (void **)addr;
 		*array = phys_to_virt(phys);
 		*da->name = *array; /* so init_work could use it directly */
-		printk(KERN_CONT " ==> [%#lx - %#lx]\n", phys, phys + size);
 
 		phys += size;
 
-- 
cgit v1.2.3-55-g7522


From 9df08f109572e88fbdab9a61d5cb0f0eae4ac9fa Mon Sep 17 00:00:00 2001
From: Cyrill Gorcunov
Date: Sun, 14 Sep 2008 11:55:37 +0400
Subject: x86: apic - lapic_setup_esr does not handle esr_disable - fix it

lapic_setup_esr doesn't handle esr_disable inquire.
The error brought in during unification process.
Fix it.

Signed-off-by: Cyrill Gorcunov <gorcunov@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/apic.c | 63 ++++++++++++++++++++++++++------------------------
 1 file changed, 33 insertions(+), 30 deletions(-)

diff --git a/arch/x86/kernel/apic.c b/arch/x86/kernel/apic.c
index 11cb18639581..59550cc017dc 100644
--- a/arch/x86/kernel/apic.c
+++ b/arch/x86/kernel/apic.c
@@ -1081,40 +1081,43 @@ void __init init_bsp_APIC(void)
 
 static void __cpuinit lapic_setup_esr(void)
 {
-	unsigned long oldvalue, value, maxlvt;
-	if (lapic_is_integrated() && !esr_disable) {
-		if (esr_disable) {
-			/*
-			 * Something untraceable is creating bad interrupts on
-			 * secondary quads ... for the moment, just leave the
-			 * ESR disabled - we can't do anything useful with the
-			 * errors anyway - mbligh
-			 */
-			printk(KERN_INFO "Leaving ESR disabled.\n");
-			return;
-		}
-		/* !82489DX */
-		maxlvt = lapic_get_maxlvt();
-		if (maxlvt > 3)		/* Due to the Pentium erratum 3AP. */
-			apic_write(APIC_ESR, 0);
-		oldvalue = apic_read(APIC_ESR);
+	unsigned int oldvalue, value, maxlvt;
+
+	if (!lapic_is_integrated()) {
+		printk(KERN_INFO "No ESR for 82489DX.\n");
+		return;
+	}
 
-		/* enables sending errors */
-		value = ERROR_APIC_VECTOR;
-		apic_write(APIC_LVTERR, value);
+	if (esr_disable) {
 		/*
-		 * spec says clear errors after enabling vector.
+		 * Something untraceable is creating bad interrupts on
+		 * secondary quads ... for the moment, just leave the
+		 * ESR disabled - we can't do anything useful with the
+		 * errors anyway - mbligh
 		 */
-		if (maxlvt > 3)
-			apic_write(APIC_ESR, 0);
-		value = apic_read(APIC_ESR);
-		if (value != oldvalue)
-			apic_printk(APIC_VERBOSE, "ESR value before enabling "
-				"vector: 0x%08lx  after: 0x%08lx\n",
-				oldvalue, value);
-	} else {
-		printk(KERN_INFO "No ESR for 82489DX.\n");
+		printk(KERN_INFO "Leaving ESR disabled.\n");
+		return;
 	}
+
+	maxlvt = lapic_get_maxlvt();
+	if (maxlvt > 3)		/* Due to the Pentium erratum 3AP. */
+		apic_write(APIC_ESR, 0);
+	oldvalue = apic_read(APIC_ESR);
+
+	/* enables sending errors */
+	value = ERROR_APIC_VECTOR;
+	apic_write(APIC_LVTERR, value);
+
+	/*
+	 * spec says clear errors after enabling vector.
+	 */
+	if (maxlvt > 3)
+		apic_write(APIC_ESR, 0);
+	value = apic_read(APIC_ESR);
+	if (value != oldvalue)
+		apic_printk(APIC_VERBOSE, "ESR value before enabling "
+			"vector: 0x%08x  after: 0x%08x\n",
+			oldvalue, value);
 }
 
 
-- 
cgit v1.2.3-55-g7522


From 08ad776e3c54e6fe8b50939f176538063b69f89e Mon Sep 17 00:00:00 2001
From: Cyrill Gorcunov
Date: Sun, 14 Sep 2008 11:55:38 +0400
Subject: x86: apic - skip writting ESR register if we dont have on

On 82489DX we don't have ESR register so we should not
write it.

Signed-off-by: Cyrill Gorcunov <gorcunov@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/apic.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/kernel/apic.c b/arch/x86/kernel/apic.c
index 59550cc017dc..d730e8d31727 100644
--- a/arch/x86/kernel/apic.c
+++ b/arch/x86/kernel/apic.c
@@ -1131,7 +1131,7 @@ void __cpuinit setup_local_APIC(void)
 
 #ifdef CONFIG_X86_32
 	/* Pound the ESR really hard over the head with a big hammer - mbligh */
-	if (esr_disable) {
+	if (lapic_is_integrated() && esr_disable) {
 		apic_write(APIC_ESR, 0);
 		apic_write(APIC_ESR, 0);
 		apic_write(APIC_ESR, 0);
-- 
cgit v1.2.3-55-g7522


From b189892de4da4634560657aedd774752094dbfa0 Mon Sep 17 00:00:00 2001
From: Cyrill Gorcunov
Date: Fri, 12 Sep 2008 23:58:24 +0400
Subject: x86: apic - fix unused vars warning in calibrate_APIC_clock

If we don't have CONFIG_X86_PM_TIMER=y compiler warns about
unused variables. Move PM timer based calibration into a
separate function and make the code cleaner and the compiler
happy as well.

Signed-off-by: Cyrill Gorcunov <gorcunov@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/apic.c | 81 +++++++++++++++++++++++++++++---------------------
 1 file changed, 47 insertions(+), 34 deletions(-)

diff --git a/arch/x86/kernel/apic.c b/arch/x86/kernel/apic.c
index d730e8d31727..784116933c70 100644
--- a/arch/x86/kernel/apic.c
+++ b/arch/x86/kernel/apic.c
@@ -538,14 +538,51 @@ static void __init lapic_cal_handler(struct clock_event_device *dev)
 	}
 }
 
+static int __init calibrate_by_pmtimer(long deltapm, long *delta)
+{
+	const long pm_100ms = PMTMR_TICKS_PER_SEC / 10;
+	const long pm_thresh = pm_100ms / 100;
+	unsigned long mult;
+	u64 res;
+
+#ifndef CONFIG_X86_PM_TIMER
+	return -1;
+#endif
+
+	apic_printk(APIC_VERBOSE, "... PM timer delta = %ld\n", deltapm);
+
+	/* Check, if the PM timer is available */
+	if (!deltapm)
+		return -1;
+
+	mult = clocksource_hz2mult(PMTMR_TICKS_PER_SEC, 22);
+
+	if (deltapm > (pm_100ms - pm_thresh) &&
+	    deltapm < (pm_100ms + pm_thresh)) {
+		apic_printk(APIC_VERBOSE, "... PM timer result ok\n");
+	} else {
+		res = (((u64)deltapm) *  mult) >> 22;
+		do_div(res, 1000000);
+		printk(KERN_WARNING "APIC calibration not consistent "
+			"with PM Timer: %ldms instead of 100ms\n",
+			(long)res);
+		/* Correct the lapic counter value */
+		res = (((u64)(*delta)) * pm_100ms);
+		do_div(res, deltapm);
+		printk(KERN_INFO "APIC delta adjusted to PM-Timer: "
+			"%lu (%ld)\n", (unsigned long)res, *delta);
+		*delta = (long)res;
+	}
+
+	return 0;
+}
+
 static int __init calibrate_APIC_clock(void)
 {
 	struct clock_event_device *levt = &__get_cpu_var(lapic_events);
-	const long pm_100ms = PMTMR_TICKS_PER_SEC/10;
-	const long pm_thresh = pm_100ms/100;
 	void (*real_handler)(struct clock_event_device *dev);
 	unsigned long deltaj;
-	long delta, deltapm;
+	long delta;
 	int pm_referenced = 0;
 
 	local_irq_disable();
@@ -575,36 +612,9 @@ static int __init calibrate_APIC_clock(void)
 	delta = lapic_cal_t1 - lapic_cal_t2;
 	apic_printk(APIC_VERBOSE, "... lapic delta = %ld\n", delta);
 
-#ifdef CONFIG_X86_PM_TIMER
-	/* Check, if the PM timer is available */
-	deltapm = lapic_cal_pm2 - lapic_cal_pm1;
-	apic_printk(APIC_VERBOSE, "... PM timer delta = %ld\n", deltapm);
-
-	if (deltapm) {
-		unsigned long mult;
-		u64 res;
-
-		mult = clocksource_hz2mult(PMTMR_TICKS_PER_SEC, 22);
-
-		if (deltapm > (pm_100ms - pm_thresh) &&
-		    deltapm < (pm_100ms + pm_thresh)) {
-			apic_printk(APIC_VERBOSE, "... PM timer result ok\n");
-		} else {
-			res = (((u64) deltapm) *  mult) >> 22;
-			do_div(res, 1000000);
-			printk(KERN_WARNING "APIC calibration not consistent "
-			       "with PM Timer: %ldms instead of 100ms\n",
-			       (long)res);
-			/* Correct the lapic counter value */
-			res = (((u64) delta) * pm_100ms);
-			do_div(res, deltapm);
-			printk(KERN_INFO "APIC delta adjusted to PM-Timer: "
-			       "%lu (%ld)\n", (unsigned long) res, delta);
-			delta = (long) res;
-		}
-		pm_referenced = 1;
-	}
-#endif
+	/* we trust the PM based calibration if possible */
+	pm_referenced = !calibrate_by_pmtimer(lapic_cal_pm2 - lapic_cal_pm1,
+					&delta);
 
 	/* Calculate the scaled math multiplication factor */
 	lapic_clockevent.mult = div_sc(delta, TICK_NSEC * LAPIC_CAL_LOOPS,
@@ -646,7 +656,10 @@ static int __init calibrate_APIC_clock(void)
 
 	levt->features &= ~CLOCK_EVT_FEAT_DUMMY;
 
-	/* We trust the pm timer based calibration */
+	/*
+	 * PM timer calibration failed or not turned on
+	 * so lets try APIC timer based calibration
+	 */
 	if (!pm_referenced) {
 		apic_printk(APIC_VERBOSE, "... verify APIC timer\n");
 
-- 
cgit v1.2.3-55-g7522


From 56ffa1a028b9fce3860a247c6fe79fce7cbf425b Mon Sep 17 00:00:00 2001
From: Cyrill Gorcunov
Date: Sat, 13 Sep 2008 13:11:16 +0400
Subject: x86: io-apic - do not use KERN_DEBUG marker too much, fix

Yinghai Lu reported:

| >  0 add_pin_to_irq: irq 15 --> apic 0 pin 15
| > IOAPIC[0]: Set routing entry (8-15 -> 0x3f -> IRQ 15 Mode:0 Active:0)
| >  8-16 8-17 8-18 8-19 8-20 8-21 8-22 8-23 (apicid-pin) not connected
| >  9-0 9-1 9-2 9-3 9-4 9-5 9-6 9-7 9-8 9-9 9-10 9-11 9-12 9-13 9-14 9-15
| > 9-16 9-17 9-18 9-19 9-20 9-21 9-22 9-23 (apicid-pin) not connected
| >
|
| only first one not connected at first, and ...

here is a quick fix for this.

Signed-off-by: Cyrill Gorcunov <gorcunov@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/io_apic.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/arch/x86/kernel/io_apic.c b/arch/x86/kernel/io_apic.c
index ac47384724e3..6ce5873a406c 100644
--- a/arch/x86/kernel/io_apic.c
+++ b/arch/x86/kernel/io_apic.c
@@ -1541,6 +1541,11 @@ static void __init setup_IO_APIC_irqs(void)
 						pin);
 				continue;
 			}
+			if (notcon) {
+				apic_printk(APIC_VERBOSE,
+					" (apicid-pin) not connected\n");
+				notcon = 0;
+			}
 
 			irq = pin_2_irq(idx, apic, pin);
 #ifdef CONFIG_X86_32
@@ -1552,11 +1557,6 @@ static void __init setup_IO_APIC_irqs(void)
 			setup_IO_APIC_irq(apic, pin, irq,
 					irq_trigger(idx), irq_polarity(idx));
 		}
-		if (notcon) {
-			apic_printk(APIC_VERBOSE,
-				" (apicid-pin) not connected\n");
-			notcon = 0;
-		}
 	}
 
 	if (notcon)
-- 
cgit v1.2.3-55-g7522


From e00585bb7fc3d0b601181b765a254df7ff4ea59b Mon Sep 17 00:00:00 2001
From: Yinghai Lu
Date: Mon, 15 Sep 2008 01:53:50 -0700
Subject: irq: fix irqpoll && sparseirq

Steven Noonan reported a boot hang when using irqpoll and
CONFIG_HAVE_SPARSE_IRQ=y.

The irqpoll loop needs to be updated to not iterate from 1 to nr_irqs
but to iterate via for_each_irq_desc(). (in the former case desc can
be NULL which crashes the box)

Reported-by: Steven Noonan <steven@uplinklabs.net>
Tested-by: Steven Noonan <steven@uplinklabs.net>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 kernel/irq/spurious.c | 17 +++++++++++------
 1 file changed, 11 insertions(+), 6 deletions(-)

diff --git a/kernel/irq/spurious.c b/kernel/irq/spurious.c
index b5d906002e1d..ec5a4bef3054 100644
--- a/kernel/irq/spurious.c
+++ b/kernel/irq/spurious.c
@@ -90,14 +90,15 @@ static int misrouted_irq(int irq)
 {
 	int i;
 	int ok = 0;
+	struct irq_desc *desc;
 
-	for (i = 1; i < nr_irqs; i++) {
-		struct irq_desc *desc;
+	for_each_irq_desc(i, desc) {
+		if (!i)
+			 continue;
 
 		if (i == irq)	/* Already tried */
 			continue;
 
-		desc = irq_to_desc(i);
 		if (try_one_irq(i, desc))
 			ok = 1;
 	}
@@ -108,10 +109,14 @@ static int misrouted_irq(int irq)
 static void poll_spurious_irqs(unsigned long dummy)
 {
 	int i;
-	for (i = 1; i < nr_irqs; i++) {
-		struct irq_desc *desc = irq_to_desc(i);
+	struct irq_desc *desc;
+
+	for_each_irq_desc(i, desc) {
 		unsigned int status;
 
+		if (!i)
+			 continue;
+
 		/* Racy but it doesn't matter */
 		status = desc->status;
 		barrier();
@@ -278,7 +283,7 @@ static int __init irqfixup_setup(char *str)
 
 __setup("irqfixup", irqfixup_setup);
 module_param(irqfixup, int, 0644);
-MODULE_PARM_DESC("irqfixup", "0: No fixup, 1: irqfixup mode 2: irqpoll mode");
+MODULE_PARM_DESC("irqfixup", "0: No fixup, 1: irqfixup mode, 2: irqpoll mode");
 
 static int __init irqpoll_setup(char *str)
 {
-- 
cgit v1.2.3-55-g7522


From 9d98598d2fc286c8dbcd0b681168639528428db0 Mon Sep 17 00:00:00 2001
From: Yinghai Lu
Date: Fri, 19 Sep 2008 00:12:26 -0700
Subject: sparseirq: remove some debug print out

Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>
Cc: Yinghai Lu <yhlu.kernel@gmail.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/io_apic.c | 19 -------------------
 kernel/irq/handle.c       | 18 ------------------
 2 files changed, 37 deletions(-)

diff --git a/arch/x86/kernel/io_apic.c b/arch/x86/kernel/io_apic.c
index 6ce5873a406c..01419fdc1d5d 100644
--- a/arch/x86/kernel/io_apic.c
+++ b/arch/x86/kernel/io_apic.c
@@ -277,23 +277,6 @@ static struct irq_cfg *irq_cfg_alloc(unsigned int irq)
 
 	spin_unlock_irqrestore(&irq_cfg_lock, flags);
 
-	printk(KERN_DEBUG "found new irq_cfg for irq %d\n", cfg->irq);
-#ifdef CONFIG_HAVE_SPARSE_IRQ_DEBUG
-	{
-		/* dump the results */
-		struct irq_cfg *cfg;
-		unsigned long phys;
-		unsigned long bytes = sizeof(struct irq_cfg);
-
-		printk(KERN_DEBUG "=========================== %d\n", irq);
-		printk(KERN_DEBUG "irq_cfg dump after get that for %d\n", irq);
-		for_each_irq_cfg(cfg) {
-			phys = __pa(cfg);
-			printk(KERN_DEBUG "irq_cfg %d ==> [%#lx - %#lx]\n", cfg->irq, phys, phys + bytes);
-		}
-		printk(KERN_DEBUG "===========================\n");
-	}
-#endif
 	return cfg;
 }
 #else
@@ -597,7 +580,6 @@ static void add_pin_to_irq(unsigned int irq, int apic, int pin)
 		cfg->irq_2_pin = entry;
 		entry->apic = apic;
 		entry->pin = pin;
-		printk(KERN_DEBUG " 0 add_pin_to_irq: irq %d --> apic %d pin %d\n", irq, apic, pin);
 		return;
 	}
 
@@ -613,7 +595,6 @@ static void add_pin_to_irq(unsigned int irq, int apic, int pin)
 	entry = entry->next;
 	entry->apic = apic;
 	entry->pin = pin;
-	printk(KERN_DEBUG " x add_pin_to_irq: irq %d --> apic %d pin %d\n", irq, apic, pin);
 }
 
 /*
diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c
index eae69373a9c6..710db0ec97e3 100644
--- a/kernel/irq/handle.c
+++ b/kernel/irq/handle.c
@@ -238,24 +238,6 @@ struct irq_desc *irq_to_desc_alloc(unsigned int irq)
 
 	spin_unlock_irqrestore(&sparse_irq_lock, flags);
 
-	printk(KERN_DEBUG "found new irq_desc for irq %d\n", desc->irq);
-#ifdef CONFIG_HAVE_SPARSE_IRQ_DEBUG
-	{
-		/* dump the results */
-		struct irq_desc *desc;
-		unsigned long phys;
-		unsigned long bytes = sizeof(struct irq_desc);
-		unsigned int irqx;
-
-		printk(KERN_DEBUG "=========================== %d\n", irq);
-		printk(KERN_DEBUG "irq_desc dump after get that for %d\n", irq);
-		for_each_irq_desc(irqx, desc) {
-			phys = __pa(desc);
-			printk(KERN_DEBUG "irq_desc %d ==> [%#lx - %#lx]\n", irqx, phys, phys + bytes);
-		}
-		printk(KERN_DEBUG "===========================\n");
-	}
-#endif
 	return desc;
 }
 #else
-- 
cgit v1.2.3-55-g7522


From 2976fe20125587c944c8df48d991c38f0891fb28 Mon Sep 17 00:00:00 2001
From: Andrew Morton
Date: Thu, 18 Sep 2008 16:10:48 -0700
Subject: fix warning: "x86: sparse_irq needs spin_lock in allocations"

caused by

commit a532e19680ada3b8579b81e67e76d3ebd19c340f
Author: Yinghai Lu <yhlu.kernel@gmail.com>
Date:   Wed Aug 20 20:46:25 2008 -0700

    x86: sparse_irq needs spin_lock in allocations

Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 kernel/irq/handle.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c
index 710db0ec97e3..7f625fbc9aa4 100644
--- a/kernel/irq/handle.c
+++ b/kernel/irq/handle.c
@@ -111,12 +111,11 @@ static void init_kstat_irqs(struct irq_desc *desc, int nr_desc, int nr)
 	}
 }
 
+#ifdef CONFIG_HAVE_SPARSE_IRQ
 /*
  * Protect the sparse_irqs_free freelist:
  */
 static DEFINE_SPINLOCK(sparse_irq_lock);
-
-#ifdef CONFIG_HAVE_SPARSE_IRQ
 static struct irq_desc *sparse_irqs_free;
 struct irq_desc *sparse_irqs;
 #endif
-- 
cgit v1.2.3-55-g7522


From 5ffa4eb2224343ec3fbd492ffe71e28e797435b7 Mon Sep 17 00:00:00 2001
From: Cyrill Gorcunov
Date: Thu, 18 Sep 2008 23:37:57 +0400
Subject: x86: io-apic - interrupt remapping fix

Interrupt remapping could lead to NULL dereference in case of
kzalloc failed and memory leak in other way. So fix the
both cases.

Signed-off-by: Cyrill Gorcunov <gorcunov@gmail.com>
Cc: "Maciej W. Rozycki" <macro@linux-mips.org>
Cc: Suresh Siddha <suresh.b.siddha@intel.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/apic.c    | 13 ++++++++++---
 arch/x86/kernel/io_apic.c | 19 +++++++++++++++++--
 2 files changed, 27 insertions(+), 5 deletions(-)

diff --git a/arch/x86/kernel/apic.c b/arch/x86/kernel/apic.c
index 784116933c70..1f48bd1c9f2d 100644
--- a/arch/x86/kernel/apic.c
+++ b/arch/x86/kernel/apic.c
@@ -1360,7 +1360,12 @@ void enable_IR_x2apic(void)
 
 	local_irq_save(flags);
 	mask_8259A();
-	save_mask_IO_APIC_setup();
+
+	ret = save_mask_IO_APIC_setup();
+	if (ret) {
+		printk(KERN_INFO "Saving IO-APIC state failed: %d\n", ret);
+		goto end;
+	}
 
 	ret = enable_intr_remapping(1);
 
@@ -1370,14 +1375,15 @@ void enable_IR_x2apic(void)
 	}
 
 	if (ret)
-		goto end;
+		goto end_restore;
 
 	if (!x2apic) {
 		x2apic = 1;
 		apic_ops = &x2apic_ops;
 		enable_x2apic();
 	}
-end:
+
+end_restore:
 	if (ret)
 		/*
 		 * IR enabling failed
@@ -1386,6 +1392,7 @@ end:
 	else
 		reinit_intr_remapped_IO_APIC(x2apic_preenabled);
 
+end:
 	unmask_8259A();
 	local_irq_restore(flags);
 
diff --git a/arch/x86/kernel/io_apic.c b/arch/x86/kernel/io_apic.c
index 01419fdc1d5d..4cc9cb64c811 100644
--- a/arch/x86/kernel/io_apic.c
+++ b/arch/x86/kernel/io_apic.c
@@ -812,7 +812,7 @@ int save_mask_IO_APIC_setup(void)
 			kzalloc(sizeof(struct IO_APIC_route_entry) *
 				nr_ioapic_registers[apic], GFP_KERNEL);
 		if (!early_ioapic_entries[apic])
-			return -ENOMEM;
+			goto nomem;
 	}
 
 	for (apic = 0; apic < nr_ioapics; apic++)
@@ -826,17 +826,32 @@ int save_mask_IO_APIC_setup(void)
 				ioapic_write_entry(apic, pin, entry);
 			}
 		}
+
 	return 0;
+
+nomem:
+	for (; apic > 0; apic--)
+		kfree(early_ioapic_entries[apic]);
+	kfree(early_ioapic_entries[apic]);
+	memset(early_ioapic_entries, 0,
+		ARRAY_SIZE(early_ioapic_entries));
+
+	return -ENOMEM;
 }
 
 void restore_IO_APIC_setup(void)
 {
 	int apic, pin;
 
-	for (apic = 0; apic < nr_ioapics; apic++)
+	for (apic = 0; apic < nr_ioapics; apic++) {
+		if (!early_ioapic_entries[apic])
+			break;
 		for (pin = 0; pin < nr_ioapic_registers[apic]; pin++)
 			ioapic_write_entry(apic, pin,
 					   early_ioapic_entries[apic][pin]);
+		kfree(early_ioapic_entries[apic]);
+		early_ioapic_entries[apic] = NULL;
+	}
 }
 
 void reinit_intr_remapped_IO_APIC(int intr_remapping)
-- 
cgit v1.2.3-55-g7522


From 8da077d6f31da291ee3a7dd559671cb8ca48cbe2 Mon Sep 17 00:00:00 2001
From: Jack Steiner
Date: Tue, 23 Sep 2008 08:42:05 -0500
Subject: x86, uv: fix ordering of calls to uv_system_init & uv_cpu_init

Fix problem caused by reordering of the calls to uv_cpu_init() &
uv_system_init. Originally, uv_cpu_init() was called AFTER uv_system_init.
This order was recently broken as a side-effect of other patches.

With this patch, initialization of cpu 0 is now done by the system_init
call.

Signed-off-by: Jack Steiner <steiner@sgi.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/genx2apic_uv_x.c | 34 +++++++++++++++++-----------------
 1 file changed, 17 insertions(+), 17 deletions(-)

diff --git a/arch/x86/kernel/genx2apic_uv_x.c b/arch/x86/kernel/genx2apic_uv_x.c
index 33581d94a90e..b689075bbe2f 100644
--- a/arch/x86/kernel/genx2apic_uv_x.c
+++ b/arch/x86/kernel/genx2apic_uv_x.c
@@ -356,7 +356,22 @@ static __init void uv_rtc_init(void)
 		sn_rtc_cycles_per_second = ticks_per_sec;
 }
 
-static bool uv_system_inited;
+/*
+ * Called on each cpu to initialize the per_cpu UV data area.
+ * 	ZZZ hotplug not supported yet
+ */
+void __cpuinit uv_cpu_init(void)
+{
+	/* CPU 0 initilization will be done via uv_system_init. */
+	if (!uv_blade_info)
+		return;
+
+	uv_blade_info[uv_numa_blade_id()].nr_online_cpus++;
+
+	if (get_uv_system_type() == UV_NON_UNIQUE_APIC)
+		set_x2apic_extra_bits(uv_hub_info->pnode);
+}
+
 
 void __init uv_system_init(void)
 {
@@ -448,21 +463,6 @@ void __init uv_system_init(void)
 	map_mmr_high(max_pnode);
 	map_config_high(max_pnode);
 	map_mmioh_high(max_pnode);
-	uv_system_inited = true;
-}
 
-/*
- * Called on each cpu to initialize the per_cpu UV data area.
- * 	ZZZ hotplug not supported yet
- */
-void __cpuinit uv_cpu_init(void)
-{
-	BUG_ON(!uv_system_inited);
-
-	uv_blade_info[uv_numa_blade_id()].nr_online_cpus++;
-
-	if (get_uv_system_type() == UV_NON_UNIQUE_APIC)
-		set_x2apic_extra_bits(uv_hub_info->pnode);
+	uv_cpu_init();
 }
-
-
-- 
cgit v1.2.3-55-g7522


From aac3f2b6f6e88827432c4050ac73552f24b19de1 Mon Sep 17 00:00:00 2001
From: Yinghai Lu
Date: Wed, 24 Sep 2008 19:04:35 -0700
Subject: x86: fix typo in irq_desc array

when SPARSE_IRQ is not used, should still use irq_desc->lock

Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 kernel/irq/handle.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c
index 7f625fbc9aa4..fb6bdb602a93 100644
--- a/kernel/irq/handle.c
+++ b/kernel/irq/handle.c
@@ -253,7 +253,7 @@ struct irq_desc irq_desc[NR_IRQS] __cacheline_aligned_in_smp = {
 		.chip = &no_irq_chip,
 		.handle_irq = handle_bad_irq,
 		.depth = 1,
-		.lock = __SPIN_LOCK_UNLOCKED(sparse_irqs->lock),
+		.lock = __SPIN_LOCK_UNLOCKED(irq_desc->lock),
 #ifdef CONFIG_SMP
 		.affinity = CPU_MASK_ALL
 #endif
-- 
cgit v1.2.3-55-g7522


From 7564676813780e0ba4dacf95338202cb72d2172d Mon Sep 17 00:00:00 2001
From: Yinghai Lu
Date: Wed, 24 Sep 2008 19:04:36 -0700
Subject: x86: irq no should not use hex in /proc/interrupts

Arjan van de Ven noticed that we changed IRQ numbers from decimal
to hex in /proc/interrupts - that can break user-space utilities
like irqbalanced.

Reported-by: Arjan van de Ven <arjan@linux.intel.com>
Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/irq_32.c | 2 +-
 arch/x86/kernel/irq_64.c | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/x86/kernel/irq_32.c b/arch/x86/kernel/irq_32.c
index b2e1082cf5ad..001772ffc918 100644
--- a/arch/x86/kernel/irq_32.c
+++ b/arch/x86/kernel/irq_32.c
@@ -307,7 +307,7 @@ int show_interrupts(struct seq_file *p, void *v)
 		action = desc->action;
 		if (!action && !any_count)
 			goto skip;
-		seq_printf(p, "%#x: ",i);
+		seq_printf(p, "%3d: ", i);
 #ifndef CONFIG_SMP
 		seq_printf(p, "%10u ", kstat_irqs(i));
 #else
diff --git a/arch/x86/kernel/irq_64.c b/arch/x86/kernel/irq_64.c
index c2fca89a3f7e..ec2661091283 100644
--- a/arch/x86/kernel/irq_64.c
+++ b/arch/x86/kernel/irq_64.c
@@ -112,7 +112,7 @@ int show_interrupts(struct seq_file *p, void *v)
 		action = desc->action;
 		if (!action && !any_count)
 			goto skip;
-		seq_printf(p, "%#x: ",i);
+		seq_printf(p, "%3d: ", i);
 #ifndef CONFIG_SMP
 		seq_printf(p, "%10u ", kstat_irqs(i));
 #else
-- 
cgit v1.2.3-55-g7522


From c1370b49cc4f09de5b447ddf688a3988a297dbfc Mon Sep 17 00:00:00 2001
From: Cyrill Gorcunov
Date: Tue, 23 Sep 2008 23:00:02 +0400
Subject: x86: io-apic - interrupt remapping fix

Clean up obscure for() cycle with straight while() form

Signed-off-by: Cyrill Gorcunov <gorcunov@gmail.com>
Cc: "Maciej W. Rozycki" <macro@linux-mips.org>
Acked-by: Suresh Siddha <suresh.b.siddha@intel.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/io_apic.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/arch/x86/kernel/io_apic.c b/arch/x86/kernel/io_apic.c
index 4cc9cb64c811..ed68a6f99dc2 100644
--- a/arch/x86/kernel/io_apic.c
+++ b/arch/x86/kernel/io_apic.c
@@ -830,9 +830,8 @@ int save_mask_IO_APIC_setup(void)
 	return 0;
 
 nomem:
-	for (; apic > 0; apic--)
-		kfree(early_ioapic_entries[apic]);
-	kfree(early_ioapic_entries[apic]);
+	while (apic >= 0)
+		kfree(early_ioapic_entries[apic--]);
 	memset(early_ioapic_entries, 0,
 		ARRAY_SIZE(early_ioapic_entries));
 
-- 
cgit v1.2.3-55-g7522


From c81bba49a13cb3376654d0cc93dc069fd619ed76 Mon Sep 17 00:00:00 2001
From: Yinghai Lu
Date: Thu, 25 Sep 2008 11:53:11 -0700
Subject: x86: print out irq nr for msi/ht, v3

v2: fix hpet compiling error
v3: Bjorn want to use dev_printk instead

Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>
Cc: Bjorn Helgaas <bjorn.helgaas@hp.com>
Cc: Jesse Barnes <jbarnes@virtuousgeek.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/hpet.c    | 3 +++
 arch/x86/kernel/io_apic.c | 5 +++++
 2 files changed, 8 insertions(+)

diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c
index 422c577ef691..2913913f4a46 100644
--- a/arch/x86/kernel/hpet.c
+++ b/arch/x86/kernel/hpet.c
@@ -467,6 +467,9 @@ static int hpet_setup_irq(struct hpet_dev *dev)
 	irq_set_affinity(dev->irq, cpumask_of_cpu(dev->cpu));
 	enable_irq(dev->irq);
 
+	printk(KERN_DEBUG "hpet: %s irq %d for MSI\n",
+			 dev->name, dev->irq);
+
 	return 0;
 }
 
diff --git a/arch/x86/kernel/io_apic.c b/arch/x86/kernel/io_apic.c
index ed68a6f99dc2..4ee270d30358 100644
--- a/arch/x86/kernel/io_apic.c
+++ b/arch/x86/kernel/io_apic.c
@@ -3354,6 +3354,8 @@ static int setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc, int irq)
 #endif
 		set_irq_chip_and_handler_name(irq, &msi_chip, handle_edge_irq, "edge");
 
+	dev_printk(KERN_DEBUG, &dev->dev, "irq %d for MSI/MSI-X\n", irq);
+
 	return 0;
 }
 
@@ -3586,6 +3588,7 @@ int arch_setup_hpet_msi(unsigned int irq)
 	hpet_msi_write(irq, &msg);
 	set_irq_chip_and_handler_name(irq, &hpet_msi_type, handle_edge_irq,
 		"edge");
+
 	return 0;
 }
 #endif
@@ -3682,6 +3685,8 @@ int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev)
 
 		set_irq_chip_and_handler_name(irq, &ht_irq_chip,
 					      handle_edge_irq, "edge");
+
+		dev_printk(KERN_DEBUG, &dev->dev, "irq %d for HT\n", irq);
 	}
 	return err;
 }
-- 
cgit v1.2.3-55-g7522


From 5f79f2f2ad39b5177c52ed08ffd066ea0c1da924 Mon Sep 17 00:00:00 2001
From: Venki Pallipadi
Date: Wed, 24 Sep 2008 10:03:17 -0700
Subject: hpet: clean up warning

Fix the below compile warnings due to recent HPET MSI changes

arch/x86/kernel/hpet.c:48: warning: 'hpet_devs' defined but not used
arch/x86/kernel/hpet.c:50: warning: 'per_cpu__cpu_hpet_dev' defined but not used

Reported-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/hpet.c | 57 +++++++++++++++++++++++++++++++-------------------
 1 file changed, 35 insertions(+), 22 deletions(-)

diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c
index 2913913f4a46..77017e834cf7 100644
--- a/arch/x86/kernel/hpet.c
+++ b/arch/x86/kernel/hpet.c
@@ -45,10 +45,6 @@ struct hpet_dev {
 	char				name[10];
 };
 
-static struct hpet_dev			*hpet_devs;
-
-static DEFINE_PER_CPU(struct hpet_dev *, cpu_hpet_dev);
-
 unsigned long hpet_readl(unsigned long a)
 {
 	return readl(hpet_virt_address + a);
@@ -126,23 +122,8 @@ EXPORT_SYMBOL_GPL(is_hpet_enabled);
  * timer 0 and timer 1 in case of RTC emulation.
  */
 #ifdef CONFIG_HPET
-static void hpet_reserve_msi_timers(struct hpet_data *hd)
-{
-	int i;
 
-	if (!hpet_devs)
-		return;
-
-	for (i = 0; i < hpet_num_timers; i++) {
-		struct hpet_dev *hdev = &hpet_devs[i];
-
-		if (!(hdev->flags & HPET_DEV_VALID))
-			continue;
-
-		hd->hd_irq[hdev->num] = hdev->irq;
-		hpet_reserve_timer(hd, hdev->num);
-	}
-}
+static void hpet_reserve_msi_timers(struct hpet_data *hd);
 
 static void hpet_reserve_platform_timers(unsigned long id)
 {
@@ -362,6 +343,10 @@ static int hpet_legacy_next_event(unsigned long delta,
  * HPET MSI Support
  */
 #ifdef CONFIG_PCI_MSI
+
+static DEFINE_PER_CPU(struct hpet_dev *, cpu_hpet_dev);
+static struct hpet_dev	*hpet_devs;
+
 void hpet_msi_unmask(unsigned int irq)
 {
 	struct hpet_dev *hdev = get_irq_data(irq);
@@ -525,7 +510,8 @@ static void init_one_hpet_msi_clockevent(struct hpet_dev *hdev, int cpu)
 #else
 #define RESERVE_TIMERS 0
 #endif
-void hpet_msi_capability_lookup(unsigned int start_timer)
+
+static void hpet_msi_capability_lookup(unsigned int start_timer)
 {
 	unsigned int id;
 	unsigned int num_timers;
@@ -571,6 +557,26 @@ void hpet_msi_capability_lookup(unsigned int start_timer)
 		num_timers, num_timers_used);
 }
 
+#ifdef CONFIG_HPET
+static void hpet_reserve_msi_timers(struct hpet_data *hd)
+{
+	int i;
+
+	if (!hpet_devs)
+		return;
+
+	for (i = 0; i < hpet_num_timers; i++) {
+		struct hpet_dev *hdev = &hpet_devs[i];
+
+		if (!(hdev->flags & HPET_DEV_VALID))
+			continue;
+
+		hd->hd_irq[hdev->num] = hdev->irq;
+		hpet_reserve_timer(hd, hdev->num);
+	}
+}
+#endif
+
 static struct hpet_dev *hpet_get_unused_timer(void)
 {
 	int i;
@@ -642,10 +648,17 @@ static int hpet_setup_msi_irq(unsigned int irq)
 {
 	return 0;
 }
-void hpet_msi_capability_lookup(unsigned int start_timer)
+static void hpet_msi_capability_lookup(unsigned int start_timer)
+{
+	return;
+}
+
+#ifdef CONFIG_HPET
+static void hpet_reserve_msi_timers(struct hpet_data *hd)
 {
 	return;
 }
+#endif
 
 static int hpet_cpuhp_notify(struct notifier_block *n,
 		unsigned long action, void *hcpu)
-- 
cgit v1.2.3-55-g7522


From 4173a0e7371ece227559b44943c6fd456ee470d1 Mon Sep 17 00:00:00 2001
From: Dean Nelson
Date: Thu, 2 Oct 2008 12:18:21 -0500
Subject: x86, UV: add uv_setup_irq() and uv_teardown_irq() functions, v3

Provide a means for UV interrupt MMRs to be setup with the message to be sent
when an MSI is raised.

Signed-off-by: Dean Nelson <dcn@sgi.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/Makefile    |  2 +-
 arch/x86/kernel/io_apic.c   | 68 +++++++++++++++++++++++++++++++++++++++
 arch/x86/kernel/uv_irq.c    | 77 +++++++++++++++++++++++++++++++++++++++++++++
 include/asm-x86/uv/uv_irq.h | 36 +++++++++++++++++++++
 4 files changed, 182 insertions(+), 1 deletion(-)
 create mode 100644 arch/x86/kernel/uv_irq.c
 create mode 100644 include/asm-x86/uv/uv_irq.h

diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index 45cecc59d894..acc0e8bf043e 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -108,7 +108,7 @@ obj-$(CONFIG_MICROCODE)			+= microcode.o
 # 64 bit specific files
 ifeq ($(CONFIG_X86_64),y)
         obj-y				+= genapic_64.o genapic_flat_64.o genx2apic_uv_x.o tlb_uv.o
-	obj-y				+= bios_uv.o
+	obj-y				+= bios_uv.o uv_irq.o
         obj-y				+= genx2apic_cluster.o
         obj-y				+= genx2apic_phys.o
         obj-$(CONFIG_X86_PM_TIMER)	+= pmtimer_64.o
diff --git a/arch/x86/kernel/io_apic.c b/arch/x86/kernel/io_apic.c
index 4ee270d30358..260c95a5e6dc 100644
--- a/arch/x86/kernel/io_apic.c
+++ b/arch/x86/kernel/io_apic.c
@@ -58,6 +58,8 @@
 #include <asm/setup.h>
 #include <asm/irq_remapping.h>
 #include <asm/hpet.h>
+#include <asm/uv/uv_hub.h>
+#include <asm/uv/uv_irq.h>
 
 #include <mach_ipi.h>
 #include <mach_apic.h>
@@ -3692,6 +3694,72 @@ int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev)
 }
 #endif /* CONFIG_HT_IRQ */
 
+#ifdef CONFIG_X86_64
+/*
+ * Re-target the irq to the specified CPU and enable the specified MMR located
+ * on the specified blade to allow the sending of MSIs to the specified CPU.
+ */
+int arch_enable_uv_irq(char *irq_name, unsigned int irq, int cpu, int mmr_blade,
+		       unsigned long mmr_offset)
+{
+	const cpumask_t *eligible_cpu = get_cpu_mask(cpu);
+	struct irq_cfg *cfg;
+	int mmr_pnode;
+	unsigned long mmr_value;
+	struct uv_IO_APIC_route_entry *entry;
+	unsigned long flags;
+	int err;
+
+	err = assign_irq_vector(irq, *eligible_cpu);
+	if (err != 0)
+		return err;
+
+	spin_lock_irqsave(&vector_lock, flags);
+	set_irq_chip_and_handler_name(irq, &uv_irq_chip, handle_percpu_irq,
+				      irq_name);
+	spin_unlock_irqrestore(&vector_lock, flags);
+
+	cfg = irq_cfg(irq);
+
+	mmr_value = 0;
+	entry = (struct uv_IO_APIC_route_entry *)&mmr_value;
+	BUG_ON(sizeof(struct uv_IO_APIC_route_entry) != sizeof(unsigned long));
+
+	entry->vector = cfg->vector;
+	entry->delivery_mode = INT_DELIVERY_MODE;
+	entry->dest_mode = INT_DEST_MODE;
+	entry->polarity = 0;
+	entry->trigger = 0;
+	entry->mask = 0;
+	entry->dest = cpu_mask_to_apicid(*eligible_cpu);
+
+	mmr_pnode = uv_blade_to_pnode(mmr_blade);
+	uv_write_global_mmr64(mmr_pnode, mmr_offset, mmr_value);
+
+	return irq;
+}
+
+/*
+ * Disable the specified MMR located on the specified blade so that MSIs are
+ * longer allowed to be sent.
+ */
+void arch_disable_uv_irq(int mmr_blade, unsigned long mmr_offset)
+{
+	unsigned long mmr_value;
+	struct uv_IO_APIC_route_entry *entry;
+	int mmr_pnode;
+
+	mmr_value = 0;
+	entry = (struct uv_IO_APIC_route_entry *)&mmr_value;
+	BUG_ON(sizeof(struct uv_IO_APIC_route_entry) != sizeof(unsigned long));
+
+	entry->mask = 1;
+
+	mmr_pnode = uv_blade_to_pnode(mmr_blade);
+	uv_write_global_mmr64(mmr_pnode, mmr_offset, mmr_value);
+}
+#endif /* CONFIG_X86_64 */
+
 int __init io_apic_get_redir_entries (int ioapic)
 {
 	union IO_APIC_reg_01	reg_01;
diff --git a/arch/x86/kernel/uv_irq.c b/arch/x86/kernel/uv_irq.c
new file mode 100644
index 000000000000..6bd26c91c30b
--- /dev/null
+++ b/arch/x86/kernel/uv_irq.c
@@ -0,0 +1,77 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * SGI UV IRQ functions
+ *
+ * Copyright (C) 2008 Silicon Graphics, Inc. All rights reserved.
+ */
+
+#include <linux/module.h>
+#include <linux/irq.h>
+#include <asm/uv/uv_irq.h>
+
+static void uv_noop(unsigned int irq)
+{
+}
+
+static unsigned int uv_noop_ret(unsigned int irq)
+{
+	return 0;
+}
+
+static void uv_ack_apic(unsigned int irq)
+{
+	ack_APIC_irq();
+}
+
+struct irq_chip uv_irq_chip = {
+	.name		= "UV-CORE",
+	.startup	= uv_noop_ret,
+	.shutdown	= uv_noop,
+	.enable		= uv_noop,
+	.disable	= uv_noop,
+	.ack		= uv_noop,
+	.mask		= uv_noop,
+	.unmask		= uv_noop,
+	.eoi		= uv_ack_apic,
+	.end		= uv_noop,
+};
+
+/*
+ * Set up a mapping of an available irq and vector, and enable the specified
+ * MMR that defines the MSI that is to be sent to the specified CPU when an
+ * interrupt is raised.
+ */
+int uv_setup_irq(char *irq_name, int cpu, int mmr_blade,
+		 unsigned long mmr_offset)
+{
+	int irq;
+	int ret;
+
+	irq = create_irq();
+	if (irq <= 0)
+		return -EBUSY;
+
+	ret = arch_enable_uv_irq(irq_name, irq, cpu, mmr_blade, mmr_offset);
+	if (ret != irq)
+		destroy_irq(irq);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(uv_setup_irq);
+
+/*
+ * Tear down a mapping of an irq and vector, and disable the specified MMR that
+ * defined the MSI that was to be sent to the specified CPU when an interrupt
+ * was raised.
+ *
+ * Set mmr_blade and mmr_offset to what was passed in on uv_setup_irq().
+ */
+void uv_teardown_irq(unsigned int irq, int mmr_blade, unsigned long mmr_offset)
+{
+	arch_disable_uv_irq(mmr_blade, mmr_offset);
+	destroy_irq(irq);
+}
+EXPORT_SYMBOL_GPL(uv_teardown_irq);
diff --git a/include/asm-x86/uv/uv_irq.h b/include/asm-x86/uv/uv_irq.h
new file mode 100644
index 000000000000..8bf5f32da9c6
--- /dev/null
+++ b/include/asm-x86/uv/uv_irq.h
@@ -0,0 +1,36 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * SGI UV IRQ definitions
+ *
+ * Copyright (C) 2008 Silicon Graphics, Inc. All rights reserved.
+ */
+
+#ifndef ASM_X86__UV__UV_IRQ_H
+#define ASM_X86__UV__UV_IRQ_H
+
+/* If a generic version of this structure gets defined, eliminate this one. */
+struct uv_IO_APIC_route_entry {
+	__u64	vector		:  8,
+		delivery_mode	:  3,
+		dest_mode	:  1,
+		delivery_status	:  1,
+		polarity	:  1,
+		__reserved_1	:  1,
+		trigger		:  1,
+		mask		:  1,
+		__reserved_2	: 15,
+		dest		: 32;
+};
+
+extern struct irq_chip uv_irq_chip;
+
+extern int arch_enable_uv_irq(char *, unsigned int, int, int, unsigned long);
+extern void arch_disable_uv_irq(int, unsigned long);
+
+extern int uv_setup_irq(char *, int, int, unsigned long);
+extern void uv_teardown_irq(unsigned int, int, unsigned long);
+
+#endif /* ASM_X86__UV__UV_IRQ_H */
-- 
cgit v1.2.3-55-g7522


From 37762b6ffb37f9e21cbc6f80902aa06b7a053fd7 Mon Sep 17 00:00:00 2001
From: Ingo Molnar
Date: Fri, 3 Oct 2008 11:38:37 +0200
Subject: x86, UV: add uv_setup_irq() and uv_teardown_irq() functions, v3, fix

fix:

 arch/x86/kernel/uv_irq.c: In function 'uv_ack_apic':
 arch/x86/kernel/uv_irq.c:26: error: implicit declaration of function 'ack_APIC_irq'

Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/uv_irq.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/arch/x86/kernel/uv_irq.c b/arch/x86/kernel/uv_irq.c
index 6bd26c91c30b..aeef529917e4 100644
--- a/arch/x86/kernel/uv_irq.c
+++ b/arch/x86/kernel/uv_irq.c
@@ -10,6 +10,8 @@
 
 #include <linux/module.h>
 #include <linux/irq.h>
+
+#include <asm/apic.h>
 #include <asm/uv/uv_irq.h>
 
 static void uv_noop(unsigned int irq)
-- 
cgit v1.2.3-55-g7522


From a50f70b17541c0060967c6df61133e968bad3652 Mon Sep 17 00:00:00 2001
From: Russ Anderson
Date: Fri, 3 Oct 2008 11:58:54 -0500
Subject: x86: Add UV EFI table entry v4

Look for a UV entry in the EFI tables.

Signed-off-by: Russ Anderson <rja@sgi.com>
Signed-off-by: Paul Jackson <pj@sgi.com>
Acked-by: Huang Ying <ying.huang@intel.com>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/efi.c | 4 ++++
 include/linux/efi.h   | 4 ++++
 2 files changed, 8 insertions(+)

diff --git a/arch/x86/kernel/efi.c b/arch/x86/kernel/efi.c
index 945a31cdd81f..1119d247fe11 100644
--- a/arch/x86/kernel/efi.c
+++ b/arch/x86/kernel/efi.c
@@ -366,6 +366,10 @@ void __init efi_init(void)
 					SMBIOS_TABLE_GUID)) {
 			efi.smbios = config_tables[i].table;
 			printk(" SMBIOS=0x%lx ", config_tables[i].table);
+		} else if (!efi_guidcmp(config_tables[i].guid,
+					UV_SYSTEM_TABLE_GUID)) {
+			efi.uv_systab = config_tables[i].table;
+			printk(" UVsystab=0x%lx ", config_tables[i].table);
 		} else if (!efi_guidcmp(config_tables[i].guid,
 					HCDP_TABLE_GUID)) {
 			efi.hcdp = config_tables[i].table;
diff --git a/include/linux/efi.h b/include/linux/efi.h
index 807373d467f7..bb66feb164bd 100644
--- a/include/linux/efi.h
+++ b/include/linux/efi.h
@@ -208,6 +208,9 @@ typedef efi_status_t efi_set_virtual_address_map_t (unsigned long memory_map_siz
 #define EFI_GLOBAL_VARIABLE_GUID \
     EFI_GUID(  0x8be4df61, 0x93ca, 0x11d2, 0xaa, 0x0d, 0x00, 0xe0, 0x98, 0x03, 0x2b, 0x8c )
 
+#define UV_SYSTEM_TABLE_GUID \
+    EFI_GUID(  0x3b13a7d4, 0x633e, 0x11dd, 0x93, 0xec, 0xda, 0x25, 0x56, 0xd8, 0x95, 0x93 )
+
 typedef struct {
 	efi_guid_t guid;
 	unsigned long table;
@@ -255,6 +258,7 @@ extern struct efi {
 	unsigned long boot_info;	/* boot info table */
 	unsigned long hcdp;		/* HCDP table */
 	unsigned long uga;		/* UGA table */
+	unsigned long uv_systab;	/* UV system table */
 	efi_get_time_t *get_time;
 	efi_set_time_t *set_time;
 	efi_get_wakeup_time_t *get_wakeup_time;
-- 
cgit v1.2.3-55-g7522


From 7f5942329e0787087a5e4dced838cee711ac2b58 Mon Sep 17 00:00:00 2001
From: Russ Anderson
Date: Fri, 3 Oct 2008 11:59:15 -0500
Subject: x86: Add UV bios call infrastructure v4

Add the EFI callback function and associated wrapper code.
Initialize SAL system table entry info at boot time.

Signed-off-by: Russ Anderson <rja@sgi.com>
Signed-off-by: Paul Jackson <pj@sgi.com>
Acked-by: Huang Ying <ying.huang@intel.com>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/bios_uv.c        | 101 +++++++++++++++++++++++++++++++--------
 arch/x86/kernel/genx2apic_uv_x.c |   1 +
 include/asm-x86/efi.h            |  13 +++++
 include/asm-x86/uv/bios.h        |  73 +++++++++++++++-------------
 4 files changed, 135 insertions(+), 53 deletions(-)

diff --git a/arch/x86/kernel/bios_uv.c b/arch/x86/kernel/bios_uv.c
index fdd585f9c53d..5481eb59f783 100644
--- a/arch/x86/kernel/bios_uv.c
+++ b/arch/x86/kernel/bios_uv.c
@@ -1,8 +1,6 @@
 /*
  * BIOS run time interface routines.
  *
- *  Copyright (c) 2008 Silicon Graphics, Inc.  All Rights Reserved.
- *
  *  This program is free software; you can redistribute it and/or modify
  *  it under the terms of the GNU General Public License as published by
  *  the Free Software Foundation; either version 2 of the License, or
@@ -16,33 +14,94 @@
  *  You should have received a copy of the GNU General Public License
  *  along with this program; if not, write to the Free Software
  *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
+ *
+ *  Copyright (c) 2008 Silicon Graphics, Inc.  All Rights Reserved.
+ *  Copyright (c) Russ Anderson
  */
 
+#include <linux/efi.h>
+#include <asm/efi.h>
+#include <linux/io.h>
 #include <asm/uv/bios.h>
 
-const char *
-x86_bios_strerror(long status)
+struct uv_systab uv_systab;
+
+s64 uv_bios_call(enum uv_bios_cmd which, u64 a1, u64 a2, u64 a3, u64 a4, u64 a5)
 {
-	const char *str;
-	switch (status) {
-	case  0: str = "Call completed without error";	break;
-	case -1: str = "Not implemented";		break;
-	case -2: str = "Invalid argument";		break;
-	case -3: str = "Call completed with error";	break;
-	default: str = "Unknown BIOS status code";	break;
-	}
-	return str;
+	struct uv_systab *tab = &uv_systab;
+
+	if (!tab->function)
+		/*
+		 * BIOS does not support UV systab
+		 */
+		return BIOS_STATUS_UNIMPLEMENTED;
+
+	return efi_call6((void *)__va(tab->function),
+					(u64)which, a1, a2, a3, a4, a5);
 }
 
-long
-x86_bios_freq_base(unsigned long which, unsigned long *ticks_per_second,
-		   unsigned long *drift_info)
+s64 uv_bios_call_irqsave(enum uv_bios_cmd which, u64 a1, u64 a2, u64 a3,
+					u64 a4, u64 a5)
 {
-	struct uv_bios_retval isrv;
+	unsigned long bios_flags;
+	s64 ret;
+
+	local_irq_save(bios_flags);
+	ret = uv_bios_call(which, a1, a2, a3, a4, a5);
+	local_irq_restore(bios_flags);
+
+	return ret;
+}
+
+s64 uv_bios_call_reentrant(enum uv_bios_cmd which, u64 a1, u64 a2, u64 a3,
+					u64 a4, u64 a5)
+{
+	s64 ret;
+
+	preempt_disable();
+	ret = uv_bios_call(which, a1, a2, a3, a4, a5);
+	preempt_enable();
 
-	BIOS_CALL(isrv, BIOS_FREQ_BASE, which, 0, 0, 0, 0, 0, 0);
-	*ticks_per_second = isrv.v0;
-	*drift_info = isrv.v1;
-	return isrv.status;
+	return ret;
+}
+
+long
+x86_bios_freq_base(unsigned long clock_type, unsigned long *ticks_per_second,
+					unsigned long *drift_info)
+{
+	return uv_bios_call(UV_BIOS_FREQ_BASE, clock_type,
+				(u64)ticks_per_second, 0, 0, 0);
 }
 EXPORT_SYMBOL_GPL(x86_bios_freq_base);
+
+
+#ifdef CONFIG_EFI
+void uv_bios_init(void)
+{
+	struct uv_systab *tab;
+
+	if ((efi.uv_systab == EFI_INVALID_TABLE_ADDR) ||
+	    (efi.uv_systab == (unsigned long)NULL)) {
+		printk(KERN_CRIT "No EFI UV System Table.\n");
+		uv_systab.function = (unsigned long)NULL;
+		return;
+	}
+
+	tab = (struct uv_systab *)ioremap(efi.uv_systab,
+					sizeof(struct uv_systab));
+	if (strncmp(tab->signature, "UVST", 4) != 0)
+		printk(KERN_ERR "bad signature in UV system table!");
+
+	/*
+	 * Copy table to permanent spot for later use.
+	 */
+	memcpy(&uv_systab, tab, sizeof(struct uv_systab));
+	iounmap(tab);
+
+	printk(KERN_INFO "EFI UV System Table Revision %d\n", tab->revision);
+}
+#else	/* !CONFIG_EFI */
+
+void uv_bios_init(void) { }
+#endif
+
diff --git a/arch/x86/kernel/genx2apic_uv_x.c b/arch/x86/kernel/genx2apic_uv_x.c
index b689075bbe2f..aa2e5e15bf69 100644
--- a/arch/x86/kernel/genx2apic_uv_x.c
+++ b/arch/x86/kernel/genx2apic_uv_x.c
@@ -427,6 +427,7 @@ void __init uv_system_init(void)
 	gnode_upper = (((unsigned long)node_id.s.node_id) &
 		       ~((1 << n_val) - 1)) << m_val;
 
+	uv_bios_init();
 	uv_rtc_init();
 
 	for_each_present_cpu(cpu) {
diff --git a/include/asm-x86/efi.h b/include/asm-x86/efi.h
index ed2de22e8705..313438e63348 100644
--- a/include/asm-x86/efi.h
+++ b/include/asm-x86/efi.h
@@ -94,4 +94,17 @@ extern void efi_reserve_early(void);
 extern void efi_call_phys_prelog(void);
 extern void efi_call_phys_epilog(void);
 
+#ifndef CONFIG_EFI
+/*
+ * IF EFI is not configured, have the EFI calls return -ENOSYS.
+ */
+#define efi_call0(_f)					(-ENOSYS)
+#define efi_call1(_f, _a1)				(-ENOSYS)
+#define efi_call2(_f, _a1, _a2)				(-ENOSYS)
+#define efi_call3(_f, _a1, _a2, _a3)			(-ENOSYS)
+#define efi_call4(_f, _a1, _a2, _a3, _a4)		(-ENOSYS)
+#define efi_call5(_f, _a1, _a2, _a3, _a4, _a5)		(-ENOSYS)
+#define efi_call6(_f, _a1, _a2, _a3, _a4, _a5, _a6)	(-ENOSYS)
+#endif /* CONFIG_EFI */
+
 #endif /* ASM_X86__EFI_H */
diff --git a/include/asm-x86/uv/bios.h b/include/asm-x86/uv/bios.h
index 7cd6d7ec1308..f63e46e5337c 100644
--- a/include/asm-x86/uv/bios.h
+++ b/include/asm-x86/uv/bios.h
@@ -2,9 +2,7 @@
 #define ASM_X86__UV__BIOS_H
 
 /*
- * BIOS layer definitions.
- *
- *  Copyright (c) 2008 Silicon Graphics, Inc.  All Rights Reserved.
+ * UV BIOS layer definitions.
  *
  *  This program is free software; you can redistribute it and/or modify
  *  it under the terms of the GNU General Public License as published by
@@ -19,50 +17,61 @@
  *  You should have received a copy of the GNU General Public License
  *  along with this program; if not, write to the Free Software
  *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
+ *
+ *  Copyright (c) 2008 Silicon Graphics, Inc.  All Rights Reserved.
+ *  Copyright (c) Russ Anderson
  */
 
 #include <linux/rtc.h>
 
-#define BIOS_FREQ_BASE			0x01000001
+/*
+ * Values for the BIOS calls.  It is passed as the first * argument in the
+ * BIOS call.  Passing any other value in the first argument will result
+ * in a BIOS_STATUS_UNIMPLEMENTED return status.
+ */
+enum uv_bios_cmd {
+	UV_BIOS_COMMON,
+	UV_BIOS_GET_SN_INFO,
+	UV_BIOS_FREQ_BASE
+};
 
+/*
+ * Status values returned from a BIOS call.
+ */
 enum {
-	BIOS_FREQ_BASE_PLATFORM = 0,
-	BIOS_FREQ_BASE_INTERVAL_TIMER = 1,
-	BIOS_FREQ_BASE_REALTIME_CLOCK = 2
+	BIOS_STATUS_SUCCESS		=  0,
+	BIOS_STATUS_UNIMPLEMENTED	= -ENOSYS,
+	BIOS_STATUS_EINVAL		= -EINVAL,
+	BIOS_STATUS_UNAVAIL		= -EBUSY
 };
 
-# define BIOS_CALL(result, a0, a1, a2, a3, a4, a5, a6, a7)		\
-	do {								\
-		/* XXX - the real call goes here */			\
-		result.status = BIOS_STATUS_UNIMPLEMENTED;		\
-		isrv.v0 = 0;						\
-		isrv.v1 = 0;						\
-	} while (0)
+/*
+ * The UV system table describes specific firmware
+ * capabilities available to the Linux kernel at runtime.
+ */
+struct uv_systab {
+	char signature[4];	/* must be "UVST" */
+	u32 revision;		/* distinguish different firmware revs */
+	u64 function;		/* BIOS runtime callback function ptr */
+};
 
 enum {
-	BIOS_STATUS_SUCCESS		=  0,
-	BIOS_STATUS_UNIMPLEMENTED	= -1,
-	BIOS_STATUS_EINVAL		= -2,
-	BIOS_STATUS_ERROR		= -3
+	BIOS_FREQ_BASE_PLATFORM = 0,
+	BIOS_FREQ_BASE_INTERVAL_TIMER = 1,
+	BIOS_FREQ_BASE_REALTIME_CLOCK = 2
 };
 
-struct uv_bios_retval {
-	/*
-	 * A zero status value indicates call completed without error.
-	 * A negative status value indicates reason of call failure.
-	 * A positive status value indicates success but an
-	 * informational value should be printed (e.g., "reboot for
-	 * change to take effect").
-	 */
-	s64 status;
-	u64 v0;
-	u64 v1;
-	u64 v2;
-};
+/*
+ * bios calls have 6 parameters
+ */
+extern s64 uv_bios_call(enum uv_bios_cmd, u64, u64, u64, u64, u64);
+extern s64 uv_bios_call_irqsave(enum uv_bios_cmd, u64, u64, u64, u64, u64);
+extern s64 uv_bios_call_reentrant(enum uv_bios_cmd, u64, u64, u64, u64, u64);
+
+extern void uv_bios_init(void);
 
 extern long
 x86_bios_freq_base(unsigned long which, unsigned long *ticks_per_second,
 		   unsigned long *drift_info);
-extern const char *x86_bios_strerror(long status);
 
 #endif /* ASM_X86__UV__BIOS_H */
-- 
cgit v1.2.3-55-g7522


From 922402f15a85f7a064926eb1db68cc52bc4d4a91 Mon Sep 17 00:00:00 2001
From: Russ Anderson
Date: Fri, 3 Oct 2008 11:59:33 -0500
Subject: x86: Add UV partition call v4

Add a bios call to return partitioning related info.

Signed-off-by: Russ Anderson <rja@sgi.com>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/bios_uv.c        | 44 +++++++++++++++++++++++++++++++++++-----
 arch/x86/kernel/genx2apic_uv_x.c | 14 +++++++------
 include/asm-x86/uv/bios.h        | 22 +++++++++++++++++---
 3 files changed, 66 insertions(+), 14 deletions(-)

diff --git a/arch/x86/kernel/bios_uv.c b/arch/x86/kernel/bios_uv.c
index 5481eb59f783..f0dfe6f17e7e 100644
--- a/arch/x86/kernel/bios_uv.c
+++ b/arch/x86/kernel/bios_uv.c
@@ -23,6 +23,7 @@
 #include <asm/efi.h>
 #include <linux/io.h>
 #include <asm/uv/bios.h>
+#include <asm/uv/uv_hub.h>
 
 struct uv_systab uv_systab;
 
@@ -65,14 +66,47 @@ s64 uv_bios_call_reentrant(enum uv_bios_cmd which, u64 a1, u64 a2, u64 a3,
 	return ret;
 }
 
-long
-x86_bios_freq_base(unsigned long clock_type, unsigned long *ticks_per_second,
-					unsigned long *drift_info)
+
+long sn_partition_id;
+EXPORT_SYMBOL_GPL(sn_partition_id);
+long uv_coherency_id;
+EXPORT_SYMBOL_GPL(uv_coherency_id);
+long uv_region_size;
+EXPORT_SYMBOL_GPL(uv_region_size);
+int uv_type;
+
+
+s64 uv_bios_get_sn_info(int fc, int *uvtype, long *partid, long *coher,
+		long *region)
+{
+	s64 ret;
+	u64 v0, v1;
+	union partition_info_u part;
+
+	ret = uv_bios_call_irqsave(UV_BIOS_GET_SN_INFO, fc,
+				(u64)(&v0), (u64)(&v1), 0, 0);
+	if (ret != BIOS_STATUS_SUCCESS)
+		return ret;
+
+	part.val = v0;
+	if (uvtype)
+		*uvtype = part.hub_version;
+	if (partid)
+		*partid = part.partition_id;
+	if (coher)
+		*coher = part.coherence_id;
+	if (region)
+		*region = part.region_size;
+	return ret;
+}
+
+
+s64 uv_bios_freq_base(u64 clock_type, u64 *ticks_per_second)
 {
 	return uv_bios_call(UV_BIOS_FREQ_BASE, clock_type,
-				(u64)ticks_per_second, 0, 0, 0);
+			   (u64)ticks_per_second, 0, 0, 0);
 }
-EXPORT_SYMBOL_GPL(x86_bios_freq_base);
+EXPORT_SYMBOL_GPL(uv_bios_freq_base);
 
 
 #ifdef CONFIG_EFI
diff --git a/arch/x86/kernel/genx2apic_uv_x.c b/arch/x86/kernel/genx2apic_uv_x.c
index aa2e5e15bf69..bfd532843df6 100644
--- a/arch/x86/kernel/genx2apic_uv_x.c
+++ b/arch/x86/kernel/genx2apic_uv_x.c
@@ -341,12 +341,12 @@ static __init void map_mmioh_high(int max_pnode)
 
 static __init void uv_rtc_init(void)
 {
-	long status, ticks_per_sec, drift;
+	long status;
+	u64 ticks_per_sec;
 
-	status =
-	    x86_bios_freq_base(BIOS_FREQ_BASE_REALTIME_CLOCK, &ticks_per_sec,
-					&drift);
-	if (status != 0 || ticks_per_sec < 100000) {
+	status = uv_bios_freq_base(BIOS_FREQ_BASE_REALTIME_CLOCK,
+					&ticks_per_sec);
+	if (status != BIOS_STATUS_SUCCESS || ticks_per_sec < 100000) {
 		printk(KERN_WARNING
 			"unable to determine platform RTC clock frequency, "
 			"guessing.\n");
@@ -428,6 +428,8 @@ void __init uv_system_init(void)
 		       ~((1 << n_val) - 1)) << m_val;
 
 	uv_bios_init();
+	uv_bios_get_sn_info(0, &uv_type, &sn_partition_id,
+			    &uv_coherency_id, &uv_region_size);
 	uv_rtc_init();
 
 	for_each_present_cpu(cpu) {
@@ -449,7 +451,7 @@ void __init uv_system_init(void)
 		uv_cpu_hub_info(cpu)->gpa_mask = (1 << (m_val + n_val)) - 1;
 		uv_cpu_hub_info(cpu)->gnode_upper = gnode_upper;
 		uv_cpu_hub_info(cpu)->global_mmr_base = mmr_base;
-		uv_cpu_hub_info(cpu)->coherency_domain_number = 0;/* ZZZ */
+		uv_cpu_hub_info(cpu)->coherency_domain_number = uv_coherency_id;
 		uv_node_to_blade[nid] = blade;
 		uv_cpu_to_blade[cpu] = blade;
 		max_pnode = max(pnode, max_pnode);
diff --git a/include/asm-x86/uv/bios.h b/include/asm-x86/uv/bios.h
index f63e46e5337c..3b305d897c84 100644
--- a/include/asm-x86/uv/bios.h
+++ b/include/asm-x86/uv/bios.h
@@ -61,6 +61,16 @@ enum {
 	BIOS_FREQ_BASE_REALTIME_CLOCK = 2
 };
 
+union partition_info_u {
+	u64	val;
+	struct {
+		u64	hub_version	:  8,
+			partition_id	: 16,
+			coherence_id	: 16,
+			region_size	: 24;
+	};
+};
+
 /*
  * bios calls have 6 parameters
  */
@@ -68,10 +78,16 @@ extern s64 uv_bios_call(enum uv_bios_cmd, u64, u64, u64, u64, u64);
 extern s64 uv_bios_call_irqsave(enum uv_bios_cmd, u64, u64, u64, u64, u64);
 extern s64 uv_bios_call_reentrant(enum uv_bios_cmd, u64, u64, u64, u64, u64);
 
+extern s64 uv_bios_get_sn_info(int, int *, long *, long *, long *);
+extern s64 uv_bios_freq_base(u64, u64 *);
+
 extern void uv_bios_init(void);
 
-extern long
-x86_bios_freq_base(unsigned long which, unsigned long *ticks_per_second,
-		   unsigned long *drift_info);
+extern int uv_type;
+extern long sn_partition_id;
+extern long uv_coherency_id;
+extern long uv_region_size;
+#define partition_coherence_id()	(uv_coherency_id)
+
 
 #endif /* ASM_X86__UV__BIOS_H */
-- 
cgit v1.2.3-55-g7522


From fc8c2d763bacc02962048fa042e287debb1416aa Mon Sep 17 00:00:00 2001
From: Russ Anderson
Date: Fri, 3 Oct 2008 11:59:50 -0500
Subject: x86: Add sysfs entries for UV v4

Create /sys/firmware/sgi_uv sysfs entries for partition_id and coherence_id.

Signed-off-by: Russ Anderson <rja@sgi.com>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/Makefile   |  2 +-
 arch/x86/kernel/uv_sysfs.c | 72 ++++++++++++++++++++++++++++++++++++++++++++++
 include/asm-x86/uv/bios.h  |  1 +
 3 files changed, 74 insertions(+), 1 deletion(-)
 create mode 100644 arch/x86/kernel/uv_sysfs.c

diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index acc0e8bf043e..cb6ade443f00 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -108,7 +108,7 @@ obj-$(CONFIG_MICROCODE)			+= microcode.o
 # 64 bit specific files
 ifeq ($(CONFIG_X86_64),y)
         obj-y				+= genapic_64.o genapic_flat_64.o genx2apic_uv_x.o tlb_uv.o
-	obj-y				+= bios_uv.o uv_irq.o
+	obj-y				+= bios_uv.o uv_irq.o uv_sysfs.o
         obj-y				+= genx2apic_cluster.o
         obj-y				+= genx2apic_phys.o
         obj-$(CONFIG_X86_PM_TIMER)	+= pmtimer_64.o
diff --git a/arch/x86/kernel/uv_sysfs.c b/arch/x86/kernel/uv_sysfs.c
new file mode 100644
index 000000000000..67f9b9dbf800
--- /dev/null
+++ b/arch/x86/kernel/uv_sysfs.c
@@ -0,0 +1,72 @@
+/*
+ * This file supports the /sys/firmware/sgi_uv interfaces for SGI UV.
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
+ *
+ *  Copyright (c) 2008 Silicon Graphics, Inc.  All Rights Reserved.
+ *  Copyright (c) Russ Anderson
+ */
+
+#include <linux/sysdev.h>
+#include <asm/uv/bios.h>
+
+struct kobject *sgi_uv_kobj;
+
+static ssize_t partition_id_show(struct kobject *kobj,
+			struct kobj_attribute *attr, char *buf)
+{
+	return snprintf(buf, PAGE_SIZE, "%ld\n", sn_partition_id);
+}
+
+static ssize_t coherence_id_show(struct kobject *kobj,
+			struct kobj_attribute *attr, char *buf)
+{
+	return snprintf(buf, PAGE_SIZE, "%ld\n", partition_coherence_id());
+}
+
+static struct kobj_attribute partition_id_attr =
+	__ATTR(partition_id, S_IRUGO, partition_id_show, NULL);
+
+static struct kobj_attribute coherence_id_attr =
+	__ATTR(coherence_id, S_IRUGO, coherence_id_show, NULL);
+
+
+static int __init sgi_uv_sysfs_init(void)
+{
+	unsigned long ret;
+
+	if (!sgi_uv_kobj)
+		sgi_uv_kobj = kobject_create_and_add("sgi_uv", firmware_kobj);
+	if (!sgi_uv_kobj) {
+		printk(KERN_WARNING "kobject_create_and_add sgi_uv failed \n");
+		return -EINVAL;
+	}
+
+	ret = sysfs_create_file(sgi_uv_kobj, &partition_id_attr.attr);
+	if (ret) {
+		printk(KERN_WARNING "sysfs_create_file partition_id failed \n");
+		return ret;
+	}
+
+	ret = sysfs_create_file(sgi_uv_kobj, &coherence_id_attr.attr);
+	if (ret) {
+		printk(KERN_WARNING "sysfs_create_file coherence_id failed \n");
+		return ret;
+	}
+
+	return 0;
+}
+
+device_initcall(sgi_uv_sysfs_init);
diff --git a/include/asm-x86/uv/bios.h b/include/asm-x86/uv/bios.h
index 3b305d897c84..215f1969c266 100644
--- a/include/asm-x86/uv/bios.h
+++ b/include/asm-x86/uv/bios.h
@@ -89,5 +89,6 @@ extern long uv_coherency_id;
 extern long uv_region_size;
 #define partition_coherence_id()	(uv_coherency_id)
 
+extern struct kobject *sgi_uv_kobj;	/* /sys/firmware/sgi_uv */
 
 #endif /* ASM_X86__UV__BIOS_H */
-- 
cgit v1.2.3-55-g7522


From 4c66a73f0796dacc2ff0d4af75794ec843ceb3d1 Mon Sep 17 00:00:00 2001
From: Yinghai Lu
Date: Sat, 4 Oct 2008 15:52:15 -0700
Subject: x86: sparse_irq: fix typo in debug print out

Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/io_apic.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/kernel/io_apic.c b/arch/x86/kernel/io_apic.c
index 260c95a5e6dc..f959acbc0db2 100644
--- a/arch/x86/kernel/io_apic.c
+++ b/arch/x86/kernel/io_apic.c
@@ -257,7 +257,7 @@ static struct irq_cfg *irq_cfg_alloc(unsigned int irq)
 			panic("please boot with nr_irq_cfg= %d\n", count * 2);
 
 		phys = __pa(cfg);
-		printk(KERN_DEBUG "irq_irq ==> [%#lx - %#lx]\n", phys, phys + total_bytes);
+		printk(KERN_DEBUG "irq_cfg ==> [%#lx - %#lx]\n", phys, phys + total_bytes);
 
 		for (i = 0; i < nr_irq_cfg; i++)
 			init_one_irq_cfg(&cfg[i]);
-- 
cgit v1.2.3-55-g7522


From 81608f3c254512b906ab78082ec5966b376aacd5 Mon Sep 17 00:00:00 2001
From: Cyrill Gorcunov
Date: Fri, 10 Oct 2008 19:00:17 +0400
Subject: x86: apic - unify APIC_DIVISOR

Use APIC_DIVISOR being set to 16 for both 32/64bit
mode. To escape APIC timer underflow during calibration
set it to the maximum possible value.

Also typo error (CONFG instead of proper CONFIG) fixed.
The error was caught by Venkatesh Pallipadi, thanks a lot Venkatesh!

See details on http://lkml.org/lkml/2008/10/9/425

Reported-by: Venkatesh Pallipad <venkatesh.pallipadi@intel.com>
Signed-off-by: Cyrill Gorcunov <gorcunov@gmail.com>
Acked-by: "Maciej W. Rozycki" <macro@linux-mips.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/apic.c | 8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/arch/x86/kernel/apic.c b/arch/x86/kernel/apic.c
index 1f48bd1c9f2d..04a7f960bbc0 100644
--- a/arch/x86/kernel/apic.c
+++ b/arch/x86/kernel/apic.c
@@ -332,11 +332,7 @@ int lapic_get_maxlvt(void)
  */
 
 /* Clock divisor */
-#ifdef CONFG_X86_64
-#define APIC_DIVISOR 1
-#else
 #define APIC_DIVISOR 16
-#endif
 
 /*
  * This function sets up the local APIC timer, with a timeout of
@@ -592,10 +588,10 @@ static int __init calibrate_APIC_clock(void)
 	global_clock_event->event_handler = lapic_cal_handler;
 
 	/*
-	 * Setup the APIC counter to 1e9. There is no way the lapic
+	 * Setup the APIC counter to maximum. There is no way the lapic
 	 * can underflow in the 100ms detection time frame
 	 */
-	__setup_APIC_LVTT(1000000000, 0, 0);
+	__setup_APIC_LVTT(0xffffffff, 0, 0);
 
 	/* Let the interrupts run */
 	local_irq_enable();
-- 
cgit v1.2.3-55-g7522


From 7ef0c30dbf96a8d9a234e90c248eb19df3c031be Mon Sep 17 00:00:00 2001
From: Thomas Gleixner
Date: Wed, 15 Oct 2008 13:07:35 +0200
Subject: genirq: define nr_irqs for architectures with GENERIC_HARDIRQS=n

Revert the sparse irq changes in m68k/s390/sparc and just define
nr_irqs as NR_IRQS for those architectures.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/m68k/kernel/ints.c   | 3 ---
 arch/s390/kernel/irq.c    | 3 ---
 arch/sparc/kernel/irq.c   | 4 ----
 include/linux/interrupt.h | 8 +++++---
 4 files changed, 5 insertions(+), 13 deletions(-)

diff --git a/arch/m68k/kernel/ints.c b/arch/m68k/kernel/ints.c
index 44169e4cd91d..7e8a0d394e61 100644
--- a/arch/m68k/kernel/ints.c
+++ b/arch/m68k/kernel/ints.c
@@ -46,9 +46,6 @@
 #include <asm/q40ints.h>
 #endif
 
-int nr_irqs = NR_IRQS;
-EXPORT_SYMBOL(nr_irqs);
-
 extern u32 auto_irqhandler_fixup[];
 extern u32 user_irqhandler_fixup[];
 extern u16 user_irqvec_fixup[];
diff --git a/arch/s390/kernel/irq.c b/arch/s390/kernel/irq.c
index 3624c4a0037a..e7c5bfb7c755 100644
--- a/arch/s390/kernel/irq.c
+++ b/arch/s390/kernel/irq.c
@@ -17,9 +17,6 @@
 #include <linux/proc_fs.h>
 #include <linux/profile.h>
 
-int nr_irqs = NR_IRQS;
-EXPORT_SYMBOL(nr_irqs);
-
 /*
  * show_interrupts is needed by /proc/interrupts.
  */
diff --git a/arch/sparc/kernel/irq.c b/arch/sparc/kernel/irq.c
index 4b99e3ce3916..93e1d1c65290 100644
--- a/arch/sparc/kernel/irq.c
+++ b/arch/sparc/kernel/irq.c
@@ -55,10 +55,6 @@
 #define SMP_NOP2
 #define SMP_NOP3
 #endif /* SMP */
-
-int nr_irqs = NR_IRQS;
-EXPORT_SYMBOL(nr_irqs);
-
 unsigned long __raw_local_irq_save(void)
 {
 	unsigned long retval;
diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h
index d4039a0b23f4..5a57df2ee922 100644
--- a/include/linux/interrupt.h
+++ b/include/linux/interrupt.h
@@ -15,11 +15,13 @@
 #include <asm/ptrace.h>
 #include <asm/system.h>
 
-extern int nr_irqs;
-
 #ifndef CONFIG_GENERIC_HARDIRQS
-#define for_each_irq_desc(irq, desc)		\
+# define for_each_irq_desc(irq, desc)		\
 	for (irq = 0; irq < nr_irqs; irq++)
+
+# define nr_irqs		NR_IRQS
+#else
+extern int nr_irqs;
 #endif
 
 /*
-- 
cgit v1.2.3-55-g7522


From 3235e936c0cc3589309280b6f59e5096779adae3 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner
Date: Wed, 15 Oct 2008 13:16:00 +0200
Subject: x86: remove sparse irq from Kconfig

This code is not ready yet.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/Kconfig | 11 -----------
 1 file changed, 11 deletions(-)

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 600584b7a497..8636ddf2f4a4 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -237,17 +237,6 @@ config SMP
 
 	  If you don't know what to do here, say N.
 
-config HAVE_SPARSE_IRQ
-	bool "Support sparse irq numbering"
-	depends on PCI_MSI || HT_IRQ
-	default y
-	help
-	  This enables support for sparse irq, esp for msi/msi-x. the irq
-	  number will be bus/dev/fn + 12bit. You may need if you have lots of
-	  cards supports msi-x installed.
-
-	  If you don't know what to do here, say Y.
-
 config X86_FIND_SMP_CONFIG
 	def_bool y
 	depends on X86_MPPARSE || X86_VOYAGER
-- 
cgit v1.2.3-55-g7522


From 70dd4d992ab324a59cdcd6bedc3f4e729863d514 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner
Date: Wed, 15 Oct 2008 15:39:27 +0200
Subject: genirq: consolidate nr_irqs and for_each_irq_desc()

Move all of those to linux/irq.h where they belong.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/interrupt.h |  9 ---------
 include/linux/irq.h       | 17 ++++++++++++-----
 2 files changed, 12 insertions(+), 14 deletions(-)

diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h
index 5a57df2ee922..58ff4e74b2f3 100644
--- a/include/linux/interrupt.h
+++ b/include/linux/interrupt.h
@@ -15,15 +15,6 @@
 #include <asm/ptrace.h>
 #include <asm/system.h>
 
-#ifndef CONFIG_GENERIC_HARDIRQS
-# define for_each_irq_desc(irq, desc)		\
-	for (irq = 0; irq < nr_irqs; irq++)
-
-# define nr_irqs		NR_IRQS
-#else
-extern int nr_irqs;
-#endif
-
 /*
  * These correspond to the IORESOURCE_IRQ_* defines in
  * linux/ioport.h to select the interrupt line behaviour.  When
diff --git a/include/linux/irq.h b/include/linux/irq.h
index 93fe9a943e71..dbe8734ae86c 100644
--- a/include/linux/irq.h
+++ b/include/linux/irq.h
@@ -11,6 +11,18 @@
 
 #include <linux/smp.h>
 
+#ifndef CONFIG_GENERIC_HARDIRQS
+# define nr_irqs		NR_IRQS
+
+# define for_each_irq_desc(irq, desc)		\
+	for (irq = 0; irq < nr_irqs; irq++)
+#else
+extern int nr_irqs;
+
+# define for_each_irq_desc(irq, desc)		\
+	for (irq = 0, desc = irq_desc; irq < nr_irqs; irq++, desc++)
+#endif
+
 #ifndef CONFIG_S390
 
 #include <linux/linkage.h>
@@ -204,11 +216,6 @@ extern struct irq_desc irq_desc[NR_IRQS];
 extern struct irq_desc *irq_desc;
 #endif
 
-#ifdef CONFIG_GENERIC_HARDIRQS
-#define for_each_irq_desc(irq, desc)		\
-	for (irq = 0, desc = irq_desc; irq < nr_irqs; irq++, desc = &irq_desc[irq])
-#endif
-
 #else
 
 extern struct irq_desc *sparse_irqs;
-- 
cgit v1.2.3-55-g7522


From c6b7674f323622d86316bf7951ad9cae1ce24642 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner
Date: Wed, 15 Oct 2008 14:31:29 +0200
Subject: genirq: use inline function for irq_to_desc

For the non sparse irq case an inline function is perfectly fine.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/irq.h | 15 +++++++++++++--
 kernel/irq/handle.c | 14 --------------
 2 files changed, 13 insertions(+), 16 deletions(-)

diff --git a/include/linux/irq.h b/include/linux/irq.h
index dbe8734ae86c..7d1adacaadb4 100644
--- a/include/linux/irq.h
+++ b/include/linux/irq.h
@@ -204,8 +204,6 @@ struct irq_desc {
 	const char		*name;
 } ____cacheline_internodealigned_in_smp;
 
-extern struct irq_desc *irq_to_desc(unsigned int irq);
-extern struct irq_desc *irq_to_desc_alloc(unsigned int irq);
 
 #ifndef CONFIG_HAVE_SPARSE_IRQ
 
@@ -216,8 +214,21 @@ extern struct irq_desc irq_desc[NR_IRQS];
 extern struct irq_desc *irq_desc;
 #endif
 
+static inline struct irq_desc *irq_to_desc(unsigned int irq)
+{
+	return (irq < nr_irqs) ? irq_desc + irq : NULL;
+}
+
+static inline struct irq_desc *irq_to_desc_alloc(unsigned int irq)
+{
+	return irq_to_desc(irq);
+}
+
 #else
 
+extern struct irq_desc *irq_to_desc(unsigned int irq);
+extern struct irq_desc *irq_to_desc_alloc(unsigned int irq);
+
 extern struct irq_desc *sparse_irqs;
 #define for_each_irq_desc(irqX, desc)		\
 	for (desc = sparse_irqs, irqX = desc->irq; desc; desc = desc->next, irqX = desc ? desc->irq : -1U)
diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c
index fb6bdb602a93..c19896f895f9 100644
--- a/kernel/irq/handle.c
+++ b/kernel/irq/handle.c
@@ -262,20 +262,6 @@ struct irq_desc irq_desc[NR_IRQS] __cacheline_aligned_in_smp = {
 
 #endif
 
-#ifndef CONFIG_HAVE_SPARSE_IRQ
-struct irq_desc *irq_to_desc(unsigned int irq)
-{
-	if (irq < nr_irqs)
-		return &irq_desc[irq];
-
-	return NULL;
-}
-struct irq_desc *irq_to_desc_alloc(unsigned int irq)
-{
-	return irq_to_desc(irq);
-}
-#endif
-
 /*
  * What should we do if we get a hw irq event on an illegal vector?
  * Each architecture has to answer this themself.
-- 
cgit v1.2.3-55-g7522


From 2cc21ef843d4fb7da122239b644a1f6f0aca60a6 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner
Date: Wed, 15 Oct 2008 14:16:55 +0200
Subject: genirq: remove sparse irq code

This code is not ready, but we need to rip it out instead of rebasing
as we would lose the APIC/IO_APIC unification otherwise.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/io_apic.c    | 130 ++-----------------------------------------
 arch/x86/kernel/irq_32.c     |   8 ---
 arch/x86/kernel/irq_64.c     |   8 ---
 drivers/char/random.c        |  31 -----------
 drivers/pci/htirq.c          |  19 +------
 drivers/pci/intr_remapping.c |  75 -------------------------
 fs/proc/proc_misc.c          |  43 ++------------
 include/linux/irq.h          |  20 -------
 kernel/irq/handle.c          | 114 -------------------------------------
 9 files changed, 10 insertions(+), 438 deletions(-)

diff --git a/arch/x86/kernel/io_apic.c b/arch/x86/kernel/io_apic.c
index f959acbc0db2..683610517d2a 100644
--- a/arch/x86/kernel/io_apic.c
+++ b/arch/x86/kernel/io_apic.c
@@ -111,9 +111,6 @@ struct irq_cfg;
 struct irq_pin_list;
 struct irq_cfg {
 	unsigned int irq;
-#ifdef CONFIG_HAVE_SPARSE_IRQ
-	struct irq_cfg *next;
-#endif
 	struct irq_pin_list *irq_2_pin;
 	cpumask_t domain;
 	cpumask_t old_domain;
@@ -151,15 +148,6 @@ static void init_one_irq_cfg(struct irq_cfg *cfg)
 
 static struct irq_cfg *irq_cfgx;
 
-#ifdef CONFIG_HAVE_SPARSE_IRQ
-/*
- * Protect the irq_cfgx_free freelist:
- */
-static DEFINE_SPINLOCK(irq_cfg_lock);
-
-static struct irq_cfg *irq_cfgx_free;
-#endif
-
 static void __init init_work(void *data)
 {
 	struct dyn_array *da = data;
@@ -174,114 +162,7 @@ static void __init init_work(void *data)
 	legacy_count = ARRAY_SIZE(irq_cfg_legacy);
 	for (i = legacy_count; i < *da->nr; i++)
 		init_one_irq_cfg(&cfg[i]);
-
-#ifdef CONFIG_HAVE_SPARSE_IRQ
-	for (i = 1; i < *da->nr; i++)
-		cfg[i-1].next = &cfg[i];
-
-	irq_cfgx_free = &irq_cfgx[legacy_count];
-	irq_cfgx[legacy_count - 1].next = NULL;
-#endif
-}
-
-#ifdef CONFIG_HAVE_SPARSE_IRQ
-/* need to be biger than size of irq_cfg_legacy */
-static int nr_irq_cfg = 32;
-
-static int __init parse_nr_irq_cfg(char *arg)
-{
-	if (arg) {
-		nr_irq_cfg = simple_strtoul(arg, NULL, 0);
-		if (nr_irq_cfg < 32)
-			nr_irq_cfg = 32;
-	}
-	return 0;
-}
-
-early_param("nr_irq_cfg", parse_nr_irq_cfg);
-
-#define for_each_irq_cfg(irqX, cfg)           \
-        for (cfg = irq_cfgx, irqX = cfg->irq; cfg; cfg = cfg->next, irqX = cfg ? cfg->irq : -1U)
-
-
-DEFINE_DYN_ARRAY(irq_cfgx, sizeof(struct irq_cfg), nr_irq_cfg, PAGE_SIZE, init_work);
-
-static struct irq_cfg *irq_cfg(unsigned int irq)
-{
-	struct irq_cfg *cfg;
-
-	cfg = irq_cfgx;
-	while (cfg) {
-		if (cfg->irq == irq)
-			return cfg;
-
-		cfg = cfg->next;
-	}
-
-	return NULL;
-}
-
-static struct irq_cfg *irq_cfg_alloc(unsigned int irq)
-{
-	struct irq_cfg *cfg, *cfg_pri;
-	unsigned long flags;
-	int count = 0;
-	int i;
-
-	cfg_pri = cfg = irq_cfgx;
-	while (cfg) {
-		if (cfg->irq == irq)
-			return cfg;
-
-		cfg_pri = cfg;
-		cfg = cfg->next;
-		count++;
-	}
-
-	spin_lock_irqsave(&irq_cfg_lock, flags);
-	if (!irq_cfgx_free) {
-		unsigned long phys;
-		unsigned long total_bytes;
-		/*
-		 *  we run out of pre-allocate ones, allocate more
-		 */
-		printk(KERN_DEBUG "try to get more irq_cfg %d\n", nr_irq_cfg);
-
-		total_bytes = sizeof(struct irq_cfg) * nr_irq_cfg;
-		if (after_bootmem)
-			cfg = kzalloc(total_bytes, GFP_ATOMIC);
-		else
-			cfg = __alloc_bootmem_nopanic(total_bytes, PAGE_SIZE, 0);
-
-		if (!cfg)
-			panic("please boot with nr_irq_cfg= %d\n", count * 2);
-
-		phys = __pa(cfg);
-		printk(KERN_DEBUG "irq_cfg ==> [%#lx - %#lx]\n", phys, phys + total_bytes);
-
-		for (i = 0; i < nr_irq_cfg; i++)
-			init_one_irq_cfg(&cfg[i]);
-
-		for (i = 1; i < nr_irq_cfg; i++)
-			cfg[i-1].next = &cfg[i];
-
-		irq_cfgx_free = cfg;
-	}
-
-	cfg = irq_cfgx_free;
-	irq_cfgx_free = irq_cfgx_free->next;
-	cfg->next = NULL;
-	if (cfg_pri)
-		cfg_pri->next = cfg;
-	else
-		irq_cfgx = cfg;
-	cfg->irq = irq;
-
-	spin_unlock_irqrestore(&irq_cfg_lock, flags);
-
-	return cfg;
 }
-#else
 
 #define for_each_irq_cfg(irq, cfg)		\
 	for (irq = 0, cfg = &irq_cfgx[irq]; irq < nr_irqs; irq++, cfg = &irq_cfgx[irq])
@@ -290,17 +171,16 @@ DEFINE_DYN_ARRAY(irq_cfgx, sizeof(struct irq_cfg), nr_irqs, PAGE_SIZE, init_work
 
 struct irq_cfg *irq_cfg(unsigned int irq)
 {
-        if (irq < nr_irqs)
-                return &irq_cfgx[irq];
+	if (irq < nr_irqs)
+		return &irq_cfgx[irq];
 
-        return NULL;
+	return NULL;
 }
 struct irq_cfg *irq_cfg_alloc(unsigned int irq)
 {
-        return irq_cfg(irq);
+	return irq_cfg(irq);
 }
 
-#endif
 /*
  * This is performance-critical, we want to do it O(1)
  *
@@ -3068,9 +2948,7 @@ unsigned int create_irq_nr(unsigned int irq_want)
 	unsigned long flags;
 	struct irq_cfg *cfg_new;
 
-#ifndef CONFIG_HAVE_SPARSE_IRQ
 	irq_want = nr_irqs - 1;
-#endif
 
 	irq = 0;
 	spin_lock_irqsave(&vector_lock, flags);
diff --git a/arch/x86/kernel/irq_32.c b/arch/x86/kernel/irq_32.c
index 001772ffc918..ccf6c1bf7120 100644
--- a/arch/x86/kernel/irq_32.c
+++ b/arch/x86/kernel/irq_32.c
@@ -272,20 +272,12 @@ int show_interrupts(struct seq_file *p, void *v)
 	struct irq_desc *desc = NULL;
 	int tail = 0;
 
-#ifdef CONFIG_HAVE_SPARSE_IRQ
-	desc = (struct irq_desc *)v;
-	entries = -1U;
-	i = desc->irq;
-	if (!desc->next)
-		tail = 1;
-#else
 	entries = nr_irqs - 1;
 	i = *(loff_t *) v;
 	if (i == nr_irqs)
 		tail = 1;
 	else
 		desc = irq_to_desc(i);
-#endif
 
 	if (i == 0) {
 		seq_printf(p, "           ");
diff --git a/arch/x86/kernel/irq_64.c b/arch/x86/kernel/irq_64.c
index ec2661091283..21f53b911113 100644
--- a/arch/x86/kernel/irq_64.c
+++ b/arch/x86/kernel/irq_64.c
@@ -77,20 +77,12 @@ int show_interrupts(struct seq_file *p, void *v)
 	struct irq_desc *desc = NULL;
 	int tail = 0;
 
-#ifdef CONFIG_HAVE_SPARSE_IRQ
-	desc = (struct irq_desc *)v;
-	entries = -1U;
-	i = desc->irq;
-	if (!desc->next)
-		tail = 1;
-#else
 	entries = nr_irqs - 1;
 	i = *(loff_t *) v;
 	if (i == nr_irqs)
 		tail = 1;
 	else
 		desc = irq_to_desc(i);
-#endif
 
 	if (i == 0) {
 		seq_printf(p, "           ");
diff --git a/drivers/char/random.c b/drivers/char/random.c
index 60c9c7ee6b2c..9ce80213007b 100644
--- a/drivers/char/random.c
+++ b/drivers/char/random.c
@@ -558,8 +558,6 @@ struct timer_rand_state {
 	unsigned dont_count_entropy:1;
 };
 
-#ifndef CONFIG_HAVE_SPARSE_IRQ
-
 #ifdef CONFIG_HAVE_DYN_ARRAY
 static struct timer_rand_state **irq_timer_state;
 DEFINE_DYN_ARRAY(irq_timer_state, sizeof(struct timer_rand_state *), nr_irqs, PAGE_SIZE, NULL);
@@ -583,33 +581,6 @@ static void set_timer_rand_state(unsigned int irq, struct timer_rand_state *stat
 	irq_timer_state[irq] = state;
 }
 
-#else
-
-static struct timer_rand_state *get_timer_rand_state(unsigned int irq)
-{
-	struct irq_desc *desc;
-
-	desc = irq_to_desc(irq);
-
-	if (!desc)
-		return NULL;
-
-	return desc->timer_rand_state;
-}
-
-static void set_timer_rand_state(unsigned int irq, struct timer_rand_state *state)
-{
-	struct irq_desc *desc;
-
-	desc = irq_to_desc(irq);
-
-	if (!desc)
-		return;
-
-	desc->timer_rand_state = state;
-}
-#endif
-
 static struct timer_rand_state input_timer_state;
 
 /*
@@ -967,10 +938,8 @@ void rand_initialize_irq(int irq)
 {
 	struct timer_rand_state *state;
 
-#ifndef CONFIG_HAVE_SPARSE_IRQ
 	if (irq >= nr_irqs)
 		return;
-#endif
 
 	state = get_timer_rand_state(irq);
 
diff --git a/drivers/pci/htirq.c b/drivers/pci/htirq.c
index 9e4929a00832..bf7d6ce9bbb3 100644
--- a/drivers/pci/htirq.c
+++ b/drivers/pci/htirq.c
@@ -82,18 +82,6 @@ void unmask_ht_irq(unsigned int irq)
 	write_ht_irq_msg(irq, &msg);
 }
 
-static unsigned int build_irq_for_pci_dev(struct pci_dev *dev)
-{
-	unsigned int irq;
-
-	irq = dev->bus->number;
-	irq <<= 8;
-	irq |= dev->devfn;
-	irq <<= 12;
-
-	return irq;
-}
-
 /**
  * __ht_create_irq - create an irq and attach it to a device.
  * @dev: The hypertransport device to find the irq capability on.
@@ -110,7 +98,6 @@ int __ht_create_irq(struct pci_dev *dev, int idx, ht_irq_update_t *update)
 	int max_irq;
 	int pos;
 	int irq;
-	unsigned int irq_want;
 
 	pos = pci_find_ht_capability(dev, HT_CAPTYPE_IRQ);
 	if (!pos)
@@ -138,12 +125,8 @@ int __ht_create_irq(struct pci_dev *dev, int idx, ht_irq_update_t *update)
 	cfg->msg.address_lo = 0xffffffff;
 	cfg->msg.address_hi = 0xffffffff;
 
-	irq_want= build_irq_for_pci_dev(dev);
-#ifdef CONFIG_HAVE_SPARSE_IRQ
-	irq = create_irq_nr(irq_want + idx);
-#else
 	irq = create_irq();
-#endif
+
 	if (irq <= 0) {
 		kfree(cfg);
 		return -EBUSY;
diff --git a/drivers/pci/intr_remapping.c b/drivers/pci/intr_remapping.c
index 2dcf973890c4..0f43b265eee6 100644
--- a/drivers/pci/intr_remapping.c
+++ b/drivers/pci/intr_remapping.c
@@ -19,78 +19,6 @@ struct irq_2_iommu {
 	u8  irte_mask;
 };
 
-#ifdef CONFIG_HAVE_SPARSE_IRQ
-static struct irq_2_iommu *irq_2_iommuX;
-/* fill one page ? */
-static int nr_irq_2_iommu = 0x100;
-static int irq_2_iommu_index;
-DEFINE_DYN_ARRAY(irq_2_iommuX, sizeof(struct irq_2_iommu), nr_irq_2_iommu, PAGE_SIZE, NULL);
-
-extern void *__alloc_bootmem_nopanic(unsigned long size,
-				     unsigned long align,
-				     unsigned long goal);
-
-static struct irq_2_iommu *get_one_free_irq_2_iommu(int not_used)
-{
-	struct irq_2_iommu *iommu;
-	unsigned long total_bytes;
-
-	if (irq_2_iommu_index >= nr_irq_2_iommu) {
-		/*
-		 *  we run out of pre-allocate ones, allocate more
-		 */
-		printk(KERN_DEBUG "try to get more irq_2_iommu %d\n", nr_irq_2_iommu);
-
-		total_bytes = sizeof(struct irq_2_iommu)*nr_irq_2_iommu;
-
-		if (after_bootmem)
-			iommu = kzalloc(total_bytes, GFP_ATOMIC);
-		else
-			iommu = __alloc_bootmem_nopanic(total_bytes, PAGE_SIZE, 0);
-
-		if (!iommu)
-			panic("can not get more irq_2_iommu\n");
-
-		irq_2_iommuX = iommu;
-		irq_2_iommu_index = 0;
-	}
-
-	iommu = &irq_2_iommuX[irq_2_iommu_index];
-	irq_2_iommu_index++;
-	return iommu;
-}
-
-static struct irq_2_iommu *irq_2_iommu(unsigned int irq)
-{
-	struct irq_desc *desc;
-
-	desc = irq_to_desc(irq);
-
-	BUG_ON(!desc);
-
-	return desc->irq_2_iommu;
-}
-
-static struct irq_2_iommu *irq_2_iommu_alloc(unsigned int irq)
-{
-	struct irq_desc *desc;
-	struct irq_2_iommu *irq_iommu;
-
-	/*
-	 * alloc irq desc if not allocated already.
-	 */
-	desc = irq_to_desc_alloc(irq);
-
-	irq_iommu = desc->irq_2_iommu;
-
-	if (!irq_iommu)
-		desc->irq_2_iommu = get_one_free_irq_2_iommu(irq);
-
-	return desc->irq_2_iommu;
-}
-
-#else /* !CONFIG_HAVE_SPARSE_IRQ */
-
 #ifdef CONFIG_HAVE_DYN_ARRAY
 static struct irq_2_iommu *irq_2_iommuX;
 DEFINE_DYN_ARRAY(irq_2_iommuX, sizeof(struct irq_2_iommu), nr_irqs, PAGE_SIZE, NULL);
@@ -109,7 +37,6 @@ static struct irq_2_iommu *irq_2_iommu_alloc(unsigned int irq)
 {
 	return irq_2_iommu(irq);
 }
-#endif
 
 static DEFINE_SPINLOCK(irq_2_ir_lock);
 
@@ -166,11 +93,9 @@ int alloc_irte(struct intel_iommu *iommu, int irq, u16 count)
 	if (!count)
 		return -1;
 
-#ifndef	CONFIG_HAVE_SPARSE_IRQ
 	/* protect irq_2_iommu_alloc later */
 	if (irq >= nr_irqs)
 		return -1;
-#endif
 
 	/*
 	 * start the IRTE search from index 0.
diff --git a/fs/proc/proc_misc.c b/fs/proc/proc_misc.c
index d68c3592fe4a..3f5c7b9d1a70 100644
--- a/fs/proc/proc_misc.c
+++ b/fs/proc/proc_misc.c
@@ -529,13 +529,10 @@ static int show_stat(struct seq_file *p, void *v)
 		softirq = cputime64_add(softirq, kstat_cpu(i).cpustat.softirq);
 		steal = cputime64_add(steal, kstat_cpu(i).cpustat.steal);
 		guest = cputime64_add(guest, kstat_cpu(i).cpustat.guest);
+
 		for_each_irq_desc(j, desc)
-		{
-			unsigned int temp;
+			sum += kstat_irqs_cpu(j, i);
 
-			temp = kstat_irqs_cpu(j, i);
-			sum += temp;
-		}
 		sum += arch_irq_stat_cpu(i);
 	}
 	sum += arch_irq_stat();
@@ -578,21 +575,13 @@ static int show_stat(struct seq_file *p, void *v)
 	seq_printf(p, "intr %llu", (unsigned long long)sum);
 
 	/* sum again ? it could be updated? */
-	for_each_irq_desc(j, desc)
-	{
+	for_each_irq_desc(j, desc) {
 		per_irq_sum = 0;
-		for_each_possible_cpu(i) {
-			unsigned int temp;
 
-			temp = kstat_irqs_cpu(j, i);
-			per_irq_sum += temp;
-		}
+		for_each_possible_cpu(i)
+			per_irq_sum += kstat_irqs_cpu(j, i);
 
-#ifdef CONFIG_HAVE_SPARSE_IRQ
-		seq_printf(p, " %#x:%u", j, per_irq_sum);
-#else
 		seq_printf(p, " %u", per_irq_sum);
-#endif
 	}
 
 	seq_printf(p,
@@ -645,36 +634,14 @@ static const struct file_operations proc_stat_operations = {
  */
 static void *int_seq_start(struct seq_file *f, loff_t *pos)
 {
-#ifdef CONFIG_HAVE_SPARSE_IRQ
-	struct irq_desc *desc;
-	int irq;
-	int count = *pos;
-
-	for_each_irq_desc(irq, desc) {
-		if (count-- == 0)
-			return desc;
-	}
-
-	return NULL;
-#else
 	return (*pos <= nr_irqs) ? pos : NULL;
-#endif
 }
 
 
 static void *int_seq_next(struct seq_file *f, void *v, loff_t *pos)
 {
-#ifdef CONFIG_HAVE_SPARSE_IRQ
-	struct irq_desc *desc;
-
-	desc = ((struct irq_desc *)v)->next;
-	(*pos)++;
-
-	return desc;
-#else
 	(*pos)++;
 	return (*pos <= nr_irqs) ? pos : NULL;
-#endif
 }
 
 static void int_seq_stop(struct seq_file *f, void *v)
diff --git a/include/linux/irq.h b/include/linux/irq.h
index 7d1adacaadb4..68e0f3f9df30 100644
--- a/include/linux/irq.h
+++ b/include/linux/irq.h
@@ -167,15 +167,8 @@ struct irq_2_iommu;
  */
 struct irq_desc {
 	unsigned int		irq;
-#ifdef CONFIG_HAVE_SPARSE_IRQ
-	struct irq_desc		*next;
-	struct timer_rand_state *timer_rand_state;
-#endif
 #ifdef CONFIG_HAVE_DYN_ARRAY
 	unsigned int            *kstat_irqs;
-#endif
-#if defined(CONFIG_INTR_REMAP) && defined(CONFIG_HAVE_SPARSE_IRQ)
-       struct irq_2_iommu      *irq_2_iommu;
 #endif
 	irq_flow_handler_t	handle_irq;
 	struct irq_chip		*chip;
@@ -205,8 +198,6 @@ struct irq_desc {
 } ____cacheline_internodealigned_in_smp;
 
 
-#ifndef CONFIG_HAVE_SPARSE_IRQ
-
 #ifndef CONFIG_HAVE_DYN_ARRAY
 /* could be removed if we get rid of all irq_desc reference */
 extern struct irq_desc irq_desc[NR_IRQS];
@@ -224,17 +215,6 @@ static inline struct irq_desc *irq_to_desc_alloc(unsigned int irq)
 	return irq_to_desc(irq);
 }
 
-#else
-
-extern struct irq_desc *irq_to_desc(unsigned int irq);
-extern struct irq_desc *irq_to_desc_alloc(unsigned int irq);
-
-extern struct irq_desc *sparse_irqs;
-#define for_each_irq_desc(irqX, desc)		\
-	for (desc = sparse_irqs, irqX = desc->irq; desc; desc = desc->next, irqX = desc ? desc->irq : -1U)
-
-#endif
-
 #ifdef CONFIG_HAVE_DYN_ARRAY
 #define kstat_irqs_this_cpu(DESC) \
 	((DESC)->kstat_irqs[smp_processor_id()])
diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c
index c19896f895f9..f837133cdfbe 100644
--- a/kernel/irq/handle.c
+++ b/kernel/irq/handle.c
@@ -111,15 +111,6 @@ static void init_kstat_irqs(struct irq_desc *desc, int nr_desc, int nr)
 	}
 }
 
-#ifdef CONFIG_HAVE_SPARSE_IRQ
-/*
- * Protect the sparse_irqs_free freelist:
- */
-static DEFINE_SPINLOCK(sparse_irq_lock);
-static struct irq_desc *sparse_irqs_free;
-struct irq_desc *sparse_irqs;
-#endif
-
 static void __init init_work(void *data)
 {
 	struct dyn_array *da = data;
@@ -130,121 +121,16 @@ static void __init init_work(void *data)
 
 	for (i = 0; i < *da->nr; i++) {
 		init_one_irq_desc(&desc[i]);
-#ifndef CONFIG_HAVE_SPARSE_IRQ
 		desc[i].irq = i;
-#endif
 	}
 
 	/* init kstat_irqs, nr_cpu_ids is ready already */
 	init_kstat_irqs(desc, *da->nr, nr_cpu_ids);
-
-#ifdef CONFIG_HAVE_SPARSE_IRQ
-	for (i = 1; i < *da->nr; i++)
-		desc[i-1].next = &desc[i];
-
-	sparse_irqs_free = sparse_irqs;
-	sparse_irqs = NULL;
-#endif
-}
-
-#ifdef CONFIG_HAVE_SPARSE_IRQ
-static int nr_irq_desc = 32;
-
-static int __init parse_nr_irq_desc(char *arg)
-{
-	if (arg)
-		nr_irq_desc = simple_strtoul(arg, NULL, 0);
-	return 0;
-}
-
-early_param("nr_irq_desc", parse_nr_irq_desc);
-
-DEFINE_DYN_ARRAY(sparse_irqs, sizeof(struct irq_desc), nr_irq_desc, PAGE_SIZE, init_work);
-
-struct irq_desc *irq_to_desc(unsigned int irq)
-{
-	struct irq_desc *desc;
-
-	desc = sparse_irqs;
-	while (desc) {
-		if (desc->irq == irq)
-			return desc;
-
-		desc = desc->next;
-	}
-	return NULL;
 }
 
-struct irq_desc *irq_to_desc_alloc(unsigned int irq)
-{
-	struct irq_desc *desc, *desc_pri;
-	unsigned long flags;
-	int count = 0;
-	int i;
-
-	desc_pri = desc = sparse_irqs;
-	while (desc) {
-		if (desc->irq == irq)
-			return desc;
-
-		desc_pri = desc;
-		desc = desc->next;
-		count++;
-	}
-
-	spin_lock_irqsave(&sparse_irq_lock, flags);
-	/*
-	 *  we run out of pre-allocate ones, allocate more
-	 */
-	if (!sparse_irqs_free) {
-		unsigned long phys;
-		unsigned long total_bytes;
-
-		printk(KERN_DEBUG "try to get more irq_desc %d\n", nr_irq_desc);
-
-		total_bytes = sizeof(struct irq_desc) * nr_irq_desc;
-		if (after_bootmem)
-			desc = kzalloc(total_bytes, GFP_ATOMIC);
-		else
-			desc = __alloc_bootmem_nopanic(total_bytes, PAGE_SIZE, 0);
-
-		if (!desc)
-			panic("please boot with nr_irq_desc= %d\n", count * 2);
-
-		phys = __pa(desc);
-		printk(KERN_DEBUG "irq_desc ==> [%#lx - %#lx]\n", phys, phys + total_bytes);
-
-		for (i = 0; i < nr_irq_desc; i++)
-			init_one_irq_desc(&desc[i]);
-
-		for (i = 1; i < nr_irq_desc; i++)
-			desc[i-1].next = &desc[i];
-
-		/* init kstat_irqs, nr_cpu_ids is ready already */
-		init_kstat_irqs(desc, nr_irq_desc, nr_cpu_ids);
-
-		sparse_irqs_free = desc;
-	}
-
-	desc = sparse_irqs_free;
-	sparse_irqs_free = sparse_irqs_free->next;
-	desc->next = NULL;
-	if (desc_pri)
-		desc_pri->next = desc;
-	else
-		sparse_irqs = desc;
-	desc->irq = irq;
-
-	spin_unlock_irqrestore(&sparse_irq_lock, flags);
-
-	return desc;
-}
-#else
 struct irq_desc *irq_desc;
 DEFINE_DYN_ARRAY(irq_desc, sizeof(struct irq_desc), nr_irqs, PAGE_SIZE, init_work);
 
-#endif
-
 #else
 
 struct irq_desc irq_desc[NR_IRQS] __cacheline_aligned_in_smp = {
-- 
cgit v1.2.3-55-g7522


From ee32c9732244bde4b9b59eeac2814c23e2b71f8d Mon Sep 17 00:00:00 2001
From: Thomas Gleixner
Date: Wed, 15 Oct 2008 14:34:09 +0200
Subject: genirq: remove irq_to_desc_alloc

Remove the leftover of sparseirqs.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/io_apic.c    | 6 +-----
 arch/x86/kernel/irqinit_32.c | 2 +-
 arch/x86/kernel/irqinit_64.c | 2 +-
 include/linux/irq.h          | 5 -----
 kernel/irq/chip.c            | 2 +-
 5 files changed, 4 insertions(+), 13 deletions(-)

diff --git a/arch/x86/kernel/io_apic.c b/arch/x86/kernel/io_apic.c
index 683610517d2a..e03bc0f87eef 100644
--- a/arch/x86/kernel/io_apic.c
+++ b/arch/x86/kernel/io_apic.c
@@ -1257,11 +1257,7 @@ static void ioapic_register_intr(int irq, unsigned long trigger)
 {
 	struct irq_desc *desc;
 
-	/* first time to use this irq_desc */
-	if (irq < 16)
-		desc = irq_to_desc(irq);
-	else
-		desc = irq_to_desc_alloc(irq);
+	desc = irq_to_desc(irq);
 
 	if ((trigger == IOAPIC_AUTO && IO_APIC_irq_trigger(irq)) ||
 	    trigger == IOAPIC_LEVEL)
diff --git a/arch/x86/kernel/irqinit_32.c b/arch/x86/kernel/irqinit_32.c
index 9092103a18eb..a8d35998d308 100644
--- a/arch/x86/kernel/irqinit_32.c
+++ b/arch/x86/kernel/irqinit_32.c
@@ -70,7 +70,7 @@ void __init init_ISA_irqs (void)
 	 */
 	for (i = 0; i < 16; i++) {
 		/* first time call this irq_desc */
-		struct irq_desc *desc = irq_to_desc_alloc(i);
+		struct irq_desc *desc = irq_to_desc(i);
 
 		desc->status = IRQ_DISABLED;
 		desc->action = NULL;
diff --git a/arch/x86/kernel/irqinit_64.c b/arch/x86/kernel/irqinit_64.c
index d17fbc26d96f..ff0235391285 100644
--- a/arch/x86/kernel/irqinit_64.c
+++ b/arch/x86/kernel/irqinit_64.c
@@ -144,7 +144,7 @@ void __init init_ISA_irqs(void)
 
 	for (i = 0; i < 16; i++) {
 		/* first time call this irq_desc */
-		struct irq_desc *desc = irq_to_desc_alloc(i);
+		struct irq_desc *desc = irq_to_desc(i);
 
 		desc->status = IRQ_DISABLED;
 		desc->action = NULL;
diff --git a/include/linux/irq.h b/include/linux/irq.h
index 68e0f3f9df30..3f33c7790300 100644
--- a/include/linux/irq.h
+++ b/include/linux/irq.h
@@ -210,11 +210,6 @@ static inline struct irq_desc *irq_to_desc(unsigned int irq)
 	return (irq < nr_irqs) ? irq_desc + irq : NULL;
 }
 
-static inline struct irq_desc *irq_to_desc_alloc(unsigned int irq)
-{
-	return irq_to_desc(irq);
-}
-
 #ifdef CONFIG_HAVE_DYN_ARRAY
 #define kstat_irqs_this_cpu(DESC) \
 	((DESC)->kstat_irqs[smp_processor_id()])
diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c
index 570d1ea1db5d..e6f73dbfcc3d 100644
--- a/kernel/irq/chip.c
+++ b/kernel/irq/chip.c
@@ -28,7 +28,7 @@ void dynamic_irq_init(unsigned int irq)
 	unsigned long flags;
 
 	/* first time to use this irq_desc */
-	desc = irq_to_desc_alloc(irq);
+	desc = irq_to_desc(irq);
 	if (!desc) {
 		WARN(1, KERN_ERR "Trying to initialize invalid IRQ%d\n", irq);
 		return;
-- 
cgit v1.2.3-55-g7522


From d6c88a507ef0b6afdb013cba4e7804ba7324d99a Mon Sep 17 00:00:00 2001
From: Thomas Gleixner
Date: Wed, 15 Oct 2008 15:27:23 +0200
Subject: genirq: revert dynarray

Revert the dynarray changes. They need more thought and polishing.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/Kconfig                      |   4 -
 arch/x86/Kconfig                  |   1 -
 arch/x86/kernel/io_apic.c         | 199 ++++++++++++++------------------------
 arch/x86/kernel/setup_percpu.c    |   8 +-
 arch/x86/kernel/visws_quirks.c    |   2 +-
 arch/x86/kernel/vmlinux_32.lds.S  |   1 -
 arch/x86/kernel/vmlinux_64.lds.S  |   2 -
 arch/x86/xen/spinlock.c           |   2 +-
 drivers/char/random.c             |   5 -
 drivers/pci/intr_remapping.c      |  11 +--
 include/asm-generic/vmlinux.lds.h |  13 ---
 include/linux/init.h              |  43 --------
 include/linux/irq.h               |  15 ---
 include/linux/kernel_stat.h       |  16 ++-
 init/Makefile                     |   2 +-
 init/dyn_array.c                  | 120 -----------------------
 init/main.c                       |  11 +--
 kernel/irq/chip.c                 |  30 +-----
 kernel/irq/handle.c               | 114 ++--------------------
 19 files changed, 103 insertions(+), 496 deletions(-)
 delete mode 100644 init/dyn_array.c

diff --git a/arch/Kconfig b/arch/Kconfig
index c8a7c2eb6490..071004d3a1b1 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -102,7 +102,3 @@ config HAVE_CLK
 	help
 	  The <linux/clk.h> calls support software clock gating and
 	  thus are a key power management tool on many systems.
-
-config HAVE_DYN_ARRAY
-	def_bool n
-
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 8636ddf2f4a4..8da6123a60d0 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -33,7 +33,6 @@ config X86
 	select HAVE_ARCH_TRACEHOOK
 	select HAVE_GENERIC_DMA_COHERENT if X86_32
 	select HAVE_EFFICIENT_UNALIGNED_ACCESS
-	select HAVE_DYN_ARRAY
 
 config ARCH_DEFCONFIG
 	string
diff --git a/arch/x86/kernel/io_apic.c b/arch/x86/kernel/io_apic.c
index e03bc0f87eef..6f80dc2f137e 100644
--- a/arch/x86/kernel/io_apic.c
+++ b/arch/x86/kernel/io_apic.c
@@ -107,7 +107,6 @@ static int __init parse_noapic(char *str)
 }
 early_param("noapic", parse_noapic);
 
-struct irq_cfg;
 struct irq_pin_list;
 struct irq_cfg {
 	unsigned int irq;
@@ -120,7 +119,7 @@ struct irq_cfg {
 };
 
 /* irq_cfg is indexed by the sum of all RTEs in all I/O APICs. */
-static struct irq_cfg irq_cfg_legacy[] __initdata = {
+static struct irq_cfg irq_cfgx[NR_IRQS] = {
 	[0]  = { .irq =  0, .domain = CPU_MASK_ALL, .vector = IRQ0_VECTOR,  },
 	[1]  = { .irq =  1, .domain = CPU_MASK_ALL, .vector = IRQ1_VECTOR,  },
 	[2]  = { .irq =  2, .domain = CPU_MASK_ALL, .vector = IRQ2_VECTOR,  },
@@ -139,48 +138,26 @@ static struct irq_cfg irq_cfg_legacy[] __initdata = {
 	[15] = { .irq = 15, .domain = CPU_MASK_ALL, .vector = IRQ15_VECTOR, },
 };
 
-static struct irq_cfg irq_cfg_init = { .irq =  -1U, };
-
-static void init_one_irq_cfg(struct irq_cfg *cfg)
-{
-	memcpy(cfg, &irq_cfg_init, sizeof(struct irq_cfg));
-}
-
-static struct irq_cfg *irq_cfgx;
-
-static void __init init_work(void *data)
-{
-	struct dyn_array *da = data;
-	struct irq_cfg *cfg;
-	int legacy_count;
-	int i;
-
-	cfg = *da->name;
-
-	memcpy(cfg, irq_cfg_legacy, sizeof(irq_cfg_legacy));
-
-	legacy_count = ARRAY_SIZE(irq_cfg_legacy);
-	for (i = legacy_count; i < *da->nr; i++)
-		init_one_irq_cfg(&cfg[i]);
-}
-
 #define for_each_irq_cfg(irq, cfg)		\
-	for (irq = 0, cfg = &irq_cfgx[irq]; irq < nr_irqs; irq++, cfg = &irq_cfgx[irq])
+	for (irq = 0, cfg = irq_cfgx; irq < nr_irqs; irq++, cfg++)
 
-DEFINE_DYN_ARRAY(irq_cfgx, sizeof(struct irq_cfg), nr_irqs, PAGE_SIZE, init_work);
-
-struct irq_cfg *irq_cfg(unsigned int irq)
+static struct irq_cfg *irq_cfg(unsigned int irq)
 {
-	if (irq < nr_irqs)
-		return &irq_cfgx[irq];
-
-	return NULL;
+	return irq < nr_irqs ? irq_cfgx + irq : NULL;
 }
-struct irq_cfg *irq_cfg_alloc(unsigned int irq)
+
+static struct irq_cfg *irq_cfg_alloc(unsigned int irq)
 {
 	return irq_cfg(irq);
 }
 
+/*
+ * Rough estimation of how many shared IRQs there are, can be changed
+ * anytime.
+ */
+#define MAX_PLUS_SHARED_IRQS NR_IRQS
+#define PIN_MAP_SIZE (MAX_PLUS_SHARED_IRQS + NR_IRQS)
+
 /*
  * This is performance-critical, we want to do it O(1)
  *
@@ -193,59 +170,29 @@ struct irq_pin_list {
 	struct irq_pin_list *next;
 };
 
-static struct irq_pin_list *irq_2_pin_head;
-/* fill one page ? */
-static int nr_irq_2_pin = 0x100;
+static struct irq_pin_list irq_2_pin_head[PIN_MAP_SIZE];
 static struct irq_pin_list *irq_2_pin_ptr;
-static void __init irq_2_pin_init_work(void *data)
+
+static void __init irq_2_pin_init(void)
 {
-	struct dyn_array *da = data;
-	struct irq_pin_list *pin;
+	struct irq_pin_list *pin = irq_2_pin_head;
 	int i;
 
-	pin = *da->name;
-
-	for (i = 1; i < *da->nr; i++)
+	for (i = 1; i < PIN_MAP_SIZE; i++)
 		pin[i-1].next = &pin[i];
 
 	irq_2_pin_ptr = &pin[0];
 }
-DEFINE_DYN_ARRAY(irq_2_pin_head, sizeof(struct irq_pin_list), nr_irq_2_pin, PAGE_SIZE, irq_2_pin_init_work);
 
 static struct irq_pin_list *get_one_free_irq_2_pin(void)
 {
-	struct irq_pin_list *pin;
-	int i;
-
-	pin = irq_2_pin_ptr;
-
-	if (pin) {
-		irq_2_pin_ptr = pin->next;
-		pin->next = NULL;
-		return pin;
-	}
-
-	/*
-	 *  we run out of pre-allocate ones, allocate more
-	 */
-	printk(KERN_DEBUG "try to get more irq_2_pin %d\n", nr_irq_2_pin);
-
-	if (after_bootmem)
-		pin = kzalloc(sizeof(struct irq_pin_list)*nr_irq_2_pin,
-				 GFP_ATOMIC);
-	else
-		pin = __alloc_bootmem_nopanic(sizeof(struct irq_pin_list) *
-				nr_irq_2_pin, PAGE_SIZE, 0);
+	struct irq_pin_list *pin = irq_2_pin_ptr;
 
 	if (!pin)
 		panic("can not get more irq_2_pin\n");
 
-	for (i = 1; i < nr_irq_2_pin; i++)
-		pin[i-1].next = &pin[i];
-
 	irq_2_pin_ptr = pin->next;
 	pin->next = NULL;
-
 	return pin;
 }
 
@@ -284,8 +231,9 @@ static inline void io_apic_write(unsigned int apic, unsigned int reg, unsigned i
 static inline void io_apic_modify(unsigned int apic, unsigned int reg, unsigned int value)
 {
 	struct io_apic __iomem *io_apic = io_apic_base(apic);
-        if (sis_apic_bug)
-                writel(reg, &io_apic->index);
+
+	if (sis_apic_bug)
+		writel(reg, &io_apic->index);
 	writel(value, &io_apic->data);
 }
 
@@ -1044,11 +992,11 @@ static int pin_2_irq(int idx, int apic, int pin)
 		while (i < apic)
 			irq += nr_ioapic_registers[i++];
 		irq += pin;
-                /*
+		/*
                  * For MPS mode, so far only needed by ES7000 platform
                  */
-                if (ioapic_renumber_irq)
-                        irq = ioapic_renumber_irq(apic, irq);
+		if (ioapic_renumber_irq)
+			irq = ioapic_renumber_irq(apic, irq);
 	}
 
 #ifdef CONFIG_X86_32
@@ -1232,19 +1180,19 @@ static struct irq_chip ir_ioapic_chip;
 #ifdef CONFIG_X86_32
 static inline int IO_APIC_irq_trigger(int irq)
 {
-        int apic, idx, pin;
+	int apic, idx, pin;
 
-        for (apic = 0; apic < nr_ioapics; apic++) {
-                for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) {
-                        idx = find_irq_entry(apic, pin, mp_INT);
-                        if ((idx != -1) && (irq == pin_2_irq(idx, apic, pin)))
-                                return irq_trigger(idx);
-                }
-        }
-        /*
+	for (apic = 0; apic < nr_ioapics; apic++) {
+		for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) {
+			idx = find_irq_entry(apic, pin, mp_INT);
+			if ((idx != -1) && (irq == pin_2_irq(idx, apic, pin)))
+				return irq_trigger(idx);
+		}
+	}
+	/*
          * nonexistent IRQs are edge default
          */
-        return 0;
+	return 0;
 }
 #else
 static inline int IO_APIC_irq_trigger(int irq)
@@ -1509,8 +1457,8 @@ __apicdebuginit(void) print_IO_APIC(void)
 	reg_01.raw = io_apic_read(apic, 1);
 	if (reg_01.bits.version >= 0x10)
 		reg_02.raw = io_apic_read(apic, 2);
-        if (reg_01.bits.version >= 0x20)
-                reg_03.raw = io_apic_read(apic, 3);
+	if (reg_01.bits.version >= 0x20)
+		reg_03.raw = io_apic_read(apic, 3);
 	spin_unlock_irqrestore(&ioapic_lock, flags);
 
 	printk("\n");
@@ -2089,9 +2037,9 @@ static int ioapic_retrigger_irq(unsigned int irq)
 #else
 static int ioapic_retrigger_irq(unsigned int irq)
 {
-        send_IPI_self(irq_cfg(irq)->vector);
+	send_IPI_self(irq_cfg(irq)->vector);
 
-        return 1;
+	return 1;
 }
 #endif
 
@@ -2189,7 +2137,7 @@ static int migrate_irq_remapped_level(int irq)
 
 	if (io_apic_level_ack_pending(irq)) {
 		/*
-	 	 * Interrupt in progress. Migrating irq now will change the
+		 * Interrupt in progress. Migrating irq now will change the
 		 * vector information in the IO-APIC RTE and that will confuse
 		 * the EOI broadcast performed by cpu.
 		 * So, delay the irq migration to the next instance.
@@ -2426,28 +2374,28 @@ static void ack_apic_level(unsigned int irq)
 }
 
 static struct irq_chip ioapic_chip __read_mostly = {
-	.name 		= "IO-APIC",
-	.startup 	= startup_ioapic_irq,
-	.mask	 	= mask_IO_APIC_irq,
-	.unmask	 	= unmask_IO_APIC_irq,
-	.ack 		= ack_apic_edge,
-	.eoi 		= ack_apic_level,
+	.name		= "IO-APIC",
+	.startup	= startup_ioapic_irq,
+	.mask		= mask_IO_APIC_irq,
+	.unmask		= unmask_IO_APIC_irq,
+	.ack		= ack_apic_edge,
+	.eoi		= ack_apic_level,
 #ifdef CONFIG_SMP
-	.set_affinity 	= set_ioapic_affinity_irq,
+	.set_affinity	= set_ioapic_affinity_irq,
 #endif
 	.retrigger	= ioapic_retrigger_irq,
 };
 
 #ifdef CONFIG_INTR_REMAP
 static struct irq_chip ir_ioapic_chip __read_mostly = {
-	.name 		= "IR-IO-APIC",
-	.startup 	= startup_ioapic_irq,
-	.mask	 	= mask_IO_APIC_irq,
-	.unmask	 	= unmask_IO_APIC_irq,
-	.ack 		= ack_x2apic_edge,
-	.eoi 		= ack_x2apic_level,
+	.name		= "IR-IO-APIC",
+	.startup	= startup_ioapic_irq,
+	.mask		= mask_IO_APIC_irq,
+	.unmask		= unmask_IO_APIC_irq,
+	.ack		= ack_x2apic_edge,
+	.eoi		= ack_x2apic_level,
 #ifdef CONFIG_SMP
-	.set_affinity 	= set_ir_ioapic_affinity_irq,
+	.set_affinity	= set_ir_ioapic_affinity_irq,
 #endif
 	.retrigger	= ioapic_retrigger_irq,
 };
@@ -2636,8 +2584,8 @@ static inline void __init check_timer(void)
 
 	local_irq_save(flags);
 
-        ver = apic_read(APIC_LVR);
-        ver = GET_APIC_VERSION(ver);
+	ver = apic_read(APIC_LVR);
+	ver = GET_APIC_VERSION(ver);
 
 	/*
 	 * get/set the timer IRQ vector:
@@ -2822,12 +2770,12 @@ void __init setup_IO_APIC(void)
 	io_apic_irqs = ~PIC_IRQS;
 
 	apic_printk(APIC_VERBOSE, "ENABLING IO-APIC IRQs\n");
-        /*
+	/*
          * Set up IO-APIC IRQ routing.
          */
 #ifdef CONFIG_X86_32
-        if (!acpi_ioapic)
-                setup_ioapic_ids_from_mpc();
+	if (!acpi_ioapic)
+		setup_ioapic_ids_from_mpc();
 #endif
 	sync_Arb_IDs();
 	setup_IO_APIC_irqs();
@@ -2842,9 +2790,9 @@ void __init setup_IO_APIC(void)
 
 static int __init io_apic_bug_finalize(void)
 {
-        if (sis_apic_bug == -1)
-                sis_apic_bug = 0;
-        return 0;
+	if (sis_apic_bug == -1)
+		sis_apic_bug = 0;
+	return 0;
 }
 
 late_initcall(io_apic_bug_finalize);
@@ -3199,7 +3147,7 @@ static int msi_alloc_irte(struct pci_dev *dev, int irq, int nvec)
 	if (index < 0) {
 		printk(KERN_ERR
 		       "Unable to allocate %d IRTE for PCI %s\n", nvec,
-		        pci_name(dev));
+		       pci_name(dev));
 		return -ENOSPC;
 	}
 	return index;
@@ -3885,23 +3833,24 @@ static struct resource * __init ioapic_setup_resources(void)
 void __init ioapic_init_mappings(void)
 {
 	unsigned long ioapic_phys, idx = FIX_IO_APIC_BASE_0;
-	int i;
 	struct resource *ioapic_res;
+	int i;
 
+	irq_2_pin_init();
 	ioapic_res = ioapic_setup_resources();
 	for (i = 0; i < nr_ioapics; i++) {
 		if (smp_found_config) {
 			ioapic_phys = mp_ioapics[i].mp_apicaddr;
 #ifdef CONFIG_X86_32
-                        if (!ioapic_phys) {
-                                printk(KERN_ERR
-                                       "WARNING: bogus zero IO-APIC "
-                                       "address found in MPTABLE, "
-                                       "disabling IO/APIC support!\n");
-                                smp_found_config = 0;
-                                skip_ioapic_setup = 1;
-                                goto fake_ioapic_page;
-                        }
+			if (!ioapic_phys) {
+				printk(KERN_ERR
+				       "WARNING: bogus zero IO-APIC "
+				       "address found in MPTABLE, "
+				       "disabling IO/APIC support!\n");
+				smp_found_config = 0;
+				skip_ioapic_setup = 1;
+				goto fake_ioapic_page;
+			}
 #endif
 		} else {
 #ifdef CONFIG_X86_32
diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c
index 2b7dab699e83..410c88f0bfeb 100644
--- a/arch/x86/kernel/setup_percpu.c
+++ b/arch/x86/kernel/setup_percpu.c
@@ -140,7 +140,7 @@ static void __init setup_cpu_pda_map(void)
  */
 void __init setup_per_cpu_areas(void)
 {
-	ssize_t size, old_size, da_size;
+	ssize_t size, old_size;
 	char *ptr;
 	int cpu;
 	unsigned long align = 1;
@@ -150,9 +150,8 @@ void __init setup_per_cpu_areas(void)
 
 	/* Copy section for each CPU (we discard the original) */
 	old_size = PERCPU_ENOUGH_ROOM;
-	da_size = per_cpu_dyn_array_size(&align);
 	align = max_t(unsigned long, PAGE_SIZE, align);
-	size = roundup(old_size + da_size, align);
+	size = roundup(old_size, align);
 	printk(KERN_INFO "PERCPU: Allocating %zd bytes of per cpu data\n",
 			  size);
 
@@ -182,9 +181,6 @@ void __init setup_per_cpu_areas(void)
 #endif
 		per_cpu_offset(cpu) = ptr - __per_cpu_start;
 		memcpy(ptr, __per_cpu_start, __per_cpu_end - __per_cpu_start);
-
-		per_cpu_alloc_dyn_array(cpu, ptr + old_size);
-
 	}
 
 	printk(KERN_DEBUG "NR_CPUS: %d, nr_cpu_ids: %d, nr_node_ids %d\n",
diff --git a/arch/x86/kernel/visws_quirks.c b/arch/x86/kernel/visws_quirks.c
index 817aa55a1209..0c9667f0752a 100644
--- a/arch/x86/kernel/visws_quirks.c
+++ b/arch/x86/kernel/visws_quirks.c
@@ -633,7 +633,7 @@ static irqreturn_t piix4_master_intr(int irq, void *dev_id)
 	/*
 	 * handle this 'virtual interrupt' as a Cobalt one now.
 	 */
-	kstat_irqs_this_cpu(desc)++;
+	kstat_incr_irqs_this_cpu(realirq, desc);
 
 	if (likely(desc->action != NULL))
 		handle_IRQ_event(realirq, desc->action);
diff --git a/arch/x86/kernel/vmlinux_32.lds.S b/arch/x86/kernel/vmlinux_32.lds.S
index c36007ab3940..a9b8560adbc2 100644
--- a/arch/x86/kernel/vmlinux_32.lds.S
+++ b/arch/x86/kernel/vmlinux_32.lds.S
@@ -145,7 +145,6 @@ SECTIONS
 	*(.x86_cpu_dev.init)
 	__x86_cpu_dev_end = .;
   }
-  DYN_ARRAY_INIT(8)
   SECURITY_INIT
   . = ALIGN(4);
   .altinstructions : AT(ADDR(.altinstructions) - LOAD_OFFSET) {
diff --git a/arch/x86/kernel/vmlinux_64.lds.S b/arch/x86/kernel/vmlinux_64.lds.S
index 30973dbac8c2..3245ad72594a 100644
--- a/arch/x86/kernel/vmlinux_64.lds.S
+++ b/arch/x86/kernel/vmlinux_64.lds.S
@@ -174,8 +174,6 @@ SECTIONS
   }
   __x86_cpu_dev_end = .;
 
-  DYN_ARRAY_INIT(8)
-
   SECURITY_INIT
 
   . = ALIGN(8);
diff --git a/arch/x86/xen/spinlock.c b/arch/x86/xen/spinlock.c
index bb6bc721b13d..5601506f2dd9 100644
--- a/arch/x86/xen/spinlock.c
+++ b/arch/x86/xen/spinlock.c
@@ -241,7 +241,7 @@ static noinline int xen_spin_lock_slow(struct raw_spinlock *lock, bool irq_enabl
 		ADD_STATS(taken_slow_spurious, !xen_test_irq_pending(irq));
 	} while (!xen_test_irq_pending(irq)); /* check for spurious wakeups */
 
-	kstat_irqs_this_cpu(irq_to_desc(irq))++;
+	kstat_incr_irqs_this_cpu(irq, irq_to_desc(irq));
 
 out:
 	raw_local_irq_restore(flags);
diff --git a/drivers/char/random.c b/drivers/char/random.c
index 9ce80213007b..1137d2976043 100644
--- a/drivers/char/random.c
+++ b/drivers/char/random.c
@@ -558,12 +558,7 @@ struct timer_rand_state {
 	unsigned dont_count_entropy:1;
 };
 
-#ifdef CONFIG_HAVE_DYN_ARRAY
-static struct timer_rand_state **irq_timer_state;
-DEFINE_DYN_ARRAY(irq_timer_state, sizeof(struct timer_rand_state *), nr_irqs, PAGE_SIZE, NULL);
-#else
 static struct timer_rand_state *irq_timer_state[NR_IRQS];
-#endif
 
 static struct timer_rand_state *get_timer_rand_state(unsigned int irq)
 {
diff --git a/drivers/pci/intr_remapping.c b/drivers/pci/intr_remapping.c
index 0f43b265eee6..950769e87475 100644
--- a/drivers/pci/intr_remapping.c
+++ b/drivers/pci/intr_remapping.c
@@ -19,20 +19,13 @@ struct irq_2_iommu {
 	u8  irte_mask;
 };
 
-#ifdef CONFIG_HAVE_DYN_ARRAY
-static struct irq_2_iommu *irq_2_iommuX;
-DEFINE_DYN_ARRAY(irq_2_iommuX, sizeof(struct irq_2_iommu), nr_irqs, PAGE_SIZE, NULL);
-#else
 static struct irq_2_iommu irq_2_iommuX[NR_IRQS];
-#endif
 
 static struct irq_2_iommu *irq_2_iommu(unsigned int irq)
 {
-	if (irq < nr_irqs)
-		return &irq_2_iommuX[irq];
-
-	return NULL;
+	return (irq < nr_irqs) ?: irq_2_iommuX + irq : NULL;
 }
+
 static struct irq_2_iommu *irq_2_iommu_alloc(unsigned int irq)
 {
 	return irq_2_iommu(irq);
diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h
index c68eda9d9a90..7440a0dceddb 100644
--- a/include/asm-generic/vmlinux.lds.h
+++ b/include/asm-generic/vmlinux.lds.h
@@ -210,19 +210,6 @@
  * All archs are supposed to use RO_DATA() */
 #define RODATA RO_DATA(4096)
 
-#define DYN_ARRAY_INIT(align)							\
-	. = ALIGN((align));						\
-	.dyn_array.init : AT(ADDR(.dyn_array.init) - LOAD_OFFSET) {	\
-		VMLINUX_SYMBOL(__dyn_array_start) = .;			\
-		*(.dyn_array.init)					\
-		VMLINUX_SYMBOL(__dyn_array_end) = .;			\
-	}								\
-	. = ALIGN((align));						\
-	.per_cpu_dyn_array.init : AT(ADDR(.per_cpu_dyn_array.init) - LOAD_OFFSET) {	\
-		VMLINUX_SYMBOL(__per_cpu_dyn_array_start) = .;		\
-		*(.per_cpu_dyn_array.init)				\
-		VMLINUX_SYMBOL(__per_cpu_dyn_array_end) = .;		\
-	}
 #define SECURITY_INIT							\
 	.security_initcall.init : AT(ADDR(.security_initcall.init) - LOAD_OFFSET) { \
 		VMLINUX_SYMBOL(__security_initcall_start) = .;		\
diff --git a/include/linux/init.h b/include/linux/init.h
index 59fbb4aaba6a..70ad53e1eab8 100644
--- a/include/linux/init.h
+++ b/include/linux/init.h
@@ -247,49 +247,6 @@ struct obs_kernel_param {
 /* Relies on boot_command_line being set */
 void __init parse_early_param(void);
 
-struct dyn_array {
-	void **name;
-	unsigned long size;
-	unsigned int *nr;
-	unsigned long align;
-	void (*init_work)(void *);
-};
-extern struct dyn_array *__dyn_array_start[], *__dyn_array_end[];
-extern struct dyn_array *__per_cpu_dyn_array_start[], *__per_cpu_dyn_array_end[];
-
-#define DEFINE_DYN_ARRAY_ADDR(nameX, addrX, sizeX, nrX, alignX, init_workX) \
-		static struct dyn_array __dyn_array_##nameX __initdata = \
-		{	.name = (void **)&(nameX),\
-			.size = sizeX,\
-			.nr   = &(nrX),\
-			.align = alignX,\
-			.init_work = init_workX,\
-		}; \
-		static struct dyn_array *__dyn_array_ptr_##nameX __used \
-		__attribute__((__section__(".dyn_array.init"))) = \
-			&__dyn_array_##nameX
-
-#define DEFINE_DYN_ARRAY(nameX, sizeX, nrX, alignX, init_workX) \
-	DEFINE_DYN_ARRAY_ADDR(nameX, nameX, sizeX, nrX, alignX, init_workX)
-
-#define DEFINE_PER_CPU_DYN_ARRAY_ADDR(nameX, addrX, sizeX, nrX, alignX, init_workX) \
-		static struct dyn_array __per_cpu_dyn_array_##nameX __initdata = \
-		{	.name = (void **)&(addrX),\
-			.size = sizeX,\
-			.nr   = &(nrX),\
-			.align = alignX,\
-			.init_work = init_workX,\
-		}; \
-		static struct dyn_array *__per_cpu_dyn_array_ptr_##nameX __used \
-		__attribute__((__section__(".per_cpu_dyn_array.init"))) = \
-			&__per_cpu_dyn_array_##nameX
-
-#define DEFINE_PER_CPU_DYN_ARRAY(nameX, sizeX, nrX, alignX, init_workX) \
-	DEFINE_PER_CPU_DYN_ARRAY_ADDR(nameX, nameX, nrX, alignX, init_workX)
-
-extern void pre_alloc_dyn_array(void);
-extern unsigned long per_cpu_dyn_array_size(unsigned long *align);
-extern void per_cpu_alloc_dyn_array(int cpu, char *ptr);
 #endif /* __ASSEMBLY__ */
 
 /**
diff --git a/include/linux/irq.h b/include/linux/irq.h
index 3f33c7790300..38bf89f2ade0 100644
--- a/include/linux/irq.h
+++ b/include/linux/irq.h
@@ -139,8 +139,6 @@ struct irq_chip {
 	const char	*typename;
 };
 
-struct timer_rand_state;
-struct irq_2_iommu;
 /**
  * struct irq_desc - interrupt descriptor
  *
@@ -167,9 +165,6 @@ struct irq_2_iommu;
  */
 struct irq_desc {
 	unsigned int		irq;
-#ifdef CONFIG_HAVE_DYN_ARRAY
-	unsigned int            *kstat_irqs;
-#endif
 	irq_flow_handler_t	handle_irq;
 	struct irq_chip		*chip;
 	struct msi_desc		*msi_desc;
@@ -198,23 +193,13 @@ struct irq_desc {
 } ____cacheline_internodealigned_in_smp;
 
 
-#ifndef CONFIG_HAVE_DYN_ARRAY
-/* could be removed if we get rid of all irq_desc reference */
 extern struct irq_desc irq_desc[NR_IRQS];
-#else
-extern struct irq_desc *irq_desc;
-#endif
 
 static inline struct irq_desc *irq_to_desc(unsigned int irq)
 {
 	return (irq < nr_irqs) ? irq_desc + irq : NULL;
 }
 
-#ifdef CONFIG_HAVE_DYN_ARRAY
-#define kstat_irqs_this_cpu(DESC) \
-	((DESC)->kstat_irqs[smp_processor_id()])
-#endif
-
 /*
  * Migration helpers for obsolete names, they will go away:
  */
diff --git a/include/linux/kernel_stat.h b/include/linux/kernel_stat.h
index 21249d8c1293..a9d0d360b776 100644
--- a/include/linux/kernel_stat.h
+++ b/include/linux/kernel_stat.h
@@ -28,9 +28,7 @@ struct cpu_usage_stat {
 
 struct kernel_stat {
 	struct cpu_usage_stat	cpustat;
-#ifndef CONFIG_HAVE_DYN_ARRAY
        unsigned int irqs[NR_IRQS];
-#endif
 };
 
 DECLARE_PER_CPU(struct kernel_stat, kstat);
@@ -41,20 +39,18 @@ DECLARE_PER_CPU(struct kernel_stat, kstat);
 
 extern unsigned long long nr_context_switches(void);
 
-#ifndef CONFIG_HAVE_DYN_ARRAY
-#define kstat_irqs_this_cpu(irq) \
-	(kstat_this_cpu.irqs[irq])
-#endif
+struct irq_desc;
 
+static inline void kstat_incr_irqs_this_cpu(unsigned int irq,
+					    struct irq_desc *desc)
+{
+	kstat_this_cpu.irqs[irq]++;
+}
 
-#ifndef CONFIG_HAVE_DYN_ARRAY
 static inline unsigned int kstat_irqs_cpu(unsigned int irq, int cpu)
 {
        return kstat_cpu(cpu).irqs[irq];
 }
-#else
-extern unsigned int kstat_irqs_cpu(unsigned int irq, int cpu);
-#endif
 
 /*
  * Number of interrupts per specific IRQ source, since bootup
diff --git a/init/Makefile b/init/Makefile
index dc5eeca6eb6d..4a243df426f7 100644
--- a/init/Makefile
+++ b/init/Makefile
@@ -2,7 +2,7 @@
 # Makefile for the linux kernel.
 #
 
-obj-y                          := main.o dyn_array.o version.o mounts.o
+obj-y                          := main.o version.o mounts.o
 ifneq ($(CONFIG_BLK_DEV_INITRD),y)
 obj-y                          += noinitramfs.o
 else
diff --git a/init/dyn_array.c b/init/dyn_array.c
deleted file mode 100644
index c8d5e2a18588..000000000000
--- a/init/dyn_array.c
+++ /dev/null
@@ -1,120 +0,0 @@
-#include <linux/types.h>
-#include <linux/kernel.h>
-#include <linux/kallsyms.h>
-#include <linux/init.h>
-#include <linux/bootmem.h>
-#include <linux/irq.h>
-
-void __init pre_alloc_dyn_array(void)
-{
-#ifdef CONFIG_HAVE_DYN_ARRAY
-	unsigned long total_size = 0, size, phys;
-	unsigned long max_align = 1;
-	struct dyn_array **daa;
-	char *ptr;
-
-	/* get the total size at first */
-	for (daa = __dyn_array_start ; daa < __dyn_array_end; daa++) {
-		struct dyn_array *da = *daa;
-
-		printk(KERN_DEBUG "dyn_array %pF size:%#lx nr:%d align:%#lx\n",
-			da->name, da->size, *da->nr, da->align);
-		size = da->size * (*da->nr);
-		total_size += roundup(size, da->align);
-		if (da->align > max_align)
-			max_align = da->align;
-	}
-	if (total_size)
-		printk(KERN_DEBUG "dyn_array total_size: %#lx\n",
-			 total_size);
-	else
-		return;
-
-	/* allocate them all together */
-	max_align = max_t(unsigned long, max_align, PAGE_SIZE);
-	ptr = __alloc_bootmem(total_size, max_align, 0);
-	phys = virt_to_phys(ptr);
-
-	for (daa = __dyn_array_start ; daa < __dyn_array_end; daa++) {
-		struct dyn_array *da = *daa;
-
-		size = da->size * (*da->nr);
-		phys = roundup(phys, da->align);
-		printk(KERN_DEBUG "dyn_array %pF ==> [%#lx - %#lx]\n",
-			da->name, phys, phys + size);
-		*da->name = phys_to_virt(phys);
-
-		phys += size;
-
-		if (da->init_work)
-			da->init_work(da);
-	}
-#else
-#ifdef CONFIG_GENERIC_HARDIRQS
-	unsigned int i;
-
-	for (i = 0; i < NR_IRQS; i++)
-		irq_desc[i].irq = i;
-#endif
-#endif
-}
-
-unsigned long __init per_cpu_dyn_array_size(unsigned long *align)
-{
-	unsigned long total_size = 0;
-#ifdef CONFIG_HAVE_DYN_ARRAY
-	unsigned long size;
-	struct dyn_array **daa;
-	unsigned max_align = 1;
-
-	for (daa = __per_cpu_dyn_array_start ; daa < __per_cpu_dyn_array_end; daa++) {
-		struct dyn_array *da = *daa;
-
-		printk(KERN_DEBUG "per_cpu_dyn_array %pF size:%#lx nr:%d align:%#lx\n",
-			da->name, da->size, *da->nr, da->align);
-		size = da->size * (*da->nr);
-		total_size += roundup(size, da->align);
-		if (da->align > max_align)
-			max_align = da->align;
-	}
-	if (total_size) {
-		printk(KERN_DEBUG "per_cpu_dyn_array total_size: %#lx\n",
-			 total_size);
-		*align = max_align;
-	}
-#endif
-	return total_size;
-}
-
-#ifdef CONFIG_SMP
-void __init per_cpu_alloc_dyn_array(int cpu, char *ptr)
-{
-#ifdef CONFIG_HAVE_DYN_ARRAY
-	unsigned long size, phys;
-	struct dyn_array **daa;
-	unsigned long addr;
-	void **array;
-
-	phys = virt_to_phys(ptr);
-	for (daa = __per_cpu_dyn_array_start ; daa < __per_cpu_dyn_array_end; daa++) {
-		struct dyn_array *da = *daa;
-
-		size = da->size * (*da->nr);
-		phys = roundup(phys, da->align);
-		printk(KERN_DEBUG "per_cpu_dyn_array %pF ==> [%#lx - %#lx]\n",
-			da->name, phys, phys + size);
-
-		addr = (unsigned long)da->name;
-		addr += per_cpu_offset(cpu);
-		array = (void **)addr;
-		*array = phys_to_virt(phys);
-		*da->name = *array; /* so init_work could use it directly */
-
-		phys += size;
-
-		if (da->init_work)
-			da->init_work(da);
-	}
-#endif
-}
-#endif
diff --git a/init/main.c b/init/main.c
index e81cf427d9c7..27f6bf6108e9 100644
--- a/init/main.c
+++ b/init/main.c
@@ -391,23 +391,17 @@ EXPORT_SYMBOL(__per_cpu_offset);
 
 static void __init setup_per_cpu_areas(void)
 {
-	unsigned long size, i, old_size;
+	unsigned long size, i;
 	char *ptr;
 	unsigned long nr_possible_cpus = num_possible_cpus();
-	unsigned long align = 1;
-	unsigned da_size;
 
 	/* Copy section for each CPU (we discard the original) */
-	old_size = PERCPU_ENOUGH_ROOM;
-	da_size = per_cpu_dyn_array_size(&align);
-	align = max_t(unsigned long, PAGE_SIZE, align);
-	size = ALIGN(old_size + da_size, align);
+	size = ALIGN(PERCPU_ENOUGH_ROOM, PAGE_SIZE);
 	ptr = alloc_bootmem_pages(size * nr_possible_cpus);
 
 	for_each_possible_cpu(i) {
 		__per_cpu_offset[i] = ptr - __per_cpu_start;
 		memcpy(ptr, __per_cpu_start, __per_cpu_end - __per_cpu_start);
-		per_cpu_alloc_dyn_array(i, ptr + old_size);
 		ptr += size;
 	}
 }
@@ -573,7 +567,6 @@ asmlinkage void __init start_kernel(void)
 	printk(KERN_NOTICE);
 	printk(linux_banner);
 	setup_arch(&command_line);
-	pre_alloc_dyn_array();
 	mm_init_owner(&init_mm, &init_task);
 	setup_command_line(command_line);
 	unwind_setup();
diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c
index e6f73dbfcc3d..d96d6f687c48 100644
--- a/kernel/irq/chip.c
+++ b/kernel/irq/chip.c
@@ -326,11 +326,7 @@ handle_simple_irq(unsigned int irq, struct irq_desc *desc)
 	if (unlikely(desc->status & IRQ_INPROGRESS))
 		goto out_unlock;
 	desc->status &= ~(IRQ_REPLAY | IRQ_WAITING);
-#ifdef CONFIG_HAVE_DYN_ARRAY
-	kstat_irqs_this_cpu(desc)++;
-#else
-	kstat_irqs_this_cpu(irq)++;
-#endif
+	kstat_incr_irqs_this_cpu(irq, desc);
 
 	action = desc->action;
 	if (unlikely(!action || (desc->status & IRQ_DISABLED)))
@@ -371,11 +367,7 @@ handle_level_irq(unsigned int irq, struct irq_desc *desc)
 	if (unlikely(desc->status & IRQ_INPROGRESS))
 		goto out_unlock;
 	desc->status &= ~(IRQ_REPLAY | IRQ_WAITING);
-#ifdef CONFIG_HAVE_DYN_ARRAY
-	kstat_irqs_this_cpu(desc)++;
-#else
-	kstat_irqs_this_cpu(irq)++;
-#endif
+	kstat_incr_irqs_this_cpu(irq, desc);
 
 	/*
 	 * If its disabled or no action available
@@ -422,11 +414,7 @@ handle_fasteoi_irq(unsigned int irq, struct irq_desc *desc)
 		goto out;
 
 	desc->status &= ~(IRQ_REPLAY | IRQ_WAITING);
-#ifdef CONFIG_HAVE_DYN_ARRAY
-	kstat_irqs_this_cpu(desc)++;
-#else
-	kstat_irqs_this_cpu(irq)++;
-#endif
+	kstat_incr_irqs_this_cpu(irq, desc);
 
 	/*
 	 * If its disabled or no action available
@@ -490,11 +478,7 @@ handle_edge_irq(unsigned int irq, struct irq_desc *desc)
 		mask_ack_irq(desc, irq);
 		goto out_unlock;
 	}
-#ifdef CONFIG_HAVE_DYN_ARRAY
-	kstat_irqs_this_cpu(desc)++;
-#else
-	kstat_irqs_this_cpu(irq)++;
-#endif
+	kstat_incr_irqs_this_cpu(irq, desc);
 
 	/* Start handling the irq */
 	desc->chip->ack(irq);
@@ -549,11 +533,7 @@ handle_percpu_irq(unsigned int irq, struct irq_desc *desc)
 {
 	irqreturn_t action_ret;
 
-#ifdef CONFIG_HAVE_DYN_ARRAY
-	kstat_irqs_this_cpu(desc)++;
-#else
-	kstat_irqs_this_cpu(irq)++;
-#endif
+	kstat_incr_irqs_this_cpu(irq, desc);
 
 	if (desc->chip->ack)
 		desc->chip->ack(irq);
diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c
index f837133cdfbe..9fe86b3a60a5 100644
--- a/kernel/irq/handle.c
+++ b/kernel/irq/handle.c
@@ -18,11 +18,6 @@
 
 #include "internals.h"
 
-/*
- * lockdep: we want to handle all irq_desc locks as a single lock-class:
- */
-static struct lock_class_key irq_desc_lock_class;
-
 /**
  * handle_bad_irq - handle spurious and unhandled irqs
  * @irq:       the interrupt number
@@ -30,15 +25,10 @@ static struct lock_class_key irq_desc_lock_class;
  *
  * Handles spurious and unhandled IRQ's. It also prints a debugmessage.
  */
-void
-handle_bad_irq(unsigned int irq, struct irq_desc *desc)
+void handle_bad_irq(unsigned int irq, struct irq_desc *desc)
 {
 	print_irq_desc(irq, desc);
-#ifdef CONFIG_HAVE_DYN_ARRAY
-	kstat_irqs_this_cpu(desc)++;
-#else
-	kstat_irqs_this_cpu(irq)++;
-#endif
+	kstat_incr_irqs_this_cpu(irq, desc);
 	ack_bad_irq(irq);
 }
 
@@ -59,80 +49,6 @@ handle_bad_irq(unsigned int irq, struct irq_desc *desc)
 int nr_irqs = NR_IRQS;
 EXPORT_SYMBOL_GPL(nr_irqs);
 
-#ifdef CONFIG_HAVE_DYN_ARRAY
-static struct irq_desc irq_desc_init = {
-	.irq = -1U,
-	.status = IRQ_DISABLED,
-	.chip = &no_irq_chip,
-	.handle_irq = handle_bad_irq,
-	.depth = 1,
-	.lock = __SPIN_LOCK_UNLOCKED(irq_desc_init.lock),
-#ifdef CONFIG_SMP
-	.affinity = CPU_MASK_ALL
-#endif
-};
-
-
-static void init_one_irq_desc(struct irq_desc *desc)
-{
-	memcpy(desc, &irq_desc_init, sizeof(struct irq_desc));
-	lockdep_set_class(&desc->lock, &irq_desc_lock_class);
-}
-
-extern int after_bootmem;
-extern void *__alloc_bootmem_nopanic(unsigned long size,
-			     unsigned long align,
-			     unsigned long goal);
-
-static void init_kstat_irqs(struct irq_desc *desc, int nr_desc, int nr)
-{
-	unsigned long bytes, total_bytes;
-	char *ptr;
-	int i;
-	unsigned long phys;
-
-	/* Compute how many bytes we need per irq and allocate them */
-	bytes = nr * sizeof(unsigned int);
-	total_bytes = bytes * nr_desc;
-	if (after_bootmem)
-		ptr = kzalloc(total_bytes, GFP_ATOMIC);
-	else
-		ptr = __alloc_bootmem_nopanic(total_bytes, PAGE_SIZE, 0);
-
-	if (!ptr)
-		panic(" can not allocate kstat_irqs\n");
-
-	phys = __pa(ptr);
-	printk(KERN_DEBUG "kstat_irqs ==> [%#lx - %#lx]\n", phys, phys + total_bytes);
-
-	for (i = 0; i < nr_desc; i++) {
-		desc[i].kstat_irqs = (unsigned int *)ptr;
-		ptr += bytes;
-	}
-}
-
-static void __init init_work(void *data)
-{
-	struct dyn_array *da = data;
-	int i;
-	struct  irq_desc *desc;
-
-	desc = *da->name;
-
-	for (i = 0; i < *da->nr; i++) {
-		init_one_irq_desc(&desc[i]);
-		desc[i].irq = i;
-	}
-
-	/* init kstat_irqs, nr_cpu_ids is ready already */
-	init_kstat_irqs(desc, *da->nr, nr_cpu_ids);
-}
-
-struct irq_desc *irq_desc;
-DEFINE_DYN_ARRAY(irq_desc, sizeof(struct irq_desc), nr_irqs, PAGE_SIZE, init_work);
-
-#else
-
 struct irq_desc irq_desc[NR_IRQS] __cacheline_aligned_in_smp = {
 	[0 ... NR_IRQS-1] = {
 		.status = IRQ_DISABLED,
@@ -146,8 +62,6 @@ struct irq_desc irq_desc[NR_IRQS] __cacheline_aligned_in_smp = {
 	}
 };
 
-#endif
-
 /*
  * What should we do if we get a hw irq event on an illegal vector?
  * Each architecture has to answer this themself.
@@ -258,11 +172,8 @@ unsigned int __do_IRQ(unsigned int irq)
 	struct irqaction *action;
 	unsigned int status;
 
-#ifdef CONFIG_HAVE_DYN_ARRAY
-	kstat_irqs_this_cpu(desc)++;
-#else
-	kstat_irqs_this_cpu(irq)++;
-#endif
+	kstat_incr_irqs_this_cpu(irq, desc);
+
 	if (CHECK_IRQ_PER_CPU(desc->status)) {
 		irqreturn_t action_ret;
 
@@ -351,23 +262,16 @@ out:
 
 
 #ifdef CONFIG_TRACE_IRQFLAGS
+/*
+ * lockdep: we want to handle all irq_desc locks as a single lock-class:
+ */
+static struct lock_class_key irq_desc_lock_class;
+
 void early_init_irq_lock_class(void)
 {
-#ifndef CONFIG_HAVE_DYN_ARRAY
 	int i;
 
 	for (i = 0; i < nr_irqs; i++)
 		lockdep_set_class(&irq_desc[i].lock, &irq_desc_lock_class);
-#endif
 }
 #endif
-
-#ifdef CONFIG_HAVE_DYN_ARRAY
-unsigned int kstat_irqs_cpu(unsigned int irq, int cpu)
-{
-	struct irq_desc *desc = irq_to_desc(irq);
-	return desc->kstat_irqs[cpu];
-}
-#endif
-EXPORT_SYMBOL(kstat_irqs_cpu);
-
-- 
cgit v1.2.3-55-g7522


From a1aca5de08a0cb840a90fb3f729a5940f8d21185 Mon Sep 17 00:00:00 2001
From: Ingo Molnar
Date: Wed, 15 Oct 2008 19:29:15 +0200
Subject: genirq: remove artifacts from sparseirq removal

Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/Kconfig                     | 1 +
 arch/x86/kernel/acpi/boot.c      | 1 -
 arch/x86/kernel/irqinit_32.c     | 1 -
 arch/x86/kernel/vmlinux_64.lds.S | 1 -
 arch/x86/mm/init_32.c            | 3 ---
 include/linux/init.h             | 1 -
 include/linux/kernel_stat.h      | 2 +-
 7 files changed, 2 insertions(+), 8 deletions(-)

diff --git a/arch/Kconfig b/arch/Kconfig
index 071004d3a1b1..0267babe5eb9 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -102,3 +102,4 @@ config HAVE_CLK
 	help
 	  The <linux/clk.h> calls support software clock gating and
 	  thus are a key power management tool on many systems.
+
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
index 5fef4fece4a5..0d1c26a583c5 100644
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@ -1254,7 +1254,6 @@ static int __init acpi_parse_madt_ioapic_entries(void)
 		return count;
 	}
 
-
 	count =
 	    acpi_table_parse_madt(ACPI_MADT_TYPE_INTERRUPT_OVERRIDE, acpi_parse_int_src_ovr,
 				  nr_irqs);
diff --git a/arch/x86/kernel/irqinit_32.c b/arch/x86/kernel/irqinit_32.c
index a8d35998d308..845aa9803e80 100644
--- a/arch/x86/kernel/irqinit_32.c
+++ b/arch/x86/kernel/irqinit_32.c
@@ -184,4 +184,3 @@ void __init native_init_IRQ(void)
 
 	irq_ctx_init(smp_processor_id());
 }
-
diff --git a/arch/x86/kernel/vmlinux_64.lds.S b/arch/x86/kernel/vmlinux_64.lds.S
index 3245ad72594a..46e05447405b 100644
--- a/arch/x86/kernel/vmlinux_64.lds.S
+++ b/arch/x86/kernel/vmlinux_64.lds.S
@@ -173,7 +173,6 @@ SECTIONS
 	*(.x86_cpu_dev.init)
   }
   __x86_cpu_dev_end = .;
-
   SECURITY_INIT
 
   . = ALIGN(8);
diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c
index 91343c2694b4..8396868e82c5 100644
--- a/arch/x86/mm/init_32.c
+++ b/arch/x86/mm/init_32.c
@@ -66,7 +66,6 @@ static unsigned long __meminitdata table_end;
 static unsigned long __meminitdata table_top;
 
 static int __initdata after_init_bootmem;
-int after_bootmem;
 
 static __init void *alloc_low_page(unsigned long *phys)
 {
@@ -989,8 +988,6 @@ void __init mem_init(void)
 
 	set_highmem_pages_init();
 
-	after_bootmem = 1;
-
 	codesize =  (unsigned long) &_etext - (unsigned long) &_text;
 	datasize =  (unsigned long) &_edata - (unsigned long) &_etext;
 	initsize =  (unsigned long) &__init_end - (unsigned long) &__init_begin;
diff --git a/include/linux/init.h b/include/linux/init.h
index 70ad53e1eab8..93538b696e3d 100644
--- a/include/linux/init.h
+++ b/include/linux/init.h
@@ -246,7 +246,6 @@ struct obs_kernel_param {
 
 /* Relies on boot_command_line being set */
 void __init parse_early_param(void);
-
 #endif /* __ASSEMBLY__ */
 
 /**
diff --git a/include/linux/kernel_stat.h b/include/linux/kernel_stat.h
index a9d0d360b776..89b6ecd41473 100644
--- a/include/linux/kernel_stat.h
+++ b/include/linux/kernel_stat.h
@@ -28,7 +28,7 @@ struct cpu_usage_stat {
 
 struct kernel_stat {
 	struct cpu_usage_stat	cpustat;
-       unsigned int irqs[NR_IRQS];
+	unsigned int irqs[NR_IRQS];
 };
 
 DECLARE_PER_CPU(struct kernel_stat, kstat);
-- 
cgit v1.2.3-55-g7522


From d3c60047bdb03199b93497ac40bd531315d43a86 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner
Date: Thu, 16 Oct 2008 09:55:00 +0200
Subject: genirq: cleanup the sparseirq modifications

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 kernel/irq/chip.c     | 43 +++++++++++++------------------------------
 kernel/irq/handle.c   |  3 +--
 kernel/irq/manage.c   | 38 +++++++++++++++++++++++---------------
 kernel/irq/proc.c     |  1 +
 kernel/irq/spurious.c | 18 ++++++++++--------
 5 files changed, 48 insertions(+), 55 deletions(-)

diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c
index d96d6f687c48..4895fde4eb93 100644
--- a/kernel/irq/chip.c
+++ b/kernel/irq/chip.c
@@ -24,11 +24,9 @@
  */
 void dynamic_irq_init(unsigned int irq)
 {
-	struct irq_desc *desc;
+	struct irq_desc *desc = irq_to_desc(irq);
 	unsigned long flags;
 
-	/* first time to use this irq_desc */
-	desc = irq_to_desc(irq);
 	if (!desc) {
 		WARN(1, KERN_ERR "Trying to initialize invalid IRQ%d\n", irq);
 		return;
@@ -58,10 +56,9 @@ void dynamic_irq_init(unsigned int irq)
  */
 void dynamic_irq_cleanup(unsigned int irq)
 {
-	struct irq_desc *desc;
+	struct irq_desc *desc = irq_to_desc(irq);
 	unsigned long flags;
 
-	desc = irq_to_desc(irq);
 	if (!desc) {
 		WARN(1, KERN_ERR "Trying to cleanup invalid IRQ%d\n", irq);
 		return;
@@ -90,10 +87,9 @@ void dynamic_irq_cleanup(unsigned int irq)
  */
 int set_irq_chip(unsigned int irq, struct irq_chip *chip)
 {
-	struct irq_desc *desc;
+	struct irq_desc *desc = irq_to_desc(irq);
 	unsigned long flags;
 
-	desc = irq_to_desc(irq);
 	if (!desc) {
 		WARN(1, KERN_ERR "Trying to install chip for IRQ%d\n", irq);
 		return -EINVAL;
@@ -118,11 +114,10 @@ EXPORT_SYMBOL(set_irq_chip);
  */
 int set_irq_type(unsigned int irq, unsigned int type)
 {
-	struct irq_desc *desc;
+	struct irq_desc *desc = irq_to_desc(irq);
 	unsigned long flags;
 	int ret = -ENXIO;
 
-	desc = irq_to_desc(irq);
 	if (!desc) {
 		printk(KERN_ERR "Trying to set irq type for IRQ%d\n", irq);
 		return -ENODEV;
@@ -147,10 +142,9 @@ EXPORT_SYMBOL(set_irq_type);
  */
 int set_irq_data(unsigned int irq, void *data)
 {
-	struct irq_desc *desc;
+	struct irq_desc *desc = irq_to_desc(irq);
 	unsigned long flags;
 
-	desc = irq_to_desc(irq);
 	if (!desc) {
 		printk(KERN_ERR
 		       "Trying to install controller data for IRQ%d\n", irq);
@@ -173,10 +167,9 @@ EXPORT_SYMBOL(set_irq_data);
  */
 int set_irq_msi(unsigned int irq, struct msi_desc *entry)
 {
-	struct irq_desc *desc;
+	struct irq_desc *desc = irq_to_desc(irq);
 	unsigned long flags;
 
-	desc = irq_to_desc(irq);
 	if (!desc) {
 		printk(KERN_ERR
 		       "Trying to install msi data for IRQ%d\n", irq);
@@ -200,10 +193,9 @@ int set_irq_msi(unsigned int irq, struct msi_desc *entry)
  */
 int set_irq_chip_data(unsigned int irq, void *data)
 {
-	struct irq_desc *desc;
+	struct irq_desc *desc = irq_to_desc(irq);
 	unsigned long flags;
 
-	desc = irq_to_desc(irq);
 	if (!desc) {
 		printk(KERN_ERR
 		       "Trying to install chip data for IRQ%d\n", irq);
@@ -228,9 +220,8 @@ EXPORT_SYMBOL(set_irq_chip_data);
  */
 static void default_enable(unsigned int irq)
 {
-	struct irq_desc *desc;
+	struct irq_desc *desc = irq_to_desc(irq);
 
-	desc = irq_to_desc(irq);
 	desc->chip->unmask(irq);
 	desc->status &= ~IRQ_MASKED;
 }
@@ -247,11 +238,9 @@ static void default_disable(unsigned int irq)
  */
 static unsigned int default_startup(unsigned int irq)
 {
-	struct irq_desc *desc;
+	struct irq_desc *desc = irq_to_desc(irq);
 
-	desc = irq_to_desc(irq);
 	desc->chip->enable(irq);
-
 	return 0;
 }
 
@@ -260,9 +249,8 @@ static unsigned int default_startup(unsigned int irq)
  */
 static void default_shutdown(unsigned int irq)
 {
-	struct irq_desc *desc;
+	struct irq_desc *desc = irq_to_desc(irq);
 
-	desc = irq_to_desc(irq);
 	desc->chip->mask(irq);
 	desc->status |= IRQ_MASKED;
 }
@@ -550,10 +538,9 @@ void
 __set_irq_handler(unsigned int irq, irq_flow_handler_t handle, int is_chained,
 		  const char *name)
 {
-	struct irq_desc *desc;
+	struct irq_desc *desc = irq_to_desc(irq);
 	unsigned long flags;
 
-	desc = irq_to_desc(irq);
 	if (!desc) {
 		printk(KERN_ERR
 		       "Trying to install type control for IRQ%d\n", irq);
@@ -614,13 +601,11 @@ set_irq_chip_and_handler_name(unsigned int irq, struct irq_chip *chip,
 
 void __init set_irq_noprobe(unsigned int irq)
 {
-	struct irq_desc *desc;
+	struct irq_desc *desc = irq_to_desc(irq);
 	unsigned long flags;
 
-	desc = irq_to_desc(irq);
 	if (!desc) {
 		printk(KERN_ERR "Trying to mark IRQ%d non-probeable\n", irq);
-
 		return;
 	}
 
@@ -631,13 +616,11 @@ void __init set_irq_noprobe(unsigned int irq)
 
 void __init set_irq_probe(unsigned int irq)
 {
-	struct irq_desc *desc;
+	struct irq_desc *desc = irq_to_desc(irq);
 	unsigned long flags;
 
-	desc = irq_to_desc(irq);
 	if (!desc) {
 		printk(KERN_ERR "Trying to mark IRQ%d probeable\n", irq);
-
 		return;
 	}
 
diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c
index 9fe86b3a60a5..a69368ff607f 100644
--- a/kernel/irq/handle.c
+++ b/kernel/irq/handle.c
@@ -68,9 +68,8 @@ struct irq_desc irq_desc[NR_IRQS] __cacheline_aligned_in_smp = {
  */
 static void ack_bad(unsigned int irq)
 {
-	struct irq_desc *desc;
+	struct irq_desc *desc = irq_to_desc(irq);
 
-	desc = irq_to_desc(irq);
 	print_irq_desc(irq, desc);
 	ack_bad_irq(irq);
 }
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
index ad2ce72c83c4..c498a1b8c621 100644
--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c
@@ -140,10 +140,9 @@ int irq_select_affinity(unsigned int irq)
  */
 void disable_irq_nosync(unsigned int irq)
 {
-	struct irq_desc *desc;
+	struct irq_desc *desc = irq_to_desc(irq);
 	unsigned long flags;
 
-	desc = irq_to_desc(irq);
 	if (!desc)
 		return;
 
@@ -170,9 +169,8 @@ EXPORT_SYMBOL(disable_irq_nosync);
  */
 void disable_irq(unsigned int irq)
 {
-	struct irq_desc *desc;
+	struct irq_desc *desc = irq_to_desc(irq);
 
-	desc = irq_to_desc(irq);
 	if (!desc)
 		return;
 
@@ -213,10 +211,9 @@ static void __enable_irq(struct irq_desc *desc, unsigned int irq)
  */
 void enable_irq(unsigned int irq)
 {
-	struct irq_desc *desc;
+	struct irq_desc *desc = irq_to_desc(irq);
 	unsigned long flags;
 
-	desc = irq_to_desc(irq);
 	if (!desc)
 		return;
 
@@ -291,10 +288,9 @@ EXPORT_SYMBOL(set_irq_wake);
  */
 int can_request_irq(unsigned int irq, unsigned long irqflags)
 {
-	struct irq_desc *desc;
+	struct irq_desc *desc = irq_to_desc(irq);
 	struct irqaction *action;
 
-	desc = irq_to_desc(irq);
 	if (!desc)
 		return 0;
 
@@ -355,16 +351,15 @@ int __irq_set_trigger(struct irq_desc *desc, unsigned int irq,
  * Internal function to register an irqaction - typically used to
  * allocate special interrupts that are part of the architecture.
  */
-int setup_irq(unsigned int irq, struct irqaction *new)
+static int
+__setup_irq(unsigned int irq, struct irq_desc * desc, struct irqaction *new)
 {
-	struct irq_desc *desc;
 	struct irqaction *old, **p;
 	const char *old_name = NULL;
 	unsigned long flags;
 	int shared = 0;
 	int ret;
 
-	desc = irq_to_desc(irq);
 	if (!desc)
 		return -EINVAL;
 
@@ -503,6 +498,20 @@ mismatch:
 	return -EBUSY;
 }
 
+/**
+ *	setup_irq - setup an interrupt
+ *	@irq: Interrupt line to setup
+ *	@act: irqaction for the interrupt
+ *
+ * Used to statically setup interrupts in the early boot process.
+ */
+int setup_irq(unsigned int irq, struct irqaction *act)
+{
+	struct irq_desc *desc = irq_to_desc(irq);
+
+	return __setup_irq(irq, desc, act);
+}
+
 /**
  *	free_irq - free an interrupt
  *	@irq: Interrupt line to free
@@ -519,13 +528,12 @@ mismatch:
  */
 void free_irq(unsigned int irq, void *dev_id)
 {
-	struct irq_desc *desc;
+	struct irq_desc *desc = irq_to_desc(irq);
 	struct irqaction **p;
 	unsigned long flags;
 
 	WARN_ON(in_interrupt());
 
-	desc = irq_to_desc(irq);
 	if (!desc)
 		return;
 
@@ -624,8 +632,8 @@ int request_irq(unsigned int irq, irq_handler_t handler,
 		unsigned long irqflags, const char *devname, void *dev_id)
 {
 	struct irqaction *action;
-	int retval;
 	struct irq_desc *desc;
+	int retval;
 
 #ifdef CONFIG_LOCKDEP
 	/*
@@ -662,7 +670,7 @@ int request_irq(unsigned int irq, irq_handler_t handler,
 	action->next = NULL;
 	action->dev_id = dev_id;
 
-	retval = setup_irq(irq, action);
+	retval = __setup_irq(irq, desc, action);
 	if (retval)
 		kfree(action);
 
diff --git a/kernel/irq/proc.c b/kernel/irq/proc.c
index bc0993d86c8b..fac014a81b24 100644
--- a/kernel/irq/proc.c
+++ b/kernel/irq/proc.c
@@ -215,6 +215,7 @@ void unregister_handler_proc(unsigned int irq, struct irqaction *action)
 {
 	if (action->dir) {
 		struct irq_desc *desc = irq_to_desc(irq);
+
 		remove_proc_entry(action->dir->name, desc->dir);
 	}
 }
diff --git a/kernel/irq/spurious.c b/kernel/irq/spurious.c
index ec5a4bef3054..dd364c11e56e 100644
--- a/kernel/irq/spurious.c
+++ b/kernel/irq/spurious.c
@@ -26,8 +26,7 @@ static DEFINE_TIMER(poll_spurious_irq_timer, poll_spurious_irqs, 0, 0);
 static int try_one_irq(int irq, struct irq_desc *desc)
 {
 	struct irqaction *action;
-	int ok = 0;
-	int work = 0;	/* Did we do work for a real IRQ */
+	int ok = 0, work = 0;
 
 	spin_lock(&desc->lock);
 	/* Already running on another processor */
@@ -88,9 +87,8 @@ static int try_one_irq(int irq, struct irq_desc *desc)
 
 static int misrouted_irq(int irq)
 {
-	int i;
-	int ok = 0;
 	struct irq_desc *desc;
+	int i, ok = 0;
 
 	for_each_irq_desc(i, desc) {
 		if (!i)
@@ -108,8 +106,8 @@ static int misrouted_irq(int irq)
 
 static void poll_spurious_irqs(unsigned long dummy)
 {
-	int i;
 	struct irq_desc *desc;
+	int i;
 
 	for_each_irq_desc(i, desc) {
 		unsigned int status;
@@ -126,7 +124,8 @@ static void poll_spurious_irqs(unsigned long dummy)
 		try_one_irq(i, desc);
 	}
 
-	mod_timer(&poll_spurious_irq_timer, jiffies + POLL_SPURIOUS_IRQ_INTERVAL);
+	mod_timer(&poll_spurious_irq_timer,
+		  jiffies + POLL_SPURIOUS_IRQ_INTERVAL);
 }
 
 /*
@@ -177,7 +176,9 @@ report_bad_irq(unsigned int irq, struct irq_desc *desc, irqreturn_t action_ret)
 	}
 }
 
-static inline int try_misrouted_irq(unsigned int irq, struct irq_desc *desc, irqreturn_t action_ret)
+static inline int
+try_misrouted_irq(unsigned int irq, struct irq_desc *desc,
+		  irqreturn_t action_ret)
 {
 	struct irqaction *action;
 
@@ -253,7 +254,8 @@ void note_interrupt(unsigned int irq, struct irq_desc *desc,
 		desc->depth++;
 		desc->chip->disable(irq);
 
-		mod_timer(&poll_spurious_irq_timer, jiffies + POLL_SPURIOUS_IRQ_INTERVAL);
+		mod_timer(&poll_spurious_irq_timer,
+			  jiffies + POLL_SPURIOUS_IRQ_INTERVAL);
 	}
 	desc->irqs_unhandled = 0;
 }
-- 
cgit v1.2.3-55-g7522


From c0c168ca26b54a4a6ad34fc813fe00f275fbc94c Mon Sep 17 00:00:00 2001
From: Thomas Gleixner
Date: Thu, 16 Oct 2008 11:15:28 +0200
Subject: x86: cleanup show_interrupts

The sparseirq patches introduced some more ugliness in show_interrupts().
Clean it up all together and make the code easier to read by splitting out
the "tail" function  which prints the special interrupts.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/irq_32.c | 160 +++++++++++++++++++++++------------------------
 arch/x86/kernel/irq_64.c | 153 ++++++++++++++++++++++----------------------
 2 files changed, 154 insertions(+), 159 deletions(-)

diff --git a/arch/x86/kernel/irq_32.c b/arch/x86/kernel/irq_32.c
index ccf6c1bf7120..8d525765a6c4 100644
--- a/arch/x86/kernel/irq_32.c
+++ b/arch/x86/kernel/irq_32.c
@@ -263,22 +263,67 @@ atomic_t irq_err_count;
  * /proc/interrupts printing:
  */
 
+static int show_other_interrupts(struct seq_file *p)
+{
+	int j;
+
+	seq_printf(p, "NMI: ");
+	for_each_online_cpu(j)
+		seq_printf(p, "%10u ", nmi_count(j));
+	seq_printf(p, "  Non-maskable interrupts\n");
+#ifdef CONFIG_X86_LOCAL_APIC
+	seq_printf(p, "LOC: ");
+	for_each_online_cpu(j)
+		seq_printf(p, "%10u ", per_cpu(irq_stat,j).apic_timer_irqs);
+	seq_printf(p, "  Local timer interrupts\n");
+#endif
+#ifdef CONFIG_SMP
+	seq_printf(p, "RES: ");
+	for_each_online_cpu(j)
+		seq_printf(p, "%10u ", per_cpu(irq_stat,j).irq_resched_count);
+	seq_printf(p, "  Rescheduling interrupts\n");
+	seq_printf(p, "CAL: ");
+	for_each_online_cpu(j)
+		seq_printf(p, "%10u ", per_cpu(irq_stat,j).irq_call_count);
+	seq_printf(p, "  Function call interrupts\n");
+	seq_printf(p, "TLB: ");
+	for_each_online_cpu(j)
+		seq_printf(p, "%10u ", per_cpu(irq_stat,j).irq_tlb_count);
+	seq_printf(p, "  TLB shootdowns\n");
+#endif
+#ifdef CONFIG_X86_MCE
+	seq_printf(p, "TRM: ");
+	for_each_online_cpu(j)
+		seq_printf(p, "%10u ", per_cpu(irq_stat,j).irq_thermal_count);
+	seq_printf(p, "  Thermal event interrupts\n");
+#endif
+#ifdef CONFIG_X86_LOCAL_APIC
+	seq_printf(p, "SPU: ");
+	for_each_online_cpu(j)
+		seq_printf(p, "%10u ", per_cpu(irq_stat,j).irq_spurious_count);
+	seq_printf(p, "  Spurious interrupts\n");
+#endif
+	seq_printf(p, "ERR: %10u\n", atomic_read(&irq_err_count));
+#if defined(CONFIG_X86_IO_APIC)
+	seq_printf(p, "MIS: %10u\n", atomic_read(&irq_mis_count));
+#endif
+	return 0;
+}
+
 int show_interrupts(struct seq_file *p, void *v)
 {
+	unsigned long flags, any_count = 0;
 	int i = *(loff_t *) v, j;
-	struct irqaction * action;
-	unsigned long flags;
-	unsigned int entries;
-	struct irq_desc *desc = NULL;
-	int tail = 0;
+	struct irqaction *action;
+	struct irq_desc *desc;
+
+	if (i > nr_irqs)
+		return 0;
 
-	entries = nr_irqs - 1;
-	i = *(loff_t *) v;
 	if (i == nr_irqs)
-		tail = 1;
-	else
-		desc = irq_to_desc(i);
+		return show_other_interrupts(p);
 
+	/* print header */
 	if (i == 0) {
 		seq_printf(p, "           ");
 		for_each_online_cpu(j)
@@ -286,88 +331,37 @@ int show_interrupts(struct seq_file *p, void *v)
 		seq_putc(p, '\n');
 	}
 
-	if (i <= entries) {
-		unsigned any_count = 0;
-
-		spin_lock_irqsave(&desc->lock, flags);
+	desc = irq_to_desc(i);
+	spin_lock_irqsave(&desc->lock, flags);
 #ifndef CONFIG_SMP
-		any_count = kstat_irqs(i);
+	any_count = kstat_irqs(i);
 #else
-		for_each_online_cpu(j)
-			any_count |= kstat_irqs_cpu(i, j);
+	for_each_online_cpu(j)
+		any_count |= kstat_irqs_cpu(i, j);
 #endif
-		action = desc->action;
-		if (!action && !any_count)
-			goto skip;
-		seq_printf(p, "%3d: ", i);
+	action = desc->action;
+	if (!action && !any_count)
+		goto out;
+
+	seq_printf(p, "%3d: ", i);
 #ifndef CONFIG_SMP
-		seq_printf(p, "%10u ", kstat_irqs(i));
+	seq_printf(p, "%10u ", kstat_irqs(i));
 #else
-		for_each_online_cpu(j)
-			seq_printf(p, "%10u ", kstat_irqs_cpu(i, j));
+	for_each_online_cpu(j)
+		seq_printf(p, "%10u ", kstat_irqs_cpu(i, j));
 #endif
-		seq_printf(p, " %8s", desc->chip->name);
-		seq_printf(p, "-%-8s", desc->name);
-
-		if (action) {
-			seq_printf(p, "  %s", action->name);
-			while ((action = action->next) != NULL)
-				seq_printf(p, ", %s", action->name);
-		}
+	seq_printf(p, " %8s", desc->chip->name);
+	seq_printf(p, "-%-8s", desc->name);
 
-		seq_putc(p, '\n');
-skip:
-		spin_unlock_irqrestore(&desc->lock, flags);
+	if (action) {
+		seq_printf(p, "  %s", action->name);
+		while ((action = action->next) != NULL)
+			seq_printf(p, ", %s", action->name);
 	}
 
-	if (tail) {
-		seq_printf(p, "NMI: ");
-		for_each_online_cpu(j)
-			seq_printf(p, "%10u ", nmi_count(j));
-		seq_printf(p, "  Non-maskable interrupts\n");
-#ifdef CONFIG_X86_LOCAL_APIC
-		seq_printf(p, "LOC: ");
-		for_each_online_cpu(j)
-			seq_printf(p, "%10u ",
-				per_cpu(irq_stat,j).apic_timer_irqs);
-		seq_printf(p, "  Local timer interrupts\n");
-#endif
-#ifdef CONFIG_SMP
-		seq_printf(p, "RES: ");
-		for_each_online_cpu(j)
-			seq_printf(p, "%10u ",
-				per_cpu(irq_stat,j).irq_resched_count);
-		seq_printf(p, "  Rescheduling interrupts\n");
-		seq_printf(p, "CAL: ");
-		for_each_online_cpu(j)
-			seq_printf(p, "%10u ",
-				per_cpu(irq_stat,j).irq_call_count);
-		seq_printf(p, "  Function call interrupts\n");
-		seq_printf(p, "TLB: ");
-		for_each_online_cpu(j)
-			seq_printf(p, "%10u ",
-				per_cpu(irq_stat,j).irq_tlb_count);
-		seq_printf(p, "  TLB shootdowns\n");
-#endif
-#ifdef CONFIG_X86_MCE
-		seq_printf(p, "TRM: ");
-		for_each_online_cpu(j)
-			seq_printf(p, "%10u ",
-				per_cpu(irq_stat,j).irq_thermal_count);
-		seq_printf(p, "  Thermal event interrupts\n");
-#endif
-#ifdef CONFIG_X86_LOCAL_APIC
-		seq_printf(p, "SPU: ");
-		for_each_online_cpu(j)
-			seq_printf(p, "%10u ",
-				per_cpu(irq_stat,j).irq_spurious_count);
-		seq_printf(p, "  Spurious interrupts\n");
-#endif
-		seq_printf(p, "ERR: %10u\n", atomic_read(&irq_err_count));
-#if defined(CONFIG_X86_IO_APIC)
-		seq_printf(p, "MIS: %10u\n", atomic_read(&irq_mis_count));
-#endif
-	}
+	seq_putc(p, '\n');
+out:
+	spin_unlock_irqrestore(&desc->lock, flags);
 	return 0;
 }
 
diff --git a/arch/x86/kernel/irq_64.c b/arch/x86/kernel/irq_64.c
index 21f53b911113..4f374294f292 100644
--- a/arch/x86/kernel/irq_64.c
+++ b/arch/x86/kernel/irq_64.c
@@ -68,22 +68,65 @@ static inline void stack_overflow_check(struct pt_regs *regs)
  * Generic, controller-independent functions:
  */
 
+static int show_other_interrupts(struct seq_file *p)
+{
+	int j;
+
+	seq_printf(p, "NMI: ");
+	for_each_online_cpu(j)
+		seq_printf(p, "%10u ", cpu_pda(j)->__nmi_count);
+	seq_printf(p, "  Non-maskable interrupts\n");
+	seq_printf(p, "LOC: ");
+	for_each_online_cpu(j)
+		seq_printf(p, "%10u ", cpu_pda(j)->apic_timer_irqs);
+	seq_printf(p, "  Local timer interrupts\n");
+#ifdef CONFIG_SMP
+	seq_printf(p, "RES: ");
+	for_each_online_cpu(j)
+		seq_printf(p, "%10u ", cpu_pda(j)->irq_resched_count);
+	seq_printf(p, "  Rescheduling interrupts\n");
+	seq_printf(p, "CAL: ");
+	for_each_online_cpu(j)
+		seq_printf(p, "%10u ", cpu_pda(j)->irq_call_count);
+	seq_printf(p, "  Function call interrupts\n");
+	seq_printf(p, "TLB: ");
+	for_each_online_cpu(j)
+		seq_printf(p, "%10u ", cpu_pda(j)->irq_tlb_count);
+	seq_printf(p, "  TLB shootdowns\n");
+#endif
+#ifdef CONFIG_X86_MCE
+	seq_printf(p, "TRM: ");
+	for_each_online_cpu(j)
+		seq_printf(p, "%10u ", cpu_pda(j)->irq_thermal_count);
+	seq_printf(p, "  Thermal event interrupts\n");
+	seq_printf(p, "THR: ");
+	for_each_online_cpu(j)
+		seq_printf(p, "%10u ", cpu_pda(j)->irq_threshold_count);
+	seq_printf(p, "  Threshold APIC interrupts\n");
+#endif
+	seq_printf(p, "SPU: ");
+	for_each_online_cpu(j)
+		seq_printf(p, "%10u ", cpu_pda(j)->irq_spurious_count);
+	seq_printf(p, "  Spurious interrupts\n");
+	seq_printf(p, "ERR: %10u\n", atomic_read(&irq_err_count));
+
+	return 0;
+}
+
 int show_interrupts(struct seq_file *p, void *v)
 {
-	int i, j;
-	struct irqaction * action;
-	unsigned long flags;
-	unsigned int entries;
-	struct irq_desc *desc = NULL;
-	int tail = 0;
-
-	entries = nr_irqs - 1;
-	i = *(loff_t *) v;
+	unsigned long flags, any_count = 0;
+	int i = *(loff_t *) v, j;
+	struct irqaction *action;
+	struct irq_desc *desc;
+
+	if (i > nr_irqs)
+		return 0;
+
 	if (i == nr_irqs)
-		tail = 1;
-	else
-		desc = irq_to_desc(i);
+		return show_other_interrupts(p);
 
+	/* print header */
 	if (i == 0) {
 		seq_printf(p, "           ");
 		for_each_online_cpu(j)
@@ -91,79 +134,37 @@ int show_interrupts(struct seq_file *p, void *v)
 		seq_putc(p, '\n');
 	}
 
-	if (i <= entries) {
-		unsigned any_count = 0;
-
-		spin_lock_irqsave(&desc->lock, flags);
+	desc = irq_to_desc(i);
+	spin_lock_irqsave(&desc->lock, flags);
 #ifndef CONFIG_SMP
-		any_count = kstat_irqs(i);
+	any_count = kstat_irqs(i);
 #else
-		for_each_online_cpu(j)
-			any_count |= kstat_irqs_cpu(i, j);
+	for_each_online_cpu(j)
+		any_count |= kstat_irqs_cpu(i, j);
 #endif
-		action = desc->action;
-		if (!action && !any_count)
-			goto skip;
-		seq_printf(p, "%3d: ", i);
+	action = desc->action;
+	if (!action && !any_count)
+		goto out;
+
+	seq_printf(p, "%3d: ", i);
 #ifndef CONFIG_SMP
-		seq_printf(p, "%10u ", kstat_irqs(i));
+	seq_printf(p, "%10u ", kstat_irqs(i));
 #else
-		for_each_online_cpu(j)
-			seq_printf(p, "%10u ", kstat_irqs_cpu(i, j));
+	for_each_online_cpu(j)
+		seq_printf(p, "%10u ", kstat_irqs_cpu(i, j));
 #endif
-		seq_printf(p, " %8s", desc->chip->name);
-		seq_printf(p, "-%-8s", desc->name);
+	seq_printf(p, " %8s", desc->chip->name);
+	seq_printf(p, "-%-8s", desc->name);
 
-		if (action) {
-			seq_printf(p, "  %s", action->name);
-			while ((action = action->next) != NULL)
-				seq_printf(p, ", %s", action->name);
-		}
-		seq_putc(p, '\n');
-skip:
-		spin_unlock_irqrestore(&desc->lock, flags);
-	}
-
-	if (tail) {
-		seq_printf(p, "NMI: ");
-		for_each_online_cpu(j)
-			seq_printf(p, "%10u ", cpu_pda(j)->__nmi_count);
-		seq_printf(p, "  Non-maskable interrupts\n");
-		seq_printf(p, "LOC: ");
-		for_each_online_cpu(j)
-			seq_printf(p, "%10u ", cpu_pda(j)->apic_timer_irqs);
-		seq_printf(p, "  Local timer interrupts\n");
-#ifdef CONFIG_SMP
-		seq_printf(p, "RES: ");
-		for_each_online_cpu(j)
-			seq_printf(p, "%10u ", cpu_pda(j)->irq_resched_count);
-		seq_printf(p, "  Rescheduling interrupts\n");
-		seq_printf(p, "CAL: ");
-		for_each_online_cpu(j)
-			seq_printf(p, "%10u ", cpu_pda(j)->irq_call_count);
-		seq_printf(p, "  Function call interrupts\n");
-		seq_printf(p, "TLB: ");
-		for_each_online_cpu(j)
-			seq_printf(p, "%10u ", cpu_pda(j)->irq_tlb_count);
-		seq_printf(p, "  TLB shootdowns\n");
-#endif
-#ifdef CONFIG_X86_MCE
-		seq_printf(p, "TRM: ");
-		for_each_online_cpu(j)
-			seq_printf(p, "%10u ", cpu_pda(j)->irq_thermal_count);
-		seq_printf(p, "  Thermal event interrupts\n");
-		seq_printf(p, "THR: ");
-		for_each_online_cpu(j)
-			seq_printf(p, "%10u ", cpu_pda(j)->irq_threshold_count);
-		seq_printf(p, "  Threshold APIC interrupts\n");
-#endif
-		seq_printf(p, "SPU: ");
-		for_each_online_cpu(j)
-			seq_printf(p, "%10u ", cpu_pda(j)->irq_spurious_count);
-		seq_printf(p, "  Spurious interrupts\n");
-		seq_printf(p, "ERR: %10u\n", atomic_read(&irq_err_count));
+	if (action) {
+		seq_printf(p, "  %s", action->name);
+		while ((action = action->next) != NULL)
+			seq_printf(p, ", %s", action->name);
 	}
 
+	seq_putc(p, '\n');
+out:
+	spin_unlock_irqrestore(&desc->lock, flags);
 	return 0;
 }
 
-- 
cgit v1.2.3-55-g7522


From 6b39ba771e3c78d00e0abcebad270bd4212b28bc Mon Sep 17 00:00:00 2001
From: Thomas Gleixner
Date: Thu, 16 Oct 2008 11:32:24 +0200
Subject: x86: unify show_interrupts() and proc helpers

show_interrupts() and proc helpers are basically the same for
32 and 64 bit. Move them to a shared source file.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/Makefile |   2 +-
 arch/x86/kernel/irq.c    | 166 +++++++++++++++++++++++++++++++++++++++++++++++
 arch/x86/kernel/irq_32.c | 146 -----------------------------------------
 arch/x86/kernel/irq_64.c | 132 -------------------------------------
 4 files changed, 167 insertions(+), 279 deletions(-)
 create mode 100644 arch/x86/kernel/irq.c

diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index cb6ade443f00..d7e5a58ee22f 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -23,7 +23,7 @@ CFLAGS_hpet.o		:= $(nostackp)
 CFLAGS_tsc.o		:= $(nostackp)
 
 obj-y			:= process_$(BITS).o signal_$(BITS).o entry_$(BITS).o
-obj-y			+= traps.o irq_$(BITS).o dumpstack_$(BITS).o
+obj-y			+= traps.o irq.o irq_$(BITS).o dumpstack_$(BITS).o
 obj-y			+= time_$(BITS).o ioport.o ldt.o
 obj-y			+= setup.o i8259.o irqinit_$(BITS).o setup_percpu.o
 obj-$(CONFIG_X86_VISWS)	+= visws_quirks.o
diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c
new file mode 100644
index 000000000000..3b9128498976
--- /dev/null
+++ b/arch/x86/kernel/irq.c
@@ -0,0 +1,166 @@
+/*
+ * Common interrupt code for 32 and 64 bit
+ */
+#include <linux/cpu.h>
+#include <linux/interrupt.h>
+#include <linux/kernel_stat.h>
+#include <linux/seq_file.h>
+
+#include <asm/apic.h>
+#include <asm/io_apic.h>
+#include <asm/smp.h>
+
+atomic_t irq_err_count;
+
+#ifdef CONFIG_X86_32
+# define irq_stats(x)		(&per_cpu(irq_stat,x))
+#else
+# define irq_stats(x)		cpu_pda(x)
+#endif
+/*
+ * /proc/interrupts printing:
+ */
+static int show_other_interrupts(struct seq_file *p)
+{
+	int j;
+
+	seq_printf(p, "NMI: ");
+	for_each_online_cpu(j)
+		seq_printf(p, "%10u ", irq_stats(j)->__nmi_count);
+	seq_printf(p, "  Non-maskable interrupts\n");
+#ifdef CONFIG_X86_LOCAL_APIC
+	seq_printf(p, "LOC: ");
+	for_each_online_cpu(j)
+		seq_printf(p, "%10u ", irq_stats(j)->apic_timer_irqs);
+	seq_printf(p, "  Local timer interrupts\n");
+#endif
+#ifdef CONFIG_SMP
+	seq_printf(p, "RES: ");
+	for_each_online_cpu(j)
+		seq_printf(p, "%10u ", irq_stats(j)->irq_resched_count);
+	seq_printf(p, "  Rescheduling interrupts\n");
+	seq_printf(p, "CAL: ");
+	for_each_online_cpu(j)
+		seq_printf(p, "%10u ", irq_stats(j)->irq_call_count);
+	seq_printf(p, "  Function call interrupts\n");
+	seq_printf(p, "TLB: ");
+	for_each_online_cpu(j)
+		seq_printf(p, "%10u ", irq_stats(j)->irq_tlb_count);
+	seq_printf(p, "  TLB shootdowns\n");
+#endif
+#ifdef CONFIG_X86_MCE
+	seq_printf(p, "TRM: ");
+	for_each_online_cpu(j)
+		seq_printf(p, "%10u ", irq_stats(j)->irq_thermal_count);
+	seq_printf(p, "  Thermal event interrupts\n");
+# ifdef CONFIG_X86_64
+	seq_printf(p, "THR: ");
+	for_each_online_cpu(j)
+		seq_printf(p, "%10u ", irq_stats(j)->irq_threshold_count);
+	seq_printf(p, "  Threshold APIC interrupts\n");
+# endif
+#endif
+#ifdef CONFIG_X86_LOCAL_APIC
+	seq_printf(p, "SPU: ");
+	for_each_online_cpu(j)
+		seq_printf(p, "%10u ", irq_stats(j)->irq_spurious_count);
+	seq_printf(p, "  Spurious interrupts\n");
+#endif
+	seq_printf(p, "ERR: %10u\n", atomic_read(&irq_err_count));
+#if defined(CONFIG_X86_IO_APIC)
+	seq_printf(p, "MIS: %10u\n", atomic_read(&irq_mis_count));
+#endif
+	return 0;
+}
+
+int show_interrupts(struct seq_file *p, void *v)
+{
+	unsigned long flags, any_count = 0;
+	int i = *(loff_t *) v, j;
+	struct irqaction *action;
+	struct irq_desc *desc;
+
+	if (i > nr_irqs)
+		return 0;
+
+	if (i == nr_irqs)
+		return show_other_interrupts(p);
+
+	/* print header */
+	if (i == 0) {
+		seq_printf(p, "           ");
+		for_each_online_cpu(j)
+			seq_printf(p, "CPU%-8d",j);
+		seq_putc(p, '\n');
+	}
+
+	desc = irq_to_desc(i);
+	spin_lock_irqsave(&desc->lock, flags);
+#ifndef CONFIG_SMP
+	any_count = kstat_irqs(i);
+#else
+	for_each_online_cpu(j)
+		any_count |= kstat_irqs_cpu(i, j);
+#endif
+	action = desc->action;
+	if (!action && !any_count)
+		goto out;
+
+	seq_printf(p, "%3d: ", i);
+#ifndef CONFIG_SMP
+	seq_printf(p, "%10u ", kstat_irqs(i));
+#else
+	for_each_online_cpu(j)
+		seq_printf(p, "%10u ", kstat_irqs_cpu(i, j));
+#endif
+	seq_printf(p, " %8s", desc->chip->name);
+	seq_printf(p, "-%-8s", desc->name);
+
+	if (action) {
+		seq_printf(p, "  %s", action->name);
+		while ((action = action->next) != NULL)
+			seq_printf(p, ", %s", action->name);
+	}
+
+	seq_putc(p, '\n');
+out:
+	spin_unlock_irqrestore(&desc->lock, flags);
+	return 0;
+}
+
+/*
+ * /proc/stat helpers
+ */
+u64 arch_irq_stat_cpu(unsigned int cpu)
+{
+	u64 sum = irq_stats(cpu)->__nmi_count;
+
+#ifdef CONFIG_X86_LOCAL_APIC
+	sum += irq_stats(cpu)->apic_timer_irqs;
+#endif
+#ifdef CONFIG_SMP
+	sum += irq_stats(cpu)->irq_resched_count;
+	sum += irq_stats(cpu)->irq_call_count;
+	sum += irq_stats(cpu)->irq_tlb_count;
+#endif
+#ifdef CONFIG_X86_MCE
+	sum += irq_stats(cpu)->irq_thermal_count;
+# ifdef CONFIG_X86_64
+	sum += irq_stats(cpu)->irq_threshold_count;
+#endif
+#endif
+#ifdef CONFIG_X86_LOCAL_APIC
+	sum += irq_stats(cpu)->irq_spurious_count;
+#endif
+	return sum;
+}
+
+u64 arch_irq_stat(void)
+{
+	u64 sum = atomic_read(&irq_err_count);
+
+#ifdef CONFIG_X86_IO_APIC
+	sum += atomic_read(&irq_mis_count);
+#endif
+	return sum;
+}
diff --git a/arch/x86/kernel/irq_32.c b/arch/x86/kernel/irq_32.c
index 8d525765a6c4..6d9bf3936c78 100644
--- a/arch/x86/kernel/irq_32.c
+++ b/arch/x86/kernel/irq_32.c
@@ -253,152 +253,6 @@ unsigned int do_IRQ(struct pt_regs *regs)
 	return 1;
 }
 
-/*
- * Interrupt statistics:
- */
-
-atomic_t irq_err_count;
-
-/*
- * /proc/interrupts printing:
- */
-
-static int show_other_interrupts(struct seq_file *p)
-{
-	int j;
-
-	seq_printf(p, "NMI: ");
-	for_each_online_cpu(j)
-		seq_printf(p, "%10u ", nmi_count(j));
-	seq_printf(p, "  Non-maskable interrupts\n");
-#ifdef CONFIG_X86_LOCAL_APIC
-	seq_printf(p, "LOC: ");
-	for_each_online_cpu(j)
-		seq_printf(p, "%10u ", per_cpu(irq_stat,j).apic_timer_irqs);
-	seq_printf(p, "  Local timer interrupts\n");
-#endif
-#ifdef CONFIG_SMP
-	seq_printf(p, "RES: ");
-	for_each_online_cpu(j)
-		seq_printf(p, "%10u ", per_cpu(irq_stat,j).irq_resched_count);
-	seq_printf(p, "  Rescheduling interrupts\n");
-	seq_printf(p, "CAL: ");
-	for_each_online_cpu(j)
-		seq_printf(p, "%10u ", per_cpu(irq_stat,j).irq_call_count);
-	seq_printf(p, "  Function call interrupts\n");
-	seq_printf(p, "TLB: ");
-	for_each_online_cpu(j)
-		seq_printf(p, "%10u ", per_cpu(irq_stat,j).irq_tlb_count);
-	seq_printf(p, "  TLB shootdowns\n");
-#endif
-#ifdef CONFIG_X86_MCE
-	seq_printf(p, "TRM: ");
-	for_each_online_cpu(j)
-		seq_printf(p, "%10u ", per_cpu(irq_stat,j).irq_thermal_count);
-	seq_printf(p, "  Thermal event interrupts\n");
-#endif
-#ifdef CONFIG_X86_LOCAL_APIC
-	seq_printf(p, "SPU: ");
-	for_each_online_cpu(j)
-		seq_printf(p, "%10u ", per_cpu(irq_stat,j).irq_spurious_count);
-	seq_printf(p, "  Spurious interrupts\n");
-#endif
-	seq_printf(p, "ERR: %10u\n", atomic_read(&irq_err_count));
-#if defined(CONFIG_X86_IO_APIC)
-	seq_printf(p, "MIS: %10u\n", atomic_read(&irq_mis_count));
-#endif
-	return 0;
-}
-
-int show_interrupts(struct seq_file *p, void *v)
-{
-	unsigned long flags, any_count = 0;
-	int i = *(loff_t *) v, j;
-	struct irqaction *action;
-	struct irq_desc *desc;
-
-	if (i > nr_irqs)
-		return 0;
-
-	if (i == nr_irqs)
-		return show_other_interrupts(p);
-
-	/* print header */
-	if (i == 0) {
-		seq_printf(p, "           ");
-		for_each_online_cpu(j)
-			seq_printf(p, "CPU%-8d",j);
-		seq_putc(p, '\n');
-	}
-
-	desc = irq_to_desc(i);
-	spin_lock_irqsave(&desc->lock, flags);
-#ifndef CONFIG_SMP
-	any_count = kstat_irqs(i);
-#else
-	for_each_online_cpu(j)
-		any_count |= kstat_irqs_cpu(i, j);
-#endif
-	action = desc->action;
-	if (!action && !any_count)
-		goto out;
-
-	seq_printf(p, "%3d: ", i);
-#ifndef CONFIG_SMP
-	seq_printf(p, "%10u ", kstat_irqs(i));
-#else
-	for_each_online_cpu(j)
-		seq_printf(p, "%10u ", kstat_irqs_cpu(i, j));
-#endif
-	seq_printf(p, " %8s", desc->chip->name);
-	seq_printf(p, "-%-8s", desc->name);
-
-	if (action) {
-		seq_printf(p, "  %s", action->name);
-		while ((action = action->next) != NULL)
-			seq_printf(p, ", %s", action->name);
-	}
-
-	seq_putc(p, '\n');
-out:
-	spin_unlock_irqrestore(&desc->lock, flags);
-	return 0;
-}
-
-/*
- * /proc/stat helpers
- */
-u64 arch_irq_stat_cpu(unsigned int cpu)
-{
-	u64 sum = nmi_count(cpu);
-
-#ifdef CONFIG_X86_LOCAL_APIC
-	sum += per_cpu(irq_stat, cpu).apic_timer_irqs;
-#endif
-#ifdef CONFIG_SMP
-	sum += per_cpu(irq_stat, cpu).irq_resched_count;
-	sum += per_cpu(irq_stat, cpu).irq_call_count;
-	sum += per_cpu(irq_stat, cpu).irq_tlb_count;
-#endif
-#ifdef CONFIG_X86_MCE
-	sum += per_cpu(irq_stat, cpu).irq_thermal_count;
-#endif
-#ifdef CONFIG_X86_LOCAL_APIC
-	sum += per_cpu(irq_stat, cpu).irq_spurious_count;
-#endif
-	return sum;
-}
-
-u64 arch_irq_stat(void)
-{
-	u64 sum = atomic_read(&irq_err_count);
-
-#ifdef CONFIG_X86_IO_APIC
-	sum += atomic_read(&irq_mis_count);
-#endif
-	return sum;
-}
-
 #ifdef CONFIG_HOTPLUG_CPU
 #include <mach_apic.h>
 
diff --git a/arch/x86/kernel/irq_64.c b/arch/x86/kernel/irq_64.c
index 4f374294f292..39ef7feb9ea4 100644
--- a/arch/x86/kernel/irq_64.c
+++ b/arch/x86/kernel/irq_64.c
@@ -18,8 +18,6 @@
 #include <asm/idle.h>
 #include <asm/smp.h>
 
-atomic_t irq_err_count;
-
 /*
  * 'what should we do if we get a hw irq event on an illegal vector'.
  * each architecture has to answer this themselves.
@@ -64,136 +62,6 @@ static inline void stack_overflow_check(struct pt_regs *regs)
 }
 #endif
 
-/*
- * Generic, controller-independent functions:
- */
-
-static int show_other_interrupts(struct seq_file *p)
-{
-	int j;
-
-	seq_printf(p, "NMI: ");
-	for_each_online_cpu(j)
-		seq_printf(p, "%10u ", cpu_pda(j)->__nmi_count);
-	seq_printf(p, "  Non-maskable interrupts\n");
-	seq_printf(p, "LOC: ");
-	for_each_online_cpu(j)
-		seq_printf(p, "%10u ", cpu_pda(j)->apic_timer_irqs);
-	seq_printf(p, "  Local timer interrupts\n");
-#ifdef CONFIG_SMP
-	seq_printf(p, "RES: ");
-	for_each_online_cpu(j)
-		seq_printf(p, "%10u ", cpu_pda(j)->irq_resched_count);
-	seq_printf(p, "  Rescheduling interrupts\n");
-	seq_printf(p, "CAL: ");
-	for_each_online_cpu(j)
-		seq_printf(p, "%10u ", cpu_pda(j)->irq_call_count);
-	seq_printf(p, "  Function call interrupts\n");
-	seq_printf(p, "TLB: ");
-	for_each_online_cpu(j)
-		seq_printf(p, "%10u ", cpu_pda(j)->irq_tlb_count);
-	seq_printf(p, "  TLB shootdowns\n");
-#endif
-#ifdef CONFIG_X86_MCE
-	seq_printf(p, "TRM: ");
-	for_each_online_cpu(j)
-		seq_printf(p, "%10u ", cpu_pda(j)->irq_thermal_count);
-	seq_printf(p, "  Thermal event interrupts\n");
-	seq_printf(p, "THR: ");
-	for_each_online_cpu(j)
-		seq_printf(p, "%10u ", cpu_pda(j)->irq_threshold_count);
-	seq_printf(p, "  Threshold APIC interrupts\n");
-#endif
-	seq_printf(p, "SPU: ");
-	for_each_online_cpu(j)
-		seq_printf(p, "%10u ", cpu_pda(j)->irq_spurious_count);
-	seq_printf(p, "  Spurious interrupts\n");
-	seq_printf(p, "ERR: %10u\n", atomic_read(&irq_err_count));
-
-	return 0;
-}
-
-int show_interrupts(struct seq_file *p, void *v)
-{
-	unsigned long flags, any_count = 0;
-	int i = *(loff_t *) v, j;
-	struct irqaction *action;
-	struct irq_desc *desc;
-
-	if (i > nr_irqs)
-		return 0;
-
-	if (i == nr_irqs)
-		return show_other_interrupts(p);
-
-	/* print header */
-	if (i == 0) {
-		seq_printf(p, "           ");
-		for_each_online_cpu(j)
-			seq_printf(p, "CPU%-8d",j);
-		seq_putc(p, '\n');
-	}
-
-	desc = irq_to_desc(i);
-	spin_lock_irqsave(&desc->lock, flags);
-#ifndef CONFIG_SMP
-	any_count = kstat_irqs(i);
-#else
-	for_each_online_cpu(j)
-		any_count |= kstat_irqs_cpu(i, j);
-#endif
-	action = desc->action;
-	if (!action && !any_count)
-		goto out;
-
-	seq_printf(p, "%3d: ", i);
-#ifndef CONFIG_SMP
-	seq_printf(p, "%10u ", kstat_irqs(i));
-#else
-	for_each_online_cpu(j)
-		seq_printf(p, "%10u ", kstat_irqs_cpu(i, j));
-#endif
-	seq_printf(p, " %8s", desc->chip->name);
-	seq_printf(p, "-%-8s", desc->name);
-
-	if (action) {
-		seq_printf(p, "  %s", action->name);
-		while ((action = action->next) != NULL)
-			seq_printf(p, ", %s", action->name);
-	}
-
-	seq_putc(p, '\n');
-out:
-	spin_unlock_irqrestore(&desc->lock, flags);
-	return 0;
-}
-
-/*
- * /proc/stat helpers
- */
-u64 arch_irq_stat_cpu(unsigned int cpu)
-{
-	u64 sum = cpu_pda(cpu)->__nmi_count;
-
-	sum += cpu_pda(cpu)->apic_timer_irqs;
-#ifdef CONFIG_SMP
-	sum += cpu_pda(cpu)->irq_resched_count;
-	sum += cpu_pda(cpu)->irq_call_count;
-	sum += cpu_pda(cpu)->irq_tlb_count;
-#endif
-#ifdef CONFIG_X86_MCE
-	sum += cpu_pda(cpu)->irq_thermal_count;
-	sum += cpu_pda(cpu)->irq_threshold_count;
-#endif
-	sum += cpu_pda(cpu)->irq_spurious_count;
-	return sum;
-}
-
-u64 arch_irq_stat(void)
-{
-	return atomic_read(&irq_err_count);
-}
-
 /*
  * do_IRQ handles all normal device IRQ's (the special
  * SMP cross-CPU interrupts have their own specific
-- 
cgit v1.2.3-55-g7522


From 249f6d9eab372790579ada8991bba3384c204e06 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner
Date: Thu, 16 Oct 2008 12:18:50 +0200
Subject: x86: move ack_bad_irq() to irq.c

Share more duplicated code.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/irq.c    | 23 +++++++++++++++++++++++
 arch/x86/kernel/irq_32.c | 23 -----------------------
 arch/x86/kernel/irq_64.c | 20 --------------------
 3 files changed, 23 insertions(+), 43 deletions(-)

diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c
index 3b9128498976..ccf6c503fc3b 100644
--- a/arch/x86/kernel/irq.c
+++ b/arch/x86/kernel/irq.c
@@ -12,6 +12,29 @@
 
 atomic_t irq_err_count;
 
+/*
+ * 'what should we do if we get a hw irq event on an illegal vector'.
+ * each architecture has to answer this themselves.
+ */
+void ack_bad_irq(unsigned int irq)
+{
+	printk(KERN_ERR "unexpected IRQ trap at vector %02x\n", irq);
+
+#ifdef CONFIG_X86_LOCAL_APIC
+	/*
+	 * Currently unexpected vectors happen only on SMP and APIC.
+	 * We _must_ ack these because every local APIC has only N
+	 * irq slots per priority level, and a 'hanging, unacked' IRQ
+	 * holds up an irq slot - in excessive cases (when multiple
+	 * unexpected vectors occur) that might lock up the APIC
+	 * completely.
+	 * But only ack when the APIC is enabled -AK
+	 */
+	if (cpu_has_apic)
+		ack_APIC_irq();
+#endif
+}
+
 #ifdef CONFIG_X86_32
 # define irq_stats(x)		(&per_cpu(irq_stat,x))
 #else
diff --git a/arch/x86/kernel/irq_32.c b/arch/x86/kernel/irq_32.c
index 6d9bf3936c78..a51382672de0 100644
--- a/arch/x86/kernel/irq_32.c
+++ b/arch/x86/kernel/irq_32.c
@@ -25,29 +25,6 @@ EXPORT_PER_CPU_SYMBOL(irq_stat);
 DEFINE_PER_CPU(struct pt_regs *, irq_regs);
 EXPORT_PER_CPU_SYMBOL(irq_regs);
 
-/*
- * 'what should we do if we get a hw irq event on an illegal vector'.
- * each architecture has to answer this themselves.
- */
-void ack_bad_irq(unsigned int irq)
-{
-	printk(KERN_ERR "unexpected IRQ trap at vector %02x\n", irq);
-
-#ifdef CONFIG_X86_LOCAL_APIC
-	/*
-	 * Currently unexpected vectors happen only on SMP and APIC.
-	 * We _must_ ack these because every local APIC has only N
-	 * irq slots per priority level, and a 'hanging, unacked' IRQ
-	 * holds up an irq slot - in excessive cases (when multiple
-	 * unexpected vectors occur) that might lock up the APIC
-	 * completely.
-	 * But only ack when the APIC is enabled -AK
-	 */
-	if (cpu_has_apic)
-		ack_APIC_irq();
-#endif
-}
-
 #ifdef CONFIG_DEBUG_STACKOVERFLOW
 /* Debugging check for stack overflow: is there less than 1KB free? */
 static int check_stack_overflow(void)
diff --git a/arch/x86/kernel/irq_64.c b/arch/x86/kernel/irq_64.c
index 39ef7feb9ea4..60eb84eb77a0 100644
--- a/arch/x86/kernel/irq_64.c
+++ b/arch/x86/kernel/irq_64.c
@@ -18,26 +18,6 @@
 #include <asm/idle.h>
 #include <asm/smp.h>
 
-/*
- * 'what should we do if we get a hw irq event on an illegal vector'.
- * each architecture has to answer this themselves.
- */
-void ack_bad_irq(unsigned int irq)
-{
-	printk(KERN_WARNING "unexpected IRQ trap at vector %02x\n", irq);
-	/*
-	 * Currently unexpected vectors happen only on SMP and APIC.
-	 * We _must_ ack these because every local APIC has only N
-	 * irq slots per priority level, and a 'hanging, unacked' IRQ
-	 * holds up an irq slot - in excessive cases (when multiple
-	 * unexpected vectors occur) that might lock up the APIC
-	 * completely.
-	 * But don't ack when the APIC is disabled. -AK
-	 */
-	if (!disable_apic)
-		ack_APIC_irq();
-}
-
 #ifdef CONFIG_DEBUG_STACKOVERFLOW
 /*
  * Probabilistic stack overflow check:
-- 
cgit v1.2.3-55-g7522


From 811410fdb6b9d82a518542289efe9b2a51e3cbfb Mon Sep 17 00:00:00 2001
From: Thomas Gleixner
Date: Thu, 16 Oct 2008 14:16:11 +0200
Subject: genirq: add reverse iterator for irq_desc

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/irq.h | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/include/linux/irq.h b/include/linux/irq.h
index 38bf89f2ade0..31632aa65d16 100644
--- a/include/linux/irq.h
+++ b/include/linux/irq.h
@@ -21,6 +21,10 @@ extern int nr_irqs;
 
 # define for_each_irq_desc(irq, desc)		\
 	for (irq = 0, desc = irq_desc; irq < nr_irqs; irq++, desc++)
+
+# define for_each_irq_desc_reverse(irq, desc)			\
+	for (irq = nr_irqs -1, desc = irq_desc + (nr_irqs -1 );	\
+	     irq > 0; irq--, desc--)
 #endif
 
 #ifndef CONFIG_S390
-- 
cgit v1.2.3-55-g7522


From 2be3b52a5785a6a5c5349fbd315f57595f7074be Mon Sep 17 00:00:00 2001
From: Thomas Gleixner
Date: Thu, 16 Oct 2008 14:50:27 +0200
Subject: proc: fixup irq iterator

There is no need for irq_desc here. Even for sparse_irq we can
handle this clever in for_each_irq_nr().

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 fs/proc/proc_misc.c | 7 ++-----
 include/linux/irq.h | 3 +++
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/fs/proc/proc_misc.c b/fs/proc/proc_misc.c
index 3f5c7b9d1a70..97b4579134d5 100644
--- a/fs/proc/proc_misc.c
+++ b/fs/proc/proc_misc.c
@@ -509,9 +509,6 @@ static int show_stat(struct seq_file *p, void *v)
 	u64 sum = 0;
 	struct timespec boottime;
 	unsigned int per_irq_sum;
-#ifdef CONFIG_GENERIC_HARDIRQS
-	struct irq_desc *desc;
-#endif
 
 	user = nice = system = idle = iowait =
 		irq = softirq = steal = cputime64_zero;
@@ -530,7 +527,7 @@ static int show_stat(struct seq_file *p, void *v)
 		steal = cputime64_add(steal, kstat_cpu(i).cpustat.steal);
 		guest = cputime64_add(guest, kstat_cpu(i).cpustat.guest);
 
-		for_each_irq_desc(j, desc)
+		for_each_irq_nr(j)
 			sum += kstat_irqs_cpu(j, i);
 
 		sum += arch_irq_stat_cpu(i);
@@ -575,7 +572,7 @@ static int show_stat(struct seq_file *p, void *v)
 	seq_printf(p, "intr %llu", (unsigned long long)sum);
 
 	/* sum again ? it could be updated? */
-	for_each_irq_desc(j, desc) {
+	for_each_irq_nr(j) {
 		per_irq_sum = 0;
 
 		for_each_possible_cpu(i)
diff --git a/include/linux/irq.h b/include/linux/irq.h
index 31632aa65d16..0618fb362cb4 100644
--- a/include/linux/irq.h
+++ b/include/linux/irq.h
@@ -27,6 +27,9 @@ extern int nr_irqs;
 	     irq > 0; irq--, desc--)
 #endif
 
+#define for_each_irq_nr(irq)			\
+	for (irq = 0; irq < nr_irqs; irq++)
+
 #ifndef CONFIG_S390
 
 #include <linux/linkage.h>
-- 
cgit v1.2.3-55-g7522


From 10e580842ec8e53dddf62e1ab1871f4906477376 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner
Date: Thu, 16 Oct 2008 14:19:04 +0200
Subject: genirq: use iterators for irq_desc loops

Use for_each_irq_desc[_reverse] for all the iteration loops.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 drivers/xen/events.c   | 14 ++++++--------
 kernel/irq/autoprobe.c | 45 ++++++++++++---------------------------------
 kernel/irq/handle.c    |  5 +++--
 3 files changed, 21 insertions(+), 43 deletions(-)

diff --git a/drivers/xen/events.c b/drivers/xen/events.c
index e6d47e8ca1ac..9ce1ab6c268d 100644
--- a/drivers/xen/events.c
+++ b/drivers/xen/events.c
@@ -137,14 +137,12 @@ static void bind_evtchn_to_cpu(unsigned int chn, unsigned int cpu)
 static void init_evtchn_cpu_bindings(void)
 {
 #ifdef CONFIG_SMP
+	struct irq_desc *desc;
 	int i;
+
 	/* By default all event channels notify CPU#0. */
-	for (i = 0; i < nr_irqs; i++) {
-		struct irq_desc *desc = irq_to_desc(i);
-		if (!desc)
-			continue;
+	for_each_irq_desc(i, desc)
 		desc->affinity = cpumask_of_cpu(0);
-	}
 #endif
 
 	memset(cpu_evtchn, 0, sizeof(cpu_evtchn));
@@ -233,7 +231,7 @@ static int find_unbound_irq(void)
 	int irq;
 
 	/* Only allocate from dynirq range */
-	for (irq = 0; irq < nr_irqs; irq++)
+	for_each_irq_nr(irq)
 		if (irq_bindcount[irq] == 0)
 			break;
 
@@ -794,7 +792,7 @@ void xen_irq_resume(void)
 		mask_evtchn(evtchn);
 
 	/* No IRQ <-> event-channel mappings. */
-	for (irq = 0; irq < nr_irqs; irq++)
+	for_each_irq_nr(irq)
 		irq_info[irq].evtchn = 0; /* zap event-channel binding */
 
 	for (evtchn = 0; evtchn < NR_EVENT_CHANNELS; evtchn++)
@@ -826,7 +824,7 @@ void __init xen_init_IRQ(void)
 		mask_evtchn(i);
 
 	/* Dynamic IRQ space is currently unbound. Zero the refcnts. */
-	for (i = 0; i < nr_irqs; i++)
+	for_each_irq_nr(i)
 		irq_bindcount[i] = 0;
 
 	irq_ctx_init(smp_processor_id());
diff --git a/kernel/irq/autoprobe.c b/kernel/irq/autoprobe.c
index b3a5549ea81e..0cbff18efb6d 100644
--- a/kernel/irq/autoprobe.c
+++ b/kernel/irq/autoprobe.c
@@ -30,19 +30,16 @@ static DEFINE_MUTEX(probing_active);
 unsigned long probe_irq_on(void)
 {
 	struct irq_desc *desc;
-	unsigned long mask;
-	unsigned int i;
+	unsigned long mask = 0;
+	unsigned int status;
+	int i;
 
 	mutex_lock(&probing_active);
 	/*
 	 * something may have generated an irq long ago and we want to
 	 * flush such a longstanding irq before considering it as spurious.
 	 */
-	for (i = nr_irqs-1; i > 0; i--) {
-		desc = irq_to_desc(i);
-		if (!desc)
-			continue;
-
+	for_each_irq_desc_reverse(i, desc) {
 		spin_lock_irq(&desc->lock);
 		if (!desc->action && !(desc->status & IRQ_NOPROBE)) {
 			/*
@@ -70,11 +67,7 @@ unsigned long probe_irq_on(void)
 	 * (we must startup again here because if a longstanding irq
 	 * happened in the previous stage, it may have masked itself)
 	 */
-	for (i = nr_irqs-1; i > 0; i--) {
-		desc = irq_to_desc(i);
-		if (!desc)
-			continue;
-
+	for_each_irq_desc_reverse(i, desc) {
 		spin_lock_irq(&desc->lock);
 		if (!desc->action && !(desc->status & IRQ_NOPROBE)) {
 			desc->status |= IRQ_AUTODETECT | IRQ_WAITING;
@@ -92,13 +85,7 @@ unsigned long probe_irq_on(void)
 	/*
 	 * Now filter out any obviously spurious interrupts
 	 */
-	mask = 0;
-	for (i = 0; i < nr_irqs; i++) {
-		unsigned int status;
-
-		desc = irq_to_desc(i);
-		if (!desc)
-			continue;
+	for_each_irq_desc(i, desc) {
 		spin_lock_irq(&desc->lock);
 		status = desc->status;
 
@@ -132,16 +119,11 @@ EXPORT_SYMBOL(probe_irq_on);
  */
 unsigned int probe_irq_mask(unsigned long val)
 {
-	unsigned int mask;
+	unsigned int status, mask = 0;
+	struct irq_desc *desc;
 	int i;
 
-	mask = 0;
-	for (i = 0; i < nr_irqs; i++) {
-		struct irq_desc *desc = irq_to_desc(i);
-		unsigned int status;
-
-		if (!desc)
-			continue;
+	for_each_irq_desc(i, desc) {
 		spin_lock_irq(&desc->lock);
 		status = desc->status;
 
@@ -180,13 +162,10 @@ EXPORT_SYMBOL(probe_irq_mask);
 int probe_irq_off(unsigned long val)
 {
 	int i, irq_found = 0, nr_irqs = 0;
+	struct irq_desc *desc;
+	unsigned int status;
 
-	for (i = 0; i < nr_irqs; i++) {
-		struct irq_desc *desc = irq_to_desc(i);
-		unsigned int status;
-
-		if (!desc)
-			continue;
+	for_each_irq_desc(i, desc) {
 		spin_lock_irq(&desc->lock);
 		status = desc->status;
 
diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c
index a69368ff607f..c815b42d0f5b 100644
--- a/kernel/irq/handle.c
+++ b/kernel/irq/handle.c
@@ -268,9 +268,10 @@ static struct lock_class_key irq_desc_lock_class;
 
 void early_init_irq_lock_class(void)
 {
+	struct irq_desc *desc;
 	int i;
 
-	for (i = 0; i < nr_irqs; i++)
-		lockdep_set_class(&irq_desc[i].lock, &irq_desc_lock_class);
+	for_each_irq_desc(i, desc)
+		lockdep_set_class(&desc->lock, &irq_desc_lock_class);
 }
 #endif
-- 
cgit v1.2.3-55-g7522


From 63d659d556f145d33798b8ad19ced10c254fe445 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner
Date: Thu, 16 Oct 2008 15:33:18 +0200
Subject: genirq: fix name space collision of nr_irqs in autoprobe.c

probe_irq_off() is disfunctional as the local nr_irqs is referenced
instead of the global one for the for_each_irq_desc() iterator.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

---
 kernel/irq/autoprobe.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/kernel/irq/autoprobe.c b/kernel/irq/autoprobe.c
index 0cbff18efb6d..cc0f7321b8ce 100644
--- a/kernel/irq/autoprobe.c
+++ b/kernel/irq/autoprobe.c
@@ -161,7 +161,7 @@ EXPORT_SYMBOL(probe_irq_mask);
  */
 int probe_irq_off(unsigned long val)
 {
-	int i, irq_found = 0, nr_irqs = 0;
+	int i, irq_found = 0, nr_of_irqs = 0;
 	struct irq_desc *desc;
 	unsigned int status;
 
@@ -171,9 +171,9 @@ int probe_irq_off(unsigned long val)
 
 		if (status & IRQ_AUTODETECT) {
 			if (!(status & IRQ_WAITING)) {
-				if (!nr_irqs)
+				if (!nr_of_irqs)
 					irq_found = i;
-				nr_irqs++;
+				nr_of_irqs++;
 			}
 			desc->status = status & ~IRQ_AUTODETECT;
 			desc->chip->shutdown(i);
@@ -182,7 +182,7 @@ int probe_irq_off(unsigned long val)
 	}
 	mutex_unlock(&probing_active);
 
-	if (nr_irqs > 1)
+	if (nr_of_irqs > 1)
 		irq_found = -irq_found;
 
 	return irq_found;
-- 
cgit v1.2.3-55-g7522


From 4b1135a277f4b38f60b9c9f28adae467feb07856 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner
Date: Thu, 16 Oct 2008 15:33:18 +0200
Subject: genirq: fix name space collisions of nr_irqs in arch/*

local shadows of global variables are _bad_

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/alpha/kernel/sys_sable.c    | 6 +++---
 arch/arm/mach-ixp2000/ixdp2x00.c | 4 ++--
 arch/arm/mach-omap2/irq.c        | 8 ++++----
 arch/avr32/mach-at32ap/extint.c  | 8 ++++----
 4 files changed, 13 insertions(+), 13 deletions(-)

diff --git a/arch/alpha/kernel/sys_sable.c b/arch/alpha/kernel/sys_sable.c
index 99a7f19da13a..a4555f497639 100644
--- a/arch/alpha/kernel/sys_sable.c
+++ b/arch/alpha/kernel/sys_sable.c
@@ -47,7 +47,7 @@ typedef struct irq_swizzle_struct
 
 static irq_swizzle_t *sable_lynx_irq_swizzle;
 
-static void sable_lynx_init_irq(int nr_irqs);
+static void sable_lynx_init_irq(int nr_of_irqs);
 
 #if defined(CONFIG_ALPHA_GENERIC) || defined(CONFIG_ALPHA_SABLE)
 
@@ -530,11 +530,11 @@ sable_lynx_srm_device_interrupt(unsigned long vector)
 }
 
 static void __init
-sable_lynx_init_irq(int nr_irqs)
+sable_lynx_init_irq(int nr_of_irqs)
 {
 	long i;
 
-	for (i = 0; i < nr_irqs; ++i) {
+	for (i = 0; i < nr_of_irqs; ++i) {
 		irq_desc[i].status = IRQ_DISABLED | IRQ_LEVEL;
 		irq_desc[i].chip = &sable_lynx_irq_type;
 	}
diff --git a/arch/arm/mach-ixp2000/ixdp2x00.c b/arch/arm/mach-ixp2000/ixdp2x00.c
index b0653a87159a..30451300751b 100644
--- a/arch/arm/mach-ixp2000/ixdp2x00.c
+++ b/arch/arm/mach-ixp2000/ixdp2x00.c
@@ -143,7 +143,7 @@ static struct irq_chip ixdp2x00_cpld_irq_chip = {
 	.unmask	= ixdp2x00_irq_unmask
 };
 
-void __init ixdp2x00_init_irq(volatile unsigned long *stat_reg, volatile unsigned long *mask_reg, unsigned long nr_irqs)
+void __init ixdp2x00_init_irq(volatile unsigned long *stat_reg, volatile unsigned long *mask_reg, unsigned long nr_of_irqs)
 {
 	unsigned int irq;
 
@@ -154,7 +154,7 @@ void __init ixdp2x00_init_irq(volatile unsigned long *stat_reg, volatile unsigne
 
 	board_irq_stat = stat_reg;
 	board_irq_mask = mask_reg;
-	board_irq_count = nr_irqs;
+	board_irq_count = nr_of_irqs;
 
 	*board_irq_mask = 0xffffffff;
 
diff --git a/arch/arm/mach-omap2/irq.c b/arch/arm/mach-omap2/irq.c
index 196a9565a8dc..003901f1e2da 100644
--- a/arch/arm/mach-omap2/irq.c
+++ b/arch/arm/mach-omap2/irq.c
@@ -111,7 +111,7 @@ static void __init omap_irq_bank_init_one(struct omap_irq_bank *bank)
 
 void __init omap_init_irq(void)
 {
-	unsigned long nr_irqs = 0;
+	unsigned long nr_of_irqs = 0;
 	unsigned int nr_banks = 0;
 	int i;
 
@@ -124,14 +124,14 @@ void __init omap_init_irq(void)
 
 		omap_irq_bank_init_one(bank);
 
-		nr_irqs += bank->nr_irqs;
+		nr_of_irqs += bank->nr_irqs;
 		nr_banks++;
 	}
 
 	printk(KERN_INFO "Total of %ld interrupts on %d active controller%s\n",
-	       nr_irqs, nr_banks, nr_banks > 1 ? "s" : "");
+	       nr_of_irqs, nr_banks, nr_banks > 1 ? "s" : "");
 
-	for (i = 0; i < nr_irqs; i++) {
+	for (i = 0; i < nr_of_irqs; i++) {
 		set_irq_chip(i, &omap_irq_chip);
 		set_irq_handler(i, handle_level_irq);
 		set_irq_flags(i, IRQF_VALID);
diff --git a/arch/avr32/mach-at32ap/extint.c b/arch/avr32/mach-at32ap/extint.c
index c36a6d59d6f0..310477ba1bbf 100644
--- a/arch/avr32/mach-at32ap/extint.c
+++ b/arch/avr32/mach-at32ap/extint.c
@@ -191,7 +191,7 @@ static int __init eic_probe(struct platform_device *pdev)
 	struct eic *eic;
 	struct resource *regs;
 	unsigned int i;
-	unsigned int nr_irqs;
+	unsigned int nr_of_irqs;
 	unsigned int int_irq;
 	int ret;
 	u32 pattern;
@@ -224,7 +224,7 @@ static int __init eic_probe(struct platform_device *pdev)
 	eic_writel(eic, IDR, ~0UL);
 	eic_writel(eic, MODE, ~0UL);
 	pattern = eic_readl(eic, MODE);
-	nr_irqs = fls(pattern);
+	nr_of_irqs = fls(pattern);
 
 	/* Trigger on low level unless overridden by driver */
 	eic_writel(eic, EDGE, 0UL);
@@ -232,7 +232,7 @@ static int __init eic_probe(struct platform_device *pdev)
 
 	eic->chip = &eic_chip;
 
-	for (i = 0; i < nr_irqs; i++) {
+	for (i = 0; i < nr_of_irqs; i++) {
 		set_irq_chip_and_handler(eic->first_irq + i, &eic_chip,
 					 handle_level_irq);
 		set_irq_chip_data(eic->first_irq + i, eic);
@@ -256,7 +256,7 @@ static int __init eic_probe(struct platform_device *pdev)
 		 eic->regs, int_irq);
 	dev_info(&pdev->dev,
 		 "Handling %u external IRQs, starting with IRQ %u\n",
-		 nr_irqs, eic->first_irq);
+		 nr_of_irqs, eic->first_irq);
 
 	return 0;
 
-- 
cgit v1.2.3-55-g7522


From 10e0298686be6249b0665465737a6b83446c4d35 Mon Sep 17 00:00:00 2001
From: Ingo Molnar
Date: Thu, 16 Oct 2008 17:04:22 +0200
Subject: io_apic: make irq_mis_count available on 64-bit too

Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/io_apic.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/arch/x86/kernel/io_apic.c b/arch/x86/kernel/io_apic.c
index 6f80dc2f137e..b764d7429c61 100644
--- a/arch/x86/kernel/io_apic.c
+++ b/arch/x86/kernel/io_apic.c
@@ -2277,9 +2277,7 @@ static void ack_apic_edge(unsigned int irq)
 	ack_APIC_irq();
 }
 
-#ifdef CONFIG_X86_32
 atomic_t irq_mis_count;
-#endif
 
 static void ack_apic_level(unsigned int irq)
 {
-- 
cgit v1.2.3-55-g7522


From cc8e920aaf5558f87851169b33c420cc4516c253 Mon Sep 17 00:00:00 2001
From: Ingo Molnar
Date: Thu, 16 Oct 2008 17:05:27 +0200
Subject: intr_remapping: fix typo

Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 drivers/pci/intr_remapping.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/pci/intr_remapping.c b/drivers/pci/intr_remapping.c
index 950769e87475..d6040dd3ff5b 100644
--- a/drivers/pci/intr_remapping.c
+++ b/drivers/pci/intr_remapping.c
@@ -23,7 +23,7 @@ static struct irq_2_iommu irq_2_iommuX[NR_IRQS];
 
 static struct irq_2_iommu *irq_2_iommu(unsigned int irq)
 {
-	return (irq < nr_irqs) ?: irq_2_iommuX + irq : NULL;
+	return (irq < nr_irqs) ? irq_2_iommuX + irq : NULL;
 }
 
 static struct irq_2_iommu *irq_2_iommu_alloc(unsigned int irq)
-- 
cgit v1.2.3-55-g7522


From 3baf63a507094992a5bf238ba3bcea71f458b1e8 Mon Sep 17 00:00:00 2001
From: Ingo Molnar
Date: Thu, 16 Oct 2008 19:00:57 +0200
Subject: m32r: fix build due to notify_cpu_starting() change

fix:

arch/m32r/kernel/smpboot.c:501: err
or: implicit declaration of function 'notify_cpu_starting'
make[2]: *** [arch/m32r/kernel/smpboot.o] Error 1
make[2]: *** Waiting for unfinished jobs....

Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/m32r/kernel/smpboot.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/m32r/kernel/smpboot.c b/arch/m32r/kernel/smpboot.c
index fc2994811f15..39cb6da72dcb 100644
--- a/arch/m32r/kernel/smpboot.c
+++ b/arch/m32r/kernel/smpboot.c
@@ -40,6 +40,7 @@
  */
 
 #include <linux/module.h>
+#include <linux/cpu.h>
 #include <linux/init.h>
 #include <linux/kernel.h>
 #include <linux/mm.h>
-- 
cgit v1.2.3-55-g7522


From 0d4a7bc12ffecd3ba41dd94179cc5b272b71ce8a Mon Sep 17 00:00:00 2001
From: Jonathan Corbet
Date: Tue, 26 Aug 2008 17:15:45 -0600
Subject: UIO: BKL removal

Fill in needed locking around idr accesses, then remove the big kernel lock
from the UIO driver.  Since there are no in-tree UIO drivers with open()
methods, no further BKL pushdown is required.

Acked-by: Hans J. Koch <hjk@linutronix.de>
Acked-by: Greg Kroah-Hartman <gregkh@suse.de>
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 drivers/uio/uio.c | 14 ++++++++------
 1 file changed, 8 insertions(+), 6 deletions(-)

diff --git a/drivers/uio/uio.c b/drivers/uio/uio.c
index 3a6934bf7131..4f28f4bf8366 100644
--- a/drivers/uio/uio.c
+++ b/drivers/uio/uio.c
@@ -47,6 +47,9 @@ static struct uio_class {
 	struct class *class;
 } *uio_class;
 
+/* Protect idr accesses */
+static DEFINE_MUTEX(minor_lock);
+
 /*
  * attributes
  */
@@ -231,7 +234,6 @@ static void uio_dev_del_attributes(struct uio_device *idev)
 
 static int uio_get_minor(struct uio_device *idev)
 {
-	static DEFINE_MUTEX(minor_lock);
 	int retval = -ENOMEM;
 	int id;
 
@@ -253,7 +255,9 @@ exit:
 
 static void uio_free_minor(struct uio_device *idev)
 {
+	mutex_lock(&minor_lock);
 	idr_remove(&uio_idr, idev->minor);
+	mutex_unlock(&minor_lock);
 }
 
 /**
@@ -297,8 +301,9 @@ static int uio_open(struct inode *inode, struct file *filep)
 	struct uio_listener *listener;
 	int ret = 0;
 
-	lock_kernel();
+	mutex_lock(&minor_lock);
 	idev = idr_find(&uio_idr, iminor(inode));
+	mutex_unlock(&minor_lock);
 	if (!idev) {
 		ret = -ENODEV;
 		goto out;
@@ -324,18 +329,15 @@ static int uio_open(struct inode *inode, struct file *filep)
 		if (ret)
 			goto err_infoopen;
 	}
-	unlock_kernel();
 	return 0;
 
 err_infoopen:
-
 	kfree(listener);
-err_alloc_listener:
 
+err_alloc_listener:
 	module_put(idev->owner);
 
 out:
-	unlock_kernel();
 	return ret;
 }
 
-- 
cgit v1.2.3-55-g7522


From 3038edabf48f01421c621cb77a712b446d3a5d67 Mon Sep 17 00:00:00 2001
From: Rafael J. Wysocki
Date: Fri, 17 Oct 2008 01:26:27 +0200
Subject: x86 ACPI: fix breakage of resume on 64-bit UP systems with SMP kernel

x86 ACPI: Fix breakage of resume on 64-bit UP systems with SMP kernel

We are now using per CPU GDT tables in head_64.S and the original
early_gdt_descr.address is invalidated after boot by
setup_per_cpu_areas().  This breaks resume from suspend to RAM on
x86_64 UP systems using SMP kernels, because this part of head_64.S
is also executed during the resume and the invalid GDT address
causes the system to crash.  It doesn't break on 'true' SMP systems,
because early_gdt_descr.address is modified every time
native_cpu_up() runs.  However, during resume it should point to the
GDT of the boot CPU rather than to another CPU's GDT.

For this reason, during suspend to RAM always make
early_gdt_descr.address point to the boot CPU's GDT.

This fixes http://bugzilla.kernel.org/show_bug.cgi?id=11568, which
is a regression from 2.6.26.

Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
Acked-by: Pavel Machek <pavel@suse.cz>
Cc: <stable@kernel.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Reported-and-tested-by: Andy Wettstein <ajw1980@gmail.com>
---
 arch/x86/kernel/acpi/sleep.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/arch/x86/kernel/acpi/sleep.c b/arch/x86/kernel/acpi/sleep.c
index 426e5d91b63a..c44cd6dbfa14 100644
--- a/arch/x86/kernel/acpi/sleep.c
+++ b/arch/x86/kernel/acpi/sleep.c
@@ -10,6 +10,7 @@
 #include <linux/dmi.h>
 #include <linux/cpumask.h>
 #include <asm/segment.h>
+#include <asm/desc.h>
 
 #include "realmode/wakeup.h"
 #include "sleep.h"
@@ -98,6 +99,8 @@ int acpi_save_state_mem(void)
 	header->trampoline_segment = setup_trampoline() >> 4;
 #ifdef CONFIG_SMP
 	stack_start.sp = temp_stack + 4096;
+	early_gdt_descr.address =
+			(unsigned long)get_cpu_gdt_table(smp_processor_id());
 #endif
 	initial_code = (unsigned long)wakeup_long64;
 	saved_magic = 0x123456789abcdef0;
-- 
cgit v1.2.3-55-g7522


From 0a70c7f67a24b45e105ad10ac1d7e73fe50ec765 Mon Sep 17 00:00:00 2001
From: Bartlomiej Zolnierkiewicz
Date: Fri, 17 Oct 2008 18:09:09 +0200
Subject: ide-disk: fix IDE_DFLAG_LBA48 handling on resume

Some code in idedisk_setup() should be in idedisk_capacity() instead.

Acked-by: Borislav Petkov <petkovbb@gmail.com>
Signed-off-by: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
---
 drivers/ide/ide-disk.c | 40 ++++++++++++++++++++--------------------
 1 file changed, 20 insertions(+), 20 deletions(-)

diff --git a/drivers/ide/ide-disk.c b/drivers/ide/ide-disk.c
index 3853bde8eedc..93ff15e6a9c4 100644
--- a/drivers/ide/ide-disk.c
+++ b/drivers/ide/ide-disk.c
@@ -403,6 +403,26 @@ static void init_idedisk_capacity(ide_drive_t *drive)
 		if (ata_id_hpa_enabled(id))
 			idedisk_check_hpa(drive);
 	}
+
+	/* limit drive capacity to 137GB if LBA48 cannot be used */
+	if ((drive->dev_flags & IDE_DFLAG_LBA48) == 0 &&
+	    drive->capacity64 > 1ULL << 28) {
+		printk(KERN_WARNING "%s: cannot use LBA48 - full capacity "
+		       "%llu sectors (%llu MB)\n",
+		       drive->name, (unsigned long long)drive->capacity64,
+		       sectors_to_MB(drive->capacity64));
+		drive->capacity64 = 1ULL << 28;
+	}
+
+	if ((drive->hwif->host_flags & IDE_HFLAG_NO_LBA48_DMA) &&
+	    (drive->dev_flags & IDE_DFLAG_LBA48)) {
+		if (drive->capacity64 > 1ULL << 28) {
+			printk(KERN_INFO "%s: cannot use LBA48 DMA - PIO mode"
+					 " will be used for accessing sectors "
+					 "> %u\n", drive->name, 1 << 28);
+		} else
+			drive->dev_flags &= ~IDE_DFLAG_LBA48;
+	}
 }
 
 sector_t ide_disk_capacity(ide_drive_t *drive)
@@ -654,26 +674,6 @@ static void idedisk_setup(ide_drive_t *drive)
 	/* calculate drive capacity, and select LBA if possible */
 	init_idedisk_capacity(drive);
 
-	/* limit drive capacity to 137GB if LBA48 cannot be used */
-	if ((drive->dev_flags & IDE_DFLAG_LBA48) == 0 &&
-	    drive->capacity64 > 1ULL << 28) {
-		printk(KERN_WARNING "%s: cannot use LBA48 - full capacity "
-		       "%llu sectors (%llu MB)\n",
-		       drive->name, (unsigned long long)drive->capacity64,
-		       sectors_to_MB(drive->capacity64));
-		drive->capacity64 = 1ULL << 28;
-	}
-
-	if ((hwif->host_flags & IDE_HFLAG_NO_LBA48_DMA) &&
-	    (drive->dev_flags & IDE_DFLAG_LBA48)) {
-		if (drive->capacity64 > 1ULL << 28) {
-			printk(KERN_INFO "%s: cannot use LBA48 DMA - PIO mode"
-					 " will be used for accessing sectors "
-					 "> %u\n", drive->name, 1 << 28);
-		} else
-			drive->dev_flags &= ~IDE_DFLAG_LBA48;
-	}
-
 	/*
 	 * if possible, give fdisk access to more of the drive,
 	 * by correcting bios_cyls:
-- 
cgit v1.2.3-55-g7522


From 099ed4c2f5d54a5e1e490250805fb9727d622c0c Mon Sep 17 00:00:00 2001
From: Bartlomiej Zolnierkiewicz
Date: Fri, 17 Oct 2008 18:09:09 +0200
Subject: ide-disk: lock media before checking for media change

Acked-by: Borislav Petkov <petkovbb@gmail.com>
Signed-off-by: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
---
 drivers/ide/ide-disk.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/ide/ide-disk.c b/drivers/ide/ide-disk.c
index 93ff15e6a9c4..2c48bd81f537 100644
--- a/drivers/ide/ide-disk.c
+++ b/drivers/ide/ide-disk.c
@@ -842,7 +842,6 @@ static int idedisk_open(struct inode *inode, struct file *filp)
 	idkp->openers++;
 
 	if ((drive->dev_flags & IDE_DFLAG_REMOVABLE) && idkp->openers == 1) {
-		check_disk_change(inode->i_bdev);
 		/*
 		 * Ignore the return code from door_lock,
 		 * since the open() has already succeeded,
@@ -851,6 +850,7 @@ static int idedisk_open(struct inode *inode, struct file *filp)
 		if ((drive->dev_flags & IDE_DFLAG_DOORLOCKING) &&
 		    idedisk_set_doorlock(drive, 1))
 			drive->dev_flags &= ~IDE_DFLAG_DOORLOCKING;
+		check_disk_change(inode->i_bdev);
 	}
 	return 0;
 }
-- 
cgit v1.2.3-55-g7522


From 9c3ba7692bc1166c6a5d06fb7c59984f9f988a00 Mon Sep 17 00:00:00 2001
From: Bartlomiej Zolnierkiewicz
Date: Fri, 17 Oct 2008 18:09:09 +0200
Subject: ide-floppy: use alloc_disk_node()

Acked-by: Borislav Petkov <petkovbb@gmail.com>
Signed-off-by: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
---
 drivers/ide/ide-floppy.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/ide/ide-floppy.c b/drivers/ide/ide-floppy.c
index cf0aa25470ee..73458e46bf1e 100644
--- a/drivers/ide/ide-floppy.c
+++ b/drivers/ide/ide-floppy.c
@@ -788,7 +788,7 @@ static int ide_floppy_probe(ide_drive_t *drive)
 		goto failed;
 	}
 
-	g = alloc_disk(1 << PARTN_BITS);
+	g = alloc_disk_node(1 << PARTN_BITS, hwif_to_node(drive->hwif));
 	if (!g)
 		goto out_free_floppy;
 
-- 
cgit v1.2.3-55-g7522


From d7e747596829c1c11833ca0a1f5e64f400d20bf2 Mon Sep 17 00:00:00 2001
From: Bartlomiej Zolnierkiewicz
Date: Fri, 17 Oct 2008 18:09:09 +0200
Subject: ide-disk: use to_ide_drv() and ide_drv_g()

There should be no functional changes caused by this patch.

Cc: Borislav Petkov <petkovbb@gmail.com>
Signed-off-by: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
---
 drivers/ide/ide-disk.c       | 14 ++++++--------
 drivers/ide/ide-disk.h       |  3 ---
 drivers/ide/ide-disk_ioctl.c |  2 +-
 3 files changed, 7 insertions(+), 12 deletions(-)

diff --git a/drivers/ide/ide-disk.c b/drivers/ide/ide-disk.c
index 2c48bd81f537..6c9c898aff62 100644
--- a/drivers/ide/ide-disk.c
+++ b/drivers/ide/ide-disk.c
@@ -49,8 +49,6 @@
 
 static DEFINE_MUTEX(idedisk_ref_mutex);
 
-#define to_ide_disk(obj) container_of(obj, struct ide_disk_obj, kref)
-
 static void ide_disk_release(struct kref *);
 
 static struct ide_disk_obj *ide_disk_get(struct gendisk *disk)
@@ -58,7 +56,7 @@ static struct ide_disk_obj *ide_disk_get(struct gendisk *disk)
 	struct ide_disk_obj *idkp = NULL;
 
 	mutex_lock(&idedisk_ref_mutex);
-	idkp = ide_disk_g(disk);
+	idkp = ide_drv_g(disk, ide_disk_obj);
 	if (idkp) {
 		if (ide_device_get(idkp->drive))
 			idkp = NULL;
@@ -746,7 +744,7 @@ static void ide_disk_remove(ide_drive_t *drive)
 
 static void ide_disk_release(struct kref *kref)
 {
-	struct ide_disk_obj *idkp = to_ide_disk(kref);
+	struct ide_disk_obj *idkp = to_ide_drv(kref, ide_disk_obj);
 	ide_drive_t *drive = idkp->drive;
 	struct gendisk *g = idkp->disk;
 
@@ -858,7 +856,7 @@ static int idedisk_open(struct inode *inode, struct file *filp)
 static int idedisk_release(struct inode *inode, struct file *filp)
 {
 	struct gendisk *disk = inode->i_bdev->bd_disk;
-	struct ide_disk_obj *idkp = ide_disk_g(disk);
+	struct ide_disk_obj *idkp = ide_drv_g(disk, ide_disk_obj);
 	ide_drive_t *drive = idkp->drive;
 
 	if (idkp->openers == 1)
@@ -879,7 +877,7 @@ static int idedisk_release(struct inode *inode, struct file *filp)
 
 static int idedisk_getgeo(struct block_device *bdev, struct hd_geometry *geo)
 {
-	struct ide_disk_obj *idkp = ide_disk_g(bdev->bd_disk);
+	struct ide_disk_obj *idkp = ide_drv_g(bdev->bd_disk, ide_disk_obj);
 	ide_drive_t *drive = idkp->drive;
 
 	geo->heads = drive->bios_head;
@@ -890,7 +888,7 @@ static int idedisk_getgeo(struct block_device *bdev, struct hd_geometry *geo)
 
 static int idedisk_media_changed(struct gendisk *disk)
 {
-	struct ide_disk_obj *idkp = ide_disk_g(disk);
+	struct ide_disk_obj *idkp = ide_drv_g(disk, ide_disk_obj);
 	ide_drive_t *drive = idkp->drive;
 
 	/* do not scan partitions twice if this is a removable device */
@@ -905,7 +903,7 @@ static int idedisk_media_changed(struct gendisk *disk)
 
 static int idedisk_revalidate_disk(struct gendisk *disk)
 {
-	struct ide_disk_obj *idkp = ide_disk_g(disk);
+	struct ide_disk_obj *idkp = ide_drv_g(disk, ide_disk_obj);
 	set_capacity(disk, ide_disk_capacity(idkp->drive));
 	return 0;
 }
diff --git a/drivers/ide/ide-disk.h b/drivers/ide/ide-disk.h
index a82fa4355665..50d674b91c8b 100644
--- a/drivers/ide/ide-disk.h
+++ b/drivers/ide/ide-disk.h
@@ -9,9 +9,6 @@ struct ide_disk_obj {
 	unsigned int	openers;	/* protected by BKL for now */
 };
 
-#define ide_disk_g(disk) \
-	container_of((disk)->private_data, struct ide_disk_obj, driver)
-
 /* ide-disk.c */
 sector_t ide_disk_capacity(ide_drive_t *);
 ide_decl_devset(address);
diff --git a/drivers/ide/ide-disk_ioctl.c b/drivers/ide/ide-disk_ioctl.c
index a6cf1a03a806..e6624eda9e69 100644
--- a/drivers/ide/ide-disk_ioctl.c
+++ b/drivers/ide/ide-disk_ioctl.c
@@ -17,7 +17,7 @@ int ide_disk_ioctl(struct inode *inode, struct file *file,
 		   unsigned int cmd, unsigned long arg)
 {
 	struct block_device *bdev = inode->i_bdev;
-	struct ide_disk_obj *idkp = ide_disk_g(bdev->bd_disk);
+	struct ide_disk_obj *idkp = ide_drv_g(bdev->bd_disk, ide_disk_obj);
 	ide_drive_t *drive = idkp->drive;
 	int err;
 
-- 
cgit v1.2.3-55-g7522


From 81ee1bb51fff76aaa738668b92406b5117f125ed Mon Sep 17 00:00:00 2001
From: Bartlomiej Zolnierkiewicz
Date: Fri, 17 Oct 2008 18:09:10 +0200
Subject: ide-disk: move IDE_DFLAG_DOORLOCKING flag handling to
 idedisk_set_doorlock()

There should be no functional changes caused by this patch.

Acked-by: Borislav Petkov <petkovbb@gmail.com>
Signed-off-by: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
---
 drivers/ide/ide-disk.c | 22 +++++++++++++---------
 1 file changed, 13 insertions(+), 9 deletions(-)

diff --git a/drivers/ide/ide-disk.c b/drivers/ide/ide-disk.c
index 6c9c898aff62..289a533afbd6 100644
--- a/drivers/ide/ide-disk.c
+++ b/drivers/ide/ide-disk.c
@@ -817,12 +817,21 @@ static ide_driver_t idedisk_driver = {
 static int idedisk_set_doorlock(ide_drive_t *drive, int on)
 {
 	ide_task_t task;
+	int ret;
+
+	if ((drive->dev_flags & IDE_DFLAG_DOORLOCKING) == 0)
+		return 0;
 
 	memset(&task, 0, sizeof(task));
 	task.tf.command = on ? ATA_CMD_MEDIA_LOCK : ATA_CMD_MEDIA_UNLOCK;
 	task.tf_flags = IDE_TFLAG_TF | IDE_TFLAG_DEVICE;
 
-	return ide_no_data_taskfile(drive, &task);
+	ret = ide_no_data_taskfile(drive, &task);
+
+	if (ret)
+		drive->dev_flags &= ~IDE_DFLAG_DOORLOCKING;
+
+	return ret;
 }
 
 static int idedisk_open(struct inode *inode, struct file *filp)
@@ -845,9 +854,7 @@ static int idedisk_open(struct inode *inode, struct file *filp)
 		 * since the open() has already succeeded,
 		 * and the door_lock is irrelevant at this point.
 		 */
-		if ((drive->dev_flags & IDE_DFLAG_DOORLOCKING) &&
-		    idedisk_set_doorlock(drive, 1))
-			drive->dev_flags &= ~IDE_DFLAG_DOORLOCKING;
+		idedisk_set_doorlock(drive, 1);
 		check_disk_change(inode->i_bdev);
 	}
 	return 0;
@@ -862,11 +869,8 @@ static int idedisk_release(struct inode *inode, struct file *filp)
 	if (idkp->openers == 1)
 		ide_cacheflush_p(drive);
 
-	if ((drive->dev_flags & IDE_DFLAG_REMOVABLE) && idkp->openers == 1) {
-		if ((drive->dev_flags & IDE_DFLAG_DOORLOCKING) &&
-		    idedisk_set_doorlock(drive, 0))
-			drive->dev_flags &= ~IDE_DFLAG_DOORLOCKING;
-	}
+	if ((drive->dev_flags & IDE_DFLAG_REMOVABLE) && idkp->openers == 1)
+		idedisk_set_doorlock(drive, 0);
 
 	idkp->openers--;
 
-- 
cgit v1.2.3-55-g7522


From ae9f9f073963c56dcc4601ed9a0921eda1e8fa9d Mon Sep 17 00:00:00 2001
From: Bartlomiej Zolnierkiewicz
Date: Fri, 17 Oct 2008 18:09:10 +0200
Subject: ide-{disk,floppy}: set IDE_DFLAG_ATTACH in *_setup()

There should be no functional changes caused by this patch.

Acked-by: Borislav Petkov <petkovbb@gmail.com>
Signed-off-by: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
---
 drivers/ide/ide-disk.c   | 18 ++++++++++--------
 drivers/ide/ide-floppy.c |  3 ++-
 2 files changed, 12 insertions(+), 9 deletions(-)

diff --git a/drivers/ide/ide-disk.c b/drivers/ide/ide-disk.c
index 289a533afbd6..70b75f23a70e 100644
--- a/drivers/ide/ide-disk.c
+++ b/drivers/ide/ide-disk.c
@@ -716,6 +716,14 @@ static void idedisk_setup(ide_drive_t *drive)
 		drive->dev_flags |= IDE_DFLAG_WCACHE;
 
 	set_wcache(drive, 1);
+
+	if ((drive->dev_flags & IDE_DFLAG_LBA) == 0 &&
+	    (drive->head == 0 || drive->head > 16)) {
+		printk(KERN_ERR "%s: invalid geometry: %d physical heads?\n",
+			drive->name, drive->head);
+		drive->dev_flags &= ~IDE_DFLAG_ATTACH;
+	} else
+		drive->dev_flags |= IDE_DFLAG_ATTACH;
 }
 
 static void ide_cacheflush_p(ide_drive_t *drive)
@@ -957,20 +965,14 @@ static int ide_disk_probe(ide_drive_t *drive)
 	drive->driver_data = idkp;
 
 	idedisk_setup(drive);
-	if ((drive->dev_flags & IDE_DFLAG_LBA) == 0 &&
-	    (drive->head == 0 || drive->head > 16)) {
-		printk(KERN_ERR "%s: INVALID GEOMETRY: %d PHYSICAL HEADS?\n",
-			drive->name, drive->head);
-		drive->dev_flags &= ~IDE_DFLAG_ATTACH;
-	} else
-		drive->dev_flags |= IDE_DFLAG_ATTACH;
+
+	set_capacity(g, ide_disk_capacity(drive));
 
 	g->minors = IDE_DISK_MINORS;
 	g->driverfs_dev = &drive->gendev;
 	g->flags |= GENHD_FL_EXT_DEVT;
 	if (drive->dev_flags & IDE_DFLAG_REMOVABLE)
 		g->flags = GENHD_FL_REMOVABLE;
-	set_capacity(g, ide_disk_capacity(drive));
 	g->fops = &idedisk_ops;
 	add_disk(g);
 	return 0;
diff --git a/drivers/ide/ide-floppy.c b/drivers/ide/ide-floppy.c
index 73458e46bf1e..bcbd980f7a48 100644
--- a/drivers/ide/ide-floppy.c
+++ b/drivers/ide/ide-floppy.c
@@ -598,6 +598,8 @@ static void idefloppy_setup(ide_drive_t *drive, idefloppy_floppy_t *floppy)
 	(void) ide_floppy_get_capacity(drive);
 
 	ide_proc_register_driver(drive, floppy->driver);
+
+	drive->dev_flags |= IDE_DFLAG_ATTACH;
 }
 
 static void ide_floppy_remove(ide_drive_t *drive)
@@ -807,7 +809,6 @@ static int ide_floppy_probe(ide_drive_t *drive)
 	drive->debug_mask = debug_mask;
 
 	idefloppy_setup(drive, floppy);
-	drive->dev_flags |= IDE_DFLAG_ATTACH;
 
 	g->minors = 1 << PARTN_BITS;
 	g->driverfs_dev = &drive->gendev;
-- 
cgit v1.2.3-55-g7522


From 8f29cd9f12e97d46e601f59810ea6aadd938945d Mon Sep 17 00:00:00 2001
From: Bartlomiej Zolnierkiewicz
Date: Fri, 17 Oct 2008 18:09:10 +0200
Subject: ide-floppy: drop 'floppy' argument from idefloppy_setup()

Acked-by: Borislav Petkov <petkovbb@gmail.com>
Signed-off-by: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
---
 drivers/ide/ide-floppy.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/drivers/ide/ide-floppy.c b/drivers/ide/ide-floppy.c
index bcbd980f7a48..4a3d7e08b65a 100644
--- a/drivers/ide/ide-floppy.c
+++ b/drivers/ide/ide-floppy.c
@@ -560,8 +560,9 @@ sector_t ide_floppy_capacity(ide_drive_t *drive)
 	return capacity;
 }
 
-static void idefloppy_setup(ide_drive_t *drive, idefloppy_floppy_t *floppy)
+static void idefloppy_setup(ide_drive_t *drive)
 {
+	struct ide_floppy_obj *floppy = drive->driver_data;
 	u16 *id = drive->id;
 
 	drive->pc_callback	 = ide_floppy_callback;
@@ -808,7 +809,7 @@ static int ide_floppy_probe(ide_drive_t *drive)
 
 	drive->debug_mask = debug_mask;
 
-	idefloppy_setup(drive, floppy);
+	idefloppy_setup(drive);
 
 	g->minors = 1 << PARTN_BITS;
 	g->driverfs_dev = &drive->gendev;
-- 
cgit v1.2.3-55-g7522


From 6f84083bbb7d206c8555e5834a2c9b887452fd54 Mon Sep 17 00:00:00 2001
From: Bartlomiej Zolnierkiewicz
Date: Fri, 17 Oct 2008 18:09:10 +0200
Subject: ide-floppy: use drive->capacity64 for caching current capacity

* Use drive->capacity64 for caching current capacity.

* Switch ide_floppy_capacity() to use drive->capacity64.

* Call set_capacity() in idefloppy_open() and ide_floppy_probe()
  instead of ide_floppy_get_capacity().

There should be no functional changes caused by this patch.

Cc: Borislav Petkov <petkovbb@gmail.com>
Signed-off-by: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
---
 drivers/ide/ide-floppy.c | 23 +++++++++++++----------
 1 file changed, 13 insertions(+), 10 deletions(-)

diff --git a/drivers/ide/ide-floppy.c b/drivers/ide/ide-floppy.c
index 4a3d7e08b65a..3271e56d091c 100644
--- a/drivers/ide/ide-floppy.c
+++ b/drivers/ide/ide-floppy.c
@@ -445,7 +445,9 @@ static int ide_floppy_get_flexible_disk_page(ide_drive_t *drive)
 			drive->name, lba_capacity, capacity);
 		floppy->blocks = floppy->block_size ?
 			capacity / floppy->block_size : 0;
+		drive->capacity64 = floppy->blocks * floppy->bs_factor;
 	}
+
 	return 0;
 }
 
@@ -466,7 +468,7 @@ static int ide_floppy_get_capacity(ide_drive_t *drive)
 	drive->bios_head = drive->bios_sect = 0;
 	floppy->blocks = 0;
 	floppy->bs_factor = 1;
-	set_capacity(floppy->disk, 0);
+	drive->capacity64 = 0;
 
 	ide_floppy_create_read_capacity_cmd(&pc);
 	if (ide_queue_pc_tail(drive, disk, &pc)) {
@@ -523,6 +525,8 @@ static int ide_floppy_get_capacity(ide_drive_t *drive)
 					       "non 512 bytes block size not "
 					       "fully supported\n",
 					       drive->name);
+				drive->capacity64 =
+					floppy->blocks * floppy->bs_factor;
 				rc = 0;
 			}
 			break;
@@ -547,17 +551,12 @@ static int ide_floppy_get_capacity(ide_drive_t *drive)
 	if (!(drive->atapi_flags & IDE_AFLAG_CLIK_DRIVE))
 		(void) ide_floppy_get_flexible_disk_page(drive);
 
-	set_capacity(disk, floppy->blocks * floppy->bs_factor);
-
 	return rc;
 }
 
 sector_t ide_floppy_capacity(ide_drive_t *drive)
 {
-	idefloppy_floppy_t *floppy = drive->driver_data;
-	unsigned long capacity = floppy->blocks * floppy->bs_factor;
-
-	return capacity;
+	return drive->capacity64;
 }
 
 static void idefloppy_setup(ide_drive_t *drive)
@@ -671,14 +670,16 @@ static int idefloppy_open(struct inode *inode, struct file *filp)
 		if (ide_do_test_unit_ready(drive, disk))
 			ide_do_start_stop(drive, disk, 1);
 
-		if (ide_floppy_get_capacity(drive)
-		   && (filp->f_flags & O_NDELAY) == 0
+		ret = ide_floppy_get_capacity(drive);
+
+		set_capacity(disk, ide_floppy_capacity(drive));
+
+		if (ret && (filp->f_flags & O_NDELAY) == 0) {
 		    /*
 		     * Allow O_NDELAY to open a drive without a disk, or with an
 		     * unreadable disk, so that we can get the format capacity
 		     * of the drive or begin the format - Sam
 		     */
-		    ) {
 			ret = -EIO;
 			goto out_put_floppy;
 		}
@@ -811,6 +812,8 @@ static int ide_floppy_probe(ide_drive_t *drive)
 
 	idefloppy_setup(drive);
 
+	set_capacity(g, ide_floppy_capacity(drive));
+
 	g->minors = 1 << PARTN_BITS;
 	g->driverfs_dev = &drive->gendev;
 	if (drive->dev_flags & IDE_DFLAG_REMOVABLE)
-- 
cgit v1.2.3-55-g7522


From fe11edfaabf1787c05d782a7b33e6497d1118b1d Mon Sep 17 00:00:00 2001
From: Bartlomiej Zolnierkiewicz
Date: Fri, 17 Oct 2008 18:09:11 +0200
Subject: ide: IDE_AFLAG_MEDIA_CHANGED -> IDE_DFLAG_MEDIA_CHANGED

There should be no functional changes caused by this patch.

Acked-by: Borislav Petkov <petkovbb@gmail.com>
Signed-off-by: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
---
 drivers/ide/ide-cd.c       | 6 +++---
 drivers/ide/ide-cd_ioctl.c | 4 ++--
 drivers/ide/ide-floppy.c   | 6 +++---
 include/linux/ide.h        | 4 ++--
 4 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/drivers/ide/ide-cd.c b/drivers/ide/ide-cd.c
index 3308b1cd3a33..7dc1a17a4dd8 100644
--- a/drivers/ide/ide-cd.c
+++ b/drivers/ide/ide-cd.c
@@ -99,7 +99,7 @@ static void ide_cd_put(struct cdrom_info *cd)
 /* Mark that we've seen a media change and invalidate our internal buffers. */
 static void cdrom_saw_media_change(ide_drive_t *drive)
 {
-	drive->atapi_flags |= IDE_AFLAG_MEDIA_CHANGED;
+	drive->dev_flags |= IDE_DFLAG_MEDIA_CHANGED;
 	drive->atapi_flags &= ~IDE_AFLAG_TOC_VALID;
 }
 
@@ -1986,8 +1986,8 @@ static int ide_cdrom_setup(ide_drive_t *drive)
 	if (!drive->queue->unplug_delay)
 		drive->queue->unplug_delay = 1;
 
-	drive->atapi_flags = IDE_AFLAG_MEDIA_CHANGED | IDE_AFLAG_NO_EJECT |
-		       ide_cd_flags(id);
+	drive->dev_flags |= IDE_DFLAG_MEDIA_CHANGED;
+	drive->atapi_flags = IDE_AFLAG_NO_EJECT | ide_cd_flags(id);
 
 	if ((drive->atapi_flags & IDE_AFLAG_VERTOS_300_SSD) &&
 	    fw_rev[4] == '1' && fw_rev[6] <= '2')
diff --git a/drivers/ide/ide-cd_ioctl.c b/drivers/ide/ide-cd_ioctl.c
index 74231b41f611..37d89ead13dd 100644
--- a/drivers/ide/ide-cd_ioctl.c
+++ b/drivers/ide/ide-cd_ioctl.c
@@ -86,8 +86,8 @@ int ide_cdrom_check_media_change_real(struct cdrom_device_info *cdi,
 
 	if (slot_nr == CDSL_CURRENT) {
 		(void) cdrom_check_status(drive, NULL);
-		retval = (drive->atapi_flags & IDE_AFLAG_MEDIA_CHANGED) ? 1 : 0;
-		drive->atapi_flags &= ~IDE_AFLAG_MEDIA_CHANGED;
+		retval = (drive->dev_flags & IDE_DFLAG_MEDIA_CHANGED) ? 1 : 0;
+		drive->dev_flags &= ~IDE_DFLAG_MEDIA_CHANGED;
 		return retval;
 	} else {
 		return -EINVAL;
diff --git a/drivers/ide/ide-floppy.c b/drivers/ide/ide-floppy.c
index 3271e56d091c..df410c7191ac 100644
--- a/drivers/ide/ide-floppy.c
+++ b/drivers/ide/ide-floppy.c
@@ -689,8 +689,8 @@ static int idefloppy_open(struct inode *inode, struct file *filp)
 			goto out_put_floppy;
 		}
 
-		drive->atapi_flags |= IDE_AFLAG_MEDIA_CHANGED;
 		ide_set_media_lock(drive, disk, 1);
+		drive->dev_flags |= IDE_DFLAG_MEDIA_CHANGED;
 		check_disk_change(inode->i_bdev);
 	} else if (drive->atapi_flags & IDE_AFLAG_FORMAT_IN_PROGRESS) {
 		ret = -EBUSY;
@@ -747,8 +747,8 @@ static int idefloppy_media_changed(struct gendisk *disk)
 		drive->dev_flags &= ~IDE_DFLAG_ATTACH;
 		return 0;
 	}
-	ret = !!(drive->atapi_flags & IDE_AFLAG_MEDIA_CHANGED);
-	drive->atapi_flags &= ~IDE_AFLAG_MEDIA_CHANGED;
+	ret = !!(drive->dev_flags & IDE_DFLAG_MEDIA_CHANGED);
+	drive->dev_flags &= ~IDE_DFLAG_MEDIA_CHANGED;
 	return ret;
 }
 
diff --git a/include/linux/ide.h b/include/linux/ide.h
index c47e371554c1..155a57f55c60 100644
--- a/include/linux/ide.h
+++ b/include/linux/ide.h
@@ -464,7 +464,6 @@ struct ide_acpi_hwif_link;
 /* ATAPI device flags */
 enum {
 	IDE_AFLAG_DRQ_INTERRUPT		= (1 << 0),
-	IDE_AFLAG_MEDIA_CHANGED		= (1 << 1),
 	/* Drive cannot lock the door. */
 	IDE_AFLAG_NO_DOORLOCK		= (1 << 2),
 
@@ -578,7 +577,8 @@ enum {
 	/* don't unload heads */
 	IDE_DFLAG_NO_UNLOAD		= (1 << 27),
 	/* heads unloaded, please don't reset port */
-	IDE_DFLAG_PARKED		= (1 << 28)
+	IDE_DFLAG_PARKED		= (1 << 28),
+	IDE_DFLAG_MEDIA_CHANGED		= (1 << 29),
 };
 
 struct ide_drive_s {
-- 
cgit v1.2.3-55-g7522


From da167876bd0f71f1c646e5dd98997544d8d90e8e Mon Sep 17 00:00:00 2001
From: Bartlomiej Zolnierkiewicz
Date: Fri, 17 Oct 2008 18:09:11 +0200
Subject: ide: IDE_AFLAG_WP -> IDE_DFLAG_WP

There should be no functional changes caused by this patch.

Acked-by: Borislav Petkov <petkovbb@gmail.com>
Signed-off-by: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
---
 drivers/ide/ide-floppy.c | 8 ++++----
 include/linux/ide.h      | 4 ++--
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/drivers/ide/ide-floppy.c b/drivers/ide/ide-floppy.c
index df410c7191ac..8078e0826cd3 100644
--- a/drivers/ide/ide-floppy.c
+++ b/drivers/ide/ide-floppy.c
@@ -410,11 +410,11 @@ static int ide_floppy_get_flexible_disk_page(ide_drive_t *drive)
 	}
 
 	if (pc.buf[3] & 0x80)
-		drive->atapi_flags |= IDE_AFLAG_WP;
+		drive->dev_flags |= IDE_DFLAG_WP;
 	else
-		drive->atapi_flags &= ~IDE_AFLAG_WP;
+		drive->dev_flags &= ~IDE_DFLAG_WP;
 
-	set_disk_ro(disk, !!(drive->atapi_flags & IDE_AFLAG_WP));
+	set_disk_ro(disk, !!(drive->dev_flags & IDE_DFLAG_WP));
 
 	page = &pc.buf[8];
 
@@ -684,7 +684,7 @@ static int idefloppy_open(struct inode *inode, struct file *filp)
 			goto out_put_floppy;
 		}
 
-		if ((drive->atapi_flags & IDE_AFLAG_WP) && (filp->f_mode & 2)) {
+		if ((drive->dev_flags & IDE_DFLAG_WP) && (filp->f_mode & 2)) {
 			ret = -EROFS;
 			goto out_put_floppy;
 		}
diff --git a/include/linux/ide.h b/include/linux/ide.h
index 155a57f55c60..bd0a4d36b6d3 100644
--- a/include/linux/ide.h
+++ b/include/linux/ide.h
@@ -503,8 +503,6 @@ enum {
 	IDE_AFLAG_CLIK_DRIVE		= (1 << 19),
 	/* Requires BH algorithm for packets */
 	IDE_AFLAG_ZIP_DRIVE		= (1 << 20),
-	/* Write protect */
-	IDE_AFLAG_WP			= (1 << 21),
 	/* Supports format progress report */
 	IDE_AFLAG_SRFP			= (1 << 22),
 
@@ -579,6 +577,8 @@ enum {
 	/* heads unloaded, please don't reset port */
 	IDE_DFLAG_PARKED		= (1 << 28),
 	IDE_DFLAG_MEDIA_CHANGED		= (1 << 29),
+	/* write protect */
+	IDE_DFLAG_WP			= (1 << 30),
 };
 
 struct ide_drive_s {
-- 
cgit v1.2.3-55-g7522


From e01286282eef85e4783b06fb2e0ed84fc111eb32 Mon Sep 17 00:00:00 2001
From: Bartlomiej Zolnierkiewicz
Date: Fri, 17 Oct 2008 18:09:11 +0200
Subject: ide: IDE_AFLAG_FORMAT_IN_PROGRESS -> IDE_DFLAG_FORMAT_IN_PROGRESS

There should be no functional changes caused by this patch.

Acked-by: Borislav Petkov <petkovbb@gmail.com>
Signed-off-by: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
---
 drivers/ide/ide-floppy.c       | 6 +++---
 drivers/ide/ide-floppy_ioctl.c | 6 +++---
 include/linux/ide.h            | 3 +--
 3 files changed, 7 insertions(+), 8 deletions(-)

diff --git a/drivers/ide/ide-floppy.c b/drivers/ide/ide-floppy.c
index 8078e0826cd3..2cf98b531fd9 100644
--- a/drivers/ide/ide-floppy.c
+++ b/drivers/ide/ide-floppy.c
@@ -664,7 +664,7 @@ static int idefloppy_open(struct inode *inode, struct file *filp)
 	floppy->openers++;
 
 	if (floppy->openers == 1) {
-		drive->atapi_flags &= ~IDE_AFLAG_FORMAT_IN_PROGRESS;
+		drive->dev_flags &= ~IDE_DFLAG_FORMAT_IN_PROGRESS;
 		/* Just in case */
 
 		if (ide_do_test_unit_ready(drive, disk))
@@ -692,7 +692,7 @@ static int idefloppy_open(struct inode *inode, struct file *filp)
 		ide_set_media_lock(drive, disk, 1);
 		drive->dev_flags |= IDE_DFLAG_MEDIA_CHANGED;
 		check_disk_change(inode->i_bdev);
-	} else if (drive->atapi_flags & IDE_AFLAG_FORMAT_IN_PROGRESS) {
+	} else if (drive->dev_flags & IDE_DFLAG_FORMAT_IN_PROGRESS) {
 		ret = -EBUSY;
 		goto out_put_floppy;
 	}
@@ -714,7 +714,7 @@ static int idefloppy_release(struct inode *inode, struct file *filp)
 
 	if (floppy->openers == 1) {
 		ide_set_media_lock(drive, disk, 0);
-		drive->atapi_flags &= ~IDE_AFLAG_FORMAT_IN_PROGRESS;
+		drive->dev_flags &= ~IDE_DFLAG_FORMAT_IN_PROGRESS;
 	}
 
 	floppy->openers--;
diff --git a/drivers/ide/ide-floppy_ioctl.c b/drivers/ide/ide-floppy_ioctl.c
index a3a7a0809e2b..b1f391df6cca 100644
--- a/drivers/ide/ide-floppy_ioctl.c
+++ b/drivers/ide/ide-floppy_ioctl.c
@@ -138,11 +138,11 @@ static int ide_floppy_format_unit(ide_drive_t *drive, int __user *arg)
 
 	if (floppy->openers > 1) {
 		/* Don't format if someone is using the disk */
-		drive->atapi_flags &= ~IDE_AFLAG_FORMAT_IN_PROGRESS;
+		drive->dev_flags &= ~IDE_DFLAG_FORMAT_IN_PROGRESS;
 		return -EBUSY;
 	}
 
-	drive->atapi_flags |= IDE_AFLAG_FORMAT_IN_PROGRESS;
+	drive->dev_flags |= IDE_DFLAG_FORMAT_IN_PROGRESS;
 
 	/*
 	 * Send ATAPI_FORMAT_UNIT to the drive.
@@ -174,7 +174,7 @@ static int ide_floppy_format_unit(ide_drive_t *drive, int __user *arg)
 
 out:
 	if (err)
-		drive->atapi_flags &= ~IDE_AFLAG_FORMAT_IN_PROGRESS;
+		drive->dev_flags &= ~IDE_DFLAG_FORMAT_IN_PROGRESS;
 	return err;
 }
 
diff --git a/include/linux/ide.h b/include/linux/ide.h
index bd0a4d36b6d3..d111c3ebbbae 100644
--- a/include/linux/ide.h
+++ b/include/linux/ide.h
@@ -497,8 +497,6 @@ enum {
 	IDE_AFLAG_LE_SPEED_FIELDS	= (1 << 17),
 
 	/* ide-floppy */
-	/* Format in progress */
-	IDE_AFLAG_FORMAT_IN_PROGRESS	= (1 << 18),
 	/* Avoid commands not supported in Clik drive */
 	IDE_AFLAG_CLIK_DRIVE		= (1 << 19),
 	/* Requires BH algorithm for packets */
@@ -579,6 +577,7 @@ enum {
 	IDE_DFLAG_MEDIA_CHANGED		= (1 << 29),
 	/* write protect */
 	IDE_DFLAG_WP			= (1 << 30),
+	IDE_DFLAG_FORMAT_IN_PROGRESS	= (1 << 31),
 };
 
 struct ide_drive_s {
-- 
cgit v1.2.3-55-g7522


From 42619d35c7af2f88cad56425fe3981f1f65ff0bd Mon Sep 17 00:00:00 2001
From: Bartlomiej Zolnierkiewicz
Date: Fri, 17 Oct 2008 18:09:11 +0200
Subject: ide: remove IDE_AFLAG_NO_DOORLOCKING

Just use IDE_DFLAG_DOORLOCKING instead.

There should be no functional changes caused by this patch.

Acked-by: Borislav Petkov <petkovbb@gmail.com>
Signed-off-by: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
---
 drivers/ide/ide-atapi.c    | 2 +-
 drivers/ide/ide-cd.c       | 2 +-
 drivers/ide/ide-cd_ioctl.c | 4 ++--
 drivers/ide/ide-floppy.c   | 2 +-
 drivers/ide/ide-probe.c    | 1 +
 drivers/ide/ide-tape.c     | 2 +-
 include/linux/ide.h        | 2 --
 7 files changed, 7 insertions(+), 8 deletions(-)

diff --git a/drivers/ide/ide-atapi.c b/drivers/ide/ide-atapi.c
index 2e305714c209..4e58b9e7a58a 100644
--- a/drivers/ide/ide-atapi.c
+++ b/drivers/ide/ide-atapi.c
@@ -191,7 +191,7 @@ int ide_set_media_lock(ide_drive_t *drive, struct gendisk *disk, int on)
 {
 	struct ide_atapi_pc pc;
 
-	if (drive->atapi_flags & IDE_AFLAG_NO_DOORLOCK)
+	if ((drive->dev_flags & IDE_DFLAG_DOORLOCKING) == 0)
 		return 0;
 
 	ide_init_pc(&pc);
diff --git a/drivers/ide/ide-cd.c b/drivers/ide/ide-cd.c
index 7dc1a17a4dd8..2f4cc10391e5 100644
--- a/drivers/ide/ide-cd.c
+++ b/drivers/ide/ide-cd.c
@@ -1732,7 +1732,7 @@ static int ide_cdrom_probe_capabilities(ide_drive_t *drive)
 		return 0;
 
 	if ((buf[8 + 6] & 0x01) == 0)
-		drive->atapi_flags |= IDE_AFLAG_NO_DOORLOCK;
+		drive->dev_flags &= ~IDE_DFLAG_DOORLOCKING;
 	if (buf[8 + 6] & 0x08)
 		drive->atapi_flags &= ~IDE_AFLAG_NO_EJECT;
 	if (buf[8 + 3] & 0x01)
diff --git a/drivers/ide/ide-cd_ioctl.c b/drivers/ide/ide-cd_ioctl.c
index 37d89ead13dd..df3df0041eb6 100644
--- a/drivers/ide/ide-cd_ioctl.c
+++ b/drivers/ide/ide-cd_ioctl.c
@@ -136,7 +136,7 @@ int ide_cd_lockdoor(ide_drive_t *drive, int lockflag,
 		sense = &my_sense;
 
 	/* If the drive cannot lock the door, just pretend. */
-	if (drive->atapi_flags & IDE_AFLAG_NO_DOORLOCK) {
+	if ((drive->dev_flags & IDE_DFLAG_DOORLOCKING) == 0) {
 		stat = 0;
 	} else {
 		unsigned char cmd[BLK_MAX_CDB];
@@ -157,7 +157,7 @@ int ide_cd_lockdoor(ide_drive_t *drive, int lockflag,
 	    (sense->asc == 0x24 || sense->asc == 0x20)) {
 		printk(KERN_ERR "%s: door locking not supported\n",
 			drive->name);
-		drive->atapi_flags |= IDE_AFLAG_NO_DOORLOCK;
+		drive->dev_flags &= ~IDE_DFLAG_DOORLOCKING;
 		stat = 0;
 	}
 
diff --git a/drivers/ide/ide-floppy.c b/drivers/ide/ide-floppy.c
index 2cf98b531fd9..791a9d6f371c 100644
--- a/drivers/ide/ide-floppy.c
+++ b/drivers/ide/ide-floppy.c
@@ -592,7 +592,7 @@ static void idefloppy_setup(ide_drive_t *drive)
 		blk_queue_max_sectors(drive->queue, 64);
 		drive->atapi_flags |= IDE_AFLAG_CLIK_DRIVE;
 		/* IOMEGA Clik! drives do not support lock/unlock commands */
-		drive->atapi_flags |= IDE_AFLAG_NO_DOORLOCK;
+		drive->dev_flags &= ~IDE_DFLAG_DOORLOCKING;
 	}
 
 	(void) ide_floppy_get_capacity(drive);
diff --git a/drivers/ide/ide-probe.c b/drivers/ide/ide-probe.c
index 19f8c7770a25..1649ea54f76c 100644
--- a/drivers/ide/ide-probe.c
+++ b/drivers/ide/ide-probe.c
@@ -208,6 +208,7 @@ static inline void do_identify (ide_drive_t *drive, u8 cmd)
 		drive->ready_stat = 0;
 		if (ata_id_cdb_intr(id))
 			drive->atapi_flags |= IDE_AFLAG_DRQ_INTERRUPT;
+		drive->dev_flags |= IDE_DFLAG_DOORLOCKING;
 		/* we don't do head unloading on ATAPI devices */
 		drive->dev_flags |= IDE_DFLAG_NO_UNLOAD;
 		return;
diff --git a/drivers/ide/ide-tape.c b/drivers/ide/ide-tape.c
index d879c7797cde..a99e28f45156 100644
--- a/drivers/ide/ide-tape.c
+++ b/drivers/ide/ide-tape.c
@@ -2108,7 +2108,7 @@ static void idetape_get_mode_sense_results(ide_drive_t *drive)
 
 	/* device lacks locking support according to capabilities page */
 	if ((caps[6] & 1) == 0)
-		drive->atapi_flags |= IDE_AFLAG_NO_DOORLOCK;
+		drive->dev_flags &= ~IDE_DFLAG_DOORLOCKING;
 
 	if (caps[7] & 0x02)
 		tape->blk_size = 512;
diff --git a/include/linux/ide.h b/include/linux/ide.h
index d111c3ebbbae..ba51a93fa547 100644
--- a/include/linux/ide.h
+++ b/include/linux/ide.h
@@ -464,8 +464,6 @@ struct ide_acpi_hwif_link;
 /* ATAPI device flags */
 enum {
 	IDE_AFLAG_DRQ_INTERRUPT		= (1 << 0),
-	/* Drive cannot lock the door. */
-	IDE_AFLAG_NO_DOORLOCK		= (1 << 2),
 
 	/* ide-cd */
 	/* Drive cannot eject the disc. */
-- 
cgit v1.2.3-55-g7522


From 5fef0e5c0283949f95a7891c9424a9f84448116b Mon Sep 17 00:00:00 2001
From: Bartlomiej Zolnierkiewicz
Date: Fri, 17 Oct 2008 18:09:12 +0200
Subject: ide-disk: factor out generic disk handling code to ide-gd.c

While at it:
- IDEDISK_VERSION -> IDE_GD_VERSION
- ide_cacheflush_p() -> ide_disk_flush()
- init_idedisk_capacity() -> ide_disk_init_capacity()
- idedisk_set_doorlock() -> ide_disk_set_doorlock()
- idedisk_setup() -> ide_disk_setup()

- ide_disk_capacity() -> ide_gd_capacity()
- ide_disk_remove() -> ide_gd_remove()
- ide_disk_probe() -> ide_gd_probe()
- ide_disk_resume() -> ide_gd_resume()
- ide_device_shutdown() -> ide_gd_shutdown()
- idedisk_driver -> ide_gd_driver
- idedisk_open() -> ide_gd_open()
- idedisk_release() -> ide_gd_release()
- idedisk_getgeo() -> ide_gd_getgeo()
- idedisk_media_changed() -> ide_gd_media_changed()
- idedisk_revalidate_disk() -> ide_gd_revalidate_disk()
- idedisk_ops -> ide_gd_ops
- idedisk_init() -> ide_gd_init()
- idedisk_exit() -> ide_gd_exit()

There should be no functional changes caused by this patch.

Acked-by: Borislav Petkov <petkovbb@gmail.com>
Signed-off-by: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
---
 drivers/ide/Makefile        |   2 +-
 drivers/ide/ide-disk.c      | 305 ++------------------------------------------
 drivers/ide/ide-disk.h      |   8 +-
 drivers/ide/ide-disk_proc.c |   2 +-
 drivers/ide/ide-gd.c        | 296 ++++++++++++++++++++++++++++++++++++++++++
 5 files changed, 314 insertions(+), 299 deletions(-)
 create mode 100644 drivers/ide/ide-gd.c

diff --git a/drivers/ide/Makefile b/drivers/ide/Makefile
index ceaf779054ea..8aad9395a427 100644
--- a/drivers/ide/Makefile
+++ b/drivers/ide/Makefile
@@ -37,7 +37,7 @@ obj-$(CONFIG_IDE_H8300)			+= h8300/
 obj-$(CONFIG_IDE_GENERIC)		+= ide-generic.o
 obj-$(CONFIG_BLK_DEV_IDEPNP)		+= ide-pnp.o
 
-ide-disk_mod-y += ide-disk.o ide-disk_ioctl.o
+ide-disk_mod-y += ide-gd.o ide-disk.o ide-disk_ioctl.o
 ide-cd_mod-y += ide-cd.o ide-cd_ioctl.o ide-cd_verbose.o
 ide-floppy_mod-y += ide-floppy.o ide-floppy_ioctl.o
 
diff --git a/drivers/ide/ide-disk.c b/drivers/ide/ide-disk.c
index 70b75f23a70e..751be7af22c2 100644
--- a/drivers/ide/ide-disk.c
+++ b/drivers/ide/ide-disk.c
@@ -14,9 +14,6 @@
  * This is the IDE/ATA disk driver, as evolved from hd.c and ide.c.
  */
 
-#define IDEDISK_VERSION	"1.18"
-
-#include <linux/module.h>
 #include <linux/types.h>
 #include <linux/string.h>
 #include <linux/kernel.h>
@@ -39,44 +36,8 @@
 #include <asm/io.h>
 #include <asm/div64.h>
 
-#if !defined(CONFIG_DEBUG_BLOCK_EXT_DEVT)
-#define IDE_DISK_MINORS		(1 << PARTN_BITS)
-#else
-#define IDE_DISK_MINORS		0
-#endif
-
 #include "ide-disk.h"
 
-static DEFINE_MUTEX(idedisk_ref_mutex);
-
-static void ide_disk_release(struct kref *);
-
-static struct ide_disk_obj *ide_disk_get(struct gendisk *disk)
-{
-	struct ide_disk_obj *idkp = NULL;
-
-	mutex_lock(&idedisk_ref_mutex);
-	idkp = ide_drv_g(disk, ide_disk_obj);
-	if (idkp) {
-		if (ide_device_get(idkp->drive))
-			idkp = NULL;
-		else
-			kref_get(&idkp->kref);
-	}
-	mutex_unlock(&idedisk_ref_mutex);
-	return idkp;
-}
-
-static void ide_disk_put(struct ide_disk_obj *idkp)
-{
-	ide_drive_t *drive = idkp->drive;
-
-	mutex_lock(&idedisk_ref_mutex);
-	kref_put(&idkp->kref, ide_disk_release);
-	ide_device_put(drive);
-	mutex_unlock(&idedisk_ref_mutex);
-}
-
 static const u8 ide_rw_cmds[] = {
 	ATA_CMD_READ_MULTI,
 	ATA_CMD_WRITE_MULTI,
@@ -223,8 +184,8 @@ static ide_startstop_t __ide_do_rw_disk(ide_drive_t *drive, struct request *rq,
  * 1073741822 == 549756 MB or 48bit addressing fake drive
  */
 
-static ide_startstop_t ide_do_rw_disk(ide_drive_t *drive, struct request *rq,
-				      sector_t block)
+ide_startstop_t ide_do_rw_disk(ide_drive_t *drive, struct request *rq,
+			       sector_t block)
 {
 	ide_hwif_t *hwif = HWIF(drive);
 
@@ -372,7 +333,7 @@ static void idedisk_check_hpa(ide_drive_t *drive)
 	}
 }
 
-static void init_idedisk_capacity(ide_drive_t *drive)
+void ide_disk_init_capacity(ide_drive_t *drive)
 {
 	u16 *id = drive->id;
 	int lba;
@@ -423,11 +384,6 @@ static void init_idedisk_capacity(ide_drive_t *drive)
 	}
 }
 
-sector_t ide_disk_capacity(ide_drive_t *drive)
-{
-	return drive->capacity64;
-}
-
 static void idedisk_prepare_flush(struct request_queue *q, struct request *rq)
 {
 	ide_drive_t *drive = q->queuedata;
@@ -526,7 +482,7 @@ static void update_ordered(ide_drive_t *drive)
 		 * time we have trimmed the drive capacity if LBA48 is
 		 * not available so we don't need to recheck that.
 		 */
-		capacity = ide_disk_capacity(drive);
+		capacity = ide_gd_capacity(drive);
 		barrier = ata_id_flush_enabled(id) &&
 			(drive->dev_flags & IDE_DFLAG_NOFLUSH) == 0 &&
 			((drive->dev_flags & IDE_DFLAG_LBA48) == 0 ||
@@ -634,7 +590,7 @@ ide_ext_devset_rw(wcache, wcache);
 
 ide_ext_devset_rw_sync(nowerr, nowerr);
 
-static void idedisk_setup(ide_drive_t *drive)
+void ide_disk_setup(ide_drive_t *drive)
 {
 	struct ide_disk_obj *idkp = drive->driver_data;
 	ide_hwif_t *hwif = drive->hwif;
@@ -670,13 +626,13 @@ static void idedisk_setup(ide_drive_t *drive)
 			 drive->queue->max_sectors / 2);
 
 	/* calculate drive capacity, and select LBA if possible */
-	init_idedisk_capacity(drive);
+	ide_disk_init_capacity(drive);
 
 	/*
 	 * if possible, give fdisk access to more of the drive,
 	 * by correcting bios_cyls:
 	 */
-	capacity = ide_disk_capacity(drive);
+	capacity = ide_gd_capacity(drive);
 
 	if ((drive->dev_flags & IDE_DFLAG_FORCED_GEOM) == 0) {
 		if (ata_id_lba48_enabled(drive->id)) {
@@ -726,7 +682,7 @@ static void idedisk_setup(ide_drive_t *drive)
 		drive->dev_flags |= IDE_DFLAG_ATTACH;
 }
 
-static void ide_cacheflush_p(ide_drive_t *drive)
+void ide_disk_flush(ide_drive_t *drive)
 {
 	if (ata_id_flush_enabled(drive->id) == 0 ||
 	    (drive->dev_flags & IDE_DFLAG_WCACHE) == 0)
@@ -736,93 +692,7 @@ static void ide_cacheflush_p(ide_drive_t *drive)
 		printk(KERN_INFO "%s: wcache flush failed!\n", drive->name);
 }
 
-static void ide_disk_remove(ide_drive_t *drive)
-{
-	struct ide_disk_obj *idkp = drive->driver_data;
-	struct gendisk *g = idkp->disk;
-
-	ide_proc_unregister_driver(drive, idkp->driver);
-
-	del_gendisk(g);
-
-	ide_cacheflush_p(drive);
-
-	ide_disk_put(idkp);
-}
-
-static void ide_disk_release(struct kref *kref)
-{
-	struct ide_disk_obj *idkp = to_ide_drv(kref, ide_disk_obj);
-	ide_drive_t *drive = idkp->drive;
-	struct gendisk *g = idkp->disk;
-
-	drive->driver_data = NULL;
-	g->private_data = NULL;
-	put_disk(g);
-	kfree(idkp);
-}
-
-static int ide_disk_probe(ide_drive_t *drive);
-
-/*
- * On HPA drives the capacity needs to be
- * reinitilized on resume otherwise the disk
- * can not be used and a hard reset is required
- */
-static void ide_disk_resume(ide_drive_t *drive)
-{
-	if (ata_id_hpa_enabled(drive->id))
-		init_idedisk_capacity(drive);
-}
-
-static void ide_device_shutdown(ide_drive_t *drive)
-{
-#ifdef	CONFIG_ALPHA
-	/* On Alpha, halt(8) doesn't actually turn the machine off,
-	   it puts you into the sort of firmware monitor. Typically,
-	   it's used to boot another kernel image, so it's not much
-	   different from reboot(8). Therefore, we don't need to
-	   spin down the disk in this case, especially since Alpha
-	   firmware doesn't handle disks in standby mode properly.
-	   On the other hand, it's reasonably safe to turn the power
-	   off when the shutdown process reaches the firmware prompt,
-	   as the firmware initialization takes rather long time -
-	   at least 10 seconds, which should be sufficient for
-	   the disk to expire its write cache. */
-	if (system_state != SYSTEM_POWER_OFF) {
-#else
-	if (system_state == SYSTEM_RESTART) {
-#endif
-		ide_cacheflush_p(drive);
-		return;
-	}
-
-	printk(KERN_INFO "Shutdown: %s\n", drive->name);
-
-	drive->gendev.bus->suspend(&drive->gendev, PMSG_SUSPEND);
-}
-
-static ide_driver_t idedisk_driver = {
-	.gen_driver = {
-		.owner		= THIS_MODULE,
-		.name		= "ide-disk",
-		.bus		= &ide_bus_type,
-	},
-	.probe			= ide_disk_probe,
-	.remove			= ide_disk_remove,
-	.resume			= ide_disk_resume,
-	.shutdown		= ide_device_shutdown,
-	.version		= IDEDISK_VERSION,
-	.do_request		= ide_do_rw_disk,
-	.end_request		= ide_end_request,
-	.error			= __ide_error,
-#ifdef CONFIG_IDE_PROC_FS
-	.proc			= ide_disk_proc,
-	.settings		= ide_disk_settings,
-#endif
-};
-
-static int idedisk_set_doorlock(ide_drive_t *drive, int on)
+int ide_disk_set_doorlock(ide_drive_t *drive, int on)
 {
 	ide_task_t task;
 	int ret;
@@ -841,160 +711,3 @@ static int idedisk_set_doorlock(ide_drive_t *drive, int on)
 
 	return ret;
 }
-
-static int idedisk_open(struct inode *inode, struct file *filp)
-{
-	struct gendisk *disk = inode->i_bdev->bd_disk;
-	struct ide_disk_obj *idkp;
-	ide_drive_t *drive;
-
-	idkp = ide_disk_get(disk);
-	if (idkp == NULL)
-		return -ENXIO;
-
-	drive = idkp->drive;
-
-	idkp->openers++;
-
-	if ((drive->dev_flags & IDE_DFLAG_REMOVABLE) && idkp->openers == 1) {
-		/*
-		 * Ignore the return code from door_lock,
-		 * since the open() has already succeeded,
-		 * and the door_lock is irrelevant at this point.
-		 */
-		idedisk_set_doorlock(drive, 1);
-		check_disk_change(inode->i_bdev);
-	}
-	return 0;
-}
-
-static int idedisk_release(struct inode *inode, struct file *filp)
-{
-	struct gendisk *disk = inode->i_bdev->bd_disk;
-	struct ide_disk_obj *idkp = ide_drv_g(disk, ide_disk_obj);
-	ide_drive_t *drive = idkp->drive;
-
-	if (idkp->openers == 1)
-		ide_cacheflush_p(drive);
-
-	if ((drive->dev_flags & IDE_DFLAG_REMOVABLE) && idkp->openers == 1)
-		idedisk_set_doorlock(drive, 0);
-
-	idkp->openers--;
-
-	ide_disk_put(idkp);
-
-	return 0;
-}
-
-static int idedisk_getgeo(struct block_device *bdev, struct hd_geometry *geo)
-{
-	struct ide_disk_obj *idkp = ide_drv_g(bdev->bd_disk, ide_disk_obj);
-	ide_drive_t *drive = idkp->drive;
-
-	geo->heads = drive->bios_head;
-	geo->sectors = drive->bios_sect;
-	geo->cylinders = (u16)drive->bios_cyl; /* truncate */
-	return 0;
-}
-
-static int idedisk_media_changed(struct gendisk *disk)
-{
-	struct ide_disk_obj *idkp = ide_drv_g(disk, ide_disk_obj);
-	ide_drive_t *drive = idkp->drive;
-
-	/* do not scan partitions twice if this is a removable device */
-	if (drive->dev_flags & IDE_DFLAG_ATTACH) {
-		drive->dev_flags &= ~IDE_DFLAG_ATTACH;
-		return 0;
-	}
-
-	/* if removable, always assume it was changed */
-	return !!(drive->dev_flags & IDE_DFLAG_REMOVABLE);
-}
-
-static int idedisk_revalidate_disk(struct gendisk *disk)
-{
-	struct ide_disk_obj *idkp = ide_drv_g(disk, ide_disk_obj);
-	set_capacity(disk, ide_disk_capacity(idkp->drive));
-	return 0;
-}
-
-static struct block_device_operations idedisk_ops = {
-	.owner			= THIS_MODULE,
-	.open			= idedisk_open,
-	.release		= idedisk_release,
-	.ioctl			= ide_disk_ioctl,
-	.getgeo			= idedisk_getgeo,
-	.media_changed		= idedisk_media_changed,
-	.revalidate_disk	= idedisk_revalidate_disk
-};
-
-MODULE_DESCRIPTION("ATA DISK Driver");
-
-static int ide_disk_probe(ide_drive_t *drive)
-{
-	struct ide_disk_obj *idkp;
-	struct gendisk *g;
-
-	/* strstr("foo", "") is non-NULL */
-	if (!strstr("ide-disk", drive->driver_req))
-		goto failed;
-
-	if (drive->media != ide_disk)
-		goto failed;
-
-	idkp = kzalloc(sizeof(*idkp), GFP_KERNEL);
-	if (!idkp)
-		goto failed;
-
-	g = alloc_disk_node(IDE_DISK_MINORS, hwif_to_node(drive->hwif));
-	if (!g)
-		goto out_free_idkp;
-
-	ide_init_disk(g, drive);
-
-	kref_init(&idkp->kref);
-
-	idkp->drive = drive;
-	idkp->driver = &idedisk_driver;
-	idkp->disk = g;
-
-	g->private_data = &idkp->driver;
-
-	drive->driver_data = idkp;
-
-	idedisk_setup(drive);
-
-	set_capacity(g, ide_disk_capacity(drive));
-
-	g->minors = IDE_DISK_MINORS;
-	g->driverfs_dev = &drive->gendev;
-	g->flags |= GENHD_FL_EXT_DEVT;
-	if (drive->dev_flags & IDE_DFLAG_REMOVABLE)
-		g->flags = GENHD_FL_REMOVABLE;
-	g->fops = &idedisk_ops;
-	add_disk(g);
-	return 0;
-
-out_free_idkp:
-	kfree(idkp);
-failed:
-	return -ENODEV;
-}
-
-static void __exit idedisk_exit(void)
-{
-	driver_unregister(&idedisk_driver.gen_driver);
-}
-
-static int __init idedisk_init(void)
-{
-	return driver_register(&idedisk_driver.gen_driver);
-}
-
-MODULE_ALIAS("ide:*m-disk*");
-MODULE_ALIAS("ide-disk");
-module_init(idedisk_init);
-module_exit(idedisk_exit);
-MODULE_LICENSE("GPL");
diff --git a/drivers/ide/ide-disk.h b/drivers/ide/ide-disk.h
index 50d674b91c8b..104ad71288a5 100644
--- a/drivers/ide/ide-disk.h
+++ b/drivers/ide/ide-disk.h
@@ -9,8 +9,14 @@ struct ide_disk_obj {
 	unsigned int	openers;	/* protected by BKL for now */
 };
 
+sector_t ide_gd_capacity(ide_drive_t *);
+
 /* ide-disk.c */
-sector_t ide_disk_capacity(ide_drive_t *);
+void ide_disk_init_capacity(ide_drive_t *);
+void ide_disk_setup(ide_drive_t *);
+void ide_disk_flush(ide_drive_t *);
+int ide_disk_set_doorlock(ide_drive_t *, int);
+ide_startstop_t ide_do_rw_disk(ide_drive_t *, struct request *, sector_t);
 ide_decl_devset(address);
 ide_decl_devset(multcount);
 ide_decl_devset(nowerr);
diff --git a/drivers/ide/ide-disk_proc.c b/drivers/ide/ide-disk_proc.c
index 4724976afe71..1146f4204c6e 100644
--- a/drivers/ide/ide-disk_proc.c
+++ b/drivers/ide/ide-disk_proc.c
@@ -56,7 +56,7 @@ static int proc_idedisk_read_capacity
 	ide_drive_t*drive = (ide_drive_t *)data;
 	int len;
 
-	len = sprintf(page, "%llu\n", (long long)ide_disk_capacity(drive));
+	len = sprintf(page, "%llu\n", (long long)ide_gd_capacity(drive));
 
 	PROC_IDE_READ_RETURN(page, start, off, count, eof, len);
 }
diff --git a/drivers/ide/ide-gd.c b/drivers/ide/ide-gd.c
new file mode 100644
index 000000000000..84bbcfee9233
--- /dev/null
+++ b/drivers/ide/ide-gd.c
@@ -0,0 +1,296 @@
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/string.h>
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/genhd.h>
+#include <linux/mutex.h>
+#include <linux/ide.h>
+#include <linux/hdreg.h>
+
+#if !defined(CONFIG_DEBUG_BLOCK_EXT_DEVT)
+#define IDE_DISK_MINORS		(1 << PARTN_BITS)
+#else
+#define IDE_DISK_MINORS		0
+#endif
+
+#include "ide-disk.h"
+
+#define IDE_GD_VERSION	"1.18"
+
+static DEFINE_MUTEX(ide_disk_ref_mutex);
+
+static void ide_disk_release(struct kref *);
+
+static struct ide_disk_obj *ide_disk_get(struct gendisk *disk)
+{
+	struct ide_disk_obj *idkp = NULL;
+
+	mutex_lock(&ide_disk_ref_mutex);
+	idkp = ide_drv_g(disk, ide_disk_obj);
+	if (idkp) {
+		if (ide_device_get(idkp->drive))
+			idkp = NULL;
+		else
+			kref_get(&idkp->kref);
+	}
+	mutex_unlock(&ide_disk_ref_mutex);
+	return idkp;
+}
+
+static void ide_disk_put(struct ide_disk_obj *idkp)
+{
+	ide_drive_t *drive = idkp->drive;
+
+	mutex_lock(&ide_disk_ref_mutex);
+	kref_put(&idkp->kref, ide_disk_release);
+	ide_device_put(drive);
+	mutex_unlock(&ide_disk_ref_mutex);
+}
+
+sector_t ide_gd_capacity(ide_drive_t *drive)
+{
+	return drive->capacity64;
+}
+
+static int ide_gd_probe(ide_drive_t *);
+
+static void ide_gd_remove(ide_drive_t *drive)
+{
+	struct ide_disk_obj *idkp = drive->driver_data;
+	struct gendisk *g = idkp->disk;
+
+	ide_proc_unregister_driver(drive, idkp->driver);
+
+	del_gendisk(g);
+
+	ide_disk_flush(drive);
+
+	ide_disk_put(idkp);
+}
+
+static void ide_disk_release(struct kref *kref)
+{
+	struct ide_disk_obj *idkp = to_ide_drv(kref, ide_disk_obj);
+	ide_drive_t *drive = idkp->drive;
+	struct gendisk *g = idkp->disk;
+
+	drive->driver_data = NULL;
+	g->private_data = NULL;
+	put_disk(g);
+	kfree(idkp);
+}
+
+/*
+ * On HPA drives the capacity needs to be
+ * reinitilized on resume otherwise the disk
+ * can not be used and a hard reset is required
+ */
+static void ide_gd_resume(ide_drive_t *drive)
+{
+	if (ata_id_hpa_enabled(drive->id))
+		ide_disk_init_capacity(drive);
+}
+
+static void ide_gd_shutdown(ide_drive_t *drive)
+{
+#ifdef	CONFIG_ALPHA
+	/* On Alpha, halt(8) doesn't actually turn the machine off,
+	   it puts you into the sort of firmware monitor. Typically,
+	   it's used to boot another kernel image, so it's not much
+	   different from reboot(8). Therefore, we don't need to
+	   spin down the disk in this case, especially since Alpha
+	   firmware doesn't handle disks in standby mode properly.
+	   On the other hand, it's reasonably safe to turn the power
+	   off when the shutdown process reaches the firmware prompt,
+	   as the firmware initialization takes rather long time -
+	   at least 10 seconds, which should be sufficient for
+	   the disk to expire its write cache. */
+	if (system_state != SYSTEM_POWER_OFF) {
+#else
+	if (system_state == SYSTEM_RESTART) {
+#endif
+		ide_disk_flush(drive);
+		return;
+	}
+
+	printk(KERN_INFO "Shutdown: %s\n", drive->name);
+
+	drive->gendev.bus->suspend(&drive->gendev, PMSG_SUSPEND);
+}
+
+static ide_driver_t ide_gd_driver = {
+	.gen_driver = {
+		.owner		= THIS_MODULE,
+		.name		= "ide-disk",
+		.bus		= &ide_bus_type,
+	},
+	.probe			= ide_gd_probe,
+	.remove			= ide_gd_remove,
+	.resume			= ide_gd_resume,
+	.shutdown		= ide_gd_shutdown,
+	.version		= IDE_GD_VERSION,
+	.do_request		= ide_do_rw_disk,
+	.end_request		= ide_end_request,
+	.error			= __ide_error,
+#ifdef CONFIG_IDE_PROC_FS
+	.proc			= ide_disk_proc,
+	.settings		= ide_disk_settings,
+#endif
+};
+
+static int ide_gd_open(struct inode *inode, struct file *filp)
+{
+	struct gendisk *disk = inode->i_bdev->bd_disk;
+	struct ide_disk_obj *idkp;
+	ide_drive_t *drive;
+
+	idkp = ide_disk_get(disk);
+	if (idkp == NULL)
+		return -ENXIO;
+
+	drive = idkp->drive;
+
+	idkp->openers++;
+
+	if ((drive->dev_flags & IDE_DFLAG_REMOVABLE) && idkp->openers == 1) {
+		/*
+		 * Ignore the return code from door_lock,
+		 * since the open() has already succeeded,
+		 * and the door_lock is irrelevant at this point.
+		 */
+		ide_disk_set_doorlock(drive, 1);
+		check_disk_change(inode->i_bdev);
+	}
+	return 0;
+}
+
+static int ide_gd_release(struct inode *inode, struct file *filp)
+{
+	struct gendisk *disk = inode->i_bdev->bd_disk;
+	struct ide_disk_obj *idkp = ide_drv_g(disk, ide_disk_obj);
+	ide_drive_t *drive = idkp->drive;
+
+	if (idkp->openers == 1)
+		ide_disk_flush(drive);
+
+	if ((drive->dev_flags & IDE_DFLAG_REMOVABLE) && idkp->openers == 1)
+		ide_disk_set_doorlock(drive, 0);
+
+	idkp->openers--;
+
+	ide_disk_put(idkp);
+
+	return 0;
+}
+
+static int ide_gd_getgeo(struct block_device *bdev, struct hd_geometry *geo)
+{
+	struct ide_disk_obj *idkp = ide_drv_g(bdev->bd_disk, ide_disk_obj);
+	ide_drive_t *drive = idkp->drive;
+
+	geo->heads = drive->bios_head;
+	geo->sectors = drive->bios_sect;
+	geo->cylinders = (u16)drive->bios_cyl; /* truncate */
+	return 0;
+}
+
+static int ide_gd_media_changed(struct gendisk *disk)
+{
+	struct ide_disk_obj *idkp = ide_drv_g(disk, ide_disk_obj);
+	ide_drive_t *drive = idkp->drive;
+
+	/* do not scan partitions twice if this is a removable device */
+	if (drive->dev_flags & IDE_DFLAG_ATTACH) {
+		drive->dev_flags &= ~IDE_DFLAG_ATTACH;
+		return 0;
+	}
+
+	/* if removable, always assume it was changed */
+	return !!(drive->dev_flags & IDE_DFLAG_REMOVABLE);
+}
+
+static int ide_gd_revalidate_disk(struct gendisk *disk)
+{
+	struct ide_disk_obj *idkp = ide_drv_g(disk, ide_disk_obj);
+	set_capacity(disk, ide_gd_capacity(idkp->drive));
+	return 0;
+}
+
+static struct block_device_operations ide_gd_ops = {
+	.owner			= THIS_MODULE,
+	.open			= ide_gd_open,
+	.release		= ide_gd_release,
+	.ioctl			= ide_disk_ioctl,
+	.getgeo			= ide_gd_getgeo,
+	.media_changed		= ide_gd_media_changed,
+	.revalidate_disk	= ide_gd_revalidate_disk
+};
+
+static int ide_gd_probe(ide_drive_t *drive)
+{
+	struct ide_disk_obj *idkp;
+	struct gendisk *g;
+
+	/* strstr("foo", "") is non-NULL */
+	if (!strstr("ide-disk", drive->driver_req))
+		goto failed;
+
+	if (drive->media != ide_disk)
+		goto failed;
+
+	idkp = kzalloc(sizeof(*idkp), GFP_KERNEL);
+	if (!idkp)
+		goto failed;
+
+	g = alloc_disk_node(IDE_DISK_MINORS, hwif_to_node(drive->hwif));
+	if (!g)
+		goto out_free_idkp;
+
+	ide_init_disk(g, drive);
+
+	kref_init(&idkp->kref);
+
+	idkp->drive = drive;
+	idkp->driver = &ide_gd_driver;
+	idkp->disk = g;
+
+	g->private_data = &idkp->driver;
+
+	drive->driver_data = idkp;
+
+	ide_disk_setup(drive);
+
+	set_capacity(g, ide_gd_capacity(drive));
+
+	g->minors = IDE_DISK_MINORS;
+	g->driverfs_dev = &drive->gendev;
+	g->flags |= GENHD_FL_EXT_DEVT;
+	if (drive->dev_flags & IDE_DFLAG_REMOVABLE)
+		g->flags = GENHD_FL_REMOVABLE;
+	g->fops = &ide_gd_ops;
+	add_disk(g);
+	return 0;
+
+out_free_idkp:
+	kfree(idkp);
+failed:
+	return -ENODEV;
+}
+
+static int __init ide_gd_init(void)
+{
+	return driver_register(&ide_gd_driver.gen_driver);
+}
+
+static void __exit ide_gd_exit(void)
+{
+	driver_unregister(&ide_gd_driver.gen_driver);
+}
+
+MODULE_ALIAS("ide:*m-disk*");
+MODULE_ALIAS("ide-disk");
+module_init(ide_gd_init);
+module_exit(ide_gd_exit);
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("ATA DISK Driver");
-- 
cgit v1.2.3-55-g7522


From cedd120cac61fa149ba215eabc57b2578068be00 Mon Sep 17 00:00:00 2001
From: Bartlomiej Zolnierkiewicz
Date: Fri, 17 Oct 2008 18:09:12 +0200
Subject: ide-disk: use IDE_DFLAG_MEDIA_CHANGED

Set IDE_DFLAG_MEDIA_CHANGED in ide_gd_open() to signalize
ide_gd_media_changed() that that media has changed (instead
of relying on IDE_DFLAG_REMOVABLE).

There should be no functional changes caused by this patch.

Acked-by: Borislav Petkov <petkovbb@gmail.com>
Signed-off-by: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
---
 drivers/ide/ide-gd.c | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/drivers/ide/ide-gd.c b/drivers/ide/ide-gd.c
index 84bbcfee9233..c08500270b9d 100644
--- a/drivers/ide/ide-gd.c
+++ b/drivers/ide/ide-gd.c
@@ -160,6 +160,7 @@ static int ide_gd_open(struct inode *inode, struct file *filp)
 		 * and the door_lock is irrelevant at this point.
 		 */
 		ide_disk_set_doorlock(drive, 1);
+		drive->dev_flags |= IDE_DFLAG_MEDIA_CHANGED;
 		check_disk_change(inode->i_bdev);
 	}
 	return 0;
@@ -199,6 +200,7 @@ static int ide_gd_media_changed(struct gendisk *disk)
 {
 	struct ide_disk_obj *idkp = ide_drv_g(disk, ide_disk_obj);
 	ide_drive_t *drive = idkp->drive;
+	int ret;
 
 	/* do not scan partitions twice if this is a removable device */
 	if (drive->dev_flags & IDE_DFLAG_ATTACH) {
@@ -206,8 +208,10 @@ static int ide_gd_media_changed(struct gendisk *disk)
 		return 0;
 	}
 
-	/* if removable, always assume it was changed */
-	return !!(drive->dev_flags & IDE_DFLAG_REMOVABLE);
+	ret = !!(drive->dev_flags & IDE_DFLAG_MEDIA_CHANGED);
+	drive->dev_flags &= ~IDE_DFLAG_MEDIA_CHANGED;
+
+	return ret;
 }
 
 static int ide_gd_revalidate_disk(struct gendisk *disk)
-- 
cgit v1.2.3-55-g7522


From c84d9bbe7c77aea7e1194da056d44a2ed982e72b Mon Sep 17 00:00:00 2001
From: Bartlomiej Zolnierkiewicz
Date: Fri, 17 Oct 2008 18:09:13 +0200
Subject: ide-floppy: factor out generic disk handling code to ide-gd-floppy.c

While at it:
- idefloppy_do_request() -> ide_floppy_do_request()
- idefloppy_end_request() -> ide_floppy_end_request()
- idefloppy_setup() -> ide_floppy_setup()

There should be no functional changes caused by this patch.

Acked-by: Borislav Petkov <petkovbb@gmail.com>
Signed-off-by: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
---
 drivers/ide/Makefile        |   2 +-
 drivers/ide/ide-floppy.c    | 319 ++------------------------------------------
 drivers/ide/ide-floppy.h    |  19 ++-
 drivers/ide/ide-gd-floppy.c | 298 +++++++++++++++++++++++++++++++++++++++++
 4 files changed, 327 insertions(+), 311 deletions(-)
 create mode 100644 drivers/ide/ide-gd-floppy.c

diff --git a/drivers/ide/Makefile b/drivers/ide/Makefile
index 8aad9395a427..7eeeab597959 100644
--- a/drivers/ide/Makefile
+++ b/drivers/ide/Makefile
@@ -39,7 +39,7 @@ obj-$(CONFIG_BLK_DEV_IDEPNP)		+= ide-pnp.o
 
 ide-disk_mod-y += ide-gd.o ide-disk.o ide-disk_ioctl.o
 ide-cd_mod-y += ide-cd.o ide-cd_ioctl.o ide-cd_verbose.o
-ide-floppy_mod-y += ide-floppy.o ide-floppy_ioctl.o
+ide-floppy_mod-y += ide-gd-floppy.o ide-floppy.o ide-floppy_ioctl.o
 
 ifeq ($(CONFIG_IDE_PROC_FS), y)
 	ide-disk_mod-y += ide-disk_proc.o
diff --git a/drivers/ide/ide-floppy.c b/drivers/ide/ide-floppy.c
index 791a9d6f371c..802e0968e32f 100644
--- a/drivers/ide/ide-floppy.c
+++ b/drivers/ide/ide-floppy.c
@@ -15,12 +15,6 @@
  * Documentation/ide/ChangeLog.ide-floppy.1996-2002
  */
 
-#define DRV_NAME "ide-floppy"
-#define PFX DRV_NAME ": "
-
-#define IDEFLOPPY_VERSION "1.00"
-
-#include <linux/module.h>
 #include <linux/types.h>
 #include <linux/string.h>
 #include <linux/kernel.h>
@@ -49,19 +43,6 @@
 
 #include "ide-floppy.h"
 
-/* module parameters */
-static unsigned long debug_mask;
-module_param(debug_mask, ulong, 0644);
-
-/* define to see debug info */
-#define IDEFLOPPY_DEBUG_LOG	0
-
-#if IDEFLOPPY_DEBUG_LOG
-#define ide_debug_log(lvl, fmt, args...) __ide_debug_log(lvl, fmt, args)
-#else
-#define ide_debug_log(lvl, fmt, args...) do {} while (0)
-#endif
-
 /*
  * After each failed packet command we issue a request sense command and retry
  * the packet command IDEFLOPPY_MAX_PC_RETRIES times.
@@ -83,41 +64,11 @@ module_param(debug_mask, ulong, 0644);
 /* Error code returned in rq->errors to the higher part of the driver. */
 #define	IDEFLOPPY_ERROR_GENERAL		101
 
-static DEFINE_MUTEX(idefloppy_ref_mutex);
-
-static void idefloppy_cleanup_obj(struct kref *);
-
-static struct ide_floppy_obj *ide_floppy_get(struct gendisk *disk)
-{
-	struct ide_floppy_obj *floppy = NULL;
-
-	mutex_lock(&idefloppy_ref_mutex);
-	floppy = ide_drv_g(disk, ide_floppy_obj);
-	if (floppy) {
-		if (ide_device_get(floppy->drive))
-			floppy = NULL;
-		else
-			kref_get(&floppy->kref);
-	}
-	mutex_unlock(&idefloppy_ref_mutex);
-	return floppy;
-}
-
-static void ide_floppy_put(struct ide_floppy_obj *floppy)
-{
-	ide_drive_t *drive = floppy->drive;
-
-	mutex_lock(&idefloppy_ref_mutex);
-	kref_put(&floppy->kref, idefloppy_cleanup_obj);
-	ide_device_put(drive);
-	mutex_unlock(&idefloppy_ref_mutex);
-}
-
 /*
  * Used to finish servicing a request. For read/write requests, we will call
  * ide_end_request to pass to the next buffer.
  */
-static int idefloppy_end_request(ide_drive_t *drive, int uptodate, int nsecs)
+int ide_floppy_end_request(ide_drive_t *drive, int uptodate, int nsecs)
 {
 	idefloppy_floppy_t *floppy = drive->driver_data;
 	struct request *rq = HWGROUP(drive)->rq;
@@ -161,7 +112,7 @@ static void idefloppy_update_buffers(ide_drive_t *drive,
 	struct bio *bio = rq->bio;
 
 	while ((bio = rq->bio) != NULL)
-		idefloppy_end_request(drive, 1, 0);
+		ide_floppy_end_request(drive, 1, 0);
 }
 
 static void ide_floppy_callback(ide_drive_t *drive, int dsc)
@@ -200,7 +151,7 @@ static void ide_floppy_callback(ide_drive_t *drive, int dsc)
 			       "Aborting request!\n");
 	}
 
-	idefloppy_end_request(drive, uptodate, 0);
+	ide_floppy_end_request(drive, uptodate, 0);
 }
 
 static void ide_floppy_report_error(idefloppy_floppy_t *floppy,
@@ -329,8 +280,8 @@ static void idefloppy_blockpc_cmd(idefloppy_floppy_t *floppy,
 	pc->req_xfer = pc->buf_size = rq->data_len;
 }
 
-static ide_startstop_t idefloppy_do_request(ide_drive_t *drive,
-		struct request *rq, sector_t block_s)
+ide_startstop_t ide_floppy_do_request(ide_drive_t *drive, struct request *rq,
+				      sector_t block_s)
 {
 	idefloppy_floppy_t *floppy = drive->driver_data;
 	ide_hwif_t *hwif = drive->hwif;
@@ -353,7 +304,7 @@ static ide_startstop_t idefloppy_do_request(ide_drive_t *drive,
 		else
 			printk(KERN_ERR PFX "%s: I/O error\n", drive->name);
 
-		idefloppy_end_request(drive, 0, 0);
+		ide_floppy_end_request(drive, 0, 0);
 		return ide_stopped;
 	}
 	if (blk_fs_request(rq)) {
@@ -361,7 +312,7 @@ static ide_startstop_t idefloppy_do_request(ide_drive_t *drive,
 		    (rq->nr_sectors % floppy->bs_factor)) {
 			printk(KERN_ERR PFX "%s: unsupported r/w rq size\n",
 				drive->name);
-			idefloppy_end_request(drive, 0, 0);
+			ide_floppy_end_request(drive, 0, 0);
 			return ide_stopped;
 		}
 		pc = &floppy->queued_pc;
@@ -373,7 +324,7 @@ static ide_startstop_t idefloppy_do_request(ide_drive_t *drive,
 		idefloppy_blockpc_cmd(floppy, pc, rq);
 	} else {
 		blk_dump_rq_flags(rq, PFX "unsupported command in queue");
-		idefloppy_end_request(drive, 0, 0);
+		ide_floppy_end_request(drive, 0, 0);
 		return ide_stopped;
 	}
 
@@ -455,7 +406,7 @@ static int ide_floppy_get_flexible_disk_page(ide_drive_t *drive)
  * Determine if a media is present in the floppy drive, and if so, its LBA
  * capacity.
  */
-static int ide_floppy_get_capacity(ide_drive_t *drive)
+int ide_floppy_get_capacity(ide_drive_t *drive)
 {
 	idefloppy_floppy_t *floppy = drive->driver_data;
 	struct gendisk *disk = floppy->disk;
@@ -554,12 +505,7 @@ static int ide_floppy_get_capacity(ide_drive_t *drive)
 	return rc;
 }
 
-sector_t ide_floppy_capacity(ide_drive_t *drive)
-{
-	return drive->capacity64;
-}
-
-static void idefloppy_setup(ide_drive_t *drive)
+void ide_floppy_setup(ide_drive_t *drive)
 {
 	struct ide_floppy_obj *floppy = drive->driver_data;
 	u16 *id = drive->id;
@@ -601,248 +547,3 @@ static void idefloppy_setup(ide_drive_t *drive)
 
 	drive->dev_flags |= IDE_DFLAG_ATTACH;
 }
-
-static void ide_floppy_remove(ide_drive_t *drive)
-{
-	idefloppy_floppy_t *floppy = drive->driver_data;
-	struct gendisk *g = floppy->disk;
-
-	ide_proc_unregister_driver(drive, floppy->driver);
-
-	del_gendisk(g);
-
-	ide_floppy_put(floppy);
-}
-
-static void idefloppy_cleanup_obj(struct kref *kref)
-{
-	struct ide_floppy_obj *floppy = to_ide_drv(kref, ide_floppy_obj);
-	ide_drive_t *drive = floppy->drive;
-	struct gendisk *g = floppy->disk;
-
-	drive->driver_data = NULL;
-	g->private_data = NULL;
-	put_disk(g);
-	kfree(floppy);
-}
-
-static int ide_floppy_probe(ide_drive_t *);
-
-static ide_driver_t idefloppy_driver = {
-	.gen_driver = {
-		.owner		= THIS_MODULE,
-		.name		= "ide-floppy",
-		.bus		= &ide_bus_type,
-	},
-	.probe			= ide_floppy_probe,
-	.remove			= ide_floppy_remove,
-	.version		= IDEFLOPPY_VERSION,
-	.do_request		= idefloppy_do_request,
-	.end_request		= idefloppy_end_request,
-	.error			= __ide_error,
-#ifdef CONFIG_IDE_PROC_FS
-	.proc			= ide_floppy_proc,
-	.settings		= ide_floppy_settings,
-#endif
-};
-
-static int idefloppy_open(struct inode *inode, struct file *filp)
-{
-	struct gendisk *disk = inode->i_bdev->bd_disk;
-	struct ide_floppy_obj *floppy;
-	ide_drive_t *drive;
-	int ret = 0;
-
-	floppy = ide_floppy_get(disk);
-	if (!floppy)
-		return -ENXIO;
-
-	drive = floppy->drive;
-
-	ide_debug_log(IDE_DBG_FUNC, "Call %s\n", __func__);
-
-	floppy->openers++;
-
-	if (floppy->openers == 1) {
-		drive->dev_flags &= ~IDE_DFLAG_FORMAT_IN_PROGRESS;
-		/* Just in case */
-
-		if (ide_do_test_unit_ready(drive, disk))
-			ide_do_start_stop(drive, disk, 1);
-
-		ret = ide_floppy_get_capacity(drive);
-
-		set_capacity(disk, ide_floppy_capacity(drive));
-
-		if (ret && (filp->f_flags & O_NDELAY) == 0) {
-		    /*
-		     * Allow O_NDELAY to open a drive without a disk, or with an
-		     * unreadable disk, so that we can get the format capacity
-		     * of the drive or begin the format - Sam
-		     */
-			ret = -EIO;
-			goto out_put_floppy;
-		}
-
-		if ((drive->dev_flags & IDE_DFLAG_WP) && (filp->f_mode & 2)) {
-			ret = -EROFS;
-			goto out_put_floppy;
-		}
-
-		ide_set_media_lock(drive, disk, 1);
-		drive->dev_flags |= IDE_DFLAG_MEDIA_CHANGED;
-		check_disk_change(inode->i_bdev);
-	} else if (drive->dev_flags & IDE_DFLAG_FORMAT_IN_PROGRESS) {
-		ret = -EBUSY;
-		goto out_put_floppy;
-	}
-	return 0;
-
-out_put_floppy:
-	floppy->openers--;
-	ide_floppy_put(floppy);
-	return ret;
-}
-
-static int idefloppy_release(struct inode *inode, struct file *filp)
-{
-	struct gendisk *disk = inode->i_bdev->bd_disk;
-	struct ide_floppy_obj *floppy = ide_drv_g(disk, ide_floppy_obj);
-	ide_drive_t *drive = floppy->drive;
-
-	ide_debug_log(IDE_DBG_FUNC, "Call %s\n", __func__);
-
-	if (floppy->openers == 1) {
-		ide_set_media_lock(drive, disk, 0);
-		drive->dev_flags &= ~IDE_DFLAG_FORMAT_IN_PROGRESS;
-	}
-
-	floppy->openers--;
-
-	ide_floppy_put(floppy);
-
-	return 0;
-}
-
-static int idefloppy_getgeo(struct block_device *bdev, struct hd_geometry *geo)
-{
-	struct ide_floppy_obj *floppy = ide_drv_g(bdev->bd_disk,
-						     ide_floppy_obj);
-	ide_drive_t *drive = floppy->drive;
-
-	geo->heads = drive->bios_head;
-	geo->sectors = drive->bios_sect;
-	geo->cylinders = (u16)drive->bios_cyl; /* truncate */
-	return 0;
-}
-
-static int idefloppy_media_changed(struct gendisk *disk)
-{
-	struct ide_floppy_obj *floppy = ide_drv_g(disk, ide_floppy_obj);
-	ide_drive_t *drive = floppy->drive;
-	int ret;
-
-	/* do not scan partitions twice if this is a removable device */
-	if (drive->dev_flags & IDE_DFLAG_ATTACH) {
-		drive->dev_flags &= ~IDE_DFLAG_ATTACH;
-		return 0;
-	}
-	ret = !!(drive->dev_flags & IDE_DFLAG_MEDIA_CHANGED);
-	drive->dev_flags &= ~IDE_DFLAG_MEDIA_CHANGED;
-	return ret;
-}
-
-static int idefloppy_revalidate_disk(struct gendisk *disk)
-{
-	struct ide_floppy_obj *floppy = ide_drv_g(disk, ide_floppy_obj);
-	set_capacity(disk, ide_floppy_capacity(floppy->drive));
-	return 0;
-}
-
-static struct block_device_operations idefloppy_ops = {
-	.owner			= THIS_MODULE,
-	.open			= idefloppy_open,
-	.release		= idefloppy_release,
-	.ioctl			= ide_floppy_ioctl,
-	.getgeo			= idefloppy_getgeo,
-	.media_changed		= idefloppy_media_changed,
-	.revalidate_disk	= idefloppy_revalidate_disk
-};
-
-static int ide_floppy_probe(ide_drive_t *drive)
-{
-	idefloppy_floppy_t *floppy;
-	struct gendisk *g;
-
-	if (!strstr("ide-floppy", drive->driver_req))
-		goto failed;
-
-	if (drive->media != ide_floppy)
-		goto failed;
-
-	if (!ide_check_atapi_device(drive, DRV_NAME)) {
-		printk(KERN_ERR PFX "%s: not supported by this version of "
-		       DRV_NAME "\n", drive->name);
-		goto failed;
-	}
-	floppy = kzalloc(sizeof(idefloppy_floppy_t), GFP_KERNEL);
-	if (!floppy) {
-		printk(KERN_ERR PFX "%s: Can't allocate a floppy structure\n",
-		       drive->name);
-		goto failed;
-	}
-
-	g = alloc_disk_node(1 << PARTN_BITS, hwif_to_node(drive->hwif));
-	if (!g)
-		goto out_free_floppy;
-
-	ide_init_disk(g, drive);
-
-	kref_init(&floppy->kref);
-
-	floppy->drive = drive;
-	floppy->driver = &idefloppy_driver;
-	floppy->disk = g;
-
-	g->private_data = &floppy->driver;
-
-	drive->driver_data = floppy;
-
-	drive->debug_mask = debug_mask;
-
-	idefloppy_setup(drive);
-
-	set_capacity(g, ide_floppy_capacity(drive));
-
-	g->minors = 1 << PARTN_BITS;
-	g->driverfs_dev = &drive->gendev;
-	if (drive->dev_flags & IDE_DFLAG_REMOVABLE)
-		g->flags = GENHD_FL_REMOVABLE;
-	g->fops = &idefloppy_ops;
-	add_disk(g);
-	return 0;
-
-out_free_floppy:
-	kfree(floppy);
-failed:
-	return -ENODEV;
-}
-
-static void __exit idefloppy_exit(void)
-{
-	driver_unregister(&idefloppy_driver.gen_driver);
-}
-
-static int __init idefloppy_init(void)
-{
-	printk(KERN_INFO DRV_NAME " driver " IDEFLOPPY_VERSION "\n");
-	return driver_register(&idefloppy_driver.gen_driver);
-}
-
-MODULE_ALIAS("ide:*m-floppy*");
-MODULE_ALIAS("ide-floppy");
-module_init(idefloppy_init);
-module_exit(idefloppy_exit);
-MODULE_LICENSE("GPL");
-MODULE_DESCRIPTION("ATAPI FLOPPY Driver");
-
diff --git a/drivers/ide/ide-floppy.h b/drivers/ide/ide-floppy.h
index 17cf865e583d..836a70e6e3ef 100644
--- a/drivers/ide/ide-floppy.h
+++ b/drivers/ide/ide-floppy.h
@@ -1,6 +1,18 @@
 #ifndef __IDE_FLOPPY_H
 #define __IDE_FLOPPY_H
 
+#define DRV_NAME "ide-floppy"
+#define PFX DRV_NAME ": "
+
+/* define to see debug info */
+#define IDEFLOPPY_DEBUG_LOG	0
+
+#if IDEFLOPPY_DEBUG_LOG
+#define ide_debug_log(lvl, fmt, args...) __ide_debug_log(lvl, fmt, args)
+#else
+#define ide_debug_log(lvl, fmt, args...) do {} while (0)
+#endif
+
 /*
  * Most of our global data which we need to save even as we leave the driver
  * due to an interrupt or a timer event is stored in a variable of type
@@ -45,10 +57,15 @@ typedef struct ide_floppy_obj {
 #define	IDEFLOPPY_IOCTL_FORMAT_START		0x4602
 #define IDEFLOPPY_IOCTL_FORMAT_GET_PROGRESS	0x4603
 
+sector_t ide_floppy_capacity(ide_drive_t *);
+
 /* ide-floppy.c */
 void ide_floppy_create_mode_sense_cmd(struct ide_atapi_pc *, u8);
 void ide_floppy_create_read_capacity_cmd(struct ide_atapi_pc *);
-sector_t ide_floppy_capacity(ide_drive_t *);
+int ide_floppy_get_capacity(ide_drive_t *);
+void ide_floppy_setup(ide_drive_t *);
+ide_startstop_t ide_floppy_do_request(ide_drive_t *, struct request *, sector_t);
+int ide_floppy_end_request(ide_drive_t *, int, int);
 
 /* ide-floppy_ioctl.c */
 int ide_floppy_ioctl(struct inode *, struct file *, unsigned, unsigned long);
diff --git a/drivers/ide/ide-gd-floppy.c b/drivers/ide/ide-gd-floppy.c
new file mode 100644
index 000000000000..7afd013b4c55
--- /dev/null
+++ b/drivers/ide/ide-gd-floppy.c
@@ -0,0 +1,298 @@
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/string.h>
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/genhd.h>
+#include <linux/mutex.h>
+#include <linux/ide.h>
+#include <linux/hdreg.h>
+
+#include "ide-floppy.h"
+
+#define IDEFLOPPY_VERSION "1.00"
+
+/* module parameters */
+static unsigned long debug_mask;
+module_param(debug_mask, ulong, 0644);
+
+static DEFINE_MUTEX(idefloppy_ref_mutex);
+
+static void idefloppy_cleanup_obj(struct kref *);
+
+static struct ide_floppy_obj *ide_floppy_get(struct gendisk *disk)
+{
+	struct ide_floppy_obj *floppy = NULL;
+
+	mutex_lock(&idefloppy_ref_mutex);
+	floppy = ide_drv_g(disk, ide_floppy_obj);
+	if (floppy) {
+		if (ide_device_get(floppy->drive))
+			floppy = NULL;
+		else
+			kref_get(&floppy->kref);
+	}
+	mutex_unlock(&idefloppy_ref_mutex);
+	return floppy;
+}
+
+static void ide_floppy_put(struct ide_floppy_obj *floppy)
+{
+	ide_drive_t *drive = floppy->drive;
+
+	mutex_lock(&idefloppy_ref_mutex);
+	kref_put(&floppy->kref, idefloppy_cleanup_obj);
+	ide_device_put(drive);
+	mutex_unlock(&idefloppy_ref_mutex);
+}
+
+sector_t ide_floppy_capacity(ide_drive_t *drive)
+{
+	return drive->capacity64;
+}
+
+static void ide_floppy_remove(ide_drive_t *drive)
+{
+	idefloppy_floppy_t *floppy = drive->driver_data;
+	struct gendisk *g = floppy->disk;
+
+	ide_proc_unregister_driver(drive, floppy->driver);
+
+	del_gendisk(g);
+
+	ide_floppy_put(floppy);
+}
+
+static void idefloppy_cleanup_obj(struct kref *kref)
+{
+	struct ide_floppy_obj *floppy = to_ide_drv(kref, ide_floppy_obj);
+	ide_drive_t *drive = floppy->drive;
+	struct gendisk *g = floppy->disk;
+
+	drive->driver_data = NULL;
+	g->private_data = NULL;
+	put_disk(g);
+	kfree(floppy);
+}
+
+static int ide_floppy_probe(ide_drive_t *);
+
+static ide_driver_t idefloppy_driver = {
+	.gen_driver = {
+		.owner		= THIS_MODULE,
+		.name		= "ide-floppy",
+		.bus		= &ide_bus_type,
+	},
+	.probe			= ide_floppy_probe,
+	.remove			= ide_floppy_remove,
+	.version		= IDEFLOPPY_VERSION,
+	.do_request		= ide_floppy_do_request,
+	.end_request		= ide_floppy_end_request,
+	.error			= __ide_error,
+#ifdef CONFIG_IDE_PROC_FS
+	.proc			= ide_floppy_proc,
+	.settings		= ide_floppy_settings,
+#endif
+};
+
+static int idefloppy_open(struct inode *inode, struct file *filp)
+{
+	struct gendisk *disk = inode->i_bdev->bd_disk;
+	struct ide_floppy_obj *floppy;
+	ide_drive_t *drive;
+	int ret = 0;
+
+	floppy = ide_floppy_get(disk);
+	if (!floppy)
+		return -ENXIO;
+
+	drive = floppy->drive;
+
+	ide_debug_log(IDE_DBG_FUNC, "Call %s\n", __func__);
+
+	floppy->openers++;
+
+	if (floppy->openers == 1) {
+		drive->dev_flags &= ~IDE_DFLAG_FORMAT_IN_PROGRESS;
+		/* Just in case */
+
+		if (ide_do_test_unit_ready(drive, disk))
+			ide_do_start_stop(drive, disk, 1);
+
+		ret = ide_floppy_get_capacity(drive);
+
+		set_capacity(disk, ide_floppy_capacity(drive));
+
+		if (ret && (filp->f_flags & O_NDELAY) == 0) {
+		    /*
+		     * Allow O_NDELAY to open a drive without a disk, or with an
+		     * unreadable disk, so that we can get the format capacity
+		     * of the drive or begin the format - Sam
+		     */
+			ret = -EIO;
+			goto out_put_floppy;
+		}
+
+		if ((drive->dev_flags & IDE_DFLAG_WP) && (filp->f_mode & 2)) {
+			ret = -EROFS;
+			goto out_put_floppy;
+		}
+
+		ide_set_media_lock(drive, disk, 1);
+		drive->dev_flags |= IDE_DFLAG_MEDIA_CHANGED;
+		check_disk_change(inode->i_bdev);
+	} else if (drive->dev_flags & IDE_DFLAG_FORMAT_IN_PROGRESS) {
+		ret = -EBUSY;
+		goto out_put_floppy;
+	}
+	return 0;
+
+out_put_floppy:
+	floppy->openers--;
+	ide_floppy_put(floppy);
+	return ret;
+}
+
+static int idefloppy_release(struct inode *inode, struct file *filp)
+{
+	struct gendisk *disk = inode->i_bdev->bd_disk;
+	struct ide_floppy_obj *floppy = ide_drv_g(disk, ide_floppy_obj);
+	ide_drive_t *drive = floppy->drive;
+
+	ide_debug_log(IDE_DBG_FUNC, "Call %s\n", __func__);
+
+	if (floppy->openers == 1) {
+		ide_set_media_lock(drive, disk, 0);
+		drive->dev_flags &= ~IDE_DFLAG_FORMAT_IN_PROGRESS;
+	}
+
+	floppy->openers--;
+
+	ide_floppy_put(floppy);
+
+	return 0;
+}
+
+static int idefloppy_getgeo(struct block_device *bdev, struct hd_geometry *geo)
+{
+	struct ide_floppy_obj *floppy = ide_drv_g(bdev->bd_disk,
+						     ide_floppy_obj);
+	ide_drive_t *drive = floppy->drive;
+
+	geo->heads = drive->bios_head;
+	geo->sectors = drive->bios_sect;
+	geo->cylinders = (u16)drive->bios_cyl; /* truncate */
+	return 0;
+}
+
+static int idefloppy_media_changed(struct gendisk *disk)
+{
+	struct ide_floppy_obj *floppy = ide_drv_g(disk, ide_floppy_obj);
+	ide_drive_t *drive = floppy->drive;
+	int ret;
+
+	/* do not scan partitions twice if this is a removable device */
+	if (drive->dev_flags & IDE_DFLAG_ATTACH) {
+		drive->dev_flags &= ~IDE_DFLAG_ATTACH;
+		return 0;
+	}
+
+	ret = !!(drive->dev_flags & IDE_DFLAG_MEDIA_CHANGED);
+	drive->dev_flags &= ~IDE_DFLAG_MEDIA_CHANGED;
+
+	return ret;
+}
+
+static int idefloppy_revalidate_disk(struct gendisk *disk)
+{
+	struct ide_floppy_obj *floppy = ide_drv_g(disk, ide_floppy_obj);
+	set_capacity(disk, ide_floppy_capacity(floppy->drive));
+	return 0;
+}
+
+static struct block_device_operations idefloppy_ops = {
+	.owner			= THIS_MODULE,
+	.open			= idefloppy_open,
+	.release		= idefloppy_release,
+	.ioctl			= ide_floppy_ioctl,
+	.getgeo			= idefloppy_getgeo,
+	.media_changed		= idefloppy_media_changed,
+	.revalidate_disk	= idefloppy_revalidate_disk
+};
+
+static int ide_floppy_probe(ide_drive_t *drive)
+{
+	idefloppy_floppy_t *floppy;
+	struct gendisk *g;
+
+	if (!strstr("ide-floppy", drive->driver_req))
+		goto failed;
+
+	if (drive->media != ide_floppy)
+		goto failed;
+
+	if (!ide_check_atapi_device(drive, DRV_NAME)) {
+		printk(KERN_ERR PFX "%s: not supported by this version of "
+		       DRV_NAME "\n", drive->name);
+		goto failed;
+	}
+	floppy = kzalloc(sizeof(idefloppy_floppy_t), GFP_KERNEL);
+	if (!floppy) {
+		printk(KERN_ERR PFX "%s: Can't allocate a floppy structure\n",
+		       drive->name);
+		goto failed;
+	}
+
+	g = alloc_disk_node(1 << PARTN_BITS, hwif_to_node(drive->hwif));
+	if (!g)
+		goto out_free_floppy;
+
+	ide_init_disk(g, drive);
+
+	kref_init(&floppy->kref);
+
+	floppy->drive = drive;
+	floppy->driver = &idefloppy_driver;
+	floppy->disk = g;
+
+	g->private_data = &floppy->driver;
+
+	drive->driver_data = floppy;
+
+	drive->debug_mask = debug_mask;
+
+	ide_floppy_setup(drive);
+
+	set_capacity(g, ide_floppy_capacity(drive));
+
+	g->minors = 1 << PARTN_BITS;
+	g->driverfs_dev = &drive->gendev;
+	if (drive->dev_flags & IDE_DFLAG_REMOVABLE)
+		g->flags = GENHD_FL_REMOVABLE;
+	g->fops = &idefloppy_ops;
+	add_disk(g);
+	return 0;
+
+out_free_floppy:
+	kfree(floppy);
+failed:
+	return -ENODEV;
+}
+
+static int __init idefloppy_init(void)
+{
+	printk(KERN_INFO DRV_NAME " driver " IDEFLOPPY_VERSION "\n");
+	return driver_register(&idefloppy_driver.gen_driver);
+}
+
+static void __exit idefloppy_exit(void)
+{
+	driver_unregister(&idefloppy_driver.gen_driver);
+}
+
+MODULE_ALIAS("ide:*m-floppy*");
+MODULE_ALIAS("ide-floppy");
+module_init(idefloppy_init);
+module_exit(idefloppy_exit);
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("ATAPI FLOPPY Driver");
-- 
cgit v1.2.3-55-g7522


From 9a6eb74d07f9152dd0e0ea551e878e869b8d2fc1 Mon Sep 17 00:00:00 2001
From: Bartlomiej Zolnierkiewicz
Date: Fri, 17 Oct 2008 18:09:13 +0200
Subject: ide: prepare for merging ide-gd-floppy.c with ide-gd.c

- idefloppy_ref_mutex -> ide_disk_ref_mutex
- idefloppy_cleanup_obj() -> ide_disk_release()
- ide_floppy_get() -> ide_disk_get()
- ide_floppy_put() -> ide_disk_put()
- ide_floppy_capacity() -> ide_gd_capacity()
- ide_floppy_remove() -> ide_gd_remove()
- ide_floppy_probe() -> ide_gd_probe()
- idefloppy_driver -> ide_gd_driver
- idefloppy_open() -> ide_gd_open()
- idefloppy_release() -> ide_gd_release()
- idefloppy_getgeo() -> ide_gd_getgeo()
- idefloppy_media_changed() -> ide_gd_media_changed()
- idefloppy_revalidate_disk() -> ide_gd_revalidate_disk()
- idefloppy_ops -> ide_gd_ops
- idefloppy_init() -> ide_gd_init()
- idefloppy_exit() -> ide_gd_exit()

- 'floppy' -> 'idkp' in ide_disk_*() and ide_gd_*()
- idefloppy_floppy_t -> struct ide_floppy_obj

There should be no functional changes caused by this patch.

Acked-by: Borislav Petkov <petkovbb@gmail.com>
Signed-off-by: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
---
 drivers/ide/ide-floppy.h      |   2 +-
 drivers/ide/ide-floppy_proc.c |   2 +-
 drivers/ide/ide-gd-floppy.c   | 179 +++++++++++++++++++++---------------------
 3 files changed, 91 insertions(+), 92 deletions(-)

diff --git a/drivers/ide/ide-floppy.h b/drivers/ide/ide-floppy.h
index 836a70e6e3ef..b965da2f41ce 100644
--- a/drivers/ide/ide-floppy.h
+++ b/drivers/ide/ide-floppy.h
@@ -57,7 +57,7 @@ typedef struct ide_floppy_obj {
 #define	IDEFLOPPY_IOCTL_FORMAT_START		0x4602
 #define IDEFLOPPY_IOCTL_FORMAT_GET_PROGRESS	0x4603
 
-sector_t ide_floppy_capacity(ide_drive_t *);
+sector_t ide_gd_capacity(ide_drive_t *);
 
 /* ide-floppy.c */
 void ide_floppy_create_mode_sense_cmd(struct ide_atapi_pc *, u8);
diff --git a/drivers/ide/ide-floppy_proc.c b/drivers/ide/ide-floppy_proc.c
index 76f0c6c4eca3..3ec762cb60ab 100644
--- a/drivers/ide/ide-floppy_proc.c
+++ b/drivers/ide/ide-floppy_proc.c
@@ -9,7 +9,7 @@ static int proc_idefloppy_read_capacity(char *page, char **start, off_t off,
 	ide_drive_t*drive = (ide_drive_t *)data;
 	int len;
 
-	len = sprintf(page, "%llu\n", (long long)ide_floppy_capacity(drive));
+	len = sprintf(page, "%llu\n", (long long)ide_gd_capacity(drive));
 	PROC_IDE_READ_RETURN(page, start, off, count, eof, len);
 }
 
diff --git a/drivers/ide/ide-gd-floppy.c b/drivers/ide/ide-gd-floppy.c
index 7afd013b4c55..986253418794 100644
--- a/drivers/ide/ide-gd-floppy.c
+++ b/drivers/ide/ide-gd-floppy.c
@@ -16,75 +16,75 @@
 static unsigned long debug_mask;
 module_param(debug_mask, ulong, 0644);
 
-static DEFINE_MUTEX(idefloppy_ref_mutex);
+static DEFINE_MUTEX(ide_disk_ref_mutex);
 
-static void idefloppy_cleanup_obj(struct kref *);
+static void ide_disk_release(struct kref *);
 
-static struct ide_floppy_obj *ide_floppy_get(struct gendisk *disk)
+static struct ide_floppy_obj *ide_disk_get(struct gendisk *disk)
 {
-	struct ide_floppy_obj *floppy = NULL;
+	struct ide_floppy_obj *idkp = NULL;
 
-	mutex_lock(&idefloppy_ref_mutex);
-	floppy = ide_drv_g(disk, ide_floppy_obj);
-	if (floppy) {
-		if (ide_device_get(floppy->drive))
-			floppy = NULL;
+	mutex_lock(&ide_disk_ref_mutex);
+	idkp = ide_drv_g(disk, ide_floppy_obj);
+	if (idkp) {
+		if (ide_device_get(idkp->drive))
+			idkp = NULL;
 		else
-			kref_get(&floppy->kref);
+			kref_get(&idkp->kref);
 	}
-	mutex_unlock(&idefloppy_ref_mutex);
-	return floppy;
+	mutex_unlock(&ide_disk_ref_mutex);
+	return idkp;
 }
 
-static void ide_floppy_put(struct ide_floppy_obj *floppy)
+static void ide_disk_put(struct ide_floppy_obj *idkp)
 {
-	ide_drive_t *drive = floppy->drive;
+	ide_drive_t *drive = idkp->drive;
 
-	mutex_lock(&idefloppy_ref_mutex);
-	kref_put(&floppy->kref, idefloppy_cleanup_obj);
+	mutex_lock(&ide_disk_ref_mutex);
+	kref_put(&idkp->kref, ide_disk_release);
 	ide_device_put(drive);
-	mutex_unlock(&idefloppy_ref_mutex);
+	mutex_unlock(&ide_disk_ref_mutex);
 }
 
-sector_t ide_floppy_capacity(ide_drive_t *drive)
+sector_t ide_gd_capacity(ide_drive_t *drive)
 {
 	return drive->capacity64;
 }
 
-static void ide_floppy_remove(ide_drive_t *drive)
+static int ide_gd_probe(ide_drive_t *);
+
+static void ide_gd_remove(ide_drive_t *drive)
 {
-	idefloppy_floppy_t *floppy = drive->driver_data;
-	struct gendisk *g = floppy->disk;
+	struct ide_floppy_obj *idkp = drive->driver_data;
+	struct gendisk *g = idkp->disk;
 
-	ide_proc_unregister_driver(drive, floppy->driver);
+	ide_proc_unregister_driver(drive, idkp->driver);
 
 	del_gendisk(g);
 
-	ide_floppy_put(floppy);
+	ide_disk_put(idkp);
 }
 
-static void idefloppy_cleanup_obj(struct kref *kref)
+static void ide_disk_release(struct kref *kref)
 {
-	struct ide_floppy_obj *floppy = to_ide_drv(kref, ide_floppy_obj);
-	ide_drive_t *drive = floppy->drive;
-	struct gendisk *g = floppy->disk;
+	struct ide_floppy_obj *idkp = to_ide_drv(kref, ide_floppy_obj);
+	ide_drive_t *drive = idkp->drive;
+	struct gendisk *g = idkp->disk;
 
 	drive->driver_data = NULL;
 	g->private_data = NULL;
 	put_disk(g);
-	kfree(floppy);
+	kfree(idkp);
 }
 
-static int ide_floppy_probe(ide_drive_t *);
-
-static ide_driver_t idefloppy_driver = {
+static ide_driver_t ide_gd_driver = {
 	.gen_driver = {
 		.owner		= THIS_MODULE,
 		.name		= "ide-floppy",
 		.bus		= &ide_bus_type,
 	},
-	.probe			= ide_floppy_probe,
-	.remove			= ide_floppy_remove,
+	.probe			= ide_gd_probe,
+	.remove			= ide_gd_remove,
 	.version		= IDEFLOPPY_VERSION,
 	.do_request		= ide_floppy_do_request,
 	.end_request		= ide_floppy_end_request,
@@ -95,24 +95,24 @@ static ide_driver_t idefloppy_driver = {
 #endif
 };
 
-static int idefloppy_open(struct inode *inode, struct file *filp)
+static int ide_gd_open(struct inode *inode, struct file *filp)
 {
 	struct gendisk *disk = inode->i_bdev->bd_disk;
-	struct ide_floppy_obj *floppy;
+	struct ide_floppy_obj *idkp;
 	ide_drive_t *drive;
 	int ret = 0;
 
-	floppy = ide_floppy_get(disk);
-	if (!floppy)
+	idkp = ide_disk_get(disk);
+	if (idkp == NULL)
 		return -ENXIO;
 
-	drive = floppy->drive;
+	drive = idkp->drive;
 
 	ide_debug_log(IDE_DBG_FUNC, "Call %s\n", __func__);
 
-	floppy->openers++;
+	idkp->openers++;
 
-	if (floppy->openers == 1) {
+	if (idkp->openers == 1) {
 		drive->dev_flags &= ~IDE_DFLAG_FORMAT_IN_PROGRESS;
 		/* Just in case */
 
@@ -121,7 +121,7 @@ static int idefloppy_open(struct inode *inode, struct file *filp)
 
 		ret = ide_floppy_get_capacity(drive);
 
-		set_capacity(disk, ide_floppy_capacity(drive));
+		set_capacity(disk, ide_gd_capacity(drive));
 
 		if (ret && (filp->f_flags & O_NDELAY) == 0) {
 		    /*
@@ -130,12 +130,12 @@ static int idefloppy_open(struct inode *inode, struct file *filp)
 		     * of the drive or begin the format - Sam
 		     */
 			ret = -EIO;
-			goto out_put_floppy;
+			goto out_put_idkp;
 		}
 
 		if ((drive->dev_flags & IDE_DFLAG_WP) && (filp->f_mode & 2)) {
 			ret = -EROFS;
-			goto out_put_floppy;
+			goto out_put_idkp;
 		}
 
 		ide_set_media_lock(drive, disk, 1);
@@ -143,41 +143,40 @@ static int idefloppy_open(struct inode *inode, struct file *filp)
 		check_disk_change(inode->i_bdev);
 	} else if (drive->dev_flags & IDE_DFLAG_FORMAT_IN_PROGRESS) {
 		ret = -EBUSY;
-		goto out_put_floppy;
+		goto out_put_idkp;
 	}
 	return 0;
 
-out_put_floppy:
-	floppy->openers--;
-	ide_floppy_put(floppy);
+out_put_idkp:
+	idkp->openers--;
+	ide_disk_put(idkp);
 	return ret;
 }
 
-static int idefloppy_release(struct inode *inode, struct file *filp)
+static int ide_gd_release(struct inode *inode, struct file *filp)
 {
 	struct gendisk *disk = inode->i_bdev->bd_disk;
-	struct ide_floppy_obj *floppy = ide_drv_g(disk, ide_floppy_obj);
-	ide_drive_t *drive = floppy->drive;
+	struct ide_floppy_obj *idkp = ide_drv_g(disk, ide_floppy_obj);
+	ide_drive_t *drive = idkp->drive;
 
 	ide_debug_log(IDE_DBG_FUNC, "Call %s\n", __func__);
 
-	if (floppy->openers == 1) {
+	if (idkp->openers == 1) {
 		ide_set_media_lock(drive, disk, 0);
 		drive->dev_flags &= ~IDE_DFLAG_FORMAT_IN_PROGRESS;
 	}
 
-	floppy->openers--;
+	idkp->openers--;
 
-	ide_floppy_put(floppy);
+	ide_disk_put(idkp);
 
 	return 0;
 }
 
-static int idefloppy_getgeo(struct block_device *bdev, struct hd_geometry *geo)
+static int ide_gd_getgeo(struct block_device *bdev, struct hd_geometry *geo)
 {
-	struct ide_floppy_obj *floppy = ide_drv_g(bdev->bd_disk,
-						     ide_floppy_obj);
-	ide_drive_t *drive = floppy->drive;
+	struct ide_floppy_obj *idkp = ide_drv_g(bdev->bd_disk, ide_floppy_obj);
+	ide_drive_t *drive = idkp->drive;
 
 	geo->heads = drive->bios_head;
 	geo->sectors = drive->bios_sect;
@@ -185,10 +184,10 @@ static int idefloppy_getgeo(struct block_device *bdev, struct hd_geometry *geo)
 	return 0;
 }
 
-static int idefloppy_media_changed(struct gendisk *disk)
+static int ide_gd_media_changed(struct gendisk *disk)
 {
-	struct ide_floppy_obj *floppy = ide_drv_g(disk, ide_floppy_obj);
-	ide_drive_t *drive = floppy->drive;
+	struct ide_floppy_obj *idkp = ide_drv_g(disk, ide_floppy_obj);
+	ide_drive_t *drive = idkp->drive;
 	int ret;
 
 	/* do not scan partitions twice if this is a removable device */
@@ -203,26 +202,26 @@ static int idefloppy_media_changed(struct gendisk *disk)
 	return ret;
 }
 
-static int idefloppy_revalidate_disk(struct gendisk *disk)
+static int ide_gd_revalidate_disk(struct gendisk *disk)
 {
-	struct ide_floppy_obj *floppy = ide_drv_g(disk, ide_floppy_obj);
-	set_capacity(disk, ide_floppy_capacity(floppy->drive));
+	struct ide_floppy_obj *idkp = ide_drv_g(disk, ide_floppy_obj);
+	set_capacity(disk, ide_gd_capacity(idkp->drive));
 	return 0;
 }
 
-static struct block_device_operations idefloppy_ops = {
+static struct block_device_operations ide_gd_ops = {
 	.owner			= THIS_MODULE,
-	.open			= idefloppy_open,
-	.release		= idefloppy_release,
+	.open			= ide_gd_open,
+	.release		= ide_gd_release,
 	.ioctl			= ide_floppy_ioctl,
-	.getgeo			= idefloppy_getgeo,
-	.media_changed		= idefloppy_media_changed,
-	.revalidate_disk	= idefloppy_revalidate_disk
+	.getgeo			= ide_gd_getgeo,
+	.media_changed		= ide_gd_media_changed,
+	.revalidate_disk	= ide_gd_revalidate_disk
 };
 
-static int ide_floppy_probe(ide_drive_t *drive)
+static int ide_gd_probe(ide_drive_t *drive)
 {
-	idefloppy_floppy_t *floppy;
+	struct ide_floppy_obj *idkp;
 	struct gendisk *g;
 
 	if (!strstr("ide-floppy", drive->driver_req))
@@ -236,8 +235,8 @@ static int ide_floppy_probe(ide_drive_t *drive)
 		       DRV_NAME "\n", drive->name);
 		goto failed;
 	}
-	floppy = kzalloc(sizeof(idefloppy_floppy_t), GFP_KERNEL);
-	if (!floppy) {
+	idkp = kzalloc(sizeof(*idkp), GFP_KERNEL);
+	if (!idkp) {
 		printk(KERN_ERR PFX "%s: Can't allocate a floppy structure\n",
 		       drive->name);
 		goto failed;
@@ -245,54 +244,54 @@ static int ide_floppy_probe(ide_drive_t *drive)
 
 	g = alloc_disk_node(1 << PARTN_BITS, hwif_to_node(drive->hwif));
 	if (!g)
-		goto out_free_floppy;
+		goto out_free_idkp;
 
 	ide_init_disk(g, drive);
 
-	kref_init(&floppy->kref);
+	kref_init(&idkp->kref);
 
-	floppy->drive = drive;
-	floppy->driver = &idefloppy_driver;
-	floppy->disk = g;
+	idkp->drive = drive;
+	idkp->driver = &ide_gd_driver;
+	idkp->disk = g;
 
-	g->private_data = &floppy->driver;
+	g->private_data = &idkp->driver;
 
-	drive->driver_data = floppy;
+	drive->driver_data = idkp;
 
 	drive->debug_mask = debug_mask;
 
 	ide_floppy_setup(drive);
 
-	set_capacity(g, ide_floppy_capacity(drive));
+	set_capacity(g, ide_gd_capacity(drive));
 
 	g->minors = 1 << PARTN_BITS;
 	g->driverfs_dev = &drive->gendev;
 	if (drive->dev_flags & IDE_DFLAG_REMOVABLE)
 		g->flags = GENHD_FL_REMOVABLE;
-	g->fops = &idefloppy_ops;
+	g->fops = &ide_gd_ops;
 	add_disk(g);
 	return 0;
 
-out_free_floppy:
-	kfree(floppy);
+out_free_idkp:
+	kfree(idkp);
 failed:
 	return -ENODEV;
 }
 
-static int __init idefloppy_init(void)
+static int __init ide_gd_init(void)
 {
 	printk(KERN_INFO DRV_NAME " driver " IDEFLOPPY_VERSION "\n");
-	return driver_register(&idefloppy_driver.gen_driver);
+	return driver_register(&ide_gd_driver.gen_driver);
 }
 
-static void __exit idefloppy_exit(void)
+static void __exit ide_gd_exit(void)
 {
-	driver_unregister(&idefloppy_driver.gen_driver);
+	driver_unregister(&ide_gd_driver.gen_driver);
 }
 
 MODULE_ALIAS("ide:*m-floppy*");
 MODULE_ALIAS("ide-floppy");
-module_init(idefloppy_init);
-module_exit(idefloppy_exit);
+module_init(ide_gd_init);
+module_exit(ide_gd_exit);
 MODULE_LICENSE("GPL");
 MODULE_DESCRIPTION("ATAPI FLOPPY Driver");
-- 
cgit v1.2.3-55-g7522


From 79cb380397c834a35952d8497651d93b543ef968 Mon Sep 17 00:00:00 2001
From: Bartlomiej Zolnierkiewicz
Date: Fri, 17 Oct 2008 18:09:13 +0200
Subject: ide: allow device drivers to specify per-device type /proc settings

Turn ide_driver_t's 'proc' field into ->proc_entries method
(and also 'settings' field into ->proc_devsets method).  Then
update all device drivers accordingly.

There should be no functional changes caused by this patch.

Acked-by: Borislav Petkov <petkovbb@gmail.com>
Signed-off-by: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
---
 drivers/ide/ide-cd.c        | 14 ++++++++++++--
 drivers/ide/ide-gd-floppy.c | 16 ++++++++++++++--
 drivers/ide/ide-gd.c        | 16 ++++++++++++++--
 drivers/ide/ide-proc.c      |  6 +++---
 drivers/ide/ide-tape.c      | 14 ++++++++++++--
 drivers/scsi/ide-scsi.c     | 26 +++++++++++++++++---------
 include/linux/ide.h         |  4 ++--
 7 files changed, 74 insertions(+), 22 deletions(-)

diff --git a/drivers/ide/ide-cd.c b/drivers/ide/ide-cd.c
index 2f4cc10391e5..32073666b9ca 100644
--- a/drivers/ide/ide-cd.c
+++ b/drivers/ide/ide-cd.c
@@ -1908,6 +1908,16 @@ static const struct ide_proc_devset idecd_settings[] = {
 	IDE_PROC_DEVSET(dsc_overlap, 0, 1),
 	{ 0 },
 };
+
+static ide_proc_entry_t *ide_cd_proc_entries(ide_drive_t *drive)
+{
+	return idecd_proc;
+}
+
+static const struct ide_proc_devset *ide_cd_proc_devsets(ide_drive_t *drive)
+{
+	return idecd_settings;
+}
 #endif
 
 static const struct cd_list_entry ide_cd_quirks_list[] = {
@@ -2069,8 +2079,8 @@ static ide_driver_t ide_cdrom_driver = {
 	.end_request		= ide_end_request,
 	.error			= __ide_error,
 #ifdef CONFIG_IDE_PROC_FS
-	.proc			= idecd_proc,
-	.settings		= idecd_settings,
+	.proc_entries		= ide_cd_proc_entries,
+	.proc_devsets		= ide_cd_proc_devsets,
 #endif
 };
 
diff --git a/drivers/ide/ide-gd-floppy.c b/drivers/ide/ide-gd-floppy.c
index 986253418794..082800b9a558 100644
--- a/drivers/ide/ide-gd-floppy.c
+++ b/drivers/ide/ide-gd-floppy.c
@@ -77,6 +77,18 @@ static void ide_disk_release(struct kref *kref)
 	kfree(idkp);
 }
 
+#ifdef CONFIG_IDE_PROC_FS
+static ide_proc_entry_t *ide_floppy_proc_entries(ide_drive_t *drive)
+{
+	return ide_floppy_proc;
+}
+
+static const struct ide_proc_devset *ide_floppy_proc_devsets(ide_drive_t *drive)
+{
+	return ide_floppy_settings;
+}
+#endif
+
 static ide_driver_t ide_gd_driver = {
 	.gen_driver = {
 		.owner		= THIS_MODULE,
@@ -90,8 +102,8 @@ static ide_driver_t ide_gd_driver = {
 	.end_request		= ide_floppy_end_request,
 	.error			= __ide_error,
 #ifdef CONFIG_IDE_PROC_FS
-	.proc			= ide_floppy_proc,
-	.settings		= ide_floppy_settings,
+	.proc_entries		= ide_floppy_proc_entries,
+	.proc_devsets		= ide_floppy_proc_devsets,
 #endif
 };
 
diff --git a/drivers/ide/ide-gd.c b/drivers/ide/ide-gd.c
index c08500270b9d..a3d4ad7db2af 100644
--- a/drivers/ide/ide-gd.c
+++ b/drivers/ide/ide-gd.c
@@ -119,6 +119,18 @@ static void ide_gd_shutdown(ide_drive_t *drive)
 	drive->gendev.bus->suspend(&drive->gendev, PMSG_SUSPEND);
 }
 
+#ifdef CONFIG_IDE_PROC_FS
+static ide_proc_entry_t *ide_disk_proc_entries(ide_drive_t *drive)
+{
+	return ide_disk_proc;
+}
+
+static const struct ide_proc_devset *ide_disk_proc_devsets(ide_drive_t *drive)
+{
+	return ide_disk_settings;
+}
+#endif
+
 static ide_driver_t ide_gd_driver = {
 	.gen_driver = {
 		.owner		= THIS_MODULE,
@@ -134,8 +146,8 @@ static ide_driver_t ide_gd_driver = {
 	.end_request		= ide_end_request,
 	.error			= __ide_error,
 #ifdef CONFIG_IDE_PROC_FS
-	.proc			= ide_disk_proc,
-	.settings		= ide_disk_settings,
+	.proc_entries		= ide_disk_proc_entries,
+	.proc_devsets		= ide_disk_proc_devsets,
 #endif
 };
 
diff --git a/drivers/ide/ide-proc.c b/drivers/ide/ide-proc.c
index b26926487cc0..c31d0dd7a532 100644
--- a/drivers/ide/ide-proc.c
+++ b/drivers/ide/ide-proc.c
@@ -567,10 +567,10 @@ static void ide_remove_proc_entries(struct proc_dir_entry *dir, ide_proc_entry_t
 void ide_proc_register_driver(ide_drive_t *drive, ide_driver_t *driver)
 {
 	mutex_lock(&ide_setting_mtx);
-	drive->settings = driver->settings;
+	drive->settings = driver->proc_devsets(drive);
 	mutex_unlock(&ide_setting_mtx);
 
-	ide_add_proc_entries(drive->proc, driver->proc, drive);
+	ide_add_proc_entries(drive->proc, driver->proc_entries(drive), drive);
 }
 
 EXPORT_SYMBOL(ide_proc_register_driver);
@@ -591,7 +591,7 @@ void ide_proc_unregister_driver(ide_drive_t *drive, ide_driver_t *driver)
 {
 	unsigned long flags;
 
-	ide_remove_proc_entries(drive->proc, driver->proc);
+	ide_remove_proc_entries(drive->proc, driver->proc_entries(drive));
 
 	mutex_lock(&ide_setting_mtx);
 	spin_lock_irqsave(&ide_lock, flags);
diff --git a/drivers/ide/ide-tape.c b/drivers/ide/ide-tape.c
index a99e28f45156..b2b2e5e8d38e 100644
--- a/drivers/ide/ide-tape.c
+++ b/drivers/ide/ide-tape.c
@@ -2298,6 +2298,16 @@ static ide_proc_entry_t idetape_proc[] = {
 	{ "name",	S_IFREG|S_IRUGO,	proc_idetape_read_name,	NULL },
 	{ NULL, 0, NULL, NULL }
 };
+
+static ide_proc_entry_t *ide_tape_proc_entries(ide_drive_t *drive)
+{
+	return idetape_proc;
+}
+
+static const struct ide_proc_devset *ide_tape_proc_devsets(ide_drive_t *drive)
+{
+	return idetape_settings;
+}
 #endif
 
 static int ide_tape_probe(ide_drive_t *);
@@ -2315,8 +2325,8 @@ static ide_driver_t idetape_driver = {
 	.end_request		= idetape_end_request,
 	.error			= __ide_error,
 #ifdef CONFIG_IDE_PROC_FS
-	.proc			= idetape_proc,
-	.settings		= idetape_settings,
+	.proc_entries		= ide_tape_proc_entries,
+	.proc_devsets		= ide_tape_proc_devsets,
 #endif
 };
 
diff --git a/drivers/scsi/ide-scsi.c b/drivers/scsi/ide-scsi.c
index 740bad435995..afc96e844a25 100644
--- a/drivers/scsi/ide-scsi.c
+++ b/drivers/scsi/ide-scsi.c
@@ -343,6 +343,11 @@ static ide_startstop_t idescsi_do_request (ide_drive_t *drive, struct request *r
 }
 
 #ifdef CONFIG_IDE_PROC_FS
+static ide_proc_entry_t idescsi_proc[] = {
+	{ "capacity", S_IFREG|S_IRUGO, proc_ide_read_capacity, NULL },
+	{ NULL, 0, NULL, NULL }
+};
+
 #define ide_scsi_devset_get(name, field) \
 static int get_##name(ide_drive_t *drive) \
 { \
@@ -378,6 +383,16 @@ static const struct ide_proc_devset idescsi_settings[] = {
 	IDE_PROC_DEVSET(transform, 0,	 3),
 	{ 0 },
 };
+
+static ide_proc_entry_t *ide_scsi_proc_entries(ide_drive_t *drive)
+{
+	return idescsi_proc;
+}
+
+static const struct ide_proc_devset *ide_scsi_proc_devsets(ide_drive_t *drive)
+{
+	return idescsi_settings;
+}
 #endif
 
 /*
@@ -419,13 +434,6 @@ static void ide_scsi_remove(ide_drive_t *drive)
 
 static int ide_scsi_probe(ide_drive_t *);
 
-#ifdef CONFIG_IDE_PROC_FS
-static ide_proc_entry_t idescsi_proc[] = {
-	{ "capacity", S_IFREG|S_IRUGO, proc_ide_read_capacity, NULL },
-	{ NULL, 0, NULL, NULL }
-};
-#endif
-
 static ide_driver_t idescsi_driver = {
 	.gen_driver = {
 		.owner		= THIS_MODULE,
@@ -439,8 +447,8 @@ static ide_driver_t idescsi_driver = {
 	.end_request		= idescsi_end_request,
 	.error                  = idescsi_atapi_error,
 #ifdef CONFIG_IDE_PROC_FS
-	.proc			= idescsi_proc,
-	.settings		= idescsi_settings,
+	.proc_entries		= ide_scsi_proc_entries,
+	.proc_devsets		= ide_scsi_proc_devsets,
 #endif
 };
 
diff --git a/include/linux/ide.h b/include/linux/ide.h
index ba51a93fa547..488808891acb 100644
--- a/include/linux/ide.h
+++ b/include/linux/ide.h
@@ -1120,8 +1120,8 @@ struct ide_driver_s {
 	void		(*resume)(ide_drive_t *);
 	void		(*shutdown)(ide_drive_t *);
 #ifdef CONFIG_IDE_PROC_FS
-	ide_proc_entry_t		*proc;
-	const struct ide_proc_devset	*settings;
+	ide_proc_entry_t *		(*proc_entries)(ide_drive_t *);
+	const struct ide_proc_devset *	(*proc_devsets)(ide_drive_t *);
 #endif
 };
 
-- 
cgit v1.2.3-55-g7522


From 806f80a6fc203ad0bde84e5a9e94572617d2ae45 Mon Sep 17 00:00:00 2001
From: Bartlomiej Zolnierkiewicz
Date: Fri, 17 Oct 2008 18:09:14 +0200
Subject: ide: add generic ATA/ATAPI disk driver

* Add struct ide_disk_ops containing protocol specific methods.

* Add 'struct ide_disk_ops *' to ide_drive_t.

* Convert ide-{disk,floppy} drivers to use struct ide_disk_ops.

* Merge ide-{disk,floppy} drivers into generic ide-gd driver.

While at it:
- ide_disk_init_capacity() -> ide_disk_get_capacity()

Acked-by: Borislav Petkov <petkovbb@gmail.com>
Signed-off-by: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
---
 drivers/ide/Kconfig            |  64 ++++-----
 drivers/ide/Makefile           |  19 ++-
 drivers/ide/ide-disk.c         |  39 +++++-
 drivers/ide/ide-disk.h         |  24 ++--
 drivers/ide/ide-disk_ioctl.c   |   4 +-
 drivers/ide/ide-floppy.c       |  45 ++++--
 drivers/ide/ide-floppy.h       |  58 ++------
 drivers/ide/ide-floppy_ioctl.c |   9 +-
 drivers/ide/ide-gd-floppy.c    | 309 -----------------------------------------
 drivers/ide/ide-gd.c           | 122 +++++++++++++---
 drivers/ide/ide-gd.h           |  44 ++++++
 drivers/leds/Kconfig           |   2 +-
 include/linux/ide.h            |  19 +++
 13 files changed, 303 insertions(+), 455 deletions(-)
 delete mode 100644 drivers/ide/ide-gd-floppy.c
 create mode 100644 drivers/ide/ide-gd.h

diff --git a/drivers/ide/Kconfig b/drivers/ide/Kconfig
index 74a369a6116f..faa974e615da 100644
--- a/drivers/ide/Kconfig
+++ b/drivers/ide/Kconfig
@@ -84,21 +84,40 @@ config BLK_DEV_IDE_SATA
 
 	  If unsure, say N.
 
-config BLK_DEV_IDEDISK
-	tristate "Include IDE/ATA-2 DISK support"
-	---help---
-	  This will include enhanced support for MFM/RLL/IDE hard disks.  If
-	  you have a MFM/RLL/IDE disk, and there is no special reason to use
-	  the old hard disk driver instead, say Y.  If you have an SCSI-only
-	  system, you can say N here.
+config IDE_GD
+	tristate "generic ATA/ATAPI disk support"
+	default y
+	help
+	  Support for ATA/ATAPI disks (including ATAPI floppy drives).
 
-	  To compile this driver as a module, choose M here: the
-	  module will be called ide-disk.
-	  Do not compile this driver as a module if your root file system
-	  (the one containing the directory /) is located on the IDE disk.
+	  To compile this driver as a module, choose M here.
+	  The module will be called ide-gd_mod.
 
 	  If unsure, say Y.
 
+config IDE_GD_ATA
+	bool "ATA disk support"
+	depends on IDE_GD
+	default y
+	help
+	  This will include support for ATA hard disks.
+
+	  If unsure, say Y.
+
+config IDE_GD_ATAPI
+	bool "ATAPI floppy support"
+	depends on IDE_GD
+	select IDE_ATAPI
+	help
+	  This will include support for ATAPI floppy drives
+	  (i.e. Iomega ZIP or MKE LS-120).
+
+	  For information about jumper settings and the question
+	  of when a ZIP drive uses a partition table, see
+	  <http://www.win.tue.nl/~aeb/linux/zip/zip-1.html>.
+
+	  If unsure, say N.
+
 config BLK_DEV_IDECS
 	tristate "PCMCIA IDE support"
 	depends on PCMCIA
@@ -163,29 +182,6 @@ config BLK_DEV_IDETAPE
 	  To compile this driver as a module, choose M here: the
 	  module will be called ide-tape.
 
-config BLK_DEV_IDEFLOPPY
-	tristate "Include IDE/ATAPI FLOPPY support"
-	select IDE_ATAPI
-	---help---
-	  If you have an IDE floppy drive which uses the ATAPI protocol,
-	  answer Y.  ATAPI is a newer protocol used by IDE CD-ROM/tape/floppy
-	  drives, similar to the SCSI protocol.
-
-	  The LS-120 and the IDE/ATAPI Iomega ZIP drive are also supported by
-	  this driver. For information about jumper settings and the question
-	  of when a ZIP drive uses a partition table, see
-	  <http://www.win.tue.nl/~aeb/linux/zip/zip-1.html>.
-	  (ATAPI PD-CD/CDR drives are not supported by this driver; support
-	  for PD-CD/CDR drives is available if you answer Y to
-	  "SCSI emulation support", below).
-
-	  If you say Y here, the FLOPPY drive will be identified along with
-	  other IDE devices, as "hdb" or "hdc", or something similar (check
-	  the boot messages with dmesg).
-
-	  To compile this driver as a module, choose M here: the
-	  module will be called ide-floppy.
-
 config BLK_DEV_IDESCSI
 	tristate "SCSI emulation support (DEPRECATED)"
 	depends on SCSI
diff --git a/drivers/ide/Makefile b/drivers/ide/Makefile
index 7eeeab597959..093d3248ca89 100644
--- a/drivers/ide/Makefile
+++ b/drivers/ide/Makefile
@@ -37,18 +37,25 @@ obj-$(CONFIG_IDE_H8300)			+= h8300/
 obj-$(CONFIG_IDE_GENERIC)		+= ide-generic.o
 obj-$(CONFIG_BLK_DEV_IDEPNP)		+= ide-pnp.o
 
-ide-disk_mod-y += ide-gd.o ide-disk.o ide-disk_ioctl.o
+ide-gd_mod-y += ide-gd.o
 ide-cd_mod-y += ide-cd.o ide-cd_ioctl.o ide-cd_verbose.o
-ide-floppy_mod-y += ide-gd-floppy.o ide-floppy.o ide-floppy_ioctl.o
 
+ifeq ($(CONFIG_IDE_GD_ATA), y)
+	ide-gd_mod-y += ide-disk.o ide-disk_ioctl.o
 ifeq ($(CONFIG_IDE_PROC_FS), y)
-	ide-disk_mod-y += ide-disk_proc.o
-	ide-floppy_mod-y += ide-floppy_proc.o
+	ide-gd_mod-y += ide-disk_proc.o
+endif
+endif
+
+ifeq ($(CONFIG_IDE_GD_ATAPI), y)
+	ide-gd_mod-y += ide-floppy.o ide-floppy_ioctl.o
+ifeq ($(CONFIG_IDE_PROC_FS), y)
+	ide-gd_mod-y += ide-floppy_proc.o
+endif
 endif
 
-obj-$(CONFIG_BLK_DEV_IDEDISK)		+= ide-disk_mod.o
+obj-$(CONFIG_IDE_GD)			+= ide-gd_mod.o
 obj-$(CONFIG_BLK_DEV_IDECD)		+= ide-cd_mod.o
-obj-$(CONFIG_BLK_DEV_IDEFLOPPY)		+= ide-floppy_mod.o
 obj-$(CONFIG_BLK_DEV_IDETAPE)		+= ide-tape.o
 
 ifeq ($(CONFIG_BLK_DEV_IDECS), y)
diff --git a/drivers/ide/ide-disk.c b/drivers/ide/ide-disk.c
index 751be7af22c2..223750c1b5a6 100644
--- a/drivers/ide/ide-disk.c
+++ b/drivers/ide/ide-disk.c
@@ -184,8 +184,8 @@ static ide_startstop_t __ide_do_rw_disk(ide_drive_t *drive, struct request *rq,
  * 1073741822 == 549756 MB or 48bit addressing fake drive
  */
 
-ide_startstop_t ide_do_rw_disk(ide_drive_t *drive, struct request *rq,
-			       sector_t block)
+static ide_startstop_t ide_do_rw_disk(ide_drive_t *drive, struct request *rq,
+				      sector_t block)
 {
 	ide_hwif_t *hwif = HWIF(drive);
 
@@ -333,7 +333,7 @@ static void idedisk_check_hpa(ide_drive_t *drive)
 	}
 }
 
-void ide_disk_init_capacity(ide_drive_t *drive)
+static int ide_disk_get_capacity(ide_drive_t *drive)
 {
 	u16 *id = drive->id;
 	int lba;
@@ -382,6 +382,8 @@ void ide_disk_init_capacity(ide_drive_t *drive)
 		} else
 			drive->dev_flags &= ~IDE_DFLAG_LBA48;
 	}
+
+	return 0;
 }
 
 static void idedisk_prepare_flush(struct request_queue *q, struct request *rq)
@@ -590,7 +592,12 @@ ide_ext_devset_rw(wcache, wcache);
 
 ide_ext_devset_rw_sync(nowerr, nowerr);
 
-void ide_disk_setup(ide_drive_t *drive)
+static int ide_disk_check(ide_drive_t *drive, const char *s)
+{
+	return 1;
+}
+
+static void ide_disk_setup(ide_drive_t *drive)
 {
 	struct ide_disk_obj *idkp = drive->driver_data;
 	ide_hwif_t *hwif = drive->hwif;
@@ -626,7 +633,7 @@ void ide_disk_setup(ide_drive_t *drive)
 			 drive->queue->max_sectors / 2);
 
 	/* calculate drive capacity, and select LBA if possible */
-	ide_disk_init_capacity(drive);
+	ide_disk_get_capacity(drive);
 
 	/*
 	 * if possible, give fdisk access to more of the drive,
@@ -682,7 +689,7 @@ void ide_disk_setup(ide_drive_t *drive)
 		drive->dev_flags |= IDE_DFLAG_ATTACH;
 }
 
-void ide_disk_flush(ide_drive_t *drive)
+static void ide_disk_flush(ide_drive_t *drive)
 {
 	if (ata_id_flush_enabled(drive->id) == 0 ||
 	    (drive->dev_flags & IDE_DFLAG_WCACHE) == 0)
@@ -692,7 +699,13 @@ void ide_disk_flush(ide_drive_t *drive)
 		printk(KERN_INFO "%s: wcache flush failed!\n", drive->name);
 }
 
-int ide_disk_set_doorlock(ide_drive_t *drive, int on)
+static int ide_disk_init_media(ide_drive_t *drive, struct gendisk *disk)
+{
+	return 0;
+}
+
+static int ide_disk_set_doorlock(ide_drive_t *drive, struct gendisk *disk,
+				 int on)
 {
 	ide_task_t task;
 	int ret;
@@ -711,3 +724,15 @@ int ide_disk_set_doorlock(ide_drive_t *drive, int on)
 
 	return ret;
 }
+
+const struct ide_disk_ops ide_ata_disk_ops = {
+	.check		= ide_disk_check,
+	.get_capacity	= ide_disk_get_capacity,
+	.setup		= ide_disk_setup,
+	.flush		= ide_disk_flush,
+	.init_media	= ide_disk_init_media,
+	.set_doorlock	= ide_disk_set_doorlock,
+	.do_request	= ide_do_rw_disk,
+	.end_request	= ide_end_request,
+	.ioctl		= ide_disk_ioctl,
+};
diff --git a/drivers/ide/ide-disk.h b/drivers/ide/ide-disk.h
index 104ad71288a5..b234b0feaf7b 100644
--- a/drivers/ide/ide-disk.h
+++ b/drivers/ide/ide-disk.h
@@ -1,22 +1,11 @@
 #ifndef __IDE_DISK_H
 #define __IDE_DISK_H
 
-struct ide_disk_obj {
-	ide_drive_t	*drive;
-	ide_driver_t	*driver;
-	struct gendisk	*disk;
-	struct kref	kref;
-	unsigned int	openers;	/* protected by BKL for now */
-};
-
-sector_t ide_gd_capacity(ide_drive_t *);
+#include "ide-gd.h"
 
+#ifdef CONFIG_IDE_GD_ATA
 /* ide-disk.c */
-void ide_disk_init_capacity(ide_drive_t *);
-void ide_disk_setup(ide_drive_t *);
-void ide_disk_flush(ide_drive_t *);
-int ide_disk_set_doorlock(ide_drive_t *, int);
-ide_startstop_t ide_do_rw_disk(ide_drive_t *, struct request *, sector_t);
+extern const struct ide_disk_ops ide_ata_disk_ops;
 ide_decl_devset(address);
 ide_decl_devset(multcount);
 ide_decl_devset(nowerr);
@@ -24,12 +13,17 @@ ide_decl_devset(wcache);
 ide_decl_devset(acoustic);
 
 /* ide-disk_ioctl.c */
-int ide_disk_ioctl(struct inode *, struct file *, unsigned int, unsigned long);
+int ide_disk_ioctl(ide_drive_t *, struct inode *, struct file *, unsigned int,
+		   unsigned long);
 
 #ifdef CONFIG_IDE_PROC_FS
 /* ide-disk_proc.c */
 extern ide_proc_entry_t ide_disk_proc[];
 extern const struct ide_proc_devset ide_disk_settings[];
 #endif
+#else
+#define ide_disk_proc		NULL
+#define ide_disk_settings	NULL
+#endif
 
 #endif /* __IDE_DISK_H */
diff --git a/drivers/ide/ide-disk_ioctl.c b/drivers/ide/ide-disk_ioctl.c
index e6624eda9e69..a49698bcf966 100644
--- a/drivers/ide/ide-disk_ioctl.c
+++ b/drivers/ide/ide-disk_ioctl.c
@@ -13,12 +13,10 @@ static const struct ide_ioctl_devset ide_disk_ioctl_settings[] = {
 { 0 }
 };
 
-int ide_disk_ioctl(struct inode *inode, struct file *file,
+int ide_disk_ioctl(ide_drive_t *drive, struct inode *inode, struct file *file,
 		   unsigned int cmd, unsigned long arg)
 {
 	struct block_device *bdev = inode->i_bdev;
-	struct ide_disk_obj *idkp = ide_drv_g(bdev->bd_disk, ide_disk_obj);
-	ide_drive_t *drive = idkp->drive;
 	int err;
 
 	err = ide_setting_ioctl(drive, bdev, cmd, arg, ide_disk_ioctl_settings);
diff --git a/drivers/ide/ide-floppy.c b/drivers/ide/ide-floppy.c
index 802e0968e32f..58746c748c12 100644
--- a/drivers/ide/ide-floppy.c
+++ b/drivers/ide/ide-floppy.c
@@ -68,7 +68,7 @@
  * Used to finish servicing a request. For read/write requests, we will call
  * ide_end_request to pass to the next buffer.
  */
-int ide_floppy_end_request(ide_drive_t *drive, int uptodate, int nsecs)
+static int ide_floppy_end_request(ide_drive_t *drive, int uptodate, int nsecs)
 {
 	idefloppy_floppy_t *floppy = drive->driver_data;
 	struct request *rq = HWGROUP(drive)->rq;
@@ -280,13 +280,12 @@ static void idefloppy_blockpc_cmd(idefloppy_floppy_t *floppy,
 	pc->req_xfer = pc->buf_size = rq->data_len;
 }
 
-ide_startstop_t ide_floppy_do_request(ide_drive_t *drive, struct request *rq,
-				      sector_t block_s)
+static ide_startstop_t ide_floppy_do_request(ide_drive_t *drive,
+					     struct request *rq, sector_t block)
 {
 	idefloppy_floppy_t *floppy = drive->driver_data;
 	ide_hwif_t *hwif = drive->hwif;
 	struct ide_atapi_pc *pc;
-	unsigned long block = (unsigned long)block_s;
 
 	ide_debug_log(IDE_DBG_FUNC, "%s: dev: %s, cmd: 0x%x, cmd_type: %x, "
 		      "errors: %d\n",
@@ -316,7 +315,7 @@ ide_startstop_t ide_floppy_do_request(ide_drive_t *drive, struct request *rq,
 			return ide_stopped;
 		}
 		pc = &floppy->queued_pc;
-		idefloppy_create_rw_cmd(drive, pc, rq, block);
+		idefloppy_create_rw_cmd(drive, pc, rq, (unsigned long)block);
 	} else if (blk_special_request(rq)) {
 		pc = (struct ide_atapi_pc *) rq->buffer;
 	} else if (blk_pc_request(rq)) {
@@ -406,7 +405,7 @@ static int ide_floppy_get_flexible_disk_page(ide_drive_t *drive)
  * Determine if a media is present in the floppy drive, and if so, its LBA
  * capacity.
  */
-int ide_floppy_get_capacity(ide_drive_t *drive)
+static int ide_floppy_get_capacity(ide_drive_t *drive)
 {
 	idefloppy_floppy_t *floppy = drive->driver_data;
 	struct gendisk *disk = floppy->disk;
@@ -505,9 +504,9 @@ int ide_floppy_get_capacity(ide_drive_t *drive)
 	return rc;
 }
 
-void ide_floppy_setup(ide_drive_t *drive)
+static void ide_floppy_setup(ide_drive_t *drive)
 {
-	struct ide_floppy_obj *floppy = drive->driver_data;
+	struct ide_disk_obj *floppy = drive->driver_data;
 	u16 *id = drive->id;
 
 	drive->pc_callback	 = ide_floppy_callback;
@@ -547,3 +546,33 @@ void ide_floppy_setup(ide_drive_t *drive)
 
 	drive->dev_flags |= IDE_DFLAG_ATTACH;
 }
+
+static void ide_floppy_flush(ide_drive_t *drive)
+{
+}
+
+static int ide_floppy_init_media(ide_drive_t *drive, struct gendisk *disk)
+{
+	int ret = 0;
+
+	if (ide_do_test_unit_ready(drive, disk))
+		ide_do_start_stop(drive, disk, 1);
+
+	ret = ide_floppy_get_capacity(drive);
+
+	set_capacity(disk, ide_gd_capacity(drive));
+
+	return ret;
+}
+
+const struct ide_disk_ops ide_atapi_disk_ops = {
+	.check		= ide_check_atapi_device,
+	.get_capacity	= ide_floppy_get_capacity,
+	.setup		= ide_floppy_setup,
+	.flush		= ide_floppy_flush,
+	.init_media	= ide_floppy_init_media,
+	.set_doorlock	= ide_set_media_lock,
+	.do_request	= ide_floppy_do_request,
+	.end_request	= ide_floppy_end_request,
+	.ioctl		= ide_floppy_ioctl,
+};
diff --git a/drivers/ide/ide-floppy.h b/drivers/ide/ide-floppy.h
index b965da2f41ce..acebc8c5a827 100644
--- a/drivers/ide/ide-floppy.h
+++ b/drivers/ide/ide-floppy.h
@@ -1,48 +1,10 @@
 #ifndef __IDE_FLOPPY_H
 #define __IDE_FLOPPY_H
 
-#define DRV_NAME "ide-floppy"
-#define PFX DRV_NAME ": "
+#include "ide-gd.h"
 
-/* define to see debug info */
-#define IDEFLOPPY_DEBUG_LOG	0
-
-#if IDEFLOPPY_DEBUG_LOG
-#define ide_debug_log(lvl, fmt, args...) __ide_debug_log(lvl, fmt, args)
-#else
-#define ide_debug_log(lvl, fmt, args...) do {} while (0)
-#endif
-
-/*
- * Most of our global data which we need to save even as we leave the driver
- * due to an interrupt or a timer event is stored in a variable of type
- * idefloppy_floppy_t, defined below.
- */
-typedef struct ide_floppy_obj {
-	ide_drive_t	*drive;
-	ide_driver_t	*driver;
-	struct gendisk	*disk;
-	struct kref	kref;
-	unsigned int	openers;	/* protected by BKL for now */
-
-	/* Last failed packet command */
-	struct ide_atapi_pc *failed_pc;
-	/* used for blk_{fs,pc}_request() requests */
-	struct ide_atapi_pc queued_pc;
-
-	/* Last error information */
-	u8 sense_key, asc, ascq;
-
-	int progress_indication;
-
-	/* Device information */
-	/* Current format */
-	int blocks, block_size, bs_factor;
-	/* Last format capacity descriptor */
-	u8 cap_desc[8];
-	/* Copy of the flexible disk page */
-	u8 flexible_disk_page[32];
-} idefloppy_floppy_t;
+#ifdef CONFIG_IDE_GD_ATAPI
+typedef struct ide_disk_obj idefloppy_floppy_t;
 
 /*
  * Pages of the SELECT SENSE / MODE SENSE packet commands.
@@ -57,23 +19,23 @@ typedef struct ide_floppy_obj {
 #define	IDEFLOPPY_IOCTL_FORMAT_START		0x4602
 #define IDEFLOPPY_IOCTL_FORMAT_GET_PROGRESS	0x4603
 
-sector_t ide_gd_capacity(ide_drive_t *);
-
 /* ide-floppy.c */
+extern const struct ide_disk_ops ide_atapi_disk_ops;
 void ide_floppy_create_mode_sense_cmd(struct ide_atapi_pc *, u8);
 void ide_floppy_create_read_capacity_cmd(struct ide_atapi_pc *);
-int ide_floppy_get_capacity(ide_drive_t *);
-void ide_floppy_setup(ide_drive_t *);
-ide_startstop_t ide_floppy_do_request(ide_drive_t *, struct request *, sector_t);
-int ide_floppy_end_request(ide_drive_t *, int, int);
 
 /* ide-floppy_ioctl.c */
-int ide_floppy_ioctl(struct inode *, struct file *, unsigned, unsigned long);
+int ide_floppy_ioctl(ide_drive_t *, struct inode *, struct file *, unsigned int,
+		     unsigned long);
 
 #ifdef CONFIG_IDE_PROC_FS
 /* ide-floppy_proc.c */
 extern ide_proc_entry_t ide_floppy_proc[];
 extern const struct ide_proc_devset ide_floppy_settings[];
 #endif
+#else
+#define ide_floppy_proc		NULL
+#define ide_floppy_settings	NULL
+#endif
 
 #endif /*__IDE_FLOPPY_H */
diff --git a/drivers/ide/ide-floppy_ioctl.c b/drivers/ide/ide-floppy_ioctl.c
index b1f391df6cca..e8aa0a5bf5dc 100644
--- a/drivers/ide/ide-floppy_ioctl.c
+++ b/drivers/ide/ide-floppy_ioctl.c
@@ -33,7 +33,7 @@
 
 static int ide_floppy_get_format_capacities(ide_drive_t *drive, int __user *arg)
 {
-	struct ide_floppy_obj *floppy = drive->driver_data;
+	struct ide_disk_obj *floppy = drive->driver_data;
 	struct ide_atapi_pc pc;
 	u8 header_len, desc_cnt;
 	int i, blocks, length, u_array_size, u_index;
@@ -260,13 +260,10 @@ static int ide_floppy_format_ioctl(ide_drive_t *drive, struct file *file,
 	}
 }
 
-int ide_floppy_ioctl(struct inode *inode, struct file *file,
-		    unsigned int cmd, unsigned long arg)
+int ide_floppy_ioctl(ide_drive_t *drive, struct inode *inode,
+		     struct file *file, unsigned int cmd, unsigned long arg)
 {
 	struct block_device *bdev = inode->i_bdev;
-	struct ide_floppy_obj *floppy = ide_drv_g(bdev->bd_disk,
-						     ide_floppy_obj);
-	ide_drive_t *drive = floppy->drive;
 	struct ide_atapi_pc pc;
 	void __user *argp = (void __user *)arg;
 	int err;
diff --git a/drivers/ide/ide-gd-floppy.c b/drivers/ide/ide-gd-floppy.c
deleted file mode 100644
index 082800b9a558..000000000000
--- a/drivers/ide/ide-gd-floppy.c
+++ /dev/null
@@ -1,309 +0,0 @@
-#include <linux/module.h>
-#include <linux/types.h>
-#include <linux/string.h>
-#include <linux/kernel.h>
-#include <linux/errno.h>
-#include <linux/genhd.h>
-#include <linux/mutex.h>
-#include <linux/ide.h>
-#include <linux/hdreg.h>
-
-#include "ide-floppy.h"
-
-#define IDEFLOPPY_VERSION "1.00"
-
-/* module parameters */
-static unsigned long debug_mask;
-module_param(debug_mask, ulong, 0644);
-
-static DEFINE_MUTEX(ide_disk_ref_mutex);
-
-static void ide_disk_release(struct kref *);
-
-static struct ide_floppy_obj *ide_disk_get(struct gendisk *disk)
-{
-	struct ide_floppy_obj *idkp = NULL;
-
-	mutex_lock(&ide_disk_ref_mutex);
-	idkp = ide_drv_g(disk, ide_floppy_obj);
-	if (idkp) {
-		if (ide_device_get(idkp->drive))
-			idkp = NULL;
-		else
-			kref_get(&idkp->kref);
-	}
-	mutex_unlock(&ide_disk_ref_mutex);
-	return idkp;
-}
-
-static void ide_disk_put(struct ide_floppy_obj *idkp)
-{
-	ide_drive_t *drive = idkp->drive;
-
-	mutex_lock(&ide_disk_ref_mutex);
-	kref_put(&idkp->kref, ide_disk_release);
-	ide_device_put(drive);
-	mutex_unlock(&ide_disk_ref_mutex);
-}
-
-sector_t ide_gd_capacity(ide_drive_t *drive)
-{
-	return drive->capacity64;
-}
-
-static int ide_gd_probe(ide_drive_t *);
-
-static void ide_gd_remove(ide_drive_t *drive)
-{
-	struct ide_floppy_obj *idkp = drive->driver_data;
-	struct gendisk *g = idkp->disk;
-
-	ide_proc_unregister_driver(drive, idkp->driver);
-
-	del_gendisk(g);
-
-	ide_disk_put(idkp);
-}
-
-static void ide_disk_release(struct kref *kref)
-{
-	struct ide_floppy_obj *idkp = to_ide_drv(kref, ide_floppy_obj);
-	ide_drive_t *drive = idkp->drive;
-	struct gendisk *g = idkp->disk;
-
-	drive->driver_data = NULL;
-	g->private_data = NULL;
-	put_disk(g);
-	kfree(idkp);
-}
-
-#ifdef CONFIG_IDE_PROC_FS
-static ide_proc_entry_t *ide_floppy_proc_entries(ide_drive_t *drive)
-{
-	return ide_floppy_proc;
-}
-
-static const struct ide_proc_devset *ide_floppy_proc_devsets(ide_drive_t *drive)
-{
-	return ide_floppy_settings;
-}
-#endif
-
-static ide_driver_t ide_gd_driver = {
-	.gen_driver = {
-		.owner		= THIS_MODULE,
-		.name		= "ide-floppy",
-		.bus		= &ide_bus_type,
-	},
-	.probe			= ide_gd_probe,
-	.remove			= ide_gd_remove,
-	.version		= IDEFLOPPY_VERSION,
-	.do_request		= ide_floppy_do_request,
-	.end_request		= ide_floppy_end_request,
-	.error			= __ide_error,
-#ifdef CONFIG_IDE_PROC_FS
-	.proc_entries		= ide_floppy_proc_entries,
-	.proc_devsets		= ide_floppy_proc_devsets,
-#endif
-};
-
-static int ide_gd_open(struct inode *inode, struct file *filp)
-{
-	struct gendisk *disk = inode->i_bdev->bd_disk;
-	struct ide_floppy_obj *idkp;
-	ide_drive_t *drive;
-	int ret = 0;
-
-	idkp = ide_disk_get(disk);
-	if (idkp == NULL)
-		return -ENXIO;
-
-	drive = idkp->drive;
-
-	ide_debug_log(IDE_DBG_FUNC, "Call %s\n", __func__);
-
-	idkp->openers++;
-
-	if (idkp->openers == 1) {
-		drive->dev_flags &= ~IDE_DFLAG_FORMAT_IN_PROGRESS;
-		/* Just in case */
-
-		if (ide_do_test_unit_ready(drive, disk))
-			ide_do_start_stop(drive, disk, 1);
-
-		ret = ide_floppy_get_capacity(drive);
-
-		set_capacity(disk, ide_gd_capacity(drive));
-
-		if (ret && (filp->f_flags & O_NDELAY) == 0) {
-		    /*
-		     * Allow O_NDELAY to open a drive without a disk, or with an
-		     * unreadable disk, so that we can get the format capacity
-		     * of the drive or begin the format - Sam
-		     */
-			ret = -EIO;
-			goto out_put_idkp;
-		}
-
-		if ((drive->dev_flags & IDE_DFLAG_WP) && (filp->f_mode & 2)) {
-			ret = -EROFS;
-			goto out_put_idkp;
-		}
-
-		ide_set_media_lock(drive, disk, 1);
-		drive->dev_flags |= IDE_DFLAG_MEDIA_CHANGED;
-		check_disk_change(inode->i_bdev);
-	} else if (drive->dev_flags & IDE_DFLAG_FORMAT_IN_PROGRESS) {
-		ret = -EBUSY;
-		goto out_put_idkp;
-	}
-	return 0;
-
-out_put_idkp:
-	idkp->openers--;
-	ide_disk_put(idkp);
-	return ret;
-}
-
-static int ide_gd_release(struct inode *inode, struct file *filp)
-{
-	struct gendisk *disk = inode->i_bdev->bd_disk;
-	struct ide_floppy_obj *idkp = ide_drv_g(disk, ide_floppy_obj);
-	ide_drive_t *drive = idkp->drive;
-
-	ide_debug_log(IDE_DBG_FUNC, "Call %s\n", __func__);
-
-	if (idkp->openers == 1) {
-		ide_set_media_lock(drive, disk, 0);
-		drive->dev_flags &= ~IDE_DFLAG_FORMAT_IN_PROGRESS;
-	}
-
-	idkp->openers--;
-
-	ide_disk_put(idkp);
-
-	return 0;
-}
-
-static int ide_gd_getgeo(struct block_device *bdev, struct hd_geometry *geo)
-{
-	struct ide_floppy_obj *idkp = ide_drv_g(bdev->bd_disk, ide_floppy_obj);
-	ide_drive_t *drive = idkp->drive;
-
-	geo->heads = drive->bios_head;
-	geo->sectors = drive->bios_sect;
-	geo->cylinders = (u16)drive->bios_cyl; /* truncate */
-	return 0;
-}
-
-static int ide_gd_media_changed(struct gendisk *disk)
-{
-	struct ide_floppy_obj *idkp = ide_drv_g(disk, ide_floppy_obj);
-	ide_drive_t *drive = idkp->drive;
-	int ret;
-
-	/* do not scan partitions twice if this is a removable device */
-	if (drive->dev_flags & IDE_DFLAG_ATTACH) {
-		drive->dev_flags &= ~IDE_DFLAG_ATTACH;
-		return 0;
-	}
-
-	ret = !!(drive->dev_flags & IDE_DFLAG_MEDIA_CHANGED);
-	drive->dev_flags &= ~IDE_DFLAG_MEDIA_CHANGED;
-
-	return ret;
-}
-
-static int ide_gd_revalidate_disk(struct gendisk *disk)
-{
-	struct ide_floppy_obj *idkp = ide_drv_g(disk, ide_floppy_obj);
-	set_capacity(disk, ide_gd_capacity(idkp->drive));
-	return 0;
-}
-
-static struct block_device_operations ide_gd_ops = {
-	.owner			= THIS_MODULE,
-	.open			= ide_gd_open,
-	.release		= ide_gd_release,
-	.ioctl			= ide_floppy_ioctl,
-	.getgeo			= ide_gd_getgeo,
-	.media_changed		= ide_gd_media_changed,
-	.revalidate_disk	= ide_gd_revalidate_disk
-};
-
-static int ide_gd_probe(ide_drive_t *drive)
-{
-	struct ide_floppy_obj *idkp;
-	struct gendisk *g;
-
-	if (!strstr("ide-floppy", drive->driver_req))
-		goto failed;
-
-	if (drive->media != ide_floppy)
-		goto failed;
-
-	if (!ide_check_atapi_device(drive, DRV_NAME)) {
-		printk(KERN_ERR PFX "%s: not supported by this version of "
-		       DRV_NAME "\n", drive->name);
-		goto failed;
-	}
-	idkp = kzalloc(sizeof(*idkp), GFP_KERNEL);
-	if (!idkp) {
-		printk(KERN_ERR PFX "%s: Can't allocate a floppy structure\n",
-		       drive->name);
-		goto failed;
-	}
-
-	g = alloc_disk_node(1 << PARTN_BITS, hwif_to_node(drive->hwif));
-	if (!g)
-		goto out_free_idkp;
-
-	ide_init_disk(g, drive);
-
-	kref_init(&idkp->kref);
-
-	idkp->drive = drive;
-	idkp->driver = &ide_gd_driver;
-	idkp->disk = g;
-
-	g->private_data = &idkp->driver;
-
-	drive->driver_data = idkp;
-
-	drive->debug_mask = debug_mask;
-
-	ide_floppy_setup(drive);
-
-	set_capacity(g, ide_gd_capacity(drive));
-
-	g->minors = 1 << PARTN_BITS;
-	g->driverfs_dev = &drive->gendev;
-	if (drive->dev_flags & IDE_DFLAG_REMOVABLE)
-		g->flags = GENHD_FL_REMOVABLE;
-	g->fops = &ide_gd_ops;
-	add_disk(g);
-	return 0;
-
-out_free_idkp:
-	kfree(idkp);
-failed:
-	return -ENODEV;
-}
-
-static int __init ide_gd_init(void)
-{
-	printk(KERN_INFO DRV_NAME " driver " IDEFLOPPY_VERSION "\n");
-	return driver_register(&ide_gd_driver.gen_driver);
-}
-
-static void __exit ide_gd_exit(void)
-{
-	driver_unregister(&ide_gd_driver.gen_driver);
-}
-
-MODULE_ALIAS("ide:*m-floppy*");
-MODULE_ALIAS("ide-floppy");
-module_init(ide_gd_init);
-module_exit(ide_gd_exit);
-MODULE_LICENSE("GPL");
-MODULE_DESCRIPTION("ATAPI FLOPPY Driver");
diff --git a/drivers/ide/ide-gd.c b/drivers/ide/ide-gd.c
index a3d4ad7db2af..d44898f46c33 100644
--- a/drivers/ide/ide-gd.c
+++ b/drivers/ide/ide-gd.c
@@ -15,9 +15,14 @@
 #endif
 
 #include "ide-disk.h"
+#include "ide-floppy.h"
 
 #define IDE_GD_VERSION	"1.18"
 
+/* module parameters */
+static unsigned long debug_mask;
+module_param(debug_mask, ulong, 0644);
+
 static DEFINE_MUTEX(ide_disk_ref_mutex);
 
 static void ide_disk_release(struct kref *);
@@ -64,7 +69,7 @@ static void ide_gd_remove(ide_drive_t *drive)
 
 	del_gendisk(g);
 
-	ide_disk_flush(drive);
+	drive->disk_ops->flush(drive);
 
 	ide_disk_put(idkp);
 }
@@ -75,6 +80,7 @@ static void ide_disk_release(struct kref *kref)
 	ide_drive_t *drive = idkp->drive;
 	struct gendisk *g = idkp->disk;
 
+	drive->disk_ops = NULL;
 	drive->driver_data = NULL;
 	g->private_data = NULL;
 	put_disk(g);
@@ -89,7 +95,7 @@ static void ide_disk_release(struct kref *kref)
 static void ide_gd_resume(ide_drive_t *drive)
 {
 	if (ata_id_hpa_enabled(drive->id))
-		ide_disk_init_capacity(drive);
+		(void)drive->disk_ops->get_capacity(drive);
 }
 
 static void ide_gd_shutdown(ide_drive_t *drive)
@@ -110,7 +116,7 @@ static void ide_gd_shutdown(ide_drive_t *drive)
 #else
 	if (system_state == SYSTEM_RESTART) {
 #endif
-		ide_disk_flush(drive);
+		drive->disk_ops->flush(drive);
 		return;
 	}
 
@@ -122,19 +128,31 @@ static void ide_gd_shutdown(ide_drive_t *drive)
 #ifdef CONFIG_IDE_PROC_FS
 static ide_proc_entry_t *ide_disk_proc_entries(ide_drive_t *drive)
 {
-	return ide_disk_proc;
+	return (drive->media == ide_disk) ? ide_disk_proc : ide_floppy_proc;
 }
 
 static const struct ide_proc_devset *ide_disk_proc_devsets(ide_drive_t *drive)
 {
-	return ide_disk_settings;
+	return (drive->media == ide_disk) ? ide_disk_settings
+					  : ide_floppy_settings;
 }
 #endif
 
+static ide_startstop_t ide_gd_do_request(ide_drive_t *drive,
+					 struct request *rq, sector_t sector)
+{
+	return drive->disk_ops->do_request(drive, rq, sector);
+}
+
+static int ide_gd_end_request(ide_drive_t *drive, int uptodate, int nrsecs)
+{
+	return drive->disk_ops->end_request(drive, uptodate, nrsecs);
+}
+
 static ide_driver_t ide_gd_driver = {
 	.gen_driver = {
 		.owner		= THIS_MODULE,
-		.name		= "ide-disk",
+		.name		= "ide-gd",
 		.bus		= &ide_bus_type,
 	},
 	.probe			= ide_gd_probe,
@@ -142,8 +160,8 @@ static ide_driver_t ide_gd_driver = {
 	.resume			= ide_gd_resume,
 	.shutdown		= ide_gd_shutdown,
 	.version		= IDE_GD_VERSION,
-	.do_request		= ide_do_rw_disk,
-	.end_request		= ide_end_request,
+	.do_request		= ide_gd_do_request,
+	.end_request		= ide_gd_end_request,
 	.error			= __ide_error,
 #ifdef CONFIG_IDE_PROC_FS
 	.proc_entries		= ide_disk_proc_entries,
@@ -156,6 +174,7 @@ static int ide_gd_open(struct inode *inode, struct file *filp)
 	struct gendisk *disk = inode->i_bdev->bd_disk;
 	struct ide_disk_obj *idkp;
 	ide_drive_t *drive;
+	int ret = 0;
 
 	idkp = ide_disk_get(disk);
 	if (idkp == NULL)
@@ -163,19 +182,49 @@ static int ide_gd_open(struct inode *inode, struct file *filp)
 
 	drive = idkp->drive;
 
+	ide_debug_log(IDE_DBG_FUNC, "Call %s\n", __func__);
+
 	idkp->openers++;
 
 	if ((drive->dev_flags & IDE_DFLAG_REMOVABLE) && idkp->openers == 1) {
+		drive->dev_flags &= ~IDE_DFLAG_FORMAT_IN_PROGRESS;
+		/* Just in case */
+
+		ret = drive->disk_ops->init_media(drive, disk);
+
+		/*
+		 * Allow O_NDELAY to open a drive without a disk, or with an
+		 * unreadable disk, so that we can get the format capacity
+		 * of the drive or begin the format - Sam
+		 */
+		if (ret && (filp->f_flags & O_NDELAY) == 0) {
+			ret = -EIO;
+			goto out_put_idkp;
+		}
+
+		if ((drive->dev_flags & IDE_DFLAG_WP) && (filp->f_mode & 2)) {
+			ret = -EROFS;
+			goto out_put_idkp;
+		}
+
 		/*
 		 * Ignore the return code from door_lock,
 		 * since the open() has already succeeded,
 		 * and the door_lock is irrelevant at this point.
 		 */
-		ide_disk_set_doorlock(drive, 1);
+		drive->disk_ops->set_doorlock(drive, disk, 1);
 		drive->dev_flags |= IDE_DFLAG_MEDIA_CHANGED;
 		check_disk_change(inode->i_bdev);
+	} else if (drive->dev_flags & IDE_DFLAG_FORMAT_IN_PROGRESS) {
+		ret = -EBUSY;
+		goto out_put_idkp;
 	}
 	return 0;
+
+out_put_idkp:
+	idkp->openers--;
+	ide_disk_put(idkp);
+	return ret;
 }
 
 static int ide_gd_release(struct inode *inode, struct file *filp)
@@ -184,11 +233,15 @@ static int ide_gd_release(struct inode *inode, struct file *filp)
 	struct ide_disk_obj *idkp = ide_drv_g(disk, ide_disk_obj);
 	ide_drive_t *drive = idkp->drive;
 
+	ide_debug_log(IDE_DBG_FUNC, "Call %s\n", __func__);
+
 	if (idkp->openers == 1)
-		ide_disk_flush(drive);
+		drive->disk_ops->flush(drive);
 
-	if ((drive->dev_flags & IDE_DFLAG_REMOVABLE) && idkp->openers == 1)
-		ide_disk_set_doorlock(drive, 0);
+	if ((drive->dev_flags & IDE_DFLAG_REMOVABLE) && idkp->openers == 1) {
+		drive->disk_ops->set_doorlock(drive, disk, 0);
+		drive->dev_flags &= ~IDE_DFLAG_FORMAT_IN_PROGRESS;
+	}
 
 	idkp->openers--;
 
@@ -233,11 +286,21 @@ static int ide_gd_revalidate_disk(struct gendisk *disk)
 	return 0;
 }
 
+static int ide_gd_ioctl(struct inode *inode, struct file *file,
+			     unsigned int cmd, unsigned long arg)
+{
+	struct block_device *bdev = inode->i_bdev;
+	struct ide_disk_obj *idkp = ide_drv_g(bdev->bd_disk, ide_disk_obj);
+	ide_drive_t *drive = idkp->drive;
+
+	return drive->disk_ops->ioctl(drive, inode, file, cmd, arg);
+}
+
 static struct block_device_operations ide_gd_ops = {
 	.owner			= THIS_MODULE,
 	.open			= ide_gd_open,
 	.release		= ide_gd_release,
-	.ioctl			= ide_disk_ioctl,
+	.ioctl			= ide_gd_ioctl,
 	.getgeo			= ide_gd_getgeo,
 	.media_changed		= ide_gd_media_changed,
 	.revalidate_disk	= ide_gd_revalidate_disk
@@ -245,19 +308,37 @@ static struct block_device_operations ide_gd_ops = {
 
 static int ide_gd_probe(ide_drive_t *drive)
 {
+	const struct ide_disk_ops *disk_ops = NULL;
 	struct ide_disk_obj *idkp;
 	struct gendisk *g;
 
 	/* strstr("foo", "") is non-NULL */
-	if (!strstr("ide-disk", drive->driver_req))
+	if (!strstr("ide-gd", drive->driver_req))
+		goto failed;
+
+#ifdef CONFIG_IDE_GD_ATA
+	if (drive->media == ide_disk)
+		disk_ops = &ide_ata_disk_ops;
+#endif
+#ifdef CONFIG_IDE_GD_ATAPI
+	if (drive->media == ide_floppy)
+		disk_ops = &ide_atapi_disk_ops;
+#endif
+	if (disk_ops == NULL)
 		goto failed;
 
-	if (drive->media != ide_disk)
+	if (disk_ops->check(drive, DRV_NAME) == 0) {
+		printk(KERN_ERR PFX "%s: not supported by this driver\n",
+			drive->name);
 		goto failed;
+	}
 
 	idkp = kzalloc(sizeof(*idkp), GFP_KERNEL);
-	if (!idkp)
+	if (!idkp) {
+		printk(KERN_ERR PFX "%s: can't allocate a disk structure\n",
+			drive->name);
 		goto failed;
+	}
 
 	g = alloc_disk_node(IDE_DISK_MINORS, hwif_to_node(drive->hwif));
 	if (!g)
@@ -274,8 +355,10 @@ static int ide_gd_probe(ide_drive_t *drive)
 	g->private_data = &idkp->driver;
 
 	drive->driver_data = idkp;
+	drive->debug_mask = debug_mask;
+	drive->disk_ops = disk_ops;
 
-	ide_disk_setup(drive);
+	disk_ops->setup(drive);
 
 	set_capacity(g, ide_gd_capacity(drive));
 
@@ -296,6 +379,7 @@ failed:
 
 static int __init ide_gd_init(void)
 {
+	printk(KERN_INFO DRV_NAME " driver " IDE_GD_VERSION "\n");
 	return driver_register(&ide_gd_driver.gen_driver);
 }
 
@@ -306,7 +390,9 @@ static void __exit ide_gd_exit(void)
 
 MODULE_ALIAS("ide:*m-disk*");
 MODULE_ALIAS("ide-disk");
+MODULE_ALIAS("ide:*m-floppy*");
+MODULE_ALIAS("ide-floppy");
 module_init(ide_gd_init);
 module_exit(ide_gd_exit);
 MODULE_LICENSE("GPL");
-MODULE_DESCRIPTION("ATA DISK Driver");
+MODULE_DESCRIPTION("generic ATA/ATAPI disk driver");
diff --git a/drivers/ide/ide-gd.h b/drivers/ide/ide-gd.h
new file mode 100644
index 000000000000..7d3d101713e0
--- /dev/null
+++ b/drivers/ide/ide-gd.h
@@ -0,0 +1,44 @@
+#ifndef __IDE_GD_H
+#define __IDE_GD_H
+
+#define DRV_NAME "ide-gd"
+#define PFX DRV_NAME ": "
+
+/* define to see debug info */
+#define IDE_GD_DEBUG_LOG	0
+
+#if IDE_GD_DEBUG_LOG
+#define ide_debug_log(lvl, fmt, args...) __ide_debug_log(lvl, fmt, args)
+#else
+#define ide_debug_log(lvl, fmt, args...) do {} while (0)
+#endif
+
+struct ide_disk_obj {
+	ide_drive_t	*drive;
+	ide_driver_t	*driver;
+	struct gendisk	*disk;
+	struct kref	kref;
+	unsigned int	openers;	/* protected by BKL for now */
+
+	/* Last failed packet command */
+	struct ide_atapi_pc *failed_pc;
+	/* used for blk_{fs,pc}_request() requests */
+	struct ide_atapi_pc queued_pc;
+
+	/* Last error information */
+	u8 sense_key, asc, ascq;
+
+	int progress_indication;
+
+	/* Device information */
+	/* Current format */
+	int blocks, block_size, bs_factor;
+	/* Last format capacity descriptor */
+	u8 cap_desc[8];
+	/* Copy of the flexible disk page */
+	u8 flexible_disk_page[32];
+};
+
+sector_t ide_gd_capacity(ide_drive_t *);
+
+#endif /* __IDE_GD_H */
diff --git a/drivers/leds/Kconfig b/drivers/leds/Kconfig
index e3e40427e00e..c7ff1e11ea85 100644
--- a/drivers/leds/Kconfig
+++ b/drivers/leds/Kconfig
@@ -179,7 +179,7 @@ config LEDS_TRIGGER_TIMER
 
 config LEDS_TRIGGER_IDE_DISK
 	bool "LED IDE Disk Trigger"
-	depends on LEDS_TRIGGERS && BLK_DEV_IDEDISK
+	depends on LEDS_TRIGGERS && IDE_GD_ATA
 	help
 	  This allows LEDs to be controlled by IDE disk activity.
 	  If unsure, say Y.
diff --git a/include/linux/ide.h b/include/linux/ide.h
index 488808891acb..89e53cfbc787 100644
--- a/include/linux/ide.h
+++ b/include/linux/ide.h
@@ -461,6 +461,23 @@ struct ide_acpi_drive_link;
 struct ide_acpi_hwif_link;
 #endif
 
+struct ide_drive_s;
+
+struct ide_disk_ops {
+	int		(*check)(struct ide_drive_s *, const char *);
+	int		(*get_capacity)(struct ide_drive_s *);
+	void		(*setup)(struct ide_drive_s *);
+	void		(*flush)(struct ide_drive_s *);
+	int		(*init_media)(struct ide_drive_s *, struct gendisk *);
+	int		(*set_doorlock)(struct ide_drive_s *, struct gendisk *,
+					int);
+	ide_startstop_t	(*do_request)(struct ide_drive_s *, struct request *,
+				      sector_t);
+	int		(*end_request)(struct ide_drive_s *, int, int);
+	int		(*ioctl)(struct ide_drive_s *, struct inode *,
+				 struct file *, unsigned int, unsigned long);
+};
+
 /* ATAPI device flags */
 enum {
 	IDE_AFLAG_DRQ_INTERRUPT		= (1 << 0),
@@ -594,6 +611,8 @@ struct ide_drive_s {
 #endif
 	struct hwif_s		*hwif;	/* actually (ide_hwif_t *) */
 
+	const struct ide_disk_ops *disk_ops;
+
 	unsigned long dev_flags;
 
 	unsigned long sleep;		/* sleep until this time */
-- 
cgit v1.2.3-55-g7522


From 71b429ca4d5cb78a889128955a6ab891c5ab2a46 Mon Sep 17 00:00:00 2001
From: Borislav Petkov
Date: Fri, 17 Oct 2008 18:09:14 +0200
Subject: ide-cd: debug log enhancements

Add some more verbosity to key function calls in ide-cd debug code. While at it,
delete a superfluous comment.

Signed-off-by: Borislav Petkov <petkovbb@gmail.com>
Signed-off-by: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
---
 drivers/ide/ide-cd.c | 33 +++++++++++++++++++++------------
 1 file changed, 21 insertions(+), 12 deletions(-)

diff --git a/drivers/ide/ide-cd.c b/drivers/ide/ide-cd.c
index 32073666b9ca..c36cab5785df 100644
--- a/drivers/ide/ide-cd.c
+++ b/drivers/ide/ide-cd.c
@@ -340,8 +340,8 @@ static int cdrom_decode_status(ide_drive_t *drive, int good_stat, int *stat_ret)
 	}
 
 	ide_debug_log(IDE_DBG_RQ, "%s: stat: 0x%x, good_stat: 0x%x, "
-		      "rq->cmd_type: 0x%x, err: 0x%x\n", __func__, stat,
-		      good_stat, rq->cmd_type, err);
+		      "rq->cmd[0]: 0x%x, rq->cmd_type: 0x%x, err: 0x%x\n",
+		      __func__, stat, good_stat, rq->cmd[0], rq->cmd_type, err);
 
 	if (blk_sense_request(rq)) {
 		/*
@@ -849,7 +849,8 @@ static void ide_cd_restore_request(ide_drive_t *drive, struct request *rq)
 static void ide_cd_request_sense_fixup(ide_drive_t *drive, struct request *rq)
 {
 
-	ide_debug_log(IDE_DBG_FUNC, "Call %s\n", __func__);
+	ide_debug_log(IDE_DBG_FUNC, "Call %s, rq->cmd[0]: 0x%x\n",
+		      __func__, rq->cmd[0]);
 
 	/*
 	 * Some of the trailing request sense fields are optional,
@@ -876,7 +877,7 @@ int ide_cd_queue_pc(ide_drive_t *drive, const unsigned char *cmd,
 	if (!sense)
 		sense = &local_sense;
 
-	ide_debug_log(IDE_DBG_PC, "Call %s, rq->cmd[0]: 0x%x, write: 0x%x, "
+	ide_debug_log(IDE_DBG_PC, "Call %s, cmd[0]: 0x%x, write: 0x%x, "
 		      "timeout: %d, cmd_flags: 0x%x\n", __func__, cmd[0], write,
 		      timeout, cmd_flags);
 
@@ -1177,8 +1178,9 @@ static ide_startstop_t cdrom_start_rw(ide_drive_t *drive, struct request *rq)
 	unsigned short sectors_per_frame =
 		queue_hardsect_size(drive->queue) >> SECTOR_BITS;
 
-	ide_debug_log(IDE_DBG_RQ, "Call %s, write: 0x%x, secs_per_frame: %u\n",
-		      __func__, write, sectors_per_frame);
+	ide_debug_log(IDE_DBG_RQ, "Call %s, rq->cmd[0]: 0x%x, write: 0x%x, "
+		      "secs_per_frame: %u\n",
+		      __func__, rq->cmd[0], write, sectors_per_frame);
 
 	if (write) {
 		/* disk has become write protected */
@@ -1221,7 +1223,8 @@ static ide_startstop_t cdrom_do_newpc_cont(ide_drive_t *drive)
 static void cdrom_do_block_pc(ide_drive_t *drive, struct request *rq)
 {
 
-	ide_debug_log(IDE_DBG_PC, "Call %s, rq->cmd_type: 0x%x\n", __func__,
+	ide_debug_log(IDE_DBG_PC, "Call %s, rq->cmd[0]: 0x%x, "
+		      "rq->cmd_type: 0x%x\n", __func__, rq->cmd[0],
 		      rq->cmd_type);
 
 	if (blk_pc_request(rq))
@@ -1257,9 +1260,6 @@ static void cdrom_do_block_pc(ide_drive_t *drive, struct request *rq)
 	}
 }
 
-/*
- * cdrom driver request routine.
- */
 static ide_startstop_t ide_cd_do_request(ide_drive_t *drive, struct request *rq,
 					sector_t block)
 {
@@ -1267,8 +1267,10 @@ static ide_startstop_t ide_cd_do_request(ide_drive_t *drive, struct request *rq,
 	ide_handler_t *fn;
 	int xferlen;
 
-	ide_debug_log(IDE_DBG_RQ, "Call %s, rq->cmd_type: 0x%x, block: %llu\n",
-		      __func__, rq->cmd_type, (unsigned long long)block);
+	ide_debug_log(IDE_DBG_RQ, "Call %s, rq->cmd[0]: 0x%x, "
+		      "rq->cmd_type: 0x%x, block: %llu\n",
+		      __func__, rq->cmd[0], rq->cmd_type,
+		      (unsigned long long)block);
 
 	if (blk_fs_request(rq)) {
 		if (drive->atapi_flags & IDE_AFLAG_SEEKING) {
@@ -1412,6 +1414,10 @@ static int cdrom_read_capacity(ide_drive_t *drive, unsigned long *capacity,
 
 	*capacity = 1 + be32_to_cpu(capbuf.lba);
 	*sectors_per_frame = blocklen >> SECTOR_BITS;
+
+	ide_debug_log(IDE_DBG_PROBE, "%s: cap: %lu, sectors_per_frame: %lu\n",
+		      __func__, *capacity, *sectors_per_frame);
+
 	return 0;
 }
 
@@ -1643,6 +1649,9 @@ void ide_cdrom_update_speed(ide_drive_t *drive, u8 *buf)
 		maxspeed = be16_to_cpup((__be16 *)&buf[8 + 8]);
 	}
 
+	ide_debug_log(IDE_DBG_PROBE, "%s: curspeed: %u, maxspeed: %u\n",
+		      __func__, curspeed, maxspeed);
+
 	cd->current_speed = (curspeed + (176/2)) / 176;
 	cd->max_speed = (maxspeed + (176/2)) / 176;
 }
-- 
cgit v1.2.3-55-g7522


From 419a5b67c3e901f8e6369d2630877a5f59692202 Mon Sep 17 00:00:00 2001
From: Borislav Petkov
Date: Fri, 17 Oct 2008 18:09:14 +0200
Subject: ide-cd: small drive type print fix

Signed-off-by: Borislav Petkov <petkovbb@gmail.com>
Signed-off-by: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
---
 drivers/ide/ide-cd.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/ide/ide-cd.c b/drivers/ide/ide-cd.c
index c36cab5785df..fd2971891321 100644
--- a/drivers/ide/ide-cd.c
+++ b/drivers/ide/ide-cd.c
@@ -1786,7 +1786,7 @@ static int ide_cdrom_probe_capabilities(ide_drive_t *drive)
 	if ((cdi->mask & CDC_DVD_R) == 0 || (cdi->mask & CDC_DVD_RAM) == 0)
 		printk(KERN_CONT " DVD%s%s",
 				 (cdi->mask & CDC_DVD_R) ? "" : "-R",
-				 (cdi->mask & CDC_DVD_RAM) ? "" : "-RAM");
+				 (cdi->mask & CDC_DVD_RAM) ? "" : "/RAM");
 
 	if ((cdi->mask & CDC_CD_R) == 0 || (cdi->mask & CDC_CD_RW) == 0)
 		printk(KERN_CONT " CD%s%s",
-- 
cgit v1.2.3-55-g7522


From 2a2267e7b11fa2de95bfb707c85f2a9880e5206a Mon Sep 17 00:00:00 2001
From: Borislav Petkov
Date: Fri, 17 Oct 2008 18:09:14 +0200
Subject: ide-cd: remove stale comment

Signed-off-by: Borislav Petkov <petkovbb@gmail.com>
[bart: split-up this change from a bigger patch]
Signed-off-by: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
---
 drivers/ide/ide-cd.c | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/drivers/ide/ide-cd.c b/drivers/ide/ide-cd.c
index fd2971891321..13265a8827da 100644
--- a/drivers/ide/ide-cd.c
+++ b/drivers/ide/ide-cd.c
@@ -843,12 +843,8 @@ static void ide_cd_restore_request(ide_drive_t *drive, struct request *rq)
 	rq->q->prep_rq_fn(rq->q, rq);
 }
 
-/*
- * All other packet commands.
- */
 static void ide_cd_request_sense_fixup(ide_drive_t *drive, struct request *rq)
 {
-
 	ide_debug_log(IDE_DBG_FUNC, "Call %s, rq->cmd[0]: 0x%x\n",
 		      __func__, rq->cmd[0]);
 
-- 
cgit v1.2.3-55-g7522


From 30c7ed5aba72bb7357282cf8f411b2e74d82081c Mon Sep 17 00:00:00 2001
From: Bartlomiej Zolnierkiewicz
Date: Fri, 17 Oct 2008 18:09:14 +0200
Subject: ide: fix support for IDE PCI controllers using MMIO on frv

Just include <asm-generic/ide_iops.h> for __ide_mm_*() instead of
defining them to normal I/O helpers so PCI bus <-> CPU byte-swapping
is done as needed.

Cc: David Howells <dhowells@redhat.com>
Signed-off-by: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
---
 include/asm-frv/ide.h | 10 +---------
 1 file changed, 1 insertion(+), 9 deletions(-)

diff --git a/include/asm-frv/ide.h b/include/asm-frv/ide.h
index 7ebcc56a2229..361076611855 100644
--- a/include/asm-frv/ide.h
+++ b/include/asm-frv/ide.h
@@ -18,15 +18,7 @@
 #include <asm/io.h>
 #include <asm/irq.h>
 
-/****************************************************************************/
-/*
- * some bits needed for parts of the IDE subsystem to compile
- */
-#define __ide_mm_insw(port, addr, n)	insw((unsigned long) (port), addr, n)
-#define __ide_mm_insl(port, addr, n)	insl((unsigned long) (port), addr, n)
-#define __ide_mm_outsw(port, addr, n)	outsw((unsigned long) (port), addr, n)
-#define __ide_mm_outsl(port, addr, n)	outsl((unsigned long) (port), addr, n)
-
+#include <asm-generic/ide_iops.h>
 
 #endif /* __KERNEL__ */
 #endif /* _ASM_IDE_H */
-- 
cgit v1.2.3-55-g7522


From c36167de652f8acdf0b374c78805e662646cd327 Mon Sep 17 00:00:00 2001
From: Bartlomiej Zolnierkiewicz
Date: Fri, 17 Oct 2008 18:09:15 +0200
Subject: ide: remove dead <asm-arm/arch-sa1100/ide.h>

It has been dead for more than 3 years and needs to be converted
to use platform PATA support anyway.

Cc: Russell King <rmk@arm.linux.org.uk>
Signed-off-by: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
---
 arch/arm/mach-sa1100/include/mach/ide.h | 75 ---------------------------------
 1 file changed, 75 deletions(-)
 delete mode 100644 arch/arm/mach-sa1100/include/mach/ide.h

diff --git a/arch/arm/mach-sa1100/include/mach/ide.h b/arch/arm/mach-sa1100/include/mach/ide.h
deleted file mode 100644
index 4c99c8f5e617..000000000000
--- a/arch/arm/mach-sa1100/include/mach/ide.h
+++ /dev/null
@@ -1,75 +0,0 @@
-/*
- * arch/arm/mach-sa1100/include/mach/ide.h
- *
- * Copyright (c) 1998 Hugo Fiennes & Nicolas Pitre
- *
- * 18-aug-2000: Cleanup by Erik Mouw (J.A.K.Mouw@its.tudelft.nl)
- *              Get rid of the special ide_init_hwif_ports() functions
- *              and make a generalised function that can be used by all
- *              architectures.
- */
-
-#include <asm/irq.h>
-#include <mach/hardware.h>
-#include <asm/mach-types.h>
-
-#error "This code is broken and needs update to match with current ide support"
-
-
-/*
- * Set up a hw structure for a specified data port, control port and IRQ.
- * This should follow whatever the default interface uses.
- */
-static inline void ide_init_hwif_ports(hw_regs_t *hw, unsigned long data_port,
-				       unsigned long ctrl_port, int *irq)
-{
-	unsigned long reg = data_port;
-	int i;
-	int regincr = 1;
-
-	/* The Empeg board has the first two address lines unused */
-	if (machine_is_empeg())
-		regincr = 1 << 2;
-
-	/* The LART doesn't use A0 for IDE */
-	if (machine_is_lart())
-		regincr = 1 << 1;
-
-	memset(hw, 0, sizeof(*hw));
-
-	for (i = 0; i <= 7; i++) {
-		hw->io_ports_array[i] = reg;
-		reg += regincr;
-	}
-
-	hw->io_ports.ctl_addr = ctrl_port;
-
-	if (irq)
-		*irq = 0;
-}
-
-/*
- * This registers the standard ports for this architecture with the IDE
- * driver.
- */
-static __inline__ void
-ide_init_default_hwifs(void)
-{
-    if (machine_is_lart()) {
-#ifdef CONFIG_SA1100_LART
-        hw_regs_t hw;
-
-        /* Enable GPIO as interrupt line */
-        GPDR &= ~LART_GPIO_IDE;
-	set_irq_type(LART_IRQ_IDE, IRQ_TYPE_EDGE_RISING);
-
-        /* set PCMCIA interface timing */
-        MECR = 0x00060006;
-
-        /* init the interface */
-	ide_init_hwif_ports(&hw, PCMCIA_IO_0_BASE + 0x0000, PCMCIA_IO_0_BASE + 0x1000, NULL);
-        hw.irq = LART_IRQ_IDE;
-        ide_register_hw(&hw);
-#endif
-    }
-}
-- 
cgit v1.2.3-55-g7522


From dc10f6119636a5ff13b633e722e720fe0253202b Mon Sep 17 00:00:00 2001
From: Bartlomiej Zolnierkiewicz
Date: Fri, 17 Oct 2008 18:09:15 +0200
Subject: ide: remove M68K_IDE_SWAPW define from <asm-m68k/ide.h>

Since we solved this by overriding default ->{in,out}put_data
methods in {q40,falcon}ide M68K_IDE_SWAP define can go away.

Acked-by: Geert Uytterhoeven <geert@linux-m68k.org>
Cc: Roman Zippel <zippel@linux-m68k.org>
Signed-off-by: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
---
 include/asm-m68k/ide.h | 9 ---------
 1 file changed, 9 deletions(-)

diff --git a/include/asm-m68k/ide.h b/include/asm-m68k/ide.h
index 1daf6cbdd9f0..b996a3c8cff5 100644
--- a/include/asm-m68k/ide.h
+++ b/include/asm-m68k/ide.h
@@ -92,15 +92,6 @@
 #define outsw_swapw(port, addr, n)	raw_outsw_swapw((u16 *)port, addr, n)
 #endif
 
-
-/* Q40 and Atari have byteswapped IDE busses and since many interesting
- * values in the identification string are text, chars and words they
- * happened to be almost correct without swapping.. However *_capacity
- * is needed for drives over 8 GB. RZ */
-#if defined(CONFIG_Q40) || defined(CONFIG_ATARI)
-#define M68K_IDE_SWAPW  (MACH_IS_Q40 || MACH_IS_ATARI)
-#endif
-
 #ifdef CONFIG_BLK_DEV_FALCON_IDE
 #define IDE_ARCH_LOCK
 
-- 
cgit v1.2.3-55-g7522


From 21f45eb1d381aad30094751c60ae8171d6223a66 Mon Sep 17 00:00:00 2001
From: Bartlomiej Zolnierkiewicz
Date: Fri, 17 Oct 2008 18:09:15 +0200
Subject: ide: remove unused macros from <asm-parisc/ide.h>

Acked-by: Kyle McMartin <kyle@mcmartin.ca>
Signed-off-by: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
---
 include/asm-parisc/ide.h | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/include/asm-parisc/ide.h b/include/asm-parisc/ide.h
index c246ef75017d..81700a2321cf 100644
--- a/include/asm-parisc/ide.h
+++ b/include/asm-parisc/ide.h
@@ -13,10 +13,6 @@
 
 #ifdef __KERNEL__
 
-#define ide_request_irq(irq,hand,flg,dev,id)	request_irq((irq),(hand),(flg),(dev),(id))
-#define ide_free_irq(irq,dev_id)		free_irq((irq), (dev_id))
-#define ide_request_region(from,extent,name)	request_region((from), (extent), (name))
-#define ide_release_region(from,extent)		release_region((from), (extent))
 /* Generic I/O and MEMIO string operations.  */
 
 #define __ide_insw	insw
-- 
cgit v1.2.3-55-g7522


From 79104c687ca29e214142d8e8f30964be05e1276f Mon Sep 17 00:00:00 2001
From: Bartlomiej Zolnierkiewicz
Date: Fri, 17 Oct 2008 18:09:15 +0200
Subject: hpt366: fix compile warning

Fixup for commit 1785192b5310ee25165768f5bb80f13146788e3e
("hpt366: add hpt3xx_disable_fast_irq() helper"):

   CC      drivers/ide/pci/hpt366.o
drivers/ide/pci/hpt366.c: In function `init_hwif_hpt366':
drivers/ide/pci/hpt366.c:1290: warning: unused variable `dev'

Reported-by: Sergei Shtylyov <sshtylyov@ru.mvista.com>
Acked-by: Sergei Shtylyov <sshtylyov@ru.mvista.com>
Signed-off-by: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
---
 drivers/ide/pci/hpt366.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/ide/pci/hpt366.c b/drivers/ide/pci/hpt366.c
index 9cf171cb9376..ca0acb50bc61 100644
--- a/drivers/ide/pci/hpt366.c
+++ b/drivers/ide/pci/hpt366.c
@@ -1289,7 +1289,6 @@ static u8 hpt3xx_cable_detect(ide_hwif_t *hwif)
 
 static void __devinit init_hwif_hpt366(ide_hwif_t *hwif)
 {
-	struct pci_dev *dev	= to_pci_dev(hwif->dev);
 	struct hpt_info *info	= hpt3xx_get_info(hwif->dev);
 	int serialize		= HPT_SERIALIZE_IO;
 	u8  chip_type		= info->chip_type;
-- 
cgit v1.2.3-55-g7522


From e5403bff8a4018240f012fd4c7afa24c2e75b469 Mon Sep 17 00:00:00 2001
From: Sergei Shtylyov
Date: Fri, 17 Oct 2008 18:09:15 +0200
Subject: ide: mask interrupt in ide_config_drive_speed()

Apparently, there is no sense in unmasking IRQ on the controller when you call
disable_irq_nosync() before doing this, set the nIEN bit afterwards, and then
unmask IRQ again after the command completion, hence 0 passed to SELECT_MASK()
before issuing the command in ide_config_drive_speed() is probably just a typo.

Signed-off-by: Sergei Shtylyov <sshtylyov@ru.mvista.com>
Signed-off-by: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
---
 drivers/ide/ide-iops.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/ide/ide-iops.c b/drivers/ide/ide-iops.c
index b762deb2dacb..bb7a1ed8094e 100644
--- a/drivers/ide/ide-iops.c
+++ b/drivers/ide/ide-iops.c
@@ -755,7 +755,7 @@ int ide_config_drive_speed(ide_drive_t *drive, u8 speed)
 	 
 	udelay(1);
 	SELECT_DRIVE(drive);
-	SELECT_MASK(drive, 0);
+	SELECT_MASK(drive, 1);
 	udelay(1);
 	tp_ops->set_irq(hwif, 0);
 
-- 
cgit v1.2.3-55-g7522


From ea2ac5a3b7d33ff9f41ddcee2a92c95b5a32f4e2 Mon Sep 17 00:00:00 2001
From: Sergei Shtylyov
Date: Fri, 17 Oct 2008 18:09:15 +0200
Subject: hpt366: cleanup maskproc() method

Since the maskproc() method calls either mirror the interrupt en/disable via
the nIEN bit of the device control register done by the IDE core before issuing
a command or unmask the interrupt after a command executed in polled mode (when
interrupt is already not expected), it is pointless to manipulate the nIEN bit
in this method; therefore, just do nothing for the drives not on the quirk list.
Move the code to the left by using an early return and the 'else if' construct,
while at it....

Signed-off-by: Sergei Shtylyov <sshtylyov@ru.mvista.com>
[bart: fix checkpatch.pl warning]
Signed-off-by: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
---
 drivers/ide/pci/hpt366.c | 34 ++++++++++++++++------------------
 1 file changed, 16 insertions(+), 18 deletions(-)

diff --git a/drivers/ide/pci/hpt366.c b/drivers/ide/pci/hpt366.c
index ca0acb50bc61..a7909e9c720e 100644
--- a/drivers/ide/pci/hpt366.c
+++ b/drivers/ide/pci/hpt366.c
@@ -3,7 +3,7 @@
  * Portions Copyright (C) 2001	        Sun Microsystems, Inc.
  * Portions Copyright (C) 2003		Red Hat Inc
  * Portions Copyright (C) 2007		Bartlomiej Zolnierkiewicz
- * Portions Copyright (C) 2005-2007	MontaVista Software, Inc.
+ * Portions Copyright (C) 2005-2008	MontaVista Software, Inc.
  *
  * Thanks to HighPoint Technologies for their assistance, and hardware.
  * Special Thanks to Jon Burchmore in SanDiego for the deep pockets, his
@@ -748,26 +748,24 @@ static void hpt3xx_maskproc(ide_drive_t *drive, int mask)
 	struct pci_dev	*dev	= to_pci_dev(hwif->dev);
 	struct hpt_info *info	= hpt3xx_get_info(hwif->dev);
 
-	if (drive->quirk_list) {
-		if (info->chip_type >= HPT370) {
-			u8 scr1 = 0;
-
-			pci_read_config_byte(dev, 0x5a, &scr1);
-			if (((scr1 & 0x10) >> 4) != mask) {
-				if (mask)
-					scr1 |=  0x10;
-				else
-					scr1 &= ~0x10;
-				pci_write_config_byte(dev, 0x5a, scr1);
-			}
-		} else {
+	if (drive->quirk_list == 0)
+		return;
+
+	if (info->chip_type >= HPT370) {
+		u8 scr1 = 0;
+
+		pci_read_config_byte(dev, 0x5a, &scr1);
+		if (((scr1 & 0x10) >> 4) != mask) {
 			if (mask)
-				disable_irq(hwif->irq);
+				scr1 |=  0x10;
 			else
-				enable_irq (hwif->irq);
+				scr1 &= ~0x10;
+			pci_write_config_byte(dev, 0x5a, scr1);
 		}
-	} else
-		outb(ATA_DEVCTL_OBS | (mask ? 2 : 0), hwif->io_ports.ctl_addr);
+	} else if (mask)
+		disable_irq(hwif->irq);
+	else
+		enable_irq(hwif->irq);
 }
 
 /*
-- 
cgit v1.2.3-55-g7522


From ea656980f40d599400d2306b23f2fbc707ae7313 Mon Sep 17 00:00:00 2001
From: Sergei Shtylyov
Date: Fri, 17 Oct 2008 18:09:16 +0200
Subject: sgiioc4: remove maskproc() method

Since the maskproc() method calls either mirror the interrupt en/disable via
the nIEN bit of the device control register done by the IDE core before issuing
a command or unmask the interrupt after a command executed in polled mode (when
interrupt is already not expected), it is pointless to implement this method
by manipulating the nIEN bit...

Signed-off-by: Sergei Shtylyov <sshtylyov@ru.mvista.com>
Cc: jeremy@sgi.com
Signed-off-by: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
---
 drivers/ide/pci/sgiioc4.c | 9 ---------
 1 file changed, 9 deletions(-)

diff --git a/drivers/ide/pci/sgiioc4.c b/drivers/ide/pci/sgiioc4.c
index dd634541ce36..8581789352aa 100644
--- a/drivers/ide/pci/sgiioc4.c
+++ b/drivers/ide/pci/sgiioc4.c
@@ -108,13 +108,6 @@ sgiioc4_init_hwif_ports(hw_regs_t * hw, unsigned long data_port,
 		hw->io_ports.irq_addr = irq_port;
 }
 
-static void
-sgiioc4_maskproc(ide_drive_t * drive, int mask)
-{
-	writeb(ATA_DEVCTL_OBS | (mask ? 2 : 0),
-	       (void __iomem *)drive->hwif->io_ports.ctl_addr);
-}
-
 static int
 sgiioc4_checkirq(ide_hwif_t * hwif)
 {
@@ -563,8 +556,6 @@ static const struct ide_port_ops sgiioc4_port_ops = {
 	.set_dma_mode		= sgiioc4_set_dma_mode,
 	/* reset DMA engine, clear IRQs */
 	.resetproc		= sgiioc4_resetproc,
-	/* mask on/off NIEN register */
-	.maskproc		= sgiioc4_maskproc,
 };
 
 static const struct ide_dma_ops sgiioc4_dma_ops = {
-- 
cgit v1.2.3-55-g7522


From 0df962777b550a4b67191b3ee2555be139da4e7d Mon Sep 17 00:00:00 2001
From: Bartlomiej Zolnierkiewicz
Date: Fri, 17 Oct 2008 18:09:16 +0200
Subject: ide-floppy: remove idefloppy_floppy_t typedef

There should be no functional changes caused by this patch.

Acked-by: Borislav Petkov <petkovbb@gmail.com>
Acked-by: Sergei Shtylyov <sshtylyov@ru.mvista.com>
Signed-off-by: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
---
 drivers/ide/ide-floppy.c       | 18 +++++++++---------
 drivers/ide/ide-floppy.h       |  2 --
 drivers/ide/ide-floppy_ioctl.c |  8 ++++----
 3 files changed, 13 insertions(+), 15 deletions(-)

diff --git a/drivers/ide/ide-floppy.c b/drivers/ide/ide-floppy.c
index 58746c748c12..aeb1ad782f54 100644
--- a/drivers/ide/ide-floppy.c
+++ b/drivers/ide/ide-floppy.c
@@ -70,7 +70,7 @@
  */
 static int ide_floppy_end_request(ide_drive_t *drive, int uptodate, int nsecs)
 {
-	idefloppy_floppy_t *floppy = drive->driver_data;
+	struct ide_disk_obj *floppy = drive->driver_data;
 	struct request *rq = HWGROUP(drive)->rq;
 	int error;
 
@@ -117,7 +117,7 @@ static void idefloppy_update_buffers(ide_drive_t *drive,
 
 static void ide_floppy_callback(ide_drive_t *drive, int dsc)
 {
-	idefloppy_floppy_t *floppy = drive->driver_data;
+	struct ide_disk_obj *floppy = drive->driver_data;
 	struct ide_atapi_pc *pc = drive->pc;
 	int uptodate = pc->error ? 0 : 1;
 
@@ -154,7 +154,7 @@ static void ide_floppy_callback(ide_drive_t *drive, int dsc)
 	ide_floppy_end_request(drive, uptodate, 0);
 }
 
-static void ide_floppy_report_error(idefloppy_floppy_t *floppy,
+static void ide_floppy_report_error(struct ide_disk_obj *floppy,
 				    struct ide_atapi_pc *pc)
 {
 	/* supress error messages resulting from Medium not present */
@@ -173,7 +173,7 @@ static void ide_floppy_report_error(idefloppy_floppy_t *floppy,
 static ide_startstop_t idefloppy_issue_pc(ide_drive_t *drive,
 		struct ide_atapi_pc *pc)
 {
-	idefloppy_floppy_t *floppy = drive->driver_data;
+	struct ide_disk_obj *floppy = drive->driver_data;
 
 	if (floppy->failed_pc == NULL &&
 	    pc->c[0] != GPCMD_REQUEST_SENSE)
@@ -237,7 +237,7 @@ static void idefloppy_create_rw_cmd(ide_drive_t *drive,
 				    struct ide_atapi_pc *pc, struct request *rq,
 				    unsigned long sector)
 {
-	idefloppy_floppy_t *floppy = drive->driver_data;
+	struct ide_disk_obj *floppy = drive->driver_data;
 	int block = sector / floppy->bs_factor;
 	int blocks = rq->nr_sectors / floppy->bs_factor;
 	int cmd = rq_data_dir(rq);
@@ -261,7 +261,7 @@ static void idefloppy_create_rw_cmd(ide_drive_t *drive,
 	pc->flags |= PC_FLAG_DMA_OK;
 }
 
-static void idefloppy_blockpc_cmd(idefloppy_floppy_t *floppy,
+static void idefloppy_blockpc_cmd(struct ide_disk_obj *floppy,
 		struct ide_atapi_pc *pc, struct request *rq)
 {
 	ide_init_pc(pc);
@@ -283,7 +283,7 @@ static void idefloppy_blockpc_cmd(idefloppy_floppy_t *floppy,
 static ide_startstop_t ide_floppy_do_request(ide_drive_t *drive,
 					     struct request *rq, sector_t block)
 {
-	idefloppy_floppy_t *floppy = drive->driver_data;
+	struct ide_disk_obj *floppy = drive->driver_data;
 	ide_hwif_t *hwif = drive->hwif;
 	struct ide_atapi_pc *pc;
 
@@ -344,7 +344,7 @@ static ide_startstop_t ide_floppy_do_request(ide_drive_t *drive,
  */
 static int ide_floppy_get_flexible_disk_page(ide_drive_t *drive)
 {
-	idefloppy_floppy_t *floppy = drive->driver_data;
+	struct ide_disk_obj *floppy = drive->driver_data;
 	struct gendisk *disk = floppy->disk;
 	struct ide_atapi_pc pc;
 	u8 *page;
@@ -407,7 +407,7 @@ static int ide_floppy_get_flexible_disk_page(ide_drive_t *drive)
  */
 static int ide_floppy_get_capacity(ide_drive_t *drive)
 {
-	idefloppy_floppy_t *floppy = drive->driver_data;
+	struct ide_disk_obj *floppy = drive->driver_data;
 	struct gendisk *disk = floppy->disk;
 	struct ide_atapi_pc pc;
 	u8 *cap_desc;
diff --git a/drivers/ide/ide-floppy.h b/drivers/ide/ide-floppy.h
index acebc8c5a827..c17124dd6079 100644
--- a/drivers/ide/ide-floppy.h
+++ b/drivers/ide/ide-floppy.h
@@ -4,8 +4,6 @@
 #include "ide-gd.h"
 
 #ifdef CONFIG_IDE_GD_ATAPI
-typedef struct ide_disk_obj idefloppy_floppy_t;
-
 /*
  * Pages of the SELECT SENSE / MODE SENSE packet commands.
  * See SFF-8070i spec.
diff --git a/drivers/ide/ide-floppy_ioctl.c b/drivers/ide/ide-floppy_ioctl.c
index e8aa0a5bf5dc..409e4c15f9b7 100644
--- a/drivers/ide/ide-floppy_ioctl.c
+++ b/drivers/ide/ide-floppy_ioctl.c
@@ -113,7 +113,7 @@ static void ide_floppy_create_format_unit_cmd(struct ide_atapi_pc *pc, int b,
 
 static int ide_floppy_get_sfrp_bit(ide_drive_t *drive)
 {
-	idefloppy_floppy_t *floppy = drive->driver_data;
+	struct ide_disk_obj *floppy = drive->driver_data;
 	struct ide_atapi_pc pc;
 
 	drive->atapi_flags &= ~IDE_AFLAG_SRFP;
@@ -132,7 +132,7 @@ static int ide_floppy_get_sfrp_bit(ide_drive_t *drive)
 
 static int ide_floppy_format_unit(ide_drive_t *drive, int __user *arg)
 {
-	idefloppy_floppy_t *floppy = drive->driver_data;
+	struct ide_disk_obj *floppy = drive->driver_data;
 	struct ide_atapi_pc pc;
 	int blocks, length, flags, err = 0;
 
@@ -190,7 +190,7 @@ out:
 
 static int ide_floppy_get_format_progress(ide_drive_t *drive, int __user *arg)
 {
-	idefloppy_floppy_t *floppy = drive->driver_data;
+	struct ide_disk_obj *floppy = drive->driver_data;
 	struct ide_atapi_pc pc;
 	int progress_indication = 0x10000;
 
@@ -226,7 +226,7 @@ static int ide_floppy_get_format_progress(ide_drive_t *drive, int __user *arg)
 static int ide_floppy_lockdoor(ide_drive_t *drive, struct ide_atapi_pc *pc,
 			       unsigned long arg, unsigned int cmd)
 {
-	idefloppy_floppy_t *floppy = drive->driver_data;
+	struct ide_disk_obj *floppy = drive->driver_data;
 	struct gendisk *disk = floppy->disk;
 	int prevent = (arg && cmd != CDROMEJECT) ? 1 : 0;
 
-- 
cgit v1.2.3-55-g7522


From 41d1a3d31d097a31380b83eea0ec10ea1d040376 Mon Sep 17 00:00:00 2001
From: Bartlomiej Zolnierkiewicz
Date: Fri, 17 Oct 2008 18:09:16 +0200
Subject: ide: remove broken hpt34x driver

No big loss since HPT343/363 controllers are properly supported
by pata_hpt3x3 driver from Alan Cox.

Signed-off-by: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
---
 drivers/ide/Kconfig      |  24 +-----
 drivers/ide/pci/Makefile |   1 -
 drivers/ide/pci/hpt34x.c | 193 -----------------------------------------------
 3 files changed, 1 insertion(+), 217 deletions(-)
 delete mode 100644 drivers/ide/pci/hpt34x.c

diff --git a/drivers/ide/Kconfig b/drivers/ide/Kconfig
index faa974e615da..a820ca6fc327 100644
--- a/drivers/ide/Kconfig
+++ b/drivers/ide/Kconfig
@@ -328,7 +328,7 @@ config IDEPCI_PCIBUS_ORDER
 # TODO: split it on per host driver config options (or module parameters)
 config BLK_DEV_OFFBOARD
 	bool "Boot off-board chipsets first support (DEPRECATED)"
-	depends on BLK_DEV_IDEPCI && (BLK_DEV_AEC62XX || BLK_DEV_GENERIC || BLK_DEV_HPT34X || BLK_DEV_HPT366 || BLK_DEV_PDC202XX_NEW || BLK_DEV_PDC202XX_OLD || BLK_DEV_TC86C001)
+	depends on BLK_DEV_IDEPCI && (BLK_DEV_AEC62XX || BLK_DEV_GENERIC || BLK_DEV_HPT366 || BLK_DEV_PDC202XX_NEW || BLK_DEV_PDC202XX_OLD || BLK_DEV_TC86C001)
 	help
 	  Normally, IDE controllers built into the motherboard (on-board
 	  controllers) are assigned to ide0 and ide1 while those on add-in PCI
@@ -478,28 +478,6 @@ config BLK_DEV_CS5535
 
 	  It is safe to say Y to this question.
 
-config BLK_DEV_HPT34X
-	tristate "HPT34X chipset support"
-	depends on BROKEN
-	select BLK_DEV_IDEDMA_PCI
-	help
-	  This driver adds up to 4 more EIDE devices sharing a single
-	  interrupt. The HPT343 chipset in its current form is a non-bootable
-	  controller; the HPT345/HPT363 chipset is a bootable (needs BIOS FIX)
-	  PCI UDMA controllers. This driver requires dynamic tuning of the
-	  chipset during the ide-probe at boot time. It is reported to support
-	  DVD II drives, by the manufacturer.
-
-config HPT34X_AUTODMA
-	bool "HPT34X AUTODMA support (EXPERIMENTAL)"
-	depends on BLK_DEV_HPT34X && EXPERIMENTAL
-	help
-	  This is a dangerous thing to attempt currently! Please read the
-	  comments at the top of <file:drivers/ide/pci/hpt34x.c>.  If you say Y
-	  here, then say Y to "Use DMA by default when available" as well.
-
-	  If unsure, say N.
-
 config BLK_DEV_HPT366
 	tristate "HPT36X/37X chipset support"
 	select BLK_DEV_IDEDMA_PCI
diff --git a/drivers/ide/pci/Makefile b/drivers/ide/pci/Makefile
index 02e6ee7d751d..ab44a1f5f5a9 100644
--- a/drivers/ide/pci/Makefile
+++ b/drivers/ide/pci/Makefile
@@ -11,7 +11,6 @@ obj-$(CONFIG_BLK_DEV_CS5535)		+= cs5535.o
 obj-$(CONFIG_BLK_DEV_SC1200)		+= sc1200.o
 obj-$(CONFIG_BLK_DEV_CY82C693)		+= cy82c693.o
 obj-$(CONFIG_BLK_DEV_DELKIN)		+= delkin_cb.o
-obj-$(CONFIG_BLK_DEV_HPT34X)		+= hpt34x.o
 obj-$(CONFIG_BLK_DEV_HPT366)		+= hpt366.o
 obj-$(CONFIG_BLK_DEV_IT8213)		+= it8213.o
 obj-$(CONFIG_BLK_DEV_IT821X)		+= it821x.o
diff --git a/drivers/ide/pci/hpt34x.c b/drivers/ide/pci/hpt34x.c
deleted file mode 100644
index fb1a3aa57f07..000000000000
--- a/drivers/ide/pci/hpt34x.c
+++ /dev/null
@@ -1,193 +0,0 @@
-/*
- * Copyright (C) 1998-2000	Andre Hedrick <andre@linux-ide.org>
- *
- * May be copied or modified under the terms of the GNU General Public License
- *
- *
- * 00:12.0 Unknown mass storage controller:
- * Triones Technologies, Inc.
- * Unknown device 0003 (rev 01)
- *
- * hde: UDMA 2 (0x0000 0x0002) (0x0000 0x0010)
- * hdf: UDMA 2 (0x0002 0x0012) (0x0010 0x0030)
- * hde: DMA 2  (0x0000 0x0002) (0x0000 0x0010)
- * hdf: DMA 2  (0x0002 0x0012) (0x0010 0x0030)
- * hdg: DMA 1  (0x0012 0x0052) (0x0030 0x0070)
- * hdh: DMA 1  (0x0052 0x0252) (0x0070 0x00f0)
- *
- * ide-pci.c reference
- *
- * Since there are two cards that report almost identically,
- * the only discernable difference is the values reported in pcicmd.
- * Booting-BIOS card or HPT363 :: pcicmd == 0x07
- * Non-bootable card or HPT343 :: pcicmd == 0x05
- */
-
-#include <linux/module.h>
-#include <linux/types.h>
-#include <linux/kernel.h>
-#include <linux/ioport.h>
-#include <linux/interrupt.h>
-#include <linux/pci.h>
-#include <linux/init.h>
-#include <linux/ide.h>
-
-#define DRV_NAME "hpt34x"
-
-#define HPT343_DEBUG_DRIVE_INFO		0
-
-static void hpt34x_set_mode(ide_drive_t *drive, const u8 speed)
-{
-	struct pci_dev *dev = to_pci_dev(drive->hwif->dev);
-	u32 reg1= 0, tmp1 = 0, reg2 = 0, tmp2 = 0;
-	u8			hi_speed, lo_speed;
-
-	hi_speed = speed >> 4;
-	lo_speed = speed & 0x0f;
-
-	if (hi_speed & 7) {
-		hi_speed = (hi_speed & 4) ? 0x01 : 0x10;
-	} else {
-		lo_speed <<= 5;
-		lo_speed >>= 5;
-	}
-
-	pci_read_config_dword(dev, 0x44, &reg1);
-	pci_read_config_dword(dev, 0x48, &reg2);
-	tmp1 = ((lo_speed << (3*drive->dn)) | (reg1 & ~(7 << (3*drive->dn))));
-	tmp2 = ((hi_speed << drive->dn) | (reg2 & ~(0x11 << drive->dn)));
-	pci_write_config_dword(dev, 0x44, tmp1);
-	pci_write_config_dword(dev, 0x48, tmp2);
-
-#if HPT343_DEBUG_DRIVE_INFO
-	printk("%s: %s drive%d (0x%04x 0x%04x) (0x%04x 0x%04x)" \
-		" (0x%02x 0x%02x)\n",
-		drive->name, ide_xfer_verbose(speed),
-		drive->dn, reg1, tmp1, reg2, tmp2,
-		hi_speed, lo_speed);
-#endif /* HPT343_DEBUG_DRIVE_INFO */
-}
-
-static void hpt34x_set_pio_mode(ide_drive_t *drive, const u8 pio)
-{
-	hpt34x_set_mode(drive, XFER_PIO_0 + pio);
-}
-
-/*
- * If the BIOS does not set the IO base addaress to XX00, 343 will fail.
- */
-#define	HPT34X_PCI_INIT_REG		0x80
-
-static unsigned int init_chipset_hpt34x(struct pci_dev *dev)
-{
-	int i = 0;
-	unsigned long hpt34xIoBase = pci_resource_start(dev, 4);
-	unsigned long hpt_addr[4] = { 0x20, 0x34, 0x28, 0x3c };
-	unsigned long hpt_addr_len[4] = { 7, 3, 7, 3 };
-	u16 cmd;
-	unsigned long flags;
-
-	local_irq_save(flags);
-
-	pci_write_config_byte(dev, HPT34X_PCI_INIT_REG, 0x00);
-	pci_read_config_word(dev, PCI_COMMAND, &cmd);
-
-	if (cmd & PCI_COMMAND_MEMORY)
-		pci_write_config_byte(dev, PCI_LATENCY_TIMER, 0xF0);
-	else
-		pci_write_config_byte(dev, PCI_LATENCY_TIMER, 0x20);
-
-	/*
-	 * Since 20-23 can be assigned and are R/W, we correct them.
-	 */
-	pci_write_config_word(dev, PCI_COMMAND, cmd & ~PCI_COMMAND_IO);
-	for(i=0; i<4; i++) {
-		dev->resource[i].start = (hpt34xIoBase + hpt_addr[i]);
-		dev->resource[i].end = dev->resource[i].start + hpt_addr_len[i];
-		dev->resource[i].flags = IORESOURCE_IO;
-		pci_write_config_dword(dev,
-				(PCI_BASE_ADDRESS_0 + (i * 4)),
-				dev->resource[i].start);
-	}
-	pci_write_config_word(dev, PCI_COMMAND, cmd);
-
-	local_irq_restore(flags);
-
-	return dev->irq;
-}
-
-static const struct ide_port_ops hpt34x_port_ops = {
-	.set_pio_mode		= hpt34x_set_pio_mode,
-	.set_dma_mode		= hpt34x_set_mode,
-};
-
-#define IDE_HFLAGS_HPT34X \
-	(IDE_HFLAG_NO_ATAPI_DMA | \
-	 IDE_HFLAG_NO_DSC | \
-	 IDE_HFLAG_NO_AUTODMA)
-
-static const struct ide_port_info hpt34x_chipsets[] __devinitdata = {
-	{ /* 0: HPT343 */
-		.name		= DRV_NAME,
-		.init_chipset	= init_chipset_hpt34x,
-		.port_ops	= &hpt34x_port_ops,
-		.host_flags	= IDE_HFLAGS_HPT34X | IDE_HFLAG_NON_BOOTABLE,
-		.pio_mask	= ATA_PIO5,
-	},
-	{ /* 1: HPT345 */
-		.name		= DRV_NAME,
-		.init_chipset	= init_chipset_hpt34x,
-		.port_ops	= &hpt34x_port_ops,
-		.host_flags	= IDE_HFLAGS_HPT34X | IDE_HFLAG_OFF_BOARD,
-		.pio_mask	= ATA_PIO5,
-#ifdef CONFIG_HPT34X_AUTODMA
-		.swdma_mask	= ATA_SWDMA2,
-		.mwdma_mask	= ATA_MWDMA2,
-		.udma_mask	= ATA_UDMA2,
-#endif
-	}
-};
-
-static int __devinit hpt34x_init_one(struct pci_dev *dev, const struct pci_device_id *id)
-{
-	const struct ide_port_info *d;
-	u16 pcicmd = 0;
-
-	pci_read_config_word(dev, PCI_COMMAND, &pcicmd);
-
-	d = &hpt34x_chipsets[(pcicmd & PCI_COMMAND_MEMORY) ? 1 : 0];
-
-	return ide_pci_init_one(dev, d, NULL);
-}
-
-static const struct pci_device_id hpt34x_pci_tbl[] = {
-	{ PCI_VDEVICE(TTI, PCI_DEVICE_ID_TTI_HPT343), 0 },
-	{ 0, },
-};
-MODULE_DEVICE_TABLE(pci, hpt34x_pci_tbl);
-
-static struct pci_driver hpt34x_pci_driver = {
-	.name		= "HPT34x_IDE",
-	.id_table	= hpt34x_pci_tbl,
-	.probe		= hpt34x_init_one,
-	.remove		= ide_pci_remove,
-	.suspend	= ide_pci_suspend,
-	.resume		= ide_pci_resume,
-};
-
-static int __init hpt34x_ide_init(void)
-{
-	return ide_pci_register_driver(&hpt34x_pci_driver);
-}
-
-static void __exit hpt34x_ide_exit(void)
-{
-	pci_unregister_driver(&hpt34x_pci_driver);
-}
-
-module_init(hpt34x_ide_init);
-module_exit(hpt34x_ide_exit);
-
-MODULE_AUTHOR("Andre Hedrick");
-MODULE_DESCRIPTION("PCI driver module for Highpoint 34x IDE");
-MODULE_LICENSE("GPL");
-- 
cgit v1.2.3-55-g7522


From 8c061a40c293660793dab83b75223e6eaaa04f8b Mon Sep 17 00:00:00 2001
From: Bartlomiej Zolnierkiewicz
Date: Fri, 17 Oct 2008 18:09:16 +0200
Subject: delkin_cb: add PM support

* Factor out chipset initialization code from delkin_cb_probe()
  to delkin_cb_init_chipset().

* Assign ->init_chipset in struct delkin_cb_port_info.

* Add delkin_cb_{suspend,resume}() (->suspend/->resume methods).

Fixes kernel bugzilla bug #11735:

http://bugzilla.kernel.org/show_bug.cgi?id=11735

Tested-by: bumble.bee@xs4all.nl
Cc: Alan Cox <alan@lxorguk.ukuu.org.uk>
Signed-off-by: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
---
 drivers/ide/pci/delkin_cb.c | 63 ++++++++++++++++++++++++++++++++++++++++-----
 1 file changed, 56 insertions(+), 7 deletions(-)

diff --git a/drivers/ide/pci/delkin_cb.c b/drivers/ide/pci/delkin_cb.c
index 8689a706f537..8f1b2d9f0513 100644
--- a/drivers/ide/pci/delkin_cb.c
+++ b/drivers/ide/pci/delkin_cb.c
@@ -46,10 +46,27 @@ static const struct ide_port_ops delkin_cb_port_ops = {
 	.quirkproc		= ide_undecoded_slave,
 };
 
+static unsigned int delkin_cb_init_chipset(struct pci_dev *dev)
+{
+	unsigned long base = pci_resource_start(dev, 0);
+	int i;
+
+	outb(0x02, base + 0x1e);	/* set nIEN to block interrupts */
+	inb(base + 0x17);		/* read status to clear interrupts */
+
+	for (i = 0; i < sizeof(setup); ++i) {
+		if (setup[i])
+			outb(setup[i], base + i);
+	}
+
+	return 0;
+}
+
 static const struct ide_port_info delkin_cb_port_info = {
 	.port_ops		= &delkin_cb_port_ops,
 	.host_flags		= IDE_HFLAG_IO_32BIT | IDE_HFLAG_UNMASK_IRQS |
 				  IDE_HFLAG_NO_DMA,
+	.init_chipset		= delkin_cb_init_chipset,
 };
 
 static int __devinit
@@ -57,7 +74,7 @@ delkin_cb_probe (struct pci_dev *dev, const struct pci_device_id *id)
 {
 	struct ide_host *host;
 	unsigned long base;
-	int i, rc;
+	int rc;
 	hw_regs_t hw, *hws[] = { &hw, NULL, NULL, NULL };
 
 	rc = pci_enable_device(dev);
@@ -72,12 +89,8 @@ delkin_cb_probe (struct pci_dev *dev, const struct pci_device_id *id)
 		return rc;
 	}
 	base = pci_resource_start(dev, 0);
-	outb(0x02, base + 0x1e);	/* set nIEN to block interrupts */
-	inb(base + 0x17);		/* read status to clear interrupts */
-	for (i = 0; i < sizeof(setup); ++i) {
-		if (setup[i])
-			outb(setup[i], base + i);
-	}
+
+	delkin_cb_init_chipset(dev);
 
 	memset(&hw, 0, sizeof(hw));
 	ide_std_init_ports(&hw, base + 0x10, base + 0x1e);
@@ -110,6 +123,40 @@ delkin_cb_remove (struct pci_dev *dev)
 	pci_disable_device(dev);
 }
 
+#ifdef CONFIG_PM
+static int delkin_cb_suspend(struct pci_dev *dev, pm_message_t state)
+{
+	pci_save_state(dev);
+	pci_disable_device(dev);
+	pci_set_power_state(dev, pci_choose_state(dev, state));
+
+	return 0;
+}
+
+static int delkin_cb_resume(struct pci_dev *dev)
+{
+	struct ide_host *host = pci_get_drvdata(dev);
+	int rc;
+
+	pci_set_power_state(dev, PCI_D0);
+
+	rc = pci_enable_device(dev);
+	if (rc)
+		return rc;
+
+	pci_restore_state(dev);
+	pci_set_master(dev);
+
+	if (host->init_chipset)
+		host->init_chipset(dev);
+
+	return 0;
+}
+#else
+#define delkin_cb_suspend NULL
+#define delkin_cb_resume NULL
+#endif
+
 static struct pci_device_id delkin_cb_pci_tbl[] __devinitdata = {
 	{ 0x1145, 0xf021, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0},
 	{ 0x1145, 0xf024, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0},
@@ -122,6 +169,8 @@ static struct pci_driver delkin_cb_pci_driver = {
 	.id_table	= delkin_cb_pci_tbl,
 	.probe		= delkin_cb_probe,
 	.remove		= delkin_cb_remove,
+	.suspend	= delkin_cb_suspend,
+	.resume		= delkin_cb_resume,
 };
 
 static int __init delkin_cb_init(void)
-- 
cgit v1.2.3-55-g7522


From 8108b882329db7fbf9fbca6559aa36e6174dc91f Mon Sep 17 00:00:00 2001
From: Sergei Shtylyov
Date: Fri, 17 Oct 2008 18:09:16 +0200
Subject: sgiioc4: kill useless address checks

The driver performs a number of checks on the virtual/physical addresses which
would always evaluate as true (except ide_dma_sgiioc4() -- always false):

- for sgiioc4_init_hwif_ports(), its caller, sgiioc4_ide_setup_pci_device(),
  guarantees that 'ctrl_port' and 'irq_port' parameters are never 0;

- in sgiioc4_read_status(), we always read the IDE status register, so there's
  no need to check the register's address (must be a leftover from the times
  when this function implemented the INB() method);

- in ide_dma_sgiioc4(), 'dma_base' can never be 0 as IOC4_DMA_OFFSET is not 0.

Signed-off-by: Sergei Shtylyov <sshtylyov@ru.mvista.com>
Cc: jeremy@sgi.com
Signed-off-by: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
---
 drivers/ide/pci/sgiioc4.c | 26 +++++++++-----------------
 1 file changed, 9 insertions(+), 17 deletions(-)

diff --git a/drivers/ide/pci/sgiioc4.c b/drivers/ide/pci/sgiioc4.c
index 8581789352aa..f6c5ba07be71 100644
--- a/drivers/ide/pci/sgiioc4.c
+++ b/drivers/ide/pci/sgiioc4.c
@@ -101,11 +101,8 @@ sgiioc4_init_hwif_ports(hw_regs_t * hw, unsigned long data_port,
 	for (i = 0; i <= 7; i++)
 		hw->io_ports_array[i] = reg + i * 4;
 
-	if (ctrl_port)
-		hw->io_ports.ctl_addr = ctrl_port;
-
-	if (irq_port)
-		hw->io_ports.irq_addr = irq_port;
+	hw->io_ports.ctl_addr = ctrl_port;
+	hw->io_ports.irq_addr = irq_port;
 }
 
 static int
@@ -303,16 +300,14 @@ static u8 sgiioc4_read_status(ide_hwif_t *hwif)
 	unsigned long port = hwif->io_ports.status_addr;
 	u8 reg = (u8) readb((void __iomem *) port);
 
-	if ((port & 0xFFF) == 0x11C) {	/* Status register of IOC4 */
-		if (!(reg & ATA_BUSY)) { /* Not busy... check for interrupt */
-			unsigned long other_ir = port - 0x110;
-			unsigned int intr_reg = (u32) readl((void __iomem *) other_ir);
+	if (!(reg & ATA_BUSY)) {	/* Not busy... check for interrupt */
+		unsigned long other_ir = port - 0x110;
+		unsigned int intr_reg = (u32) readl((void __iomem *) other_ir);
 
-			/* Clear the Interrupt, Error bits on the IOC4 */
-			if (intr_reg & 0x03) {
-				writel(0x03, (void __iomem *) other_ir);
-				intr_reg = (u32) readl((void __iomem *) other_ir);
-			}
+		/* Clear the Interrupt, Error bits on the IOC4 */
+		if (intr_reg & 0x03) {
+			writel(0x03, (void __iomem *) other_ir);
+			intr_reg = (u32) readl((void __iomem *) other_ir);
 		}
 	}
 
@@ -329,9 +324,6 @@ ide_dma_sgiioc4(ide_hwif_t *hwif, const struct ide_port_info *d)
 	int num_ports = sizeof (ioc4_dma_regs_t);
 	void *pad;
 
-	if (dma_base == 0)
-		return -1;
-
 	printk(KERN_INFO "    %s: MMIO-DMA\n", hwif->name);
 
 	if (request_mem_region(dma_base, num_ports, hwif->name) == NULL) {
-- 
cgit v1.2.3-55-g7522


From 107111d450541df8c2a8d3af1d538cc7cd85e81b Mon Sep 17 00:00:00 2001
From: Sergei Shtylyov
Date: Fri, 17 Oct 2008 18:09:17 +0200
Subject: sgiioc4: kill duplicate ioremap()

By the time ide_dma_sgiioc4() gets called, sgiioc4_ide_setup_pci_device() will
have called ioremap() on the whole BAR0 region, so calling ioremap() on the DMA
registers means wasting a page. Replace this call by a mere address calculation,
based on the fact that IRQ registers (pointed to by 'hwif->io_ports.irq_addr')
are situated at offset 0 from BAR0.

Signed-off-by: Sergei Shtylyov <sshtylyov@ru.mvista.com>
Cc: jeremy@sgi.com
Signed-off-by: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
---
 drivers/ide/pci/sgiioc4.c | 14 ++------------
 1 file changed, 2 insertions(+), 12 deletions(-)

diff --git a/drivers/ide/pci/sgiioc4.c b/drivers/ide/pci/sgiioc4.c
index f6c5ba07be71..8af9b23499fd 100644
--- a/drivers/ide/pci/sgiioc4.c
+++ b/drivers/ide/pci/sgiioc4.c
@@ -320,7 +320,6 @@ ide_dma_sgiioc4(ide_hwif_t *hwif, const struct ide_port_info *d)
 {
 	struct pci_dev *dev = to_pci_dev(hwif->dev);
 	unsigned long dma_base = pci_resource_start(dev, 0) + IOC4_DMA_OFFSET;
-	void __iomem *virt_dma_base;
 	int num_ports = sizeof (ioc4_dma_regs_t);
 	void *pad;
 
@@ -333,14 +332,8 @@ ide_dma_sgiioc4(ide_hwif_t *hwif, const struct ide_port_info *d)
 		return -1;
 	}
 
-	virt_dma_base = ioremap(dma_base, num_ports);
-	if (virt_dma_base == NULL) {
-		printk(KERN_ERR "%s(%s) -- ERROR: unable to map addresses "
-		       "0x%lx to 0x%lx\n", __func__, hwif->name,
-		       dma_base, dma_base + num_ports - 1);
-		goto dma_remap_failure;
-	}
-	hwif->dma_base = (unsigned long) virt_dma_base;
+	hwif->dma_base = (unsigned long)hwif->io_ports.irq_addr +
+			 IOC4_DMA_OFFSET;
 
 	hwif->sg_max_nents = IOC4_PRD_ENTRIES;
 
@@ -364,9 +357,6 @@ ide_dma_sgiioc4(ide_hwif_t *hwif, const struct ide_port_info *d)
 	printk(KERN_INFO "%s: changing from DMA to PIO mode", hwif->name);
 
 dma_pci_alloc_failure:
-	iounmap(virt_dma_base);
-
-dma_remap_failure:
 	release_mem_region(dma_base, num_ports);
 
 	return -1;
-- 
cgit v1.2.3-55-g7522


From a11e2afa77b9e1ddffc63e37cae5685683558870 Mon Sep 17 00:00:00 2001
From: Sergei Shtylyov
Date: Fri, 17 Oct 2008 18:09:17 +0200
Subject: scc_pata: kill unused variables

Fix the "unused variable" warning in init_hwif_scc() caused by the commit
48c3c1072651922ed153bcf0a33ea82cf20df390 (ide: add struct ide_host (take 3)).
Moreover, remove the write-only variable 'dma_status_port' in init_setup_scc()
about which gcc gives no warning and which has been there from the very start...

Signed-off-by: Sergei Shtylyov <sshtylyov@ru.mvista.com>
Signed-off-by: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
---
 drivers/ide/pci/scc_pata.c | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/drivers/ide/pci/scc_pata.c b/drivers/ide/pci/scc_pata.c
index 9ce1d8059921..49f163aa51e3 100644
--- a/drivers/ide/pci/scc_pata.c
+++ b/drivers/ide/pci/scc_pata.c
@@ -617,7 +617,6 @@ static int __devinit init_setup_scc(struct pci_dev *dev,
 	unsigned long intmask_port;
 	unsigned long mode_port;
 	unsigned long ecmode_port;
-	unsigned long dma_status_port;
 	u32 reg = 0;
 	struct scc_ports *ports;
 	int rc;
@@ -637,7 +636,6 @@ static int __devinit init_setup_scc(struct pci_dev *dev,
 	intmask_port = dma_base + 0x010;
 	mode_port = ctl_base + 0x024;
 	ecmode_port = ctl_base + 0xf00;
-	dma_status_port = dma_base + 0x004;
 
 	/* controller initialization */
 	reg = 0;
@@ -843,8 +841,6 @@ static u8 scc_cable_detect(ide_hwif_t *hwif)
 
 static void __devinit init_hwif_scc(ide_hwif_t *hwif)
 {
-	struct scc_ports *ports = ide_get_hwifdata(hwif);
-
 	/* PTERADD */
 	out_be32((void __iomem *)(hwif->dma_base + 0x018), hwif->dmatable_dma);
 
-- 
cgit v1.2.3-55-g7522


From 769b49ce68386b21e45bb6e573b63c02020b17a1 Mon Sep 17 00:00:00 2001
From: Bartlomiej Zolnierkiewicz
Date: Fri, 17 Oct 2008 18:09:18 +0200
Subject: ide: re-add TRM290 fix lost during ide_build_dmatable() cleanup

commit 14c123f37187aba0b4e0e893a969efc6820c4170 ("ide: cleanup
ide_build_dmatable()") accidentally reverted TRM290 fix introduced
by commit 22e05b4549bf2405d6aca128540b20cd2dd33f1f ("ide-dma: fix
ide_build_dmatable() for TRM290").

Reported-by: Sergei Shtylylov <sshtylyov@ru.mvista.com>
Acked-by: Sergei Shtylylov <sshtylyov@ru.mvista.com>
Signed-off-by: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
---
 drivers/ide/ide-dma-sff.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/ide/ide-dma-sff.c b/drivers/ide/ide-dma-sff.c
index 0903782689e9..cac431f0df17 100644
--- a/drivers/ide/ide-dma-sff.c
+++ b/drivers/ide/ide-dma-sff.c
@@ -130,7 +130,7 @@ int ide_build_dmatable(ide_drive_t *drive, struct request *rq)
 			xcount = bcount & 0xffff;
 			if (is_trm290)
 				xcount = ((xcount >> 2) - 1) << 16;
-			if (xcount == 0x0000) {
+			else if (xcount == 0x0000) {
 				if (count++ >= PRD_ENTRIES)
 					goto use_pio_instead;
 				*table++ = cpu_to_le32(0x8000);
-- 
cgit v1.2.3-55-g7522


From 719254faa17ffedc87ba0fadb9b34e535c9758d5 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner
Date: Fri, 17 Oct 2008 09:59:47 +0200
Subject: NOHZ: unify the nohz function calls in irq_enter()

We have two separate nohz function calls in irq_enter() for no good
reason. Just call a single NOHZ function from irq_enter() and call
the bits in the tick code.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/tick.h     |  7 +++----
 kernel/softirq.c         | 10 +++-------
 kernel/time/tick-sched.c | 13 ++++++++++++-
 3 files changed, 18 insertions(+), 12 deletions(-)

diff --git a/include/linux/tick.h b/include/linux/tick.h
index 98921a3e1aa8..b6ec8189ac0c 100644
--- a/include/linux/tick.h
+++ b/include/linux/tick.h
@@ -96,9 +96,11 @@ extern cpumask_t *tick_get_broadcast_oneshot_mask(void);
 extern void tick_clock_notify(void);
 extern int tick_check_oneshot_change(int allow_nohz);
 extern struct tick_sched *tick_get_tick_sched(int cpu);
+extern void tick_check_idle(int cpu);
 # else
 static inline void tick_clock_notify(void) { }
 static inline int tick_check_oneshot_change(int allow_nohz) { return 0; }
+static inline void tick_check_idle(int cpu) { }
 # endif
 
 #else /* CONFIG_GENERIC_CLOCKEVENTS */
@@ -106,26 +108,23 @@ static inline void tick_init(void) { }
 static inline void tick_cancel_sched_timer(int cpu) { }
 static inline void tick_clock_notify(void) { }
 static inline int tick_check_oneshot_change(int allow_nohz) { return 0; }
+static inline void tick_check_idle(int cpu) { }
 #endif /* !CONFIG_GENERIC_CLOCKEVENTS */
 
 # ifdef CONFIG_NO_HZ
 extern void tick_nohz_stop_sched_tick(int inidle);
 extern void tick_nohz_restart_sched_tick(void);
-extern void tick_nohz_update_jiffies(void);
 extern ktime_t tick_nohz_get_sleep_length(void);
-extern void tick_nohz_stop_idle(int cpu);
 extern u64 get_cpu_idle_time_us(int cpu, u64 *last_update_time);
 # else
 static inline void tick_nohz_stop_sched_tick(int inidle) { }
 static inline void tick_nohz_restart_sched_tick(void) { }
-static inline void tick_nohz_update_jiffies(void) { }
 static inline ktime_t tick_nohz_get_sleep_length(void)
 {
 	ktime_t len = { .tv64 = NSEC_PER_SEC/HZ };
 
 	return len;
 }
-static inline void tick_nohz_stop_idle(int cpu) { }
 static inline u64 get_cpu_idle_time_us(int cpu, u64 *unused) { return -1; }
 # endif /* !NO_HZ */
 
diff --git a/kernel/softirq.c b/kernel/softirq.c
index 37d67aa2d56f..d410014279e7 100644
--- a/kernel/softirq.c
+++ b/kernel/softirq.c
@@ -265,16 +265,12 @@ asmlinkage void do_softirq(void)
  */
 void irq_enter(void)
 {
-#ifdef CONFIG_NO_HZ
 	int cpu = smp_processor_id();
+
 	if (idle_cpu(cpu) && !in_interrupt())
-		tick_nohz_stop_idle(cpu);
-#endif
+		tick_check_idle(cpu);
+
 	__irq_enter();
-#ifdef CONFIG_NO_HZ
-	if (idle_cpu(cpu))
-		tick_nohz_update_jiffies();
-#endif
 }
 
 #ifdef __ARCH_IRQ_EXIT_IRQS_DISABLED
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index b711ffcb106c..fdcf3f93bb8d 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -155,7 +155,7 @@ void tick_nohz_update_jiffies(void)
 	touch_softlockup_watchdog();
 }
 
-void tick_nohz_stop_idle(int cpu)
+static void tick_nohz_stop_idle(int cpu)
 {
 	struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu);
 
@@ -558,6 +558,17 @@ static inline void tick_nohz_switch_to_nohz(void) { }
 
 #endif /* NO_HZ */
 
+/*
+ * Called from irq_enter to notify about the possible interruption of idle()
+ */
+void tick_check_idle(int cpu)
+{
+#ifdef CONFIG_NO_HZ
+	tick_nohz_stop_idle(cpu);
+	tick_nohz_update_jiffies();
+#endif
+}
+
 /*
  * High resolution timer specific code
  */
-- 
cgit v1.2.3-55-g7522


From c34bec5a44e9486597d78e7a686b2f9088a0564c Mon Sep 17 00:00:00 2001
From: Thomas Gleixner
Date: Fri, 17 Oct 2008 10:04:34 +0200
Subject: NOHZ: split tick_nohz_restart_sched_tick()

Split out the clock event device reprogramming. Preparatory
patch.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 kernel/time/tick-sched.c | 49 ++++++++++++++++++++++++++----------------------
 1 file changed, 27 insertions(+), 22 deletions(-)

diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index fdcf3f93bb8d..7aedf4343539 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -377,6 +377,32 @@ ktime_t tick_nohz_get_sleep_length(void)
 	return ts->sleep_length;
 }
 
+static void tick_nohz_restart(struct tick_sched *ts, ktime_t now)
+{
+	hrtimer_cancel(&ts->sched_timer);
+	ts->sched_timer.expires = ts->idle_tick;
+
+	while (1) {
+		/* Forward the time to expire in the future */
+		hrtimer_forward(&ts->sched_timer, now, tick_period);
+
+		if (ts->nohz_mode == NOHZ_MODE_HIGHRES) {
+			hrtimer_start(&ts->sched_timer,
+				      ts->sched_timer.expires,
+				      HRTIMER_MODE_ABS);
+			/* Check, if the timer was already in the past */
+			if (hrtimer_active(&ts->sched_timer))
+				break;
+		} else {
+			if (!tick_program_event(ts->sched_timer.expires, 0))
+				break;
+		}
+		/* Update jiffies and reread time */
+		tick_do_update_jiffies64(now);
+		now = ktime_get();
+	}
+}
+
 /**
  * tick_nohz_restart_sched_tick - restart the idle tick from the idle task
  *
@@ -430,28 +456,7 @@ void tick_nohz_restart_sched_tick(void)
 	 */
 	ts->tick_stopped  = 0;
 	ts->idle_exittime = now;
-	hrtimer_cancel(&ts->sched_timer);
-	ts->sched_timer.expires = ts->idle_tick;
-
-	while (1) {
-		/* Forward the time to expire in the future */
-		hrtimer_forward(&ts->sched_timer, now, tick_period);
-
-		if (ts->nohz_mode == NOHZ_MODE_HIGHRES) {
-			hrtimer_start(&ts->sched_timer,
-				      ts->sched_timer.expires,
-				      HRTIMER_MODE_ABS);
-			/* Check, if the timer was already in the past */
-			if (hrtimer_active(&ts->sched_timer))
-				break;
-		} else {
-			if (!tick_program_event(ts->sched_timer.expires, 0))
-				break;
-		}
-		/* Update jiffies and reread time */
-		tick_do_update_jiffies64(now);
-		now = ktime_get();
-	}
+	tick_nohz_restart(ts, now);
 	local_irq_enable();
 }
 
-- 
cgit v1.2.3-55-g7522


From fb02fbc14d17837b4b7b02dbb36142c16a7bf208 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner
Date: Fri, 17 Oct 2008 10:01:23 +0200
Subject: NOHZ: restart tick device from irq_enter()

We did not restart the tick device from irq_enter() to avoid double
reprogramming and extra events in the return immediate to idle case.

But long lasting softirqs can lead to a situation where jiffies become
stale:

idle()
  tick stopped (reprogrammed to next pending timer)
  halt()
   interrupt
     jiffies updated from irq_enter()
     interrupt handler
     softirq function 1 runs 20ms
     softirq function 2 arms a 10ms timer with a stale jiffies value
     jiffies updated from irq_exit()
     timer wheel has now an already expired timer
     (the one added in function 2)
     timer fires and timer softirq runs

This was discovered when debugging a timer problem which happend only
when the ath5k driver is active. The debugging proved that there is a
softirq function running for more than 20ms, which is a bug by itself.

To solve this we restart the tick timer right from irq_enter(), but do
not go through the other functions which are necessary to return from
idle when need_resched() is set.

Reported-by: Elias Oltmanns <eo@nebensachen.de>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Tested-by: Elias Oltmanns <eo@nebensachen.de>
---
 kernel/time/tick-broadcast.c | 13 +++++++++++++
 kernel/time/tick-internal.h  |  2 ++
 kernel/time/tick-sched.c     | 31 +++++++++++++++++++++++--------
 3 files changed, 38 insertions(+), 8 deletions(-)

diff --git a/kernel/time/tick-broadcast.c b/kernel/time/tick-broadcast.c
index cb01cd8f919b..f98a1b7b16e9 100644
--- a/kernel/time/tick-broadcast.c
+++ b/kernel/time/tick-broadcast.c
@@ -383,6 +383,19 @@ int tick_resume_broadcast_oneshot(struct clock_event_device *bc)
 	return 0;
 }
 
+/*
+ * Called from irq_enter() when idle was interrupted to reenable the
+ * per cpu device.
+ */
+void tick_check_oneshot_broadcast(int cpu)
+{
+	if (cpu_isset(cpu, tick_broadcast_oneshot_mask)) {
+		struct tick_device *td = &per_cpu(tick_cpu_device, cpu);
+
+		clockevents_set_mode(td->evtdev, CLOCK_EVT_MODE_ONESHOT);
+	}
+}
+
 /*
  * Handle oneshot mode broadcasting
  */
diff --git a/kernel/time/tick-internal.h b/kernel/time/tick-internal.h
index 469248782c23..b1c05bf75ee0 100644
--- a/kernel/time/tick-internal.h
+++ b/kernel/time/tick-internal.h
@@ -36,6 +36,7 @@ extern void tick_broadcast_switch_to_oneshot(void);
 extern void tick_shutdown_broadcast_oneshot(unsigned int *cpup);
 extern int tick_resume_broadcast_oneshot(struct clock_event_device *bc);
 extern int tick_broadcast_oneshot_active(void);
+extern void tick_check_oneshot_broadcast(int cpu);
 # else /* BROADCAST */
 static inline void tick_broadcast_setup_oneshot(struct clock_event_device *bc)
 {
@@ -45,6 +46,7 @@ static inline void tick_broadcast_oneshot_control(unsigned long reason) { }
 static inline void tick_broadcast_switch_to_oneshot(void) { }
 static inline void tick_shutdown_broadcast_oneshot(unsigned int *cpup) { }
 static inline int tick_broadcast_oneshot_active(void) { return 0; }
+static inline void tick_check_oneshot_broadcast(int cpu) { }
 # endif /* !BROADCAST */
 
 #else /* !ONESHOT */
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index 7aedf4343539..0581c11fe6c6 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -508,10 +508,6 @@ static void tick_nohz_handler(struct clock_event_device *dev)
 	update_process_times(user_mode(regs));
 	profile_tick(CPU_PROFILING);
 
-	/* Do not restart, when we are in the idle loop */
-	if (ts->tick_stopped)
-		return;
-
 	while (tick_nohz_reprogram(ts, now)) {
 		now = ktime_get();
 		tick_do_update_jiffies64(now);
@@ -557,6 +553,27 @@ static void tick_nohz_switch_to_nohz(void)
 	       smp_processor_id());
 }
 
+/*
+ * When NOHZ is enabled and the tick is stopped, we need to kick the
+ * tick timer from irq_enter() so that the jiffies update is kept
+ * alive during long running softirqs. That's ugly as hell, but
+ * correctness is key even if we need to fix the offending softirq in
+ * the first place.
+ *
+ * Note, this is different to tick_nohz_restart. We just kick the
+ * timer and do not touch the other magic bits which need to be done
+ * when idle is left.
+ */
+static void tick_nohz_kick_tick(int cpu)
+{
+	struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu);
+
+	if (!ts->tick_stopped)
+		return;
+
+	tick_nohz_restart(ts, ktime_get());
+}
+
 #else
 
 static inline void tick_nohz_switch_to_nohz(void) { }
@@ -568,9 +585,11 @@ static inline void tick_nohz_switch_to_nohz(void) { }
  */
 void tick_check_idle(int cpu)
 {
+	tick_check_oneshot_broadcast(cpu);
 #ifdef CONFIG_NO_HZ
 	tick_nohz_stop_idle(cpu);
 	tick_nohz_update_jiffies();
+	tick_nohz_kick_tick(cpu);
 #endif
 }
 
@@ -627,10 +646,6 @@ static enum hrtimer_restart tick_sched_timer(struct hrtimer *timer)
 		profile_tick(CPU_PROFILING);
 	}
 
-	/* Do not restart, when we are in the idle loop */
-	if (ts->tick_stopped)
-		return HRTIMER_NORESTART;
-
 	hrtimer_forward(timer, now, tick_period);
 
 	return HRTIMER_RESTART;
-- 
cgit v1.2.3-55-g7522


From 81e192d6ce303b6792aa38ff35f41a1a7357f23a Mon Sep 17 00:00:00 2001
From: Helge Deller
Date: Fri, 17 Oct 2008 18:48:36 +0000
Subject: parisc: convert to generic compat_sys_ptrace

This patch does the compat_sys_ptrace conversion for parisc.
In addition it does convert the parisc ptrace code to use the
architecture-independent ptrace infrastructure instead of own coding.

Signed-off-by: Helge Deller <deller@gmx.de>
Signed-off-by: Kyle McMartin <kyle@mcmartin.ca>
---
 arch/parisc/include/asm/ptrace.h   |  10 +
 arch/parisc/kernel/ptrace.c        | 429 +++++++++++++++----------------------
 arch/parisc/kernel/syscall_table.S |   2 +-
 3 files changed, 183 insertions(+), 258 deletions(-)

diff --git a/arch/parisc/include/asm/ptrace.h b/arch/parisc/include/asm/ptrace.h
index 3e94c5d85ff5..afa5333187b4 100644
--- a/arch/parisc/include/asm/ptrace.h
+++ b/arch/parisc/include/asm/ptrace.h
@@ -47,6 +47,16 @@ struct pt_regs {
 
 #define task_regs(task) ((struct pt_regs *) ((char *)(task) + TASK_REGS))
 
+#define __ARCH_WANT_COMPAT_SYS_PTRACE
+
+struct task_struct;
+#define arch_has_single_step()	1
+void user_disable_single_step(struct task_struct *task);
+void user_enable_single_step(struct task_struct *task);
+
+#define arch_has_block_step()	1
+void user_enable_block_step(struct task_struct *task);
+
 /* XXX should we use iaoq[1] or iaoq[0] ? */
 #define user_mode(regs)			(((regs)->iaoq[0] & 3) ? 1 : 0)
 #define user_space(regs)		(((regs)->iasq[1] != 0) ? 1 : 0)
diff --git a/arch/parisc/kernel/ptrace.c b/arch/parisc/kernel/ptrace.c
index 49c637970789..90904f9dfc50 100644
--- a/arch/parisc/kernel/ptrace.c
+++ b/arch/parisc/kernel/ptrace.c
@@ -4,6 +4,7 @@
  * Copyright (C) 2000 Hewlett-Packard Co, Linuxcare Inc.
  * Copyright (C) 2000 Matthew Wilcox <matthew@wil.cx>
  * Copyright (C) 2000 David Huggins-Daines <dhd@debian.org>
+ * Copyright (C) 2008 Helge Deller <deller@gmx.de>
  */
 
 #include <linux/kernel.h>
@@ -27,15 +28,149 @@
 /* PSW bits we allow the debugger to modify */
 #define USER_PSW_BITS	(PSW_N | PSW_V | PSW_CB)
 
-#undef DEBUG_PTRACE
+/*
+ * Called by kernel/ptrace.c when detaching..
+ *
+ * Make sure single step bits etc are not set.
+ */
+void ptrace_disable(struct task_struct *task)
+{
+	task->ptrace &= ~(PT_SINGLESTEP|PT_BLOCKSTEP);
 
-#ifdef DEBUG_PTRACE
-#define DBG(x...)	printk(x)
-#else
-#define DBG(x...)
-#endif
+	/* make sure the trap bits are not set */
+	pa_psw(task)->r = 0;
+	pa_psw(task)->t = 0;
+	pa_psw(task)->h = 0;
+	pa_psw(task)->l = 0;
+}
+
+/*
+ * The following functions are called by ptrace_resume() when
+ * enabling or disabling single/block tracing.
+ */
+void user_disable_single_step(struct task_struct *task)
+{
+	ptrace_disable(task);
+}
+
+void user_enable_single_step(struct task_struct *task)
+{
+	task->ptrace &= ~PT_BLOCKSTEP;
+	task->ptrace |= PT_SINGLESTEP;
+
+	if (pa_psw(task)->n) {
+		struct siginfo si;
+
+		/* Nullified, just crank over the queue. */
+		task_regs(task)->iaoq[0] = task_regs(task)->iaoq[1];
+		task_regs(task)->iasq[0] = task_regs(task)->iasq[1];
+		task_regs(task)->iaoq[1] = task_regs(task)->iaoq[0] + 4;
+		pa_psw(task)->n = 0;
+		pa_psw(task)->x = 0;
+		pa_psw(task)->y = 0;
+		pa_psw(task)->z = 0;
+		pa_psw(task)->b = 0;
+		ptrace_disable(task);
+		/* Don't wake up the task, but let the
+		   parent know something happened. */
+		si.si_code = TRAP_TRACE;
+		si.si_addr = (void __user *) (task_regs(task)->iaoq[0] & ~3);
+		si.si_signo = SIGTRAP;
+		si.si_errno = 0;
+		force_sig_info(SIGTRAP, &si, task);
+		/* notify_parent(task, SIGCHLD); */
+		return;
+	}
+
+	/* Enable recovery counter traps.  The recovery counter
+	 * itself will be set to zero on a task switch.  If the
+	 * task is suspended on a syscall then the syscall return
+	 * path will overwrite the recovery counter with a suitable
+	 * value such that it traps once back in user space.  We
+	 * disable interrupts in the tasks PSW here also, to avoid
+	 * interrupts while the recovery counter is decrementing.
+	 */
+	pa_psw(task)->r = 1;
+	pa_psw(task)->t = 0;
+	pa_psw(task)->h = 0;
+	pa_psw(task)->l = 0;
+}
+
+void user_enable_block_step(struct task_struct *task)
+{
+	task->ptrace &= ~PT_SINGLESTEP;
+	task->ptrace |= PT_BLOCKSTEP;
+
+	/* Enable taken branch trap. */
+	pa_psw(task)->r = 0;
+	pa_psw(task)->t = 1;
+	pa_psw(task)->h = 0;
+	pa_psw(task)->l = 0;
+}
+
+long arch_ptrace(struct task_struct *child, long request, long addr, long data)
+{
+	unsigned long tmp;
+	long ret = -EIO;
 
-#ifdef CONFIG_64BIT
+	switch (request) {
+
+	/* Read the word at location addr in the USER area.  For ptraced
+	   processes, the kernel saves all regs on a syscall. */
+	case PTRACE_PEEKUSR:
+		if ((addr & (sizeof(long)-1)) ||
+		    (unsigned long) addr >= sizeof(struct pt_regs))
+			break;
+		tmp = *(unsigned long *) ((char *) task_regs(child) + addr);
+		ret = put_user(tmp, (unsigned long *) data);
+		break;
+
+	/* Write the word at location addr in the USER area.  This will need
+	   to change when the kernel no longer saves all regs on a syscall.
+	   FIXME.  There is a problem at the moment in that r3-r18 are only
+	   saved if the process is ptraced on syscall entry, and even then
+	   those values are overwritten by actual register values on syscall
+	   exit. */
+	case PTRACE_POKEUSR:
+		/* Some register values written here may be ignored in
+		 * entry.S:syscall_restore_rfi; e.g. iaoq is written with
+		 * r31/r31+4, and not with the values in pt_regs.
+		 */
+		if (addr == PT_PSW) {
+			/* Allow writing to Nullify, Divide-step-correction,
+			 * and carry/borrow bits.
+			 * BEWARE, if you set N, and then single step, it won't
+			 * stop on the nullified instruction.
+			 */
+			data &= USER_PSW_BITS;
+			task_regs(child)->gr[0] &= ~USER_PSW_BITS;
+			task_regs(child)->gr[0] |= data;
+			ret = 0;
+			break;
+		}
+
+		if ((addr & (sizeof(long)-1)) ||
+		    (unsigned long) addr >= sizeof(struct pt_regs))
+			break;
+		if ((addr >= PT_GR1 && addr <= PT_GR31) ||
+				addr == PT_IAOQ0 || addr == PT_IAOQ1 ||
+				(addr >= PT_FR0 && addr <= PT_FR31 + 4) ||
+				addr == PT_SAR) {
+			*(unsigned long *) ((char *) task_regs(child) + addr) = data;
+			ret = 0;
+		}
+		break;
+
+	default:
+		ret = ptrace_request(child, request, addr, data);
+		break;
+	}
+
+	return ret;
+}
+
+
+#ifdef CONFIG_COMPAT
 
 /* This function is needed to translate 32 bit pt_regs offsets in to
  * 64 bit pt_regs offsets.  For example, a 32 bit gdb under a 64 bit kernel
@@ -61,106 +196,25 @@ static long translate_usr_offset(long offset)
 	else
 		return -1;
 }
-#endif
 
-/*
- * Called by kernel/ptrace.c when detaching..
- *
- * Make sure single step bits etc are not set.
- */
-void ptrace_disable(struct task_struct *child)
+long compat_arch_ptrace(struct task_struct *child, compat_long_t request,
+			compat_ulong_t addr, compat_ulong_t data)
 {
-	/* make sure the trap bits are not set */
-	pa_psw(child)->r = 0;
-	pa_psw(child)->t = 0;
-	pa_psw(child)->h = 0;
-	pa_psw(child)->l = 0;
-}
-
-long arch_ptrace(struct task_struct *child, long request, long addr, long data)
-{
-	long ret;
-#ifdef DEBUG_PTRACE
-	long oaddr=addr, odata=data;
-#endif
+	compat_uint_t tmp;
+	long ret = -EIO;
 
 	switch (request) {
-	case PTRACE_PEEKTEXT: /* read word at location addr. */ 
-	case PTRACE_PEEKDATA: {
-#ifdef CONFIG_64BIT
-		if (__is_compat_task(child)) {
-			int copied;
-			unsigned int tmp;
-
-			addr &= 0xffffffffL;
-			copied = access_process_vm(child, addr, &tmp, sizeof(tmp), 0);
-			ret = -EIO;
-			if (copied != sizeof(tmp))
-				goto out_tsk;
-			ret = put_user(tmp,(unsigned int *) data);
-			DBG("sys_ptrace(PEEK%s, %d, %lx, %lx) returning %ld, data %x\n",
-				request == PTRACE_PEEKTEXT ? "TEXT" : "DATA",
-				pid, oaddr, odata, ret, tmp);
-		}
-		else
-#endif
-			ret = generic_ptrace_peekdata(child, addr, data);
-		goto out_tsk;
-	}
 
-	/* when I and D space are separate, this will have to be fixed. */
-	case PTRACE_POKETEXT: /* write the word at location addr. */
-	case PTRACE_POKEDATA:
-		ret = 0;
-#ifdef CONFIG_64BIT
-		if (__is_compat_task(child)) {
-			unsigned int tmp = (unsigned int)data;
-			DBG("sys_ptrace(POKE%s, %d, %lx, %lx)\n",
-				request == PTRACE_POKETEXT ? "TEXT" : "DATA",
-				pid, oaddr, odata);
-			addr &= 0xffffffffL;
-			if (access_process_vm(child, addr, &tmp, sizeof(tmp), 1) == sizeof(tmp))
-				goto out_tsk;
-		}
-		else
-#endif
-		{
-			if (access_process_vm(child, addr, &data, sizeof(data), 1) == sizeof(data))
-				goto out_tsk;
-		}
-		ret = -EIO;
-		goto out_tsk;
-
-	/* Read the word at location addr in the USER area.  For ptraced
-	   processes, the kernel saves all regs on a syscall. */
-	case PTRACE_PEEKUSR: {
-		ret = -EIO;
-#ifdef CONFIG_64BIT
-		if (__is_compat_task(child)) {
-			unsigned int tmp;
-
-			if (addr & (sizeof(int)-1))
-				goto out_tsk;
-			if ((addr = translate_usr_offset(addr)) < 0)
-				goto out_tsk;
-
-			tmp = *(unsigned int *) ((char *) task_regs(child) + addr);
-			ret = put_user(tmp, (unsigned int *) data);
-			DBG("sys_ptrace(PEEKUSR, %d, %lx, %lx) returning %ld, addr %lx, data %x\n",
-				pid, oaddr, odata, ret, addr, tmp);
-		}
-		else
-#endif
-		{
-			unsigned long tmp;
+	case PTRACE_PEEKUSR:
+		if (addr & (sizeof(compat_uint_t)-1))
+			break;
+		addr = translate_usr_offset(addr);
+		if (addr < 0)
+			break;
 
-			if ((addr & (sizeof(long)-1)) || (unsigned long) addr >= sizeof(struct pt_regs))
-				goto out_tsk;
-			tmp = *(unsigned long *) ((char *) task_regs(child) + addr);
-			ret = put_user(tmp, (unsigned long *) data);
-		}
-		goto out_tsk;
-	}
+		tmp = *(compat_uint_t *) ((char *) task_regs(child) + addr);
+		ret = put_user(tmp, (compat_uint_t *) (unsigned long) data);
+		break;
 
 	/* Write the word at location addr in the USER area.  This will need
 	   to change when the kernel no longer saves all regs on a syscall.
@@ -169,185 +223,46 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data)
 	   those values are overwritten by actual register values on syscall
 	   exit. */
 	case PTRACE_POKEUSR:
-		ret = -EIO;
 		/* Some register values written here may be ignored in
 		 * entry.S:syscall_restore_rfi; e.g. iaoq is written with
 		 * r31/r31+4, and not with the values in pt_regs.
 		 */
-		 /* PT_PSW=0, so this is valid for 32 bit processes under 64
-		 * bit kernels.
-		 */
 		if (addr == PT_PSW) {
-			/* PT_PSW=0, so this is valid for 32 bit processes
-			 * under 64 bit kernels.
-			 *
-			 * Allow writing to Nullify, Divide-step-correction,
-			 * and carry/borrow bits.
-			 * BEWARE, if you set N, and then single step, it won't
-			 * stop on the nullified instruction.
+			/* Since PT_PSW==0, it is valid for 32 bit processes
+			 * under 64 bit kernels as well.
 			 */
-			DBG("sys_ptrace(POKEUSR, %d, %lx, %lx)\n",
-				pid, oaddr, odata);
-			data &= USER_PSW_BITS;
-			task_regs(child)->gr[0] &= ~USER_PSW_BITS;
-			task_regs(child)->gr[0] |= data;
-			ret = 0;
-			goto out_tsk;
-		}
-#ifdef CONFIG_64BIT
-		if (__is_compat_task(child)) {
-			if (addr & (sizeof(int)-1))
-				goto out_tsk;
-			if ((addr = translate_usr_offset(addr)) < 0)
-				goto out_tsk;
-			DBG("sys_ptrace(POKEUSR, %d, %lx, %lx) addr %lx\n",
-				pid, oaddr, odata, addr);
+			ret = arch_ptrace(child, request, addr, data);
+		} else {
+			if (addr & (sizeof(compat_uint_t)-1))
+				break;
+			addr = translate_usr_offset(addr);
+			if (addr < 0)
+				break;
 			if (addr >= PT_FR0 && addr <= PT_FR31 + 4) {
 				/* Special case, fp regs are 64 bits anyway */
-				*(unsigned int *) ((char *) task_regs(child) + addr) = data;
+				*(__u64 *) ((char *) task_regs(child) + addr) = data;
 				ret = 0;
 			}
 			else if ((addr >= PT_GR1+4 && addr <= PT_GR31+4) ||
 					addr == PT_IAOQ0+4 || addr == PT_IAOQ1+4 ||
 					addr == PT_SAR+4) {
 				/* Zero the top 32 bits */
-				*(unsigned int *) ((char *) task_regs(child) + addr - 4) = 0;
-				*(unsigned int *) ((char *) task_regs(child) + addr) = data;
+				*(__u32 *) ((char *) task_regs(child) + addr - 4) = 0;
+				*(__u32 *) ((char *) task_regs(child) + addr) = data;
 				ret = 0;
 			}
-			goto out_tsk;
 		}
-		else
-#endif
-		{
-			if ((addr & (sizeof(long)-1)) || (unsigned long) addr >= sizeof(struct pt_regs))
-				goto out_tsk;
-			if ((addr >= PT_GR1 && addr <= PT_GR31) ||
-					addr == PT_IAOQ0 || addr == PT_IAOQ1 ||
-					(addr >= PT_FR0 && addr <= PT_FR31 + 4) ||
-					addr == PT_SAR) {
-				*(unsigned long *) ((char *) task_regs(child) + addr) = data;
-				ret = 0;
-			}
-			goto out_tsk;
-		}
-
-	case PTRACE_SYSCALL: /* continue and stop at next (return from) syscall */
-	case PTRACE_CONT:
-		ret = -EIO;
-		DBG("sys_ptrace(%s)\n",
-			request == PTRACE_SYSCALL ? "SYSCALL" : "CONT");
-		if (!valid_signal(data))
-			goto out_tsk;
-		child->ptrace &= ~(PT_SINGLESTEP|PT_BLOCKSTEP);
-		if (request == PTRACE_SYSCALL) {
-			set_tsk_thread_flag(child, TIF_SYSCALL_TRACE);
-		} else {
-			clear_tsk_thread_flag(child, TIF_SYSCALL_TRACE);
-		}		
-		child->exit_code = data;
-		goto out_wake_notrap;
-
-	case PTRACE_KILL:
-		/*
-		 * make the child exit.  Best I can do is send it a
-		 * sigkill.  perhaps it should be put in the status
-		 * that it wants to exit.
-		 */
-		ret = 0;
-		DBG("sys_ptrace(KILL)\n");
-		if (child->exit_state == EXIT_ZOMBIE)	/* already dead */
-			goto out_tsk;
-		child->exit_code = SIGKILL;
-		goto out_wake_notrap;
-
-	case PTRACE_SINGLEBLOCK:
-		DBG("sys_ptrace(SINGLEBLOCK)\n");
-		ret = -EIO;
-		if (!valid_signal(data))
-			goto out_tsk;
-		clear_tsk_thread_flag(child, TIF_SYSCALL_TRACE);
-		child->ptrace &= ~PT_SINGLESTEP;
-		child->ptrace |= PT_BLOCKSTEP;
-		child->exit_code = data;
-
-		/* Enable taken branch trap. */
-		pa_psw(child)->r = 0;
-		pa_psw(child)->t = 1;
-		pa_psw(child)->h = 0;
-		pa_psw(child)->l = 0;
-		goto out_wake;
-
-	case PTRACE_SINGLESTEP:
-		DBG("sys_ptrace(SINGLESTEP)\n");
-		ret = -EIO;
-		if (!valid_signal(data))
-			goto out_tsk;
-
-		clear_tsk_thread_flag(child, TIF_SYSCALL_TRACE);
-		child->ptrace &= ~PT_BLOCKSTEP;
-		child->ptrace |= PT_SINGLESTEP;
-		child->exit_code = data;
-
-		if (pa_psw(child)->n) {
-			struct siginfo si;
-
-			/* Nullified, just crank over the queue. */
-			task_regs(child)->iaoq[0] = task_regs(child)->iaoq[1];
-			task_regs(child)->iasq[0] = task_regs(child)->iasq[1];
-			task_regs(child)->iaoq[1] = task_regs(child)->iaoq[0] + 4;
-			pa_psw(child)->n = 0;
-			pa_psw(child)->x = 0;
-			pa_psw(child)->y = 0;
-			pa_psw(child)->z = 0;
-			pa_psw(child)->b = 0;
-			ptrace_disable(child);
-			/* Don't wake up the child, but let the
-			   parent know something happened. */
-			si.si_code = TRAP_TRACE;
-			si.si_addr = (void __user *) (task_regs(child)->iaoq[0] & ~3);
-			si.si_signo = SIGTRAP;
-			si.si_errno = 0;
-			force_sig_info(SIGTRAP, &si, child);
-			//notify_parent(child, SIGCHLD);
-			//ret = 0;
-			goto out_wake;
-		}
-
-		/* Enable recovery counter traps.  The recovery counter
-		 * itself will be set to zero on a task switch.  If the
-		 * task is suspended on a syscall then the syscall return
-		 * path will overwrite the recovery counter with a suitable
-		 * value such that it traps once back in user space.  We
-		 * disable interrupts in the childs PSW here also, to avoid
-		 * interrupts while the recovery counter is decrementing.
-		 */
-		pa_psw(child)->r = 1;
-		pa_psw(child)->t = 0;
-		pa_psw(child)->h = 0;
-		pa_psw(child)->l = 0;
-		/* give it a chance to run. */
-		goto out_wake;
-
-	case PTRACE_GETEVENTMSG:
-                ret = put_user(child->ptrace_message, (unsigned int __user *) data);
-		goto out_tsk;
+		break;
 
 	default:
-		ret = ptrace_request(child, request, addr, data);
-		goto out_tsk;
+		ret = compat_ptrace_request(child, request, addr, data);
+		break;
 	}
 
-out_wake_notrap:
-	ptrace_disable(child);
-out_wake:
-	wake_up_process(child);
-	ret = 0;
-out_tsk:
-	DBG("arch_ptrace(%ld, %d, %lx, %lx) returning %ld\n",
-		request, pid, oaddr, odata, ret);
 	return ret;
 }
+#endif
+
 
 void syscall_trace(void)
 {
diff --git a/arch/parisc/kernel/syscall_table.S b/arch/parisc/kernel/syscall_table.S
index 6084667eacf9..4ed01f2d655d 100644
--- a/arch/parisc/kernel/syscall_table.S
+++ b/arch/parisc/kernel/syscall_table.S
@@ -87,7 +87,7 @@
 	ENTRY_SAME(setuid)
 	ENTRY_SAME(getuid)
 	ENTRY_COMP(stime)		/* 25 */
-	ENTRY_SAME(ptrace)
+	ENTRY_COMP(ptrace)
 	ENTRY_SAME(alarm)
 	/* see stat comment */
 	ENTRY_COMP(newfstat)
-- 
cgit v1.2.3-55-g7522


From dd3a1db900f2a215a7d7dd71b836e149a6cf5fed Mon Sep 17 00:00:00 2001
From: Thomas Gleixner
Date: Thu, 16 Oct 2008 18:20:58 +0200
Subject: genirq: improve include files

Move the irq_desc related iterators out of irq.h, into irqnr.h, also
available via interrupt.h.

This way non-genirq (and even non-hardirq) architectures get the
common definitions and iterators.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/interrupt.h |  1 +
 include/linux/irq.h       | 20 +-------------------
 include/linux/irqnr.h     | 24 ++++++++++++++++++++++++
 3 files changed, 26 insertions(+), 19 deletions(-)
 create mode 100644 include/linux/irqnr.h

diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h
index 58ff4e74b2f3..72fcfcff5637 100644
--- a/include/linux/interrupt.h
+++ b/include/linux/interrupt.h
@@ -8,6 +8,7 @@
 #include <linux/preempt.h>
 #include <linux/cpumask.h>
 #include <linux/irqreturn.h>
+#include <linux/irqnr.h>
 #include <linux/hardirq.h>
 #include <linux/sched.h>
 #include <linux/irqflags.h>
diff --git a/include/linux/irq.h b/include/linux/irq.h
index 0618fb362cb4..d058c57be02d 100644
--- a/include/linux/irq.h
+++ b/include/linux/irq.h
@@ -11,25 +11,6 @@
 
 #include <linux/smp.h>
 
-#ifndef CONFIG_GENERIC_HARDIRQS
-# define nr_irqs		NR_IRQS
-
-# define for_each_irq_desc(irq, desc)		\
-	for (irq = 0; irq < nr_irqs; irq++)
-#else
-extern int nr_irqs;
-
-# define for_each_irq_desc(irq, desc)		\
-	for (irq = 0, desc = irq_desc; irq < nr_irqs; irq++, desc++)
-
-# define for_each_irq_desc_reverse(irq, desc)			\
-	for (irq = nr_irqs -1, desc = irq_desc + (nr_irqs -1 );	\
-	     irq > 0; irq--, desc--)
-#endif
-
-#define for_each_irq_nr(irq)			\
-	for (irq = 0; irq < nr_irqs; irq++)
-
 #ifndef CONFIG_S390
 
 #include <linux/linkage.h>
@@ -37,6 +18,7 @@ extern int nr_irqs;
 #include <linux/spinlock.h>
 #include <linux/cpumask.h>
 #include <linux/irqreturn.h>
+#include <linux/irqnr.h>
 #include <linux/errno.h>
 
 #include <asm/irq.h>
diff --git a/include/linux/irqnr.h b/include/linux/irqnr.h
new file mode 100644
index 000000000000..3171ddc3b39d
--- /dev/null
+++ b/include/linux/irqnr.h
@@ -0,0 +1,24 @@
+#ifndef _LINUX_IRQNR_H
+#define _LINUX_IRQNR_H
+
+#ifndef CONFIG_GENERIC_HARDIRQS
+#include <asm/irq.h>
+# define nr_irqs		NR_IRQS
+
+# define for_each_irq_desc(irq, desc)		\
+	for (irq = 0; irq < nr_irqs; irq++)
+#else
+extern int nr_irqs;
+
+# define for_each_irq_desc(irq, desc)		\
+	for (irq = 0, desc = irq_desc; irq < nr_irqs; irq++, desc++)
+
+# define for_each_irq_desc_reverse(irq, desc)			\
+	for (irq = nr_irqs -1, desc = irq_desc + (nr_irqs -1 );	\
+	     irq > 0; irq--, desc--)
+#endif
+
+#define for_each_irq_nr(irq)			\
+	for (irq = 0; irq < nr_irqs; irq++)
+
+#endif
-- 
cgit v1.2.3-55-g7522


From e67ef25a35b949561a9bd77693523ec94ab4a278 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner
Date: Thu, 25 Sep 2008 23:50:23 +0200
Subject: timer_list: print real timer address

The current timer_list output prints the address of the on stack copy
of the active hrtimer instead of the hrtimer itself.

Print the address of the real timer instead.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 kernel/time/timer_list.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/kernel/time/timer_list.c b/kernel/time/timer_list.c
index a40e20fd0001..ec9ea6cadd85 100644
--- a/kernel/time/timer_list.c
+++ b/kernel/time/timer_list.c
@@ -47,13 +47,14 @@ static void print_name_offset(struct seq_file *m, void *sym)
 }
 
 static void
-print_timer(struct seq_file *m, struct hrtimer *timer, int idx, u64 now)
+print_timer(struct seq_file *m, struct hrtimer *taddr, struct hrtimer *timer,
+	    int idx, u64 now)
 {
 #ifdef CONFIG_TIMER_STATS
 	char tmp[TASK_COMM_LEN + 1];
 #endif
 	SEQ_printf(m, " #%d: ", idx);
-	print_name_offset(m, timer);
+	print_name_offset(m, taddr);
 	SEQ_printf(m, ", ");
 	print_name_offset(m, timer->function);
 	SEQ_printf(m, ", S:%02lx", timer->state);
@@ -99,7 +100,7 @@ next_one:
 		tmp = *timer;
 		spin_unlock_irqrestore(&base->cpu_base->lock, flags);
 
-		print_timer(m, &tmp, i, now);
+		print_timer(m, timer, &tmp, i, now);
 		next++;
 		goto next_one;
 	}
-- 
cgit v1.2.3-55-g7522


From c5b77a3d3a716a5c61a1999d7f2a78e9c39fd1b0 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner
Date: Mon, 29 Sep 2008 17:31:41 +0200
Subject: timer_list: print cpu number of clockevents device

The per cpu clock events device output of timer_list lacks an
association of the device to the cpu which is annoying when looking at
the output of /proc/timer_list from a 128 way system. 

Add the CPU number info and mark the broadcast device in the device
list printout.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 kernel/time/timer_list.c | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/kernel/time/timer_list.c b/kernel/time/timer_list.c
index ec9ea6cadd85..5479c6e7a023 100644
--- a/kernel/time/timer_list.c
+++ b/kernel/time/timer_list.c
@@ -184,12 +184,16 @@ static void print_cpu(struct seq_file *m, int cpu, u64 now)
 
 #ifdef CONFIG_GENERIC_CLOCKEVENTS
 static void
-print_tickdevice(struct seq_file *m, struct tick_device *td)
+print_tickdevice(struct seq_file *m, struct tick_device *td, int cpu)
 {
 	struct clock_event_device *dev = td->evtdev;
 
 	SEQ_printf(m, "\n");
 	SEQ_printf(m, "Tick Device: mode:     %d\n", td->mode);
+	if (cpu < 0)
+		SEQ_printf(m, "Broadcast device\n");
+	else
+		SEQ_printf(m, "Per CPU device: %d\n", cpu);
 
 	SEQ_printf(m, "Clock Event Device: ");
 	if (!dev) {
@@ -223,7 +227,7 @@ static void timer_list_show_tickdevices(struct seq_file *m)
 	int cpu;
 
 #ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST
-	print_tickdevice(m, tick_get_broadcast_device());
+	print_tickdevice(m, tick_get_broadcast_device(), -1);
 	SEQ_printf(m, "tick_broadcast_mask: %08lx\n",
 		   tick_get_broadcast_mask()->bits[0]);
 #ifdef CONFIG_TICK_ONESHOT
@@ -233,7 +237,7 @@ static void timer_list_show_tickdevices(struct seq_file *m)
 	SEQ_printf(m, "\n");
 #endif
 	for_each_online_cpu(cpu)
-		   print_tickdevice(m, tick_get_device(cpu));
+		print_tickdevice(m, tick_get_device(cpu), cpu);
 	SEQ_printf(m, "\n");
 }
 #else
-- 
cgit v1.2.3-55-g7522


From 870e2a284567714335d125c390366dce882d726f Mon Sep 17 00:00:00 2001
From: Thomas Gleixner
Date: Mon, 29 Sep 2008 17:41:55 +0200
Subject: timer_list: add base address to clock base

The base address of a (per cpu) clock base is a useful debug info.
Add it and bump the version number of timer_lists.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 kernel/time/timer_list.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/kernel/time/timer_list.c b/kernel/time/timer_list.c
index 5479c6e7a023..f6426911e35a 100644
--- a/kernel/time/timer_list.c
+++ b/kernel/time/timer_list.c
@@ -110,6 +110,7 @@ next_one:
 static void
 print_base(struct seq_file *m, struct hrtimer_clock_base *base, u64 now)
 {
+	SEQ_printf(m, "  .base:       %p\n", base);
 	SEQ_printf(m, "  .index:      %d\n",
 			base->index);
 	SEQ_printf(m, "  .resolution: %Lu nsecs\n",
@@ -249,7 +250,7 @@ static int timer_list_show(struct seq_file *m, void *v)
 	u64 now = ktime_to_ns(ktime_get());
 	int cpu;
 
-	SEQ_printf(m, "Timer List Version: v0.3\n");
+	SEQ_printf(m, "Timer List Version: v0.4\n");
 	SEQ_printf(m, "HRTIMER_MAX_CLOCK_BASES: %d\n", HRTIMER_MAX_CLOCK_BASES);
 	SEQ_printf(m, "now at %Ld nsecs\n", (unsigned long long)now);
 
-- 
cgit v1.2.3-55-g7522


From 24bdeb4598b9560c8ffecb8ba5cefa01f3a12a54 Mon Sep 17 00:00:00 2001
From: Andi Kleen
Date: Sat, 18 Oct 2008 20:27:27 -0700
Subject: Fix documentation of sysrq-q

I fell into the trap recently that it only dumps hrtimers instead of
all timers. Fix the documentation.

Signed-off-by: Andi Kleen <ak@linux.intel.com>
Cc: torvalds@linux-foundation.org
Cc: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 Documentation/sysrq.txt | 3 ++-
 drivers/char/sysrq.c    | 2 +-
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/Documentation/sysrq.txt b/Documentation/sysrq.txt
index 5ce0952aa065..49378a9f2b5f 100644
--- a/Documentation/sysrq.txt
+++ b/Documentation/sysrq.txt
@@ -95,7 +95,8 @@ On all -  write a character to /proc/sysrq-trigger.  e.g.:
 
 'p'     - Will dump the current registers and flags to your console.
 
-'q'     - Will dump a list of all running timers.
+'q'     - Will dump a list of all running hrtimers.
+	  WARNING: Does not cover any other timers
 
 'r'     - Turns off keyboard raw mode and sets it to XLATE.
 
diff --git a/drivers/char/sysrq.c b/drivers/char/sysrq.c
index dce4cc0e6953..d0c0d64ed366 100644
--- a/drivers/char/sysrq.c
+++ b/drivers/char/sysrq.c
@@ -168,7 +168,7 @@ static void sysrq_handle_show_timers(int key, struct tty_struct *tty)
 static struct sysrq_key_op sysrq_show_timers_op = {
 	.handler	= sysrq_handle_show_timers,
 	.help_msg	= "show-all-timers(Q)",
-	.action_msg	= "Show Pending Timers",
+	.action_msg	= "Show pending hrtimers (no others)",
 };
 
 static void sysrq_handle_mountro(int key, struct tty_struct *tty)
-- 
cgit v1.2.3-55-g7522


From 322acf6585f3c4e82ee32a246b0483ca0f6ad3f4 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner
Date: Mon, 20 Oct 2008 12:33:14 +0200
Subject: fix documentation of sysrq-q really

SysRq-Q also dumps information about the clockevent devices.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 Documentation/sysrq.txt | 4 ++--
 drivers/char/sysrq.c    | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/Documentation/sysrq.txt b/Documentation/sysrq.txt
index 49378a9f2b5f..7b3b069c376e 100644
--- a/Documentation/sysrq.txt
+++ b/Documentation/sysrq.txt
@@ -95,8 +95,8 @@ On all -  write a character to /proc/sysrq-trigger.  e.g.:
 
 'p'     - Will dump the current registers and flags to your console.
 
-'q'     - Will dump a list of all running hrtimers.
-	  WARNING: Does not cover any other timers
+'q'     - Will dump per CPU lists of all armed hrtimers (not timer_list timers)
+	  and detailed information about all clockevent devices.
 
 'r'     - Turns off keyboard raw mode and sets it to XLATE.
 
diff --git a/drivers/char/sysrq.c b/drivers/char/sysrq.c
index d0c0d64ed366..ce0d9da52a8a 100644
--- a/drivers/char/sysrq.c
+++ b/drivers/char/sysrq.c
@@ -168,7 +168,7 @@ static void sysrq_handle_show_timers(int key, struct tty_struct *tty)
 static struct sysrq_key_op sysrq_show_timers_op = {
 	.handler	= sysrq_handle_show_timers,
 	.help_msg	= "show-all-timers(Q)",
-	.action_msg	= "Show pending hrtimers (no others)",
+	.action_msg	= "Show clockevent devices & pending hrtimers (no others)",
 };
 
 static void sysrq_handle_mountro(int key, struct tty_struct *tty)
-- 
cgit v1.2.3-55-g7522


From edbc25caaa492a82e19baa915f1f6b0a0db6554d Mon Sep 17 00:00:00 2001
From: Milton Miller
Date: Thu, 10 Jul 2008 16:29:37 -0500
Subject: PCI: remove dynids.use_driver_data

The driver flag dynids.use_driver_data is almost consistently not set,
and causes more problems than it solves.  It was initially intended as a
flag to indicate whether a driver's usage of driver_data had been
carefully inspected and was ready for values from userspace.  That audit
was never done, so most drivers just get a 0 for driver_data when new
IDs are added from userspace via sysfs.  So remove the flag, allowing
drivers to see the data directly (a followon patch validates the passed
driver_data value against what the drivers expect).

Acked-by: Greg Kroah-Hartman <gregkh@suse.de>
Acked-by: Jean Delvare <khali@linux-fr.org>
Signed-off-by: Milton Miller <miltonm@bga.com>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
 drivers/i2c/busses/i2c-amd756.c | 1 -
 drivers/i2c/busses/i2c-viapro.c | 1 -
 drivers/pci/pci-driver.c        | 3 +--
 drivers/scsi/ipr.c              | 1 -
 include/linux/pci.h             | 1 -
 5 files changed, 1 insertion(+), 6 deletions(-)

diff --git a/drivers/i2c/busses/i2c-amd756.c b/drivers/i2c/busses/i2c-amd756.c
index 1ea39254dac6..a3542b053c8e 100644
--- a/drivers/i2c/busses/i2c-amd756.c
+++ b/drivers/i2c/busses/i2c-amd756.c
@@ -412,7 +412,6 @@ static struct pci_driver amd756_driver = {
 	.id_table	= amd756_ids,
 	.probe		= amd756_probe,
 	.remove		= __devexit_p(amd756_remove),
-	.dynids.use_driver_data = 1,
 };
 
 static int __init amd756_init(void)
diff --git a/drivers/i2c/busses/i2c-viapro.c b/drivers/i2c/busses/i2c-viapro.c
index 73dc52e114eb..2324780484c0 100644
--- a/drivers/i2c/busses/i2c-viapro.c
+++ b/drivers/i2c/busses/i2c-viapro.c
@@ -483,7 +483,6 @@ static struct pci_driver vt596_driver = {
 	.name		= "vt596_smbus",
 	.id_table	= vt596_ids,
 	.probe		= vt596_probe,
-	.dynids.use_driver_data = 1,
 };
 
 static int __init i2c_vt596_init(void)
diff --git a/drivers/pci/pci-driver.c b/drivers/pci/pci-driver.c
index a13f53486114..4940a53c56a3 100644
--- a/drivers/pci/pci-driver.c
+++ b/drivers/pci/pci-driver.c
@@ -65,8 +65,7 @@ store_new_id(struct device_driver *driver, const char *buf, size_t count)
 	dynid->id.subdevice = subdevice;
 	dynid->id.class = class;
 	dynid->id.class_mask = class_mask;
-	dynid->id.driver_data = pdrv->dynids.use_driver_data ?
-		driver_data : 0UL;
+	dynid->id.driver_data = driver_data;
 
 	spin_lock(&pdrv->dynids.lock);
 	list_add_tail(&dynid->node, &pdrv->dynids.list);
diff --git a/drivers/scsi/ipr.c b/drivers/scsi/ipr.c
index d30eb7ba018e..098739deb02e 100644
--- a/drivers/scsi/ipr.c
+++ b/drivers/scsi/ipr.c
@@ -7859,7 +7859,6 @@ static struct pci_driver ipr_driver = {
 	.remove = ipr_remove,
 	.shutdown = ipr_shutdown,
 	.err_handler = &ipr_err_handler,
-	.dynids.use_driver_data = 1
 };
 
 /**
diff --git a/include/linux/pci.h b/include/linux/pci.h
index acf8f24037cd..c989f58d09bf 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -347,7 +347,6 @@ struct pci_bus_region {
 struct pci_dynids {
 	spinlock_t lock;            /* protects list, index */
 	struct list_head list;      /* for IDs added at runtime */
-	unsigned int use_driver_data:1; /* pci_device_id->driver_data is used */
 };
 
 /* ---------------------------------------------------------------- */
-- 
cgit v1.2.3-55-g7522


From b41d6cf38e27a940d998d989526a9748de1bf028 Mon Sep 17 00:00:00 2001
From: Jean Delvare
Date: Sun, 17 Aug 2008 21:06:59 +0200
Subject: PCI: Check dynids driver_data value for validity

Only accept dynids whose driver_data value matches one of the driver's
pci_driver_id entries. This prevents the user from accidentally passing
values the drivers do not expect.

Cc: Milton Miller <miltonm@bga.com>
Acked-by: Greg Kroah-Hartman <gregkh@suse.de>
Signed-off-by: Jean Delvare <khali@linux-fr.org>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
 Documentation/PCI/pci.txt       |  4 ++++
 drivers/i2c/busses/i2c-amd756.c |  4 ----
 drivers/i2c/busses/i2c-viapro.c |  4 ----
 drivers/pci/pci-driver.c        | 18 ++++++++++++++++--
 4 files changed, 20 insertions(+), 10 deletions(-)

diff --git a/Documentation/PCI/pci.txt b/Documentation/PCI/pci.txt
index 8d4dc6250c58..fd4907a2968c 100644
--- a/Documentation/PCI/pci.txt
+++ b/Documentation/PCI/pci.txt
@@ -163,6 +163,10 @@ need pass only as many optional fields as necessary:
 	o class and classmask fields default to 0
 	o driver_data defaults to 0UL.
 
+Note that driver_data must match the value used by any of the pci_device_id
+entries defined in the driver. This makes the driver_data field mandatory
+if all the pci_device_id entries have a non-zero driver_data value.
+
 Once added, the driver probe routine will be invoked for any unclaimed
 PCI devices listed in its (newly updated) pci_ids list.
 
diff --git a/drivers/i2c/busses/i2c-amd756.c b/drivers/i2c/busses/i2c-amd756.c
index a3542b053c8e..424dad6f18d8 100644
--- a/drivers/i2c/busses/i2c-amd756.c
+++ b/drivers/i2c/busses/i2c-amd756.c
@@ -332,10 +332,6 @@ static int __devinit amd756_probe(struct pci_dev *pdev,
 	int error;
 	u8 temp;
 	
-	/* driver_data might come from user-space, so check it */
-	if (id->driver_data >= ARRAY_SIZE(chipname))
-		return -EINVAL;
-
 	if (amd756_ioport) {
 		dev_err(&pdev->dev, "Only one device supported "
 		       "(you have a strange motherboard, btw)\n");
diff --git a/drivers/i2c/busses/i2c-viapro.c b/drivers/i2c/busses/i2c-viapro.c
index 2324780484c0..9f194d9efd91 100644
--- a/drivers/i2c/busses/i2c-viapro.c
+++ b/drivers/i2c/busses/i2c-viapro.c
@@ -332,10 +332,6 @@ static int __devinit vt596_probe(struct pci_dev *pdev,
 	unsigned char temp;
 	int error = -ENODEV;
 
-	/* driver_data might come from user-space, so check it */
-	if (id->driver_data & 1 || id->driver_data > 0xff)
-		return -EINVAL;
-
 	/* Determine the address of the SMBus areas */
 	if (force_addr) {
 		vt596_smba = force_addr & 0xfff0;
diff --git a/drivers/pci/pci-driver.c b/drivers/pci/pci-driver.c
index 4940a53c56a3..b4cdd690ae71 100644
--- a/drivers/pci/pci-driver.c
+++ b/drivers/pci/pci-driver.c
@@ -43,18 +43,32 @@ store_new_id(struct device_driver *driver, const char *buf, size_t count)
 {
 	struct pci_dynid *dynid;
 	struct pci_driver *pdrv = to_pci_driver(driver);
+	const struct pci_device_id *ids = pdrv->id_table;
 	__u32 vendor, device, subvendor=PCI_ANY_ID,
 		subdevice=PCI_ANY_ID, class=0, class_mask=0;
 	unsigned long driver_data=0;
 	int fields=0;
-	int retval = 0;
+	int retval;
 
-	fields = sscanf(buf, "%x %x %x %x %x %x %lux",
+	fields = sscanf(buf, "%x %x %x %x %x %x %lx",
 			&vendor, &device, &subvendor, &subdevice,
 			&class, &class_mask, &driver_data);
 	if (fields < 2)
 		return -EINVAL;
 
+	/* Only accept driver_data values that match an existing id_table
+	   entry */
+	retval = -EINVAL;
+	while (ids->vendor || ids->subvendor || ids->class_mask) {
+		if (driver_data == ids->driver_data) {
+			retval = 0;
+			break;
+		}
+		ids++;
+	}
+	if (retval)	/* No match */
+		return retval;
+
 	dynid = kzalloc(sizeof(*dynid), GFP_KERNEL);
 	if (!dynid)
 		return -ENOMEM;
-- 
cgit v1.2.3-55-g7522


From 3d137310245e4cdc3e8c8ba1bea2e145a87ae8e3 Mon Sep 17 00:00:00 2001
From: Thomas Petazzoni
Date: Tue, 19 Aug 2008 10:28:24 +0200
Subject: PCI: allow quirks to be compiled out

This patch adds the CONFIG_PCI_QUIRKS option which allows to remove all
the PCI quirks, which are not necessarily used on embedded systems when
PCI is working properly. As this is a size-reduction option, it depends
on CONFIG_EMBEDDED. It allows to save almost 12 kilobytes of kernel
code:

   text	   data	    bss	    dec	    hex	filename
1287806	 123596	 212992	1624394	 18c94a	vmlinux.old
1275854	 123596	 212992	1612442	 189a9a	vmlinux
 -11952       0       0  -11952   -2EB0 +/-

This patch has originally been written by Zwane Mwaikambo
<zwane@arm.linux.org.uk> and is part of the Linux Tiny project.

Signed-off-by: Thomas Petazzoni <thomas.petazzoni@free-electrons.com>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
 drivers/pci/quirks.c | 176 ++++++++++++++++++++++++++-------------------------
 init/Kconfig         |   8 +++
 2 files changed, 98 insertions(+), 86 deletions(-)

diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c
index e872ac925b4b..246918fe9482 100644
--- a/drivers/pci/quirks.c
+++ b/drivers/pci/quirks.c
@@ -24,6 +24,14 @@
 #include <linux/kallsyms.h>
 #include "pci.h"
 
+int isa_dma_bridge_buggy;
+EXPORT_SYMBOL(isa_dma_bridge_buggy);
+int pci_pci_problems;
+EXPORT_SYMBOL(pci_pci_problems);
+int pcie_mch_quirk;
+EXPORT_SYMBOL(pcie_mch_quirk);
+
+#ifdef CONFIG_PCI_QUIRKS
 /* The Mellanox Tavor device gives false positive parity errors
  * Mark this device with a broken_parity_status, to allow
  * PCI scanning code to "skip" this now blacklisted device.
@@ -62,8 +70,6 @@ DECLARE_PCI_FIXUP_RESUME(PCI_VENDOR_ID_INTEL,	PCI_DEVICE_ID_INTEL_82441,	quirk_p
     
     This appears to be BIOS not version dependent. So presumably there is a 
     chipset level fix */
-int isa_dma_bridge_buggy;
-EXPORT_SYMBOL(isa_dma_bridge_buggy);
     
 static void __devinit quirk_isa_dma_hangs(struct pci_dev *dev)
 {
@@ -84,9 +90,6 @@ DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_NEC,	PCI_DEVICE_ID_NEC_CBUS_1,	quirk_isa_d
 DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_NEC,	PCI_DEVICE_ID_NEC_CBUS_2,	quirk_isa_dma_hangs);
 DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_NEC,	PCI_DEVICE_ID_NEC_CBUS_3,	quirk_isa_dma_hangs);
 
-int pci_pci_problems;
-EXPORT_SYMBOL(pci_pci_problems);
-
 /*
  *	Chipsets where PCI->PCI transfers vanish or hang
  */
@@ -1362,9 +1365,6 @@ static void __init quirk_alder_ioapic(struct pci_dev *pdev)
 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL,	PCI_DEVICE_ID_INTEL_EESSC,	quirk_alder_ioapic);
 #endif
 
-int pcie_mch_quirk;
-EXPORT_SYMBOL(pcie_mch_quirk);
-
 static void __devinit quirk_pcie_mch(struct pci_dev *pdev)
 {
 	pcie_mch_quirk = 1;
@@ -1555,84 +1555,6 @@ static void __devinit fixup_rev1_53c810(struct pci_dev* dev)
 }
 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_NCR, PCI_DEVICE_ID_NCR_53C810, fixup_rev1_53c810);
 
-static void pci_do_fixups(struct pci_dev *dev, struct pci_fixup *f, struct pci_fixup *end)
-{
-	while (f < end) {
-		if ((f->vendor == dev->vendor || f->vendor == (u16) PCI_ANY_ID) &&
- 		    (f->device == dev->device || f->device == (u16) PCI_ANY_ID)) {
-#ifdef DEBUG
-			dev_dbg(&dev->dev, "calling %pF\n", f->hook);
-#endif
-			f->hook(dev);
-		}
-		f++;
-	}
-}
-
-extern struct pci_fixup __start_pci_fixups_early[];
-extern struct pci_fixup __end_pci_fixups_early[];
-extern struct pci_fixup __start_pci_fixups_header[];
-extern struct pci_fixup __end_pci_fixups_header[];
-extern struct pci_fixup __start_pci_fixups_final[];
-extern struct pci_fixup __end_pci_fixups_final[];
-extern struct pci_fixup __start_pci_fixups_enable[];
-extern struct pci_fixup __end_pci_fixups_enable[];
-extern struct pci_fixup __start_pci_fixups_resume[];
-extern struct pci_fixup __end_pci_fixups_resume[];
-extern struct pci_fixup __start_pci_fixups_resume_early[];
-extern struct pci_fixup __end_pci_fixups_resume_early[];
-extern struct pci_fixup __start_pci_fixups_suspend[];
-extern struct pci_fixup __end_pci_fixups_suspend[];
-
-
-void pci_fixup_device(enum pci_fixup_pass pass, struct pci_dev *dev)
-{
-	struct pci_fixup *start, *end;
-
-	switch(pass) {
-	case pci_fixup_early:
-		start = __start_pci_fixups_early;
-		end = __end_pci_fixups_early;
-		break;
-
-	case pci_fixup_header:
-		start = __start_pci_fixups_header;
-		end = __end_pci_fixups_header;
-		break;
-
-	case pci_fixup_final:
-		start = __start_pci_fixups_final;
-		end = __end_pci_fixups_final;
-		break;
-
-	case pci_fixup_enable:
-		start = __start_pci_fixups_enable;
-		end = __end_pci_fixups_enable;
-		break;
-
-	case pci_fixup_resume:
-		start = __start_pci_fixups_resume;
-		end = __end_pci_fixups_resume;
-		break;
-
-	case pci_fixup_resume_early:
-		start = __start_pci_fixups_resume_early;
-		end = __end_pci_fixups_resume_early;
-		break;
-
-	case pci_fixup_suspend:
-		start = __start_pci_fixups_suspend;
-		end = __end_pci_fixups_suspend;
-		break;
-
-	default:
-		/* stupid compiler warning, you would think with an enum... */
-		return;
-	}
-	pci_do_fixups(dev, start, end);
-}
-EXPORT_SYMBOL(pci_fixup_device);
-
 /* Enable 1k I/O space granularity on the Intel P64H2 */
 static void __devinit quirk_p64h2_1k_io(struct pci_dev *dev)
 {
@@ -2006,3 +1928,85 @@ DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_ATI, 0x4375,
 			quirk_msi_intx_disable_bug);
 
 #endif /* CONFIG_PCI_MSI */
+
+static void pci_do_fixups(struct pci_dev *dev, struct pci_fixup *f, struct pci_fixup *end)
+{
+	while (f < end) {
+		if ((f->vendor == dev->vendor || f->vendor == (u16) PCI_ANY_ID) &&
+ 		    (f->device == dev->device || f->device == (u16) PCI_ANY_ID)) {
+#ifdef DEBUG
+			dev_dbg(&dev->dev, "calling ");
+			print_fn_descriptor_symbol("%s\n", f->hook);
+#endif
+			f->hook(dev);
+		}
+		f++;
+	}
+}
+
+extern struct pci_fixup __start_pci_fixups_early[];
+extern struct pci_fixup __end_pci_fixups_early[];
+extern struct pci_fixup __start_pci_fixups_header[];
+extern struct pci_fixup __end_pci_fixups_header[];
+extern struct pci_fixup __start_pci_fixups_final[];
+extern struct pci_fixup __end_pci_fixups_final[];
+extern struct pci_fixup __start_pci_fixups_enable[];
+extern struct pci_fixup __end_pci_fixups_enable[];
+extern struct pci_fixup __start_pci_fixups_resume[];
+extern struct pci_fixup __end_pci_fixups_resume[];
+extern struct pci_fixup __start_pci_fixups_resume_early[];
+extern struct pci_fixup __end_pci_fixups_resume_early[];
+extern struct pci_fixup __start_pci_fixups_suspend[];
+extern struct pci_fixup __end_pci_fixups_suspend[];
+
+
+void pci_fixup_device(enum pci_fixup_pass pass, struct pci_dev *dev)
+{
+	struct pci_fixup *start, *end;
+
+	switch(pass) {
+	case pci_fixup_early:
+		start = __start_pci_fixups_early;
+		end = __end_pci_fixups_early;
+		break;
+
+	case pci_fixup_header:
+		start = __start_pci_fixups_header;
+		end = __end_pci_fixups_header;
+		break;
+
+	case pci_fixup_final:
+		start = __start_pci_fixups_final;
+		end = __end_pci_fixups_final;
+		break;
+
+	case pci_fixup_enable:
+		start = __start_pci_fixups_enable;
+		end = __end_pci_fixups_enable;
+		break;
+
+	case pci_fixup_resume:
+		start = __start_pci_fixups_resume;
+		end = __end_pci_fixups_resume;
+		break;
+
+	case pci_fixup_resume_early:
+		start = __start_pci_fixups_resume_early;
+		end = __end_pci_fixups_resume_early;
+		break;
+
+	case pci_fixup_suspend:
+		start = __start_pci_fixups_suspend;
+		end = __end_pci_fixups_suspend;
+		break;
+
+	default:
+		/* stupid compiler warning, you would think with an enum... */
+		return;
+	}
+	pci_do_fixups(dev, start, end);
+}
+#else
+void pci_fixup_device(enum pci_fixup_pass pass, struct pci_dev *dev) {}
+#endif
+EXPORT_SYMBOL(pci_fixup_device);
diff --git a/init/Kconfig b/init/Kconfig
index 8828ed0b2051..06330a305249 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -737,6 +737,14 @@ config VM_EVENT_COUNTERS
 	  on EMBEDDED systems.  /proc/vmstat will only show page counts
 	  if VM event counters are disabled.
 
+config PCI_QUIRKS
+	default y
+	bool "Enable PCI quirk workarounds" if EMBEDDED && PCI
+	help
+	  This enables workarounds for various PCI chipset
+          bugs/quirks. Disable this only if your target machine is
+          unaffected by PCI quirks.
+
 config SLUB_DEBUG
 	default y
 	bool "Enable SLUB debugging support" if EMBEDDED
-- 
cgit v1.2.3-55-g7522


From 0235c4fc7fc6f621dc0dd89eba102ad5aa373390 Mon Sep 17 00:00:00 2001
From: Rafael J. Wysocki
Date: Mon, 18 Aug 2008 21:38:00 +0200
Subject: PCI PM: Introduce function pci_wake_from_d3

Many device drivers use the following sequence of statements to enable
the device to wake up the system while being in the D3_hot or D3_cold
low power state:

        pci_enable_wake(pdev, PCI_D3hot, 1);
        pci_enable_wake(pdev, PCI_D3cold, 1);

However, the second call is not necessary if the first one succeeds (the
ordering of the statements above doesn't matter here) and it may even be
harmful, because we are not supposed to enable PME# after the wake-up
power has been enabled for the device.

To allow drivers to overcome this problem, introduce function
pci_wake_from_d3() that will enable the device to wake up the system
from any of D3_hot and D3_cold as long as the wake-up from at least one
of them is supported.

Acked-by: Pavel Machek <pavel@suse.cz>
Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
 drivers/pci/pci.c   | 22 ++++++++++++++++++++++
 include/linux/pci.h |  1 +
 2 files changed, 23 insertions(+)

diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
index dbe9f39f4436..2797112c9400 100644
--- a/drivers/pci/pci.c
+++ b/drivers/pci/pci.c
@@ -1126,6 +1126,27 @@ int pci_enable_wake(struct pci_dev *dev, pci_power_t state, int enable)
 	return pme_done ? 0 : error;
 }
 
+/**
+ * pci_wake_from_d3 - enable/disable device to wake up from D3_hot or D3_cold
+ * @dev: PCI device to prepare
+ * @enable: True to enable wake-up event generation; false to disable
+ *
+ * Many drivers want the device to wake up the system from D3_hot or D3_cold
+ * and this function allows them to set that up cleanly - pci_enable_wake()
+ * should not be called twice in a row to enable wake-up due to PCI PM vs ACPI
+ * ordering constraints.
+ *
+ * This function only returns error code if the device is not capable of
+ * generating PME# from both D3_hot and D3_cold, and the platform is unable to
+ * enable wake-up power for it.
+ */
+int pci_wake_from_d3(struct pci_dev *dev, bool enable)
+{
+	return pci_pme_capable(dev, PCI_D3cold) ?
+			pci_enable_wake(dev, PCI_D3cold, enable) :
+			pci_enable_wake(dev, PCI_D3hot, enable);
+}
+
 /**
  * pci_target_state - find an appropriate low power state for a given PCI dev
  * @dev: PCI device
@@ -1942,6 +1963,7 @@ EXPORT_SYMBOL(pci_restore_state);
 EXPORT_SYMBOL(pci_pme_capable);
 EXPORT_SYMBOL(pci_pme_active);
 EXPORT_SYMBOL(pci_enable_wake);
+EXPORT_SYMBOL(pci_wake_from_d3);
 EXPORT_SYMBOL(pci_target_state);
 EXPORT_SYMBOL(pci_prepare_to_sleep);
 EXPORT_SYMBOL(pci_back_from_sleep);
diff --git a/include/linux/pci.h b/include/linux/pci.h
index c989f58d09bf..f7e7dbc09194 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -644,6 +644,7 @@ pci_power_t pci_choose_state(struct pci_dev *dev, pm_message_t state);
 bool pci_pme_capable(struct pci_dev *dev, pci_power_t state);
 void pci_pme_active(struct pci_dev *dev, bool enable);
 int pci_enable_wake(struct pci_dev *dev, pci_power_t state, int enable);
+int pci_wake_from_d3(struct pci_dev *dev, bool enable);
 pci_power_t pci_target_state(struct pci_dev *dev);
 int pci_prepare_to_sleep(struct pci_dev *dev);
 int pci_back_from_sleep(struct pci_dev *dev);
-- 
cgit v1.2.3-55-g7522


From 16dbef4a831782466b10d4ae56837c5ba17d1948 Mon Sep 17 00:00:00 2001
From: Yinghai Lu
Date: Fri, 15 Aug 2008 19:36:45 -0700
Subject: PCI: change MSI-x vector to 32bit

We are using 28bit pci (bus/dev/fn + 12 bits) as irq number, so the
cache for irq number should be 32 bit too.

Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>
Cc: Andrew Vasquez <andrew.vasquez@qlogic.com>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
 drivers/scsi/qla2xxx/qla_def.h | 2 +-
 include/linux/pci.h            | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/scsi/qla2xxx/qla_def.h b/drivers/scsi/qla2xxx/qla_def.h
index 83c819216771..f25f41a499e5 100644
--- a/drivers/scsi/qla2xxx/qla_def.h
+++ b/drivers/scsi/qla2xxx/qla_def.h
@@ -2108,7 +2108,7 @@ struct scsi_qla_host;
 
 struct qla_msix_entry {
 	int have_irq;
-	uint16_t msix_vector;
+	uint32_t msix_vector;
 	uint16_t msix_entry;
 };
 
diff --git a/include/linux/pci.h b/include/linux/pci.h
index f7e7dbc09194..8a4d0bebc311 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -725,7 +725,7 @@ enum pci_dma_burst_strategy {
 };
 
 struct msix_entry {
-	u16 	vector;	/* kernel uses to write allocated vector */
+	u32	vector;	/* kernel uses to write allocated vector */
 	u16	entry;	/* driver uses to specify entry, OS writes */
 };
 
-- 
cgit v1.2.3-55-g7522


From d768cb6929773060171eee8397a63883f60ddc07 Mon Sep 17 00:00:00 2001
From: Bjorn Helgaas
Date: Mon, 25 Aug 2008 15:44:59 -0600
Subject: x86/PCI: follow lspci device/vendor style

Use "[%04x:%04x]" for PCI vendor/device IDs to follow the format
used by lspci(8).

Signed-off-by: Bjorn Helgaas <bjorn.helgaas@hp.com>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
 arch/x86/pci/irq.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/arch/x86/pci/irq.c b/arch/x86/pci/irq.c
index 006599db0dc7..52a1de1128c1 100644
--- a/arch/x86/pci/irq.c
+++ b/arch/x86/pci/irq.c
@@ -493,7 +493,7 @@ static int pirq_amd756_get(struct pci_dev *router, struct pci_dev *dev, int pirq
 	if (pirq <= 4)
 		irq = read_config_nybble(router, 0x56, pirq - 1);
 	dev_info(&dev->dev,
-		 "AMD756: dev [%04x/%04x], router PIRQ %d get IRQ %d\n",
+		 "AMD756: dev [%04x:%04x], router PIRQ %d get IRQ %d\n",
 		 dev->vendor, dev->device, pirq, irq);
 	return irq;
 }
@@ -501,7 +501,7 @@ static int pirq_amd756_get(struct pci_dev *router, struct pci_dev *dev, int pirq
 static int pirq_amd756_set(struct pci_dev *router, struct pci_dev *dev, int pirq, int irq)
 {
 	dev_info(&dev->dev,
-		 "AMD756: dev [%04x/%04x], router PIRQ %d set IRQ %d\n",
+		 "AMD756: dev [%04x:%04x], router PIRQ %d set IRQ %d\n",
 		 dev->vendor, dev->device, pirq, irq);
 	if (pirq <= 4)
 		write_config_nybble(router, 0x56, pirq - 1, irq);
@@ -823,7 +823,7 @@ static void __init pirq_find_router(struct irq_router *r)
 	r->get = NULL;
 	r->set = NULL;
 
-	DBG(KERN_DEBUG "PCI: Attempting to find IRQ router for %04x:%04x\n",
+	DBG(KERN_DEBUG "PCI: Attempting to find IRQ router for [%04x:%04x]\n",
 	    rt->rtr_vendor, rt->rtr_device);
 
 	pirq_router_dev = pci_get_bus_and_slot(rt->rtr_bus, rt->rtr_devfn);
@@ -843,7 +843,7 @@ static void __init pirq_find_router(struct irq_router *r)
 			h->probe(r, pirq_router_dev, pirq_router_dev->device))
 			break;
 	}
-	dev_info(&pirq_router_dev->dev, "%s IRQ router [%04x/%04x]\n",
+	dev_info(&pirq_router_dev->dev, "%s IRQ router [%04x:%04x]\n",
 		 pirq_router.name,
 		 pirq_router_dev->vendor, pirq_router_dev->device);
 
-- 
cgit v1.2.3-55-g7522


From 34a2e15e95fce6d6f4d30162f53a0ceb25d5bbaf Mon Sep 17 00:00:00 2001
From: Bjorn Helgaas
Date: Mon, 25 Aug 2008 15:45:20 -0600
Subject: PCI: follow lspci device/vendor style

Use "[%04x:%04x]" for PCI vendor/device IDs to follow the format
used by lspci(8).

Signed-off-by: Bjorn Helgaas <bjorn.helgaas@hp.com>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
 drivers/pci/pcie/portdrv_pci.c | 2 +-
 drivers/pci/probe.c            | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/pci/pcie/portdrv_pci.c b/drivers/pci/pcie/portdrv_pci.c
index 367c9c20000d..584422da8d8b 100644
--- a/drivers/pci/pcie/portdrv_pci.c
+++ b/drivers/pci/pcie/portdrv_pci.c
@@ -91,7 +91,7 @@ static int __devinit pcie_portdrv_probe (struct pci_dev *dev,
 	
 	pci_set_master(dev);
         if (!dev->irq && dev->pin) {
-		dev_warn(&dev->dev, "device [%04x/%04x] has invalid IRQ; "
+		dev_warn(&dev->dev, "device [%04x:%04x] has invalid IRQ; "
 			 "check vendor BIOS\n", dev->vendor, dev->device);
 	}
 	if (pcie_port_device_register(dev)) {
diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c
index d3db8b249729..578d15f49e02 100644
--- a/drivers/pci/probe.c
+++ b/drivers/pci/probe.c
@@ -762,7 +762,7 @@ static int pci_setup_device(struct pci_dev * dev)
 	dev->class = class;
 	class >>= 8;
 
-	dev_dbg(&dev->dev, "found [%04x/%04x] class %06x header type %02x\n",
+	dev_dbg(&dev->dev, "found [%04x:%04x] class %06x header type %02x\n",
 		 dev->vendor, dev->device, class, dev->hdr_type);
 
 	/* "Unknown power state" */
-- 
cgit v1.2.3-55-g7522


From a8d2dbd38481e0c35c6bdd8196dd38a42ae45d02 Mon Sep 17 00:00:00 2001
From: akpm@linux-foundation.org
Date: Fri, 22 Aug 2008 13:28:17 -0700
Subject: PCI: ibmphp: list_for_each to list_for_each_entry

Make code more readable with list_for_each_entry().

Signed-off-by: Cordelia Sam <cordesam@gmail.com>
Cc: Kristen Carlson Accardi <kristen.c.accardi@intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
 drivers/pci/hotplug/ibmphp_ebda.c | 92 ++++++++++-----------------------------
 1 file changed, 23 insertions(+), 69 deletions(-)

diff --git a/drivers/pci/hotplug/ibmphp_ebda.c b/drivers/pci/hotplug/ibmphp_ebda.c
index 7d27631e6e62..1bc08a5b2c7d 100644
--- a/drivers/pci/hotplug/ibmphp_ebda.c
+++ b/drivers/pci/hotplug/ibmphp_ebda.c
@@ -123,10 +123,8 @@ static struct ebda_pci_rsrc *alloc_ebda_pci_rsrc (void)
 static void __init print_bus_info (void)
 {
 	struct bus_info *ptr;
-	struct list_head *ptr1;
 	
-	list_for_each (ptr1, &bus_info_head) {
-		ptr = list_entry (ptr1, struct bus_info, bus_info_list);
+	list_for_each_entry (ptr, &bus_info_head, bus_info_list) {
 		debug ("%s - slot_min = %x\n", __func__, ptr->slot_min);
 		debug ("%s - slot_max = %x\n", __func__, ptr->slot_max);
 		debug ("%s - slot_count = %x\n", __func__, ptr->slot_count);
@@ -146,10 +144,8 @@ static void __init print_bus_info (void)
 static void print_lo_info (void)
 {
 	struct rio_detail *ptr;
-	struct list_head *ptr1;
 	debug ("print_lo_info ----\n");	
-	list_for_each (ptr1, &rio_lo_head) {
-		ptr = list_entry (ptr1, struct rio_detail, rio_detail_list);
+	list_for_each_entry (ptr, &rio_lo_head, rio_detail_list) {
 		debug ("%s - rio_node_id = %x\n", __func__, ptr->rio_node_id);
 		debug ("%s - rio_type = %x\n", __func__, ptr->rio_type);
 		debug ("%s - owner_id = %x\n", __func__, ptr->owner_id);
@@ -163,10 +159,8 @@ static void print_lo_info (void)
 static void print_vg_info (void)
 {
 	struct rio_detail *ptr;
-	struct list_head *ptr1;
 	debug ("%s ---\n", __func__);
-	list_for_each (ptr1, &rio_vg_head) {
-		ptr = list_entry (ptr1, struct rio_detail, rio_detail_list);
+	list_for_each_entry (ptr, &rio_vg_head, rio_detail_list) {
 		debug ("%s - rio_node_id = %x\n", __func__, ptr->rio_node_id);
 		debug ("%s - rio_type = %x\n", __func__, ptr->rio_type);
 		debug ("%s - owner_id = %x\n", __func__, ptr->owner_id);
@@ -180,10 +174,8 @@ static void print_vg_info (void)
 static void __init print_ebda_pci_rsrc (void)
 {
 	struct ebda_pci_rsrc *ptr;
-	struct list_head *ptr1;
 
-	list_for_each (ptr1, &ibmphp_ebda_pci_rsrc_head) {
-		ptr = list_entry (ptr1, struct ebda_pci_rsrc, ebda_pci_rsrc_list);
+	list_for_each_entry (ptr, &ibmphp_ebda_pci_rsrc_head, ebda_pci_rsrc_list) {
 		debug ("%s - rsrc type: %x bus#: %x dev_func: %x start addr: %x end addr: %x\n", 
 			__func__, ptr->rsrc_type ,ptr->bus_num, ptr->dev_fun,ptr->start_addr, ptr->end_addr);
 	}
@@ -192,10 +184,8 @@ static void __init print_ebda_pci_rsrc (void)
 static void __init print_ibm_slot (void)
 {
 	struct slot *ptr;
-	struct list_head *ptr1;
 
-	list_for_each (ptr1, &ibmphp_slot_head) {
-		ptr = list_entry (ptr1, struct slot, ibm_slot_list);
+	list_for_each_entry (ptr, &ibmphp_slot_head, ibm_slot_list) {
 		debug ("%s - slot_number: %x\n", __func__, ptr->number);
 	}
 }
@@ -203,10 +193,8 @@ static void __init print_ibm_slot (void)
 static void __init print_opt_vg (void)
 {
 	struct opt_rio *ptr;
-	struct list_head *ptr1;
 	debug ("%s ---\n", __func__);
-	list_for_each (ptr1, &opt_vg_head) {
-		ptr = list_entry (ptr1, struct opt_rio, opt_rio_list);
+	list_for_each_entry (ptr, &opt_vg_head, opt_rio_list) {
 		debug ("%s - rio_type %x\n", __func__, ptr->rio_type);
 		debug ("%s - chassis_num: %x\n", __func__, ptr->chassis_num);
 		debug ("%s - first_slot_num: %x\n", __func__, ptr->first_slot_num);
@@ -217,13 +205,9 @@ static void __init print_opt_vg (void)
 static void __init print_ebda_hpc (void)
 {
 	struct controller *hpc_ptr;
-	struct list_head *ptr1;
 	u16 index;
 
-	list_for_each (ptr1, &ebda_hpc_head) {
-
-		hpc_ptr = list_entry (ptr1, struct controller, ebda_hpc_list); 
-
+	list_for_each_entry (hpc_ptr, &ebda_hpc_head, ebda_hpc_list) {
 		for (index = 0; index < hpc_ptr->slot_count; index++) {
 			debug ("%s - physical slot#: %x\n", __func__, hpc_ptr->slots[index].slot_num);
 			debug ("%s - pci bus# of the slot: %x\n", __func__, hpc_ptr->slots[index].slot_bus_num);
@@ -460,9 +444,7 @@ static int __init ebda_rio_table (void)
 static struct opt_rio *search_opt_vg (u8 chassis_num)
 {
 	struct opt_rio *ptr;
-	struct list_head *ptr1;
-	list_for_each (ptr1, &opt_vg_head) {
-		ptr = list_entry (ptr1, struct opt_rio, opt_rio_list);
+	list_for_each_entry (ptr, &opt_vg_head, opt_rio_list) {
 		if (ptr->chassis_num == chassis_num)
 			return ptr;
 	}		
@@ -473,10 +455,8 @@ static int __init combine_wpg_for_chassis (void)
 {
 	struct opt_rio *opt_rio_ptr = NULL;
 	struct rio_detail *rio_detail_ptr = NULL;
-	struct list_head *list_head_ptr = NULL;
 	
-	list_for_each (list_head_ptr, &rio_vg_head) {
-		rio_detail_ptr = list_entry (list_head_ptr, struct rio_detail, rio_detail_list);
+	list_for_each_entry (rio_detail_ptr, &rio_vg_head, rio_detail_list) {
 		opt_rio_ptr = search_opt_vg (rio_detail_ptr->chassis_num);
 		if (!opt_rio_ptr) {
 			opt_rio_ptr = kzalloc(sizeof(struct opt_rio), GFP_KERNEL);
@@ -497,14 +477,12 @@ static int __init combine_wpg_for_chassis (void)
 }	
 
 /*
- * reorgnizing linked list of expansion box	 
+ * reorganizing linked list of expansion box
  */
 static struct opt_rio_lo *search_opt_lo (u8 chassis_num)
 {
 	struct opt_rio_lo *ptr;
-	struct list_head *ptr1;
-	list_for_each (ptr1, &opt_lo_head) {
-		ptr = list_entry (ptr1, struct opt_rio_lo, opt_rio_lo_list);
+	list_for_each_entry (ptr, &opt_lo_head, opt_rio_lo_list) {
 		if (ptr->chassis_num == chassis_num)
 			return ptr;
 	}		
@@ -515,10 +493,8 @@ static int combine_wpg_for_expansion (void)
 {
 	struct opt_rio_lo *opt_rio_lo_ptr = NULL;
 	struct rio_detail *rio_detail_ptr = NULL;
-	struct list_head *list_head_ptr = NULL;
 	
-	list_for_each (list_head_ptr, &rio_lo_head) {
-		rio_detail_ptr = list_entry (list_head_ptr, struct rio_detail, rio_detail_list);
+	list_for_each_entry (rio_detail_ptr, &rio_lo_head, rio_detail_list) {
 		opt_rio_lo_ptr = search_opt_lo (rio_detail_ptr->chassis_num);
 		if (!opt_rio_lo_ptr) {
 			opt_rio_lo_ptr = kzalloc(sizeof(struct opt_rio_lo), GFP_KERNEL);
@@ -550,20 +526,17 @@ static int first_slot_num (u8 slot_num, u8 first_slot, u8 var)
 {
 	struct opt_rio *opt_vg_ptr = NULL;
 	struct opt_rio_lo *opt_lo_ptr = NULL;
-	struct list_head *ptr = NULL;
 	int rc = 0;
 
 	if (!var) {
-		list_for_each (ptr, &opt_vg_head) {
-			opt_vg_ptr = list_entry (ptr, struct opt_rio, opt_rio_list);
+		list_for_each_entry (opt_vg_ptr, &opt_vg_head, opt_rio_list) {
 			if ((first_slot < opt_vg_ptr->first_slot_num) && (slot_num >= opt_vg_ptr->first_slot_num)) { 
 				rc = -ENODEV;
 				break;
 			}
 		}
 	} else {
-		list_for_each (ptr, &opt_lo_head) {
-			opt_lo_ptr = list_entry (ptr, struct opt_rio_lo, opt_rio_lo_list);
+		list_for_each_entry (opt_lo_ptr, &opt_lo_head, opt_rio_lo_list) {
 			if ((first_slot < opt_lo_ptr->first_slot_num) && (slot_num >= opt_lo_ptr->first_slot_num)) {
 				rc = -ENODEV;
 				break;
@@ -576,10 +549,8 @@ static int first_slot_num (u8 slot_num, u8 first_slot, u8 var)
 static struct opt_rio_lo * find_rxe_num (u8 slot_num)
 {
 	struct opt_rio_lo *opt_lo_ptr;
-	struct list_head *ptr;
 
-	list_for_each (ptr, &opt_lo_head) {
-		opt_lo_ptr = list_entry (ptr, struct opt_rio_lo, opt_rio_lo_list);
+	list_for_each_entry (opt_lo_ptr, &opt_lo_head, opt_rio_lo_list) {
 		//check to see if this slot_num belongs to expansion box
 		if ((slot_num >= opt_lo_ptr->first_slot_num) && (!first_slot_num (slot_num, opt_lo_ptr->first_slot_num, 1))) 
 			return opt_lo_ptr;
@@ -590,10 +561,8 @@ static struct opt_rio_lo * find_rxe_num (u8 slot_num)
 static struct opt_rio * find_chassis_num (u8 slot_num)
 {
 	struct opt_rio *opt_vg_ptr;
-	struct list_head *ptr;
 
-	list_for_each (ptr, &opt_vg_head) {
-		opt_vg_ptr = list_entry (ptr, struct opt_rio, opt_rio_list);
+	list_for_each_entry (opt_vg_ptr, &opt_vg_head, opt_rio_list) {
 		//check to see if this slot_num belongs to chassis 
 		if ((slot_num >= opt_vg_ptr->first_slot_num) && (!first_slot_num (slot_num, opt_vg_ptr->first_slot_num, 0))) 
 			return opt_vg_ptr;
@@ -607,11 +576,9 @@ static struct opt_rio * find_chassis_num (u8 slot_num)
 static u8 calculate_first_slot (u8 slot_num)
 {
 	u8 first_slot = 1;
-	struct list_head * list;
 	struct slot * slot_cur;
 	
-	list_for_each (list, &ibmphp_slot_head) {
-		slot_cur = list_entry (list, struct slot, ibm_slot_list);
+	list_for_each_entry (slot_cur, &ibmphp_slot_head, ibm_slot_list) {
 		if (slot_cur->ctrl) {
 			if ((slot_cur->ctrl->ctlr_type != 4) && (slot_cur->ctrl->ending_slot_num > first_slot) && (slot_num > slot_cur->ctrl->ending_slot_num)) 
 				first_slot = slot_cur->ctrl->ending_slot_num;
@@ -767,7 +734,6 @@ static int __init ebda_rsrc_controller (void)
 	struct bus_info *bus_info_ptr1, *bus_info_ptr2;
 	int rc;
 	struct slot *tmp_slot;
-	struct list_head *list;
 
 	addr = hpc_list_ptr->phys_addr;
 	for (ctlr = 0; ctlr < hpc_list_ptr->num_ctlrs; ctlr++) {
@@ -997,9 +963,7 @@ static int __init ebda_rsrc_controller (void)
 
 	}			/* each hpc  */
 
-	list_for_each (list, &ibmphp_slot_head) {
-		tmp_slot = list_entry (list, struct slot, ibm_slot_list);
-
+	list_for_each_entry (tmp_slot, &ibmphp_slot_head, ibm_slot_list) {
 		snprintf (tmp_slot->hotplug_slot->name, 30, "%s", create_file_name (tmp_slot));
 		pci_hp_register(tmp_slot->hotplug_slot,
 			pci_find_bus(0, tmp_slot->bus), tmp_slot->device);
@@ -1101,10 +1065,8 @@ u16 ibmphp_get_total_controllers (void)
 struct slot *ibmphp_get_slot_from_physical_num (u8 physical_num)
 {
 	struct slot *slot;
-	struct list_head *list;
 
-	list_for_each (list, &ibmphp_slot_head) {
-		slot = list_entry (list, struct slot, ibm_slot_list);
+	list_for_each_entry (slot, &ibmphp_slot_head, ibm_slot_list) {
 		if (slot->number == physical_num)
 			return slot;
 	}
@@ -1120,10 +1082,8 @@ struct slot *ibmphp_get_slot_from_physical_num (u8 physical_num)
 struct bus_info *ibmphp_find_same_bus_num (u32 num)
 {
 	struct bus_info *ptr;
-	struct list_head  *ptr1;
 
-	list_for_each (ptr1, &bus_info_head) {
-		ptr = list_entry (ptr1, struct bus_info, bus_info_list); 
+	list_for_each_entry (ptr, &bus_info_head, bus_info_list) {
 		if (ptr->busno == num) 
 			 return ptr;
 	}
@@ -1136,10 +1096,8 @@ struct bus_info *ibmphp_find_same_bus_num (u32 num)
 int ibmphp_get_bus_index (u8 num)
 {
 	struct bus_info *ptr;
-	struct list_head  *ptr1;
 
-	list_for_each (ptr1, &bus_info_head) {
-		ptr = list_entry (ptr1, struct bus_info, bus_info_list);
+	list_for_each_entry (ptr, &bus_info_head, bus_info_list) {
 		if (ptr->busno == num)  
 			return ptr->index;
 	}
@@ -1212,11 +1170,9 @@ static struct pci_driver ibmphp_driver = {
 int ibmphp_register_pci (void)
 {
 	struct controller *ctrl;
-	struct list_head *tmp;
 	int rc = 0;
 
-	list_for_each (tmp, &ebda_hpc_head) {
-		ctrl = list_entry (tmp, struct controller, ebda_hpc_list);
+	list_for_each_entry (ctrl, &ebda_hpc_head, ebda_hpc_list) {
 		if (ctrl->ctlr_type == 1) {
 			rc = pci_register_driver(&ibmphp_driver);
 			break;
@@ -1227,12 +1183,10 @@ int ibmphp_register_pci (void)
 static int ibmphp_probe (struct pci_dev * dev, const struct pci_device_id *ids)
 {
 	struct controller *ctrl;
-	struct list_head *tmp;
 
 	debug ("inside ibmphp_probe\n");
 	
-	list_for_each (tmp, &ebda_hpc_head) {
-		ctrl = list_entry (tmp, struct controller, ebda_hpc_list);
+	list_for_each_entry (ctrl, &ebda_hpc_head, ebda_hpc_list) {
 		if (ctrl->ctlr_type == 1) {
 			if ((dev->devfn == ctrl->u.pci_ctlr.dev_fun) && (dev->bus->number == ctrl->u.pci_ctlr.bus)) {
 				ctrl->ctrl_dev = dev;
-- 
cgit v1.2.3-55-g7522


From 2fd39aa7c2f3d696814abb3e8962c759eee484f3 Mon Sep 17 00:00:00 2001
From: akpm@linux-foundation.org
Date: Fri, 22 Aug 2008 13:30:14 -0700
Subject: PCI: ibmphp: list_for_each to list_for_each_entry-checkpatch cleanups

Please run checkpatch prior to sending patches, this one fixes several style
issues with the list_for_each conversion patch.

Cc: Cordelia Sam <cordesam@gmail.com>
Cc: Cordelia Sam <cordsam@linux.vnet.ibm.com>
Cc: Kristen Carlson Accardi <kristen.c.accardi@intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
 drivers/pci/hotplug/ibmphp_ebda.c | 44 +++++++++++++++++++--------------------
 1 file changed, 22 insertions(+), 22 deletions(-)

diff --git a/drivers/pci/hotplug/ibmphp_ebda.c b/drivers/pci/hotplug/ibmphp_ebda.c
index 1bc08a5b2c7d..8cfd1c4926c8 100644
--- a/drivers/pci/hotplug/ibmphp_ebda.c
+++ b/drivers/pci/hotplug/ibmphp_ebda.c
@@ -124,7 +124,7 @@ static void __init print_bus_info (void)
 {
 	struct bus_info *ptr;
 	
-	list_for_each_entry (ptr, &bus_info_head, bus_info_list) {
+	list_for_each_entry(ptr, &bus_info_head, bus_info_list) {
 		debug ("%s - slot_min = %x\n", __func__, ptr->slot_min);
 		debug ("%s - slot_max = %x\n", __func__, ptr->slot_max);
 		debug ("%s - slot_count = %x\n", __func__, ptr->slot_count);
@@ -145,7 +145,7 @@ static void print_lo_info (void)
 {
 	struct rio_detail *ptr;
 	debug ("print_lo_info ----\n");	
-	list_for_each_entry (ptr, &rio_lo_head, rio_detail_list) {
+	list_for_each_entry(ptr, &rio_lo_head, rio_detail_list) {
 		debug ("%s - rio_node_id = %x\n", __func__, ptr->rio_node_id);
 		debug ("%s - rio_type = %x\n", __func__, ptr->rio_type);
 		debug ("%s - owner_id = %x\n", __func__, ptr->owner_id);
@@ -160,7 +160,7 @@ static void print_vg_info (void)
 {
 	struct rio_detail *ptr;
 	debug ("%s ---\n", __func__);
-	list_for_each_entry (ptr, &rio_vg_head, rio_detail_list) {
+	list_for_each_entry(ptr, &rio_vg_head, rio_detail_list) {
 		debug ("%s - rio_node_id = %x\n", __func__, ptr->rio_node_id);
 		debug ("%s - rio_type = %x\n", __func__, ptr->rio_type);
 		debug ("%s - owner_id = %x\n", __func__, ptr->owner_id);
@@ -175,7 +175,7 @@ static void __init print_ebda_pci_rsrc (void)
 {
 	struct ebda_pci_rsrc *ptr;
 
-	list_for_each_entry (ptr, &ibmphp_ebda_pci_rsrc_head, ebda_pci_rsrc_list) {
+	list_for_each_entry(ptr, &ibmphp_ebda_pci_rsrc_head, ebda_pci_rsrc_list) {
 		debug ("%s - rsrc type: %x bus#: %x dev_func: %x start addr: %x end addr: %x\n", 
 			__func__, ptr->rsrc_type ,ptr->bus_num, ptr->dev_fun,ptr->start_addr, ptr->end_addr);
 	}
@@ -185,7 +185,7 @@ static void __init print_ibm_slot (void)
 {
 	struct slot *ptr;
 
-	list_for_each_entry (ptr, &ibmphp_slot_head, ibm_slot_list) {
+	list_for_each_entry(ptr, &ibmphp_slot_head, ibm_slot_list) {
 		debug ("%s - slot_number: %x\n", __func__, ptr->number);
 	}
 }
@@ -194,7 +194,7 @@ static void __init print_opt_vg (void)
 {
 	struct opt_rio *ptr;
 	debug ("%s ---\n", __func__);
-	list_for_each_entry (ptr, &opt_vg_head, opt_rio_list) {
+	list_for_each_entry(ptr, &opt_vg_head, opt_rio_list) {
 		debug ("%s - rio_type %x\n", __func__, ptr->rio_type);
 		debug ("%s - chassis_num: %x\n", __func__, ptr->chassis_num);
 		debug ("%s - first_slot_num: %x\n", __func__, ptr->first_slot_num);
@@ -207,7 +207,7 @@ static void __init print_ebda_hpc (void)
 	struct controller *hpc_ptr;
 	u16 index;
 
-	list_for_each_entry (hpc_ptr, &ebda_hpc_head, ebda_hpc_list) {
+	list_for_each_entry(hpc_ptr, &ebda_hpc_head, ebda_hpc_list) {
 		for (index = 0; index < hpc_ptr->slot_count; index++) {
 			debug ("%s - physical slot#: %x\n", __func__, hpc_ptr->slots[index].slot_num);
 			debug ("%s - pci bus# of the slot: %x\n", __func__, hpc_ptr->slots[index].slot_bus_num);
@@ -444,7 +444,7 @@ static int __init ebda_rio_table (void)
 static struct opt_rio *search_opt_vg (u8 chassis_num)
 {
 	struct opt_rio *ptr;
-	list_for_each_entry (ptr, &opt_vg_head, opt_rio_list) {
+	list_for_each_entry(ptr, &opt_vg_head, opt_rio_list) {
 		if (ptr->chassis_num == chassis_num)
 			return ptr;
 	}		
@@ -456,7 +456,7 @@ static int __init combine_wpg_for_chassis (void)
 	struct opt_rio *opt_rio_ptr = NULL;
 	struct rio_detail *rio_detail_ptr = NULL;
 	
-	list_for_each_entry (rio_detail_ptr, &rio_vg_head, rio_detail_list) {
+	list_for_each_entry(rio_detail_ptr, &rio_vg_head, rio_detail_list) {
 		opt_rio_ptr = search_opt_vg (rio_detail_ptr->chassis_num);
 		if (!opt_rio_ptr) {
 			opt_rio_ptr = kzalloc(sizeof(struct opt_rio), GFP_KERNEL);
@@ -482,7 +482,7 @@ static int __init combine_wpg_for_chassis (void)
 static struct opt_rio_lo *search_opt_lo (u8 chassis_num)
 {
 	struct opt_rio_lo *ptr;
-	list_for_each_entry (ptr, &opt_lo_head, opt_rio_lo_list) {
+	list_for_each_entry(ptr, &opt_lo_head, opt_rio_lo_list) {
 		if (ptr->chassis_num == chassis_num)
 			return ptr;
 	}		
@@ -494,7 +494,7 @@ static int combine_wpg_for_expansion (void)
 	struct opt_rio_lo *opt_rio_lo_ptr = NULL;
 	struct rio_detail *rio_detail_ptr = NULL;
 	
-	list_for_each_entry (rio_detail_ptr, &rio_lo_head, rio_detail_list) {
+	list_for_each_entry(rio_detail_ptr, &rio_lo_head, rio_detail_list) {
 		opt_rio_lo_ptr = search_opt_lo (rio_detail_ptr->chassis_num);
 		if (!opt_rio_lo_ptr) {
 			opt_rio_lo_ptr = kzalloc(sizeof(struct opt_rio_lo), GFP_KERNEL);
@@ -529,14 +529,14 @@ static int first_slot_num (u8 slot_num, u8 first_slot, u8 var)
 	int rc = 0;
 
 	if (!var) {
-		list_for_each_entry (opt_vg_ptr, &opt_vg_head, opt_rio_list) {
+		list_for_each_entry(opt_vg_ptr, &opt_vg_head, opt_rio_list) {
 			if ((first_slot < opt_vg_ptr->first_slot_num) && (slot_num >= opt_vg_ptr->first_slot_num)) { 
 				rc = -ENODEV;
 				break;
 			}
 		}
 	} else {
-		list_for_each_entry (opt_lo_ptr, &opt_lo_head, opt_rio_lo_list) {
+		list_for_each_entry(opt_lo_ptr, &opt_lo_head, opt_rio_lo_list) {
 			if ((first_slot < opt_lo_ptr->first_slot_num) && (slot_num >= opt_lo_ptr->first_slot_num)) {
 				rc = -ENODEV;
 				break;
@@ -550,7 +550,7 @@ static struct opt_rio_lo * find_rxe_num (u8 slot_num)
 {
 	struct opt_rio_lo *opt_lo_ptr;
 
-	list_for_each_entry (opt_lo_ptr, &opt_lo_head, opt_rio_lo_list) {
+	list_for_each_entry(opt_lo_ptr, &opt_lo_head, opt_rio_lo_list) {
 		//check to see if this slot_num belongs to expansion box
 		if ((slot_num >= opt_lo_ptr->first_slot_num) && (!first_slot_num (slot_num, opt_lo_ptr->first_slot_num, 1))) 
 			return opt_lo_ptr;
@@ -562,7 +562,7 @@ static struct opt_rio * find_chassis_num (u8 slot_num)
 {
 	struct opt_rio *opt_vg_ptr;
 
-	list_for_each_entry (opt_vg_ptr, &opt_vg_head, opt_rio_list) {
+	list_for_each_entry(opt_vg_ptr, &opt_vg_head, opt_rio_list) {
 		//check to see if this slot_num belongs to chassis 
 		if ((slot_num >= opt_vg_ptr->first_slot_num) && (!first_slot_num (slot_num, opt_vg_ptr->first_slot_num, 0))) 
 			return opt_vg_ptr;
@@ -578,7 +578,7 @@ static u8 calculate_first_slot (u8 slot_num)
 	u8 first_slot = 1;
 	struct slot * slot_cur;
 	
-	list_for_each_entry (slot_cur, &ibmphp_slot_head, ibm_slot_list) {
+	list_for_each_entry(slot_cur, &ibmphp_slot_head, ibm_slot_list) {
 		if (slot_cur->ctrl) {
 			if ((slot_cur->ctrl->ctlr_type != 4) && (slot_cur->ctrl->ending_slot_num > first_slot) && (slot_num > slot_cur->ctrl->ending_slot_num)) 
 				first_slot = slot_cur->ctrl->ending_slot_num;
@@ -963,7 +963,7 @@ static int __init ebda_rsrc_controller (void)
 
 	}			/* each hpc  */
 
-	list_for_each_entry (tmp_slot, &ibmphp_slot_head, ibm_slot_list) {
+	list_for_each_entry(tmp_slot, &ibmphp_slot_head, ibm_slot_list) {
 		snprintf (tmp_slot->hotplug_slot->name, 30, "%s", create_file_name (tmp_slot));
 		pci_hp_register(tmp_slot->hotplug_slot,
 			pci_find_bus(0, tmp_slot->bus), tmp_slot->device);
@@ -1066,7 +1066,7 @@ struct slot *ibmphp_get_slot_from_physical_num (u8 physical_num)
 {
 	struct slot *slot;
 
-	list_for_each_entry (slot, &ibmphp_slot_head, ibm_slot_list) {
+	list_for_each_entry(slot, &ibmphp_slot_head, ibm_slot_list) {
 		if (slot->number == physical_num)
 			return slot;
 	}
@@ -1083,7 +1083,7 @@ struct bus_info *ibmphp_find_same_bus_num (u32 num)
 {
 	struct bus_info *ptr;
 
-	list_for_each_entry (ptr, &bus_info_head, bus_info_list) {
+	list_for_each_entry(ptr, &bus_info_head, bus_info_list) {
 		if (ptr->busno == num) 
 			 return ptr;
 	}
@@ -1097,7 +1097,7 @@ int ibmphp_get_bus_index (u8 num)
 {
 	struct bus_info *ptr;
 
-	list_for_each_entry (ptr, &bus_info_head, bus_info_list) {
+	list_for_each_entry(ptr, &bus_info_head, bus_info_list) {
 		if (ptr->busno == num)  
 			return ptr->index;
 	}
@@ -1172,7 +1172,7 @@ int ibmphp_register_pci (void)
 	struct controller *ctrl;
 	int rc = 0;
 
-	list_for_each_entry (ctrl, &ebda_hpc_head, ebda_hpc_list) {
+	list_for_each_entry(ctrl, &ebda_hpc_head, ebda_hpc_list) {
 		if (ctrl->ctlr_type == 1) {
 			rc = pci_register_driver(&ibmphp_driver);
 			break;
@@ -1186,7 +1186,7 @@ static int ibmphp_probe (struct pci_dev * dev, const struct pci_device_id *ids)
 
 	debug ("inside ibmphp_probe\n");
 	
-	list_for_each_entry (ctrl, &ebda_hpc_head, ebda_hpc_list) {
+	list_for_each_entry(ctrl, &ebda_hpc_head, ebda_hpc_list) {
 		if (ctrl->ctlr_type == 1) {
 			if ((dev->devfn == ctrl->u.pci_ctlr.dev_fun) && (dev->bus->number == ctrl->u.pci_ctlr.bus)) {
 				ctrl->ctrl_dev = dev;
-- 
cgit v1.2.3-55-g7522


From c9ed77eeba8ec2541a40918210bcc676acacd43a Mon Sep 17 00:00:00 2001
From: Bjorn Helgaas
Date: Fri, 22 Aug 2008 09:37:02 -0600
Subject: PCI: tidy PME support messages

This patch changes these two messages:

    pci 0000:00:03.0: supports D1
    pci 0000:00:03.0: supports D2

to this:

    pci 0000:00:03.0: supports D1 D2

It also trivially converts a "dev_printk(KERN_INFO, ...)" to
"dev_info(...)".

Signed-off-by: Bjorn Helgaas <bjorn.helgaas@hp.com>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
 drivers/pci/pci.c | 26 +++++++++++++-------------
 1 file changed, 13 insertions(+), 13 deletions(-)

diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
index 2797112c9400..e1a17b85447b 100644
--- a/drivers/pci/pci.c
+++ b/drivers/pci/pci.c
@@ -1263,25 +1263,25 @@ void pci_pm_init(struct pci_dev *dev)
 	dev->d1_support = false;
 	dev->d2_support = false;
 	if (!pci_no_d1d2(dev)) {
-		if (pmc & PCI_PM_CAP_D1) {
-			dev_printk(KERN_DEBUG, &dev->dev, "supports D1\n");
+		if (pmc & PCI_PM_CAP_D1)
 			dev->d1_support = true;
-		}
-		if (pmc & PCI_PM_CAP_D2) {
-			dev_printk(KERN_DEBUG, &dev->dev, "supports D2\n");
+		if (pmc & PCI_PM_CAP_D2)
 			dev->d2_support = true;
-		}
+
+		if (dev->d1_support || dev->d2_support)
+			dev_printk(KERN_DEBUG, &dev->dev, "supports%s%s\n",
+				   dev->d1_support ? " D1": "",
+				   dev->d2_support ? " D2": "");
 	}
 
 	pmc &= PCI_PM_CAP_PME_MASK;
 	if (pmc) {
-		dev_printk(KERN_INFO, &dev->dev,
-			"PME# supported from%s%s%s%s%s\n",
-			(pmc & PCI_PM_CAP_PME_D0) ? " D0" : "",
-			(pmc & PCI_PM_CAP_PME_D1) ? " D1" : "",
-			(pmc & PCI_PM_CAP_PME_D2) ? " D2" : "",
-			(pmc & PCI_PM_CAP_PME_D3) ? " D3hot" : "",
-			(pmc & PCI_PM_CAP_PME_D3cold) ? " D3cold" : "");
+		dev_info(&dev->dev, "PME# supported from%s%s%s%s%s\n",
+			 (pmc & PCI_PM_CAP_PME_D0) ? " D0" : "",
+			 (pmc & PCI_PM_CAP_PME_D1) ? " D1" : "",
+			 (pmc & PCI_PM_CAP_PME_D2) ? " D2" : "",
+			 (pmc & PCI_PM_CAP_PME_D3) ? " D3hot" : "",
+			 (pmc & PCI_PM_CAP_PME_D3cold) ? " D3cold" : "");
 		dev->pme_support = pmc >> PCI_PM_CAP_PME_SHIFT;
 		/*
 		 * Make device's PM flags reflect the wake-up capability, but
-- 
cgit v1.2.3-55-g7522


From c01156061bdd5976397dfb173f8c70ae351a6cb6 Mon Sep 17 00:00:00 2001
From: Andi Kleen
Date: Fri, 22 Aug 2008 09:53:39 +0200
Subject: PCI: Document that most pci options are shared between i386 and
 x86-64

Since the code is shared pretty much most of the pci= options are shared,
but kernel-parameters.txt marked most of them as i386 only.

Signed-off-by: Andi Kleen <ak@linux.intel.com>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
 Documentation/kernel-parameters.txt | 25 +++++++++++++------------
 1 file changed, 13 insertions(+), 12 deletions(-)

diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index 0f1544f67400..08fbc8372ba4 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -101,6 +101,7 @@ parameter is applicable:
 	X86-64	X86-64 architecture is enabled.
 			More X86-64 boot options can be found in
 			Documentation/x86_64/boot-options.txt .
+	X86	Either 32bit or 64bit x86 (same as X86-32+X86-64)
 
 In addition, the following text indicates that the option:
 
@@ -1588,7 +1589,7 @@ and is between 256 and 4096 characters. It is defined in the file
 			See also Documentation/paride.txt.
 
 	pci=option[,option...]	[PCI] various PCI subsystem options:
-		off		[X86-32] don't probe for the PCI bus
+		off		[X86] don't probe for the PCI bus
 		bios		[X86-32] force use of PCI BIOS, don't access
 				the hardware directly. Use this if your machine
 				has a non-standard PCI host bridge.
@@ -1596,9 +1597,9 @@ and is between 256 and 4096 characters. It is defined in the file
 				hardware access methods are allowed. Use this
 				if you experience crashes upon bootup and you
 				suspect they are caused by the BIOS.
-		conf1		[X86-32] Force use of PCI Configuration
+		conf1		[X86] Force use of PCI Configuration
 				Mechanism 1.
-		conf2		[X86-32] Force use of PCI Configuration
+		conf2		[X86] Force use of PCI Configuration
 				Mechanism 2.
 		noaer		[PCIE] If the PCIEAER kernel config parameter is
 				enabled, this kernel boot option can be used to
@@ -1618,37 +1619,37 @@ and is between 256 and 4096 characters. It is defined in the file
 				this option if the kernel is unable to allocate
 				IRQs or discover secondary PCI buses on your
 				motherboard.
-		rom		[X86-32] Assign address space to expansion ROMs.
+		rom		[X86] Assign address space to expansion ROMs.
 				Use with caution as certain devices share
 				address decoders between ROMs and other
 				resources.
-		norom		[X86-32,X86_64] Do not assign address space to
+		norom		[X86] Do not assign address space to
 				expansion ROMs that do not already have
 				BIOS assigned address ranges.
-		irqmask=0xMMMM	[X86-32] Set a bit mask of IRQs allowed to be
+		irqmask=0xMMMM	[X86] Set a bit mask of IRQs allowed to be
 				assigned automatically to PCI devices. You can
 				make the kernel exclude IRQs of your ISA cards
 				this way.
-		pirqaddr=0xAAAAA	[X86-32] Specify the physical address
+		pirqaddr=0xAAAAA	[X86] Specify the physical address
 				of the PIRQ table (normally generated
 				by the BIOS) if it is outside the
 				F0000h-100000h range.
-		lastbus=N	[X86-32] Scan all buses thru bus #N. Can be
+		lastbus=N	[X86] Scan all buses thru bus #N. Can be
 				useful if the kernel is unable to find your
 				secondary buses and you want to tell it
 				explicitly which ones they are.
-		assign-busses	[X86-32] Always assign all PCI bus
+		assign-busses	[X86] Always assign all PCI bus
 				numbers ourselves, overriding
 				whatever the firmware may have done.
-		usepirqmask	[X86-32] Honor the possible IRQ mask stored
+		usepirqmask	[X86] Honor the possible IRQ mask stored
 				in the BIOS $PIR table. This is needed on
 				some systems with broken BIOSes, notably
 				some HP Pavilion N5400 and Omnibook XE3
 				notebooks. This will have no effect if ACPI
 				IRQ routing is enabled.
-		noacpi		[X86-32] Do not use ACPI for IRQ routing
+		noacpi		[X86] Do not use ACPI for IRQ routing
 				or for PCI scanning.
-		use_crs		[X86-32] Use _CRS for PCI resource
+		use_crs		[X86] Use _CRS for PCI resource
 				allocation.
 		routeirq	Do IRQ routing for all PCI devices.
 				This is normally done in pci_enable_device(),
-- 
cgit v1.2.3-55-g7522


From f7a10e32a1a7ae240fa3925c5727d224eba3e31d Mon Sep 17 00:00:00 2001
From: Kenji Kaneshige
Date: Fri, 22 Aug 2008 17:16:48 +0900
Subject: PCI: pciehp: fix irq initialization

Current pciehp driver gets irq number from pci_dev->irq. But because
pciehp driver is a pci express port service driver, it should get irq
number from pcie_device->irq.

Signed-off-by: Kenji Kaneshige <kaneshige.kenji@jp.fujitsu.com>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
 drivers/pci/hotplug/pciehp.h     | 1 +
 drivers/pci/hotplug/pciehp_hpc.c | 5 +++--
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/drivers/pci/hotplug/pciehp.h b/drivers/pci/hotplug/pciehp.h
index 9e6cec67e1cc..217427a0ead9 100644
--- a/drivers/pci/hotplug/pciehp.h
+++ b/drivers/pci/hotplug/pciehp.h
@@ -87,6 +87,7 @@ struct controller {
 	int num_slots;			/* Number of slots on ctlr */
 	int slot_num_inc;		/* 1 or -1 */
 	struct pci_dev *pci_dev;
+	struct pcie_device *pcie;	/* PCI Express port service */
 	struct list_head slot_list;
 	struct hpc_ops *hpc_ops;
 	wait_queue_head_t queue;	/* sleep & wake process */
diff --git a/drivers/pci/hotplug/pciehp_hpc.c b/drivers/pci/hotplug/pciehp_hpc.c
index 9d934ddee956..5a5c08f12af2 100644
--- a/drivers/pci/hotplug/pciehp_hpc.c
+++ b/drivers/pci/hotplug/pciehp_hpc.c
@@ -223,7 +223,7 @@ static void start_int_poll_timer(struct controller *ctrl, int sec)
 
 static inline int pciehp_request_irq(struct controller *ctrl)
 {
-	int retval, irq = ctrl->pci_dev->irq;
+	int retval, irq = ctrl->pcie->irq;
 
 	/* Install interrupt polling timer. Start with 10 sec delay */
 	if (pciehp_poll_mode) {
@@ -244,7 +244,7 @@ static inline void pciehp_free_irq(struct controller *ctrl)
 	if (pciehp_poll_mode)
 		del_timer_sync(&ctrl->poll_timer);
 	else
-		free_irq(ctrl->pci_dev->irq, ctrl);
+		free_irq(ctrl->pcie->irq, ctrl);
 }
 
 static int pcie_poll_cmd(struct controller *ctrl)
@@ -1114,6 +1114,7 @@ struct controller *pcie_init(struct pcie_device *dev)
 	}
 	INIT_LIST_HEAD(&ctrl->slot_list);
 
+	ctrl->pcie = dev;
 	ctrl->pci_dev = pdev;
 	ctrl->cap_base = pci_find_capability(pdev, PCI_CAP_ID_EXP);
 	if (!ctrl->cap_base) {
-- 
cgit v1.2.3-55-g7522


From 37a84ec668ba251ae02cf2c2c664baf6b247ae1f Mon Sep 17 00:00:00 2001
From: Seth Heasley
Date: Thu, 28 Aug 2008 15:40:59 -0700
Subject: x86/PCI: irq and pci_ids patch for Intel Ibex Peak DeviceIDs

This patch updates the Intel Ibex Peak (PCH) LPC and SMBus Controller
DeviceIDs.

The LPC Controller ID is set by Firmware within the range of
0x3b00-3b1f.  This range is included in pci_ids.h using min and max
values, and irq.c now has code to handle the range (in lieu of 32
additions to a SWITCH statement).

The SMBus Controller ID is a fixed-value and will not change.

Signed-off-by: Seth Heasley <seth.heasley@intel.com>
Acked-by: Jean Delvare <khali@linux-fr.org>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
 arch/x86/pci/irq.c      | 11 +++++++++--
 include/linux/pci_ids.h |  6 +++---
 2 files changed, 12 insertions(+), 5 deletions(-)

diff --git a/arch/x86/pci/irq.c b/arch/x86/pci/irq.c
index 52a1de1128c1..bf69dbe08bff 100644
--- a/arch/x86/pci/irq.c
+++ b/arch/x86/pci/irq.c
@@ -590,13 +590,20 @@ static __init int intel_router_probe(struct irq_router *r, struct pci_dev *route
 	case PCI_DEVICE_ID_INTEL_ICH10_1:
 	case PCI_DEVICE_ID_INTEL_ICH10_2:
 	case PCI_DEVICE_ID_INTEL_ICH10_3:
-	case PCI_DEVICE_ID_INTEL_PCH_0:
-	case PCI_DEVICE_ID_INTEL_PCH_1:
 		r->name = "PIIX/ICH";
 		r->get = pirq_piix_get;
 		r->set = pirq_piix_set;
 		return 1;
 	}
+
+	if ((device >= PCI_DEVICE_ID_INTEL_PCH_LPC_MIN) && 
+		(device <= PCI_DEVICE_ID_INTEL_PCH_LPC_MAX)) {
+		r->name = "PIIX/ICH";
+		r->get = pirq_piix_get;
+		r->set = pirq_piix_set;
+		return 1;
+	}
+
 	return 0;
 }
 
diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h
index 8edddc240e4f..e5d344bfcb7e 100644
--- a/include/linux/pci_ids.h
+++ b/include/linux/pci_ids.h
@@ -2454,9 +2454,9 @@
 #define PCI_DEVICE_ID_INTEL_ICH10_3	0x3a1a
 #define PCI_DEVICE_ID_INTEL_ICH10_4	0x3a30
 #define PCI_DEVICE_ID_INTEL_ICH10_5	0x3a60
-#define PCI_DEVICE_ID_INTEL_PCH_0	0x3b10
-#define PCI_DEVICE_ID_INTEL_PCH_1	0x3b11
-#define PCI_DEVICE_ID_INTEL_PCH_2	0x3b30
+#define PCI_DEVICE_ID_INTEL_PCH_LPC_MIN	0x3b00
+#define PCI_DEVICE_ID_INTEL_PCH_LPC_MAX	0x3b1f
+#define PCI_DEVICE_ID_INTEL_PCH_SMBUS	0x3b30
 #define PCI_DEVICE_ID_INTEL_IOAT_SNB	0x402f
 #define PCI_DEVICE_ID_INTEL_5100_16	0x65f0
 #define PCI_DEVICE_ID_INTEL_5100_21	0x65f5
-- 
cgit v1.2.3-55-g7522


From 83e9ad540b9ee23919961f9500ca254220b78d09 Mon Sep 17 00:00:00 2001
From: Taku Izumi
Date: Fri, 5 Sep 2008 12:09:43 +0900
Subject: PCI: pciehp: change name tag of "hpdriver_portdrv" variable

I think an appropriate name tag of "hpdriver_portdrv" variable
is "pciehp" rather than "hpdriver".

Signed-off-by: Taku Izumi <izumi.taku@jp.fujitsu.com>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
 drivers/pci/hotplug/pciehp_core.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/pci/hotplug/pciehp_core.c b/drivers/pci/hotplug/pciehp_core.c
index 4fd5355bc3b5..3c8581e597c5 100644
--- a/drivers/pci/hotplug/pciehp_core.c
+++ b/drivers/pci/hotplug/pciehp_core.c
@@ -497,10 +497,9 @@ static struct pcie_port_service_id port_pci_ids[] = { {
 	.driver_data =	0,
 	}, { /* end: all zeroes */ }
 };
-static const char device_name[] = "hpdriver";
 
 static struct pcie_port_service_driver hpdriver_portdrv = {
-	.name		= (char *)device_name,
+	.name		= PCIE_MODULE_NAME,
 	.id_table	= &port_pci_ids[0],
 
 	.probe		= pciehp_probe,
-- 
cgit v1.2.3-55-g7522


From 7f2feec140f1f1e4f701e013a2bf8284a9ec2a3c Mon Sep 17 00:00:00 2001
From: Taku Izumi
Date: Fri, 5 Sep 2008 12:11:26 +0900
Subject: PCI: pciehp: replace printk with dev_printk

This patch replaces printks within pciehp module with dev_printks.

Signed-off-by: Taku Izumi <izumi.taku@jp.fujitsu.com>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
 drivers/pci/hotplug/pciehp.h      |  15 ++-
 drivers/pci/hotplug/pciehp_core.c |  75 +++++++++------
 drivers/pci/hotplug/pciehp_ctrl.c | 136 ++++++++++++++------------
 drivers/pci/hotplug/pciehp_hpc.c  | 197 ++++++++++++++++++++++----------------
 drivers/pci/hotplug/pciehp_pci.c  |  26 ++---
 5 files changed, 263 insertions(+), 186 deletions(-)

diff --git a/drivers/pci/hotplug/pciehp.h b/drivers/pci/hotplug/pciehp.h
index 217427a0ead9..c367978bd7fe 100644
--- a/drivers/pci/hotplug/pciehp.h
+++ b/drivers/pci/hotplug/pciehp.h
@@ -57,6 +57,19 @@ extern struct workqueue_struct *pciehp_wq;
 #define warn(format, arg...)						\
 	printk(KERN_WARNING "%s: " format, MY_NAME , ## arg)
 
+#define ctrl_dbg(ctrl, format, arg...)					\
+	do {								\
+		if (pciehp_debug)					\
+			dev_printk(, &ctrl->pcie->device,		\
+					format, ## arg);		\
+	} while (0)
+#define ctrl_err(ctrl, format, arg...)					\
+	dev_err(&ctrl->pcie->device, format, ## arg)
+#define ctrl_info(ctrl, format, arg...)					\
+	dev_info(&ctrl->pcie->device, format, ## arg)
+#define ctrl_warn(ctrl, format, arg...)					\
+	dev_warn(&ctrl->pcie->device, format, ## arg)
+
 #define SLOT_NAME_SIZE 10
 struct slot {
 	u8 bus;
@@ -171,7 +184,7 @@ static inline struct slot *pciehp_find_slot(struct controller *ctrl, u8 device)
 			return slot;
 	}
 
-	err("%s: slot (device=0x%x) not found\n", __func__, device);
+	ctrl_err(ctrl, "%s: slot (device=0x%x) not found\n", __func__, device);
 	return NULL;
 }
 
diff --git a/drivers/pci/hotplug/pciehp_core.c b/drivers/pci/hotplug/pciehp_core.c
index 3c8581e597c5..c748a19db89d 100644
--- a/drivers/pci/hotplug/pciehp_core.c
+++ b/drivers/pci/hotplug/pciehp_core.c
@@ -144,9 +144,10 @@ set_lock_exit:
  * sysfs interface which allows the user to toggle the Electro Mechanical
  * Interlock.  Valid values are either 0 or 1.  0 == unlock, 1 == lock
  */
-static ssize_t lock_write_file(struct hotplug_slot *slot, const char *buf,
-		size_t count)
+static ssize_t lock_write_file(struct hotplug_slot *hotplug_slot,
+		const char *buf, size_t count)
 {
+	struct slot *slot = hotplug_slot->private;
 	unsigned long llock;
 	u8 lock;
 	int retval = 0;
@@ -157,10 +158,11 @@ static ssize_t lock_write_file(struct hotplug_slot *slot, const char *buf,
 	switch (lock) {
 		case 0:
 		case 1:
-			retval = set_lock_status(slot, lock);
+			retval = set_lock_status(hotplug_slot, lock);
 			break;
 		default:
-			err ("%d is an invalid lock value\n", lock);
+			ctrl_err(slot->ctrl, "%d is an invalid lock value\n",
+				 lock);
 			retval = -EINVAL;
 	}
 	if (retval)
@@ -180,7 +182,10 @@ static struct hotplug_slot_attribute hotplug_slot_attr_lock = {
  */
 static void release_slot(struct hotplug_slot *hotplug_slot)
 {
-	dbg("%s - physical_slot = %s\n", __func__, hotplug_slot->name);
+	struct slot *slot = hotplug_slot->private;
+
+	ctrl_dbg(slot->ctrl, "%s - physical_slot = %s\n",
+		 __func__, hotplug_slot->name);
 
 	kfree(hotplug_slot->info);
 	kfree(hotplug_slot);
@@ -215,9 +220,9 @@ static int init_slots(struct controller *ctrl)
 		get_adapter_status(hotplug_slot, &info->adapter_status);
 		slot->hotplug_slot = hotplug_slot;
 
-		dbg("Registering bus=%x dev=%x hp_slot=%x sun=%x "
-		    "slot_device_offset=%x\n", slot->bus, slot->device,
-		    slot->hp_slot, slot->number, ctrl->slot_device_offset);
+		ctrl_dbg(ctrl, "Registering bus=%x dev=%x hp_slot=%x sun=%x "
+			 "slot_device_offset=%x\n", slot->bus, slot->device,
+			 slot->hp_slot, slot->number, ctrl->slot_device_offset);
 duplicate_name:
 		retval = pci_hp_register(hotplug_slot,
 					 ctrl->pci_dev->subordinate,
@@ -233,9 +238,11 @@ duplicate_name:
 				if (len < SLOT_NAME_SIZE)
 					goto duplicate_name;
 				else
-					err("duplicate slot name overflow\n");
+					ctrl_err(ctrl, "duplicate slot name "
+						 "overflow\n");
 			}
-			err("pci_hp_register failed with error %d\n", retval);
+			ctrl_err(ctrl, "pci_hp_register failed with error %d\n",
+				 retval);
 			goto error_info;
 		}
 		/* create additional sysfs entries */
@@ -244,7 +251,8 @@ duplicate_name:
 				&hotplug_slot_attr_lock.attr);
 			if (retval) {
 				pci_hp_deregister(hotplug_slot);
-				err("cannot create additional sysfs entries\n");
+				ctrl_err(ctrl, "cannot create additional sysfs "
+					 "entries\n");
 				goto error_info;
 			}
 		}
@@ -278,7 +286,8 @@ static int set_attention_status(struct hotplug_slot *hotplug_slot, u8 status)
 {
 	struct slot *slot = hotplug_slot->private;
 
-	dbg("%s - physical_slot = %s\n", __func__, hotplug_slot->name);
+	ctrl_dbg(slot->ctrl, "%s - physical_slot = %s\n",
+		  __func__, hotplug_slot->name);
 
 	hotplug_slot->info->attention_status = status;
 
@@ -293,7 +302,8 @@ static int enable_slot(struct hotplug_slot *hotplug_slot)
 {
 	struct slot *slot = hotplug_slot->private;
 
-	dbg("%s - physical_slot = %s\n", __func__, hotplug_slot->name);
+	ctrl_dbg(slot->ctrl, "%s - physical_slot = %s\n",
+		 __func__, hotplug_slot->name);
 
 	return pciehp_sysfs_enable_slot(slot);
 }
@@ -303,7 +313,8 @@ static int disable_slot(struct hotplug_slot *hotplug_slot)
 {
 	struct slot *slot = hotplug_slot->private;
 
-	dbg("%s - physical_slot = %s\n", __func__, hotplug_slot->name);
+	ctrl_dbg(slot->ctrl, "%s - physical_slot = %s\n",
+		  __func__, hotplug_slot->name);
 
 	return pciehp_sysfs_disable_slot(slot);
 }
@@ -313,7 +324,8 @@ static int get_power_status(struct hotplug_slot *hotplug_slot, u8 *value)
 	struct slot *slot = hotplug_slot->private;
 	int retval;
 
-	dbg("%s - physical_slot = %s\n", __func__, hotplug_slot->name);
+	ctrl_dbg(slot->ctrl, "%s - physical_slot = %s\n",
+		  __func__, hotplug_slot->name);
 
 	retval = slot->hpc_ops->get_power_status(slot, value);
 	if (retval < 0)
@@ -327,7 +339,8 @@ static int get_attention_status(struct hotplug_slot *hotplug_slot, u8 *value)
 	struct slot *slot = hotplug_slot->private;
 	int retval;
 
-	dbg("%s - physical_slot = %s\n", __func__, hotplug_slot->name);
+	ctrl_dbg(slot->ctrl, "%s - physical_slot = %s\n",
+		  __func__, hotplug_slot->name);
 
 	retval = slot->hpc_ops->get_attention_status(slot, value);
 	if (retval < 0)
@@ -341,7 +354,8 @@ static int get_latch_status(struct hotplug_slot *hotplug_slot, u8 *value)
 	struct slot *slot = hotplug_slot->private;
 	int retval;
 
-	dbg("%s - physical_slot = %s\n", __func__, hotplug_slot->name);
+	ctrl_dbg(slot->ctrl, "%s - physical_slot = %s\n",
+		 __func__, hotplug_slot->name);
 
 	retval = slot->hpc_ops->get_latch_status(slot, value);
 	if (retval < 0)
@@ -355,7 +369,8 @@ static int get_adapter_status(struct hotplug_slot *hotplug_slot, u8 *value)
 	struct slot *slot = hotplug_slot->private;
 	int retval;
 
-	dbg("%s - physical_slot = %s\n", __func__, hotplug_slot->name);
+	ctrl_dbg(slot->ctrl, "%s - physical_slot = %s\n",
+		 __func__, hotplug_slot->name);
 
 	retval = slot->hpc_ops->get_adapter_status(slot, value);
 	if (retval < 0)
@@ -370,7 +385,8 @@ static int get_max_bus_speed(struct hotplug_slot *hotplug_slot,
 	struct slot *slot = hotplug_slot->private;
 	int retval;
 
-	dbg("%s - physical_slot = %s\n", __func__, hotplug_slot->name);
+	ctrl_dbg(slot->ctrl, "%s - physical_slot = %s\n",
+		 __func__, hotplug_slot->name);
 
 	retval = slot->hpc_ops->get_max_bus_speed(slot, value);
 	if (retval < 0)
@@ -384,7 +400,8 @@ static int get_cur_bus_speed(struct hotplug_slot *hotplug_slot, enum pci_bus_spe
 	struct slot *slot = hotplug_slot->private;
 	int retval;
 
-	dbg("%s - physical_slot = %s\n", __func__, hotplug_slot->name);
+	ctrl_dbg(slot->ctrl, "%s - physical_slot = %s\n",
+		 __func__, hotplug_slot->name);
 
 	retval = slot->hpc_ops->get_cur_bus_speed(slot, value);
 	if (retval < 0)
@@ -402,14 +419,15 @@ static int pciehp_probe(struct pcie_device *dev, const struct pcie_port_service_
 	struct pci_dev *pdev = dev->port;
 
 	if (pciehp_force)
-		dbg("Bypassing BIOS check for pciehp use on %s\n",
-		    pci_name(pdev));
+		dev_info(&dev->device,
+			 "Bypassing BIOS check for pciehp use on %s\n",
+			 pci_name(pdev));
 	else if (pciehp_get_hp_hw_control_from_firmware(pdev))
 		goto err_out_none;
 
 	ctrl = pcie_init(dev);
 	if (!ctrl) {
-		dbg("%s: controller initialization failed\n", PCIE_MODULE_NAME);
+		dev_err(&dev->device, "controller initialization failed\n");
 		goto err_out_none;
 	}
 	set_service_data(dev, ctrl);
@@ -418,11 +436,10 @@ static int pciehp_probe(struct pcie_device *dev, const struct pcie_port_service_
 	rc = init_slots(ctrl);
 	if (rc) {
 		if (rc == -EBUSY)
-			warn("%s: slot already registered by another "
-				"hotplug driver\n", PCIE_MODULE_NAME);
+			ctrl_warn(ctrl, "slot already registered by another "
+				  "hotplug driver\n");
 		else
-			err("%s: slot initialization failed\n",
-				PCIE_MODULE_NAME);
+			ctrl_err(ctrl, "slot initialization failed\n");
 		goto err_out_release_ctlr;
 	}
 
@@ -461,13 +478,13 @@ static void pciehp_remove (struct pcie_device *dev)
 #ifdef CONFIG_PM
 static int pciehp_suspend (struct pcie_device *dev, pm_message_t state)
 {
-	printk("%s ENTRY\n", __func__);
+	dev_info(&dev->device, "%s ENTRY\n", __func__);
 	return 0;
 }
 
 static int pciehp_resume (struct pcie_device *dev)
 {
-	printk("%s ENTRY\n", __func__);
+	dev_info(&dev->device, "%s ENTRY\n", __func__);
 	if (pciehp_force) {
 		struct controller *ctrl = get_service_data(dev);
 		struct slot *t_slot;
diff --git a/drivers/pci/hotplug/pciehp_ctrl.c b/drivers/pci/hotplug/pciehp_ctrl.c
index 96a5d55a4983..acb7f9efd182 100644
--- a/drivers/pci/hotplug/pciehp_ctrl.c
+++ b/drivers/pci/hotplug/pciehp_ctrl.c
@@ -58,14 +58,15 @@ static int queue_interrupt_event(struct slot *p_slot, u32 event_type)
 u8 pciehp_handle_attention_button(struct slot *p_slot)
 {
 	u32 event_type;
+	struct controller *ctrl = p_slot->ctrl;
 
 	/* Attention Button Change */
-	dbg("pciehp:  Attention button interrupt received.\n");
+	ctrl_dbg(ctrl, "Attention button interrupt received.\n");
 
 	/*
 	 *  Button pressed - See if need to TAKE ACTION!!!
 	 */
-	info("Button pressed on Slot(%s)\n", p_slot->name);
+	ctrl_info(ctrl, "Button pressed on Slot(%s)\n", p_slot->name);
 	event_type = INT_BUTTON_PRESS;
 
 	queue_interrupt_event(p_slot, event_type);
@@ -77,22 +78,23 @@ u8 pciehp_handle_switch_change(struct slot *p_slot)
 {
 	u8 getstatus;
 	u32 event_type;
+	struct controller *ctrl = p_slot->ctrl;
 
 	/* Switch Change */
-	dbg("pciehp:  Switch interrupt received.\n");
+	ctrl_dbg(ctrl, "Switch interrupt received.\n");
 
 	p_slot->hpc_ops->get_latch_status(p_slot, &getstatus);
 	if (getstatus) {
 		/*
 		 * Switch opened
 		 */
-		info("Latch open on Slot(%s)\n", p_slot->name);
+		ctrl_info(ctrl, "Latch open on Slot(%s)\n", p_slot->name);
 		event_type = INT_SWITCH_OPEN;
 	} else {
 		/*
 		 *  Switch closed
 		 */
-		info("Latch close on Slot(%s)\n", p_slot->name);
+		ctrl_info(ctrl, "Latch close on Slot(%s)\n", p_slot->name);
 		event_type = INT_SWITCH_CLOSE;
 	}
 
@@ -105,9 +107,10 @@ u8 pciehp_handle_presence_change(struct slot *p_slot)
 {
 	u32 event_type;
 	u8 presence_save;
+	struct controller *ctrl = p_slot->ctrl;
 
 	/* Presence Change */
-	dbg("pciehp:  Presence/Notify input change.\n");
+	ctrl_dbg(ctrl, "Presence/Notify input change.\n");
 
 	/* Switch is open, assume a presence change
 	 * Save the presence state
@@ -117,13 +120,13 @@ u8 pciehp_handle_presence_change(struct slot *p_slot)
 		/*
 		 * Card Present
 		 */
-		info("Card present on Slot(%s)\n", p_slot->name);
+		ctrl_info(ctrl, "Card present on Slot(%s)\n", p_slot->name);
 		event_type = INT_PRESENCE_ON;
 	} else {
 		/*
 		 * Not Present
 		 */
-		info("Card not present on Slot(%s)\n", p_slot->name);
+		ctrl_info(ctrl, "Card not present on Slot(%s)\n", p_slot->name);
 		event_type = INT_PRESENCE_OFF;
 	}
 
@@ -135,23 +138,25 @@ u8 pciehp_handle_presence_change(struct slot *p_slot)
 u8 pciehp_handle_power_fault(struct slot *p_slot)
 {
 	u32 event_type;
+	struct controller *ctrl = p_slot->ctrl;
 
 	/* power fault */
-	dbg("pciehp:  Power fault interrupt received.\n");
+	ctrl_dbg(ctrl, "Power fault interrupt received.\n");
 
 	if ( !(p_slot->hpc_ops->query_power_fault(p_slot))) {
 		/*
 		 * power fault Cleared
 		 */
-		info("Power fault cleared on Slot(%s)\n", p_slot->name);
+		ctrl_info(ctrl, "Power fault cleared on Slot(%s)\n",
+			  p_slot->name);
 		event_type = INT_POWER_FAULT_CLEAR;
 	} else {
 		/*
 		 *   power fault
 		 */
-		info("Power fault on Slot(%s)\n", p_slot->name);
+		ctrl_info(ctrl, "Power fault on Slot(%s)\n", p_slot->name);
 		event_type = INT_POWER_FAULT;
-		info("power fault bit %x set\n", 0);
+		ctrl_info(ctrl, "power fault bit %x set\n", 0);
 	}
 
 	queue_interrupt_event(p_slot, event_type);
@@ -168,8 +173,9 @@ static void set_slot_off(struct controller *ctrl, struct slot * pslot)
 	/* turn off slot, turn on Amber LED, turn off Green LED if supported*/
 	if (POWER_CTRL(ctrl)) {
 		if (pslot->hpc_ops->power_off_slot(pslot)) {
-			err("%s: Issue of Slot Power Off command failed\n",
-			    __func__);
+			ctrl_err(ctrl,
+				 "%s: Issue of Slot Power Off command failed\n",
+				 __func__);
 			return;
 		}
 	}
@@ -186,8 +192,8 @@ static void set_slot_off(struct controller *ctrl, struct slot * pslot)
 
 	if (ATTN_LED(ctrl)) {
 		if (pslot->hpc_ops->set_attention_status(pslot, 1)) {
-			err("%s: Issue of Set Attention Led command failed\n",
-			    __func__);
+			ctrl_err(ctrl, "%s: Issue of Set Attention "
+				 "Led command failed\n", __func__);
 			return;
 		}
 	}
@@ -205,9 +211,9 @@ static int board_added(struct slot *p_slot)
 	int retval = 0;
 	struct controller *ctrl = p_slot->ctrl;
 
-	dbg("%s: slot device, slot offset, hp slot = %d, %d ,%d\n",
-			__func__, p_slot->device,
-			ctrl->slot_device_offset, p_slot->hp_slot);
+	ctrl_dbg(ctrl, "%s: slot device, slot offset, hp slot = %d, %d ,%d\n",
+		 __func__, p_slot->device, ctrl->slot_device_offset,
+		 p_slot->hp_slot);
 
 	if (POWER_CTRL(ctrl)) {
 		/* Power on slot */
@@ -225,22 +231,22 @@ static int board_added(struct slot *p_slot)
 	/* Check link training status */
 	retval = p_slot->hpc_ops->check_lnk_status(ctrl);
 	if (retval) {
-		err("%s: Failed to check link status\n", __func__);
+		ctrl_err(ctrl, "%s: Failed to check link status\n", __func__);
 		set_slot_off(ctrl, p_slot);
 		return retval;
 	}
 
 	/* Check for a power fault */
 	if (p_slot->hpc_ops->query_power_fault(p_slot)) {
-		dbg("%s: power fault detected\n", __func__);
+		ctrl_dbg(ctrl, "%s: power fault detected\n", __func__);
 		retval = POWER_FAILURE;
 		goto err_exit;
 	}
 
 	retval = pciehp_configure_device(p_slot);
 	if (retval) {
-		err("Cannot add device 0x%x:%x\n", p_slot->bus,
-		    p_slot->device);
+		ctrl_err(ctrl, "Cannot add device 0x%x:%x\n",
+			 p_slot->bus, p_slot->device);
 		goto err_exit;
 	}
 
@@ -272,14 +278,14 @@ static int remove_board(struct slot *p_slot)
 	if (retval)
 		return retval;
 
-	dbg("In %s, hp_slot = %d\n", __func__, p_slot->hp_slot);
+	ctrl_dbg(ctrl, "In %s, hp_slot = %d\n", __func__, p_slot->hp_slot);
 
 	if (POWER_CTRL(ctrl)) {
 		/* power off slot */
 		retval = p_slot->hpc_ops->power_off_slot(p_slot);
 		if (retval) {
-			err("%s: Issue of Slot Disable command failed\n",
-			    __func__);
+			ctrl_err(ctrl, "%s: Issue of Slot Disable command "
+				 "failed\n", __func__);
 			return retval;
 		}
 	}
@@ -320,8 +326,8 @@ static void pciehp_power_thread(struct work_struct *work)
 	switch (p_slot->state) {
 	case POWEROFF_STATE:
 		mutex_unlock(&p_slot->lock);
-		dbg("%s: disabling bus:device(%x:%x)\n",
-		    __func__, p_slot->bus, p_slot->device);
+		ctrl_dbg(p_slot->ctrl, "%s: disabling bus:device(%x:%x)\n",
+			 __func__, p_slot->bus, p_slot->device);
 		pciehp_disable_slot(p_slot);
 		mutex_lock(&p_slot->lock);
 		p_slot->state = STATIC_STATE;
@@ -349,7 +355,8 @@ void pciehp_queue_pushbutton_work(struct work_struct *work)
 
 	info = kmalloc(sizeof(*info), GFP_KERNEL);
 	if (!info) {
-		err("%s: Cannot allocate memory\n", __func__);
+		ctrl_err(p_slot->ctrl, "%s: Cannot allocate memory\n",
+			 __func__);
 		return;
 	}
 	info->p_slot = p_slot;
@@ -403,12 +410,14 @@ static void handle_button_press_event(struct slot *p_slot)
 		p_slot->hpc_ops->get_power_status(p_slot, &getstatus);
 		if (getstatus) {
 			p_slot->state = BLINKINGOFF_STATE;
-			info("PCI slot #%s - powering off due to button "
-			     "press.\n", p_slot->name);
+			ctrl_info(ctrl,
+				  "PCI slot #%s - powering off due to button "
+				  "press.\n", p_slot->name);
 		} else {
 			p_slot->state = BLINKINGON_STATE;
-			info("PCI slot #%s - powering on due to button "
-			     "press.\n", p_slot->name);
+			ctrl_info(ctrl,
+				  "PCI slot #%s - powering on due to button "
+				  "press.\n", p_slot->name);
 		}
 		/* blink green LED and turn off amber */
 		if (PWR_LED(ctrl))
@@ -425,8 +434,8 @@ static void handle_button_press_event(struct slot *p_slot)
 		 * press the attention again before the 5 sec. limit
 		 * expires to cancel hot-add or hot-remove
 		 */
-		info("Button cancel on Slot(%s)\n", p_slot->name);
-		dbg("%s: button cancel\n", __func__);
+		ctrl_info(ctrl, "Button cancel on Slot(%s)\n", p_slot->name);
+		ctrl_dbg(ctrl, "%s: button cancel\n", __func__);
 		cancel_delayed_work(&p_slot->work);
 		if (p_slot->state == BLINKINGOFF_STATE) {
 			if (PWR_LED(ctrl))
@@ -437,8 +446,8 @@ static void handle_button_press_event(struct slot *p_slot)
 		}
 		if (ATTN_LED(ctrl))
 			p_slot->hpc_ops->set_attention_status(p_slot, 0);
-		info("PCI slot #%s - action canceled due to button press\n",
-		     p_slot->name);
+		ctrl_info(ctrl, "PCI slot #%s - action canceled "
+			  "due to button press\n", p_slot->name);
 		p_slot->state = STATIC_STATE;
 		break;
 	case POWEROFF_STATE:
@@ -448,11 +457,11 @@ static void handle_button_press_event(struct slot *p_slot)
 		 * this means that the previous attention button action
 		 * to hot-add or hot-remove is undergoing
 		 */
-		info("Button ignore on Slot(%s)\n", p_slot->name);
+		ctrl_info(ctrl, "Button ignore on Slot(%s)\n", p_slot->name);
 		update_slot_info(p_slot);
 		break;
 	default:
-		warn("Not a valid state\n");
+		ctrl_warn(ctrl, "Not a valid state\n");
 		break;
 	}
 }
@@ -467,7 +476,8 @@ static void handle_surprise_event(struct slot *p_slot)
 
 	info = kmalloc(sizeof(*info), GFP_KERNEL);
 	if (!info) {
-		err("%s: Cannot allocate memory\n", __func__);
+		ctrl_err(p_slot->ctrl, "%s: Cannot allocate memory\n",
+			 __func__);
 		return;
 	}
 	info->p_slot = p_slot;
@@ -505,7 +515,7 @@ static void interrupt_event_handler(struct work_struct *work)
 	case INT_PRESENCE_OFF:
 		if (!HP_SUPR_RM(ctrl))
 			break;
-		dbg("Surprise Removal\n");
+		ctrl_dbg(ctrl, "Surprise Removal\n");
 		update_slot_info(p_slot);
 		handle_surprise_event(p_slot);
 		break;
@@ -522,22 +532,23 @@ int pciehp_enable_slot(struct slot *p_slot)
 {
 	u8 getstatus = 0;
 	int rc;
+	struct controller *ctrl = p_slot->ctrl;
 
 	/* Check to see if (latch closed, card present, power off) */
 	mutex_lock(&p_slot->ctrl->crit_sect);
 
 	rc = p_slot->hpc_ops->get_adapter_status(p_slot, &getstatus);
 	if (rc || !getstatus) {
-		info("%s: no adapter on slot(%s)\n", __func__,
-		     p_slot->name);
+		ctrl_info(ctrl, "%s: no adapter on slot(%s)\n",
+			  __func__, p_slot->name);
 		mutex_unlock(&p_slot->ctrl->crit_sect);
 		return -ENODEV;
 	}
 	if (MRL_SENS(p_slot->ctrl)) {
 		rc = p_slot->hpc_ops->get_latch_status(p_slot, &getstatus);
 		if (rc || getstatus) {
-			info("%s: latch open on slot(%s)\n", __func__,
-			     p_slot->name);
+			ctrl_info(ctrl, "%s: latch open on slot(%s)\n",
+				  __func__, p_slot->name);
 			mutex_unlock(&p_slot->ctrl->crit_sect);
 			return -ENODEV;
 		}
@@ -546,8 +557,8 @@ int pciehp_enable_slot(struct slot *p_slot)
 	if (POWER_CTRL(p_slot->ctrl)) {
 		rc = p_slot->hpc_ops->get_power_status(p_slot, &getstatus);
 		if (rc || getstatus) {
-			info("%s: already enabled on slot(%s)\n", __func__,
-			     p_slot->name);
+			ctrl_info(ctrl, "%s: already enabled on slot(%s)\n",
+				  __func__, p_slot->name);
 			mutex_unlock(&p_slot->ctrl->crit_sect);
 			return -EINVAL;
 		}
@@ -571,6 +582,7 @@ int pciehp_disable_slot(struct slot *p_slot)
 {
 	u8 getstatus = 0;
 	int ret = 0;
+	struct controller *ctrl = p_slot->ctrl;
 
 	if (!p_slot->ctrl)
 		return 1;
@@ -581,8 +593,8 @@ int pciehp_disable_slot(struct slot *p_slot)
 	if (!HP_SUPR_RM(p_slot->ctrl)) {
 		ret = p_slot->hpc_ops->get_adapter_status(p_slot, &getstatus);
 		if (ret || !getstatus) {
-			info("%s: no adapter on slot(%s)\n", __func__,
-			     p_slot->name);
+			ctrl_info(ctrl, "%s: no adapter on slot(%s)\n",
+				  __func__, p_slot->name);
 			mutex_unlock(&p_slot->ctrl->crit_sect);
 			return -ENODEV;
 		}
@@ -591,8 +603,8 @@ int pciehp_disable_slot(struct slot *p_slot)
 	if (MRL_SENS(p_slot->ctrl)) {
 		ret = p_slot->hpc_ops->get_latch_status(p_slot, &getstatus);
 		if (ret || getstatus) {
-			info("%s: latch open on slot(%s)\n", __func__,
-			     p_slot->name);
+			ctrl_info(ctrl, "%s: latch open on slot(%s)\n",
+				  __func__, p_slot->name);
 			mutex_unlock(&p_slot->ctrl->crit_sect);
 			return -ENODEV;
 		}
@@ -601,8 +613,8 @@ int pciehp_disable_slot(struct slot *p_slot)
 	if (POWER_CTRL(p_slot->ctrl)) {
 		ret = p_slot->hpc_ops->get_power_status(p_slot, &getstatus);
 		if (ret || !getstatus) {
-			info("%s: already disabled slot(%s)\n", __func__,
-			     p_slot->name);
+			ctrl_info(ctrl, "%s: already disabled slot(%s)\n",
+				  __func__, p_slot->name);
 			mutex_unlock(&p_slot->ctrl->crit_sect);
 			return -EINVAL;
 		}
@@ -618,6 +630,7 @@ int pciehp_disable_slot(struct slot *p_slot)
 int pciehp_sysfs_enable_slot(struct slot *p_slot)
 {
 	int retval = -ENODEV;
+	struct controller *ctrl = p_slot->ctrl;
 
 	mutex_lock(&p_slot->lock);
 	switch (p_slot->state) {
@@ -631,15 +644,15 @@ int pciehp_sysfs_enable_slot(struct slot *p_slot)
 		p_slot->state = STATIC_STATE;
 		break;
 	case POWERON_STATE:
-		info("Slot %s is already in powering on state\n",
-		     p_slot->name);
+		ctrl_info(ctrl, "Slot %s is already in powering on state\n",
+			  p_slot->name);
 		break;
 	case BLINKINGOFF_STATE:
 	case POWEROFF_STATE:
-		info("Already enabled on slot %s\n", p_slot->name);
+		ctrl_info(ctrl, "Already enabled on slot %s\n", p_slot->name);
 		break;
 	default:
-		err("Not a valid state on slot %s\n", p_slot->name);
+		ctrl_err(ctrl, "Not a valid state on slot %s\n", p_slot->name);
 		break;
 	}
 	mutex_unlock(&p_slot->lock);
@@ -650,6 +663,7 @@ int pciehp_sysfs_enable_slot(struct slot *p_slot)
 int pciehp_sysfs_disable_slot(struct slot *p_slot)
 {
 	int retval = -ENODEV;
+	struct controller *ctrl = p_slot->ctrl;
 
 	mutex_lock(&p_slot->lock);
 	switch (p_slot->state) {
@@ -663,15 +677,15 @@ int pciehp_sysfs_disable_slot(struct slot *p_slot)
 		p_slot->state = STATIC_STATE;
 		break;
 	case POWEROFF_STATE:
-		info("Slot %s is already in powering off state\n",
-		     p_slot->name);
+		ctrl_info(ctrl, "Slot %s is already in powering off state\n",
+			  p_slot->name);
 		break;
 	case BLINKINGON_STATE:
 	case POWERON_STATE:
-		info("Already disabled on slot %s\n", p_slot->name);
+		ctrl_info(ctrl, "Already disabled on slot %s\n", p_slot->name);
 		break;
 	default:
-		err("Not a valid state on slot %s\n", p_slot->name);
+		ctrl_err(ctrl, "Not a valid state on slot %s\n", p_slot->name);
 		break;
 	}
 	mutex_unlock(&p_slot->lock);
diff --git a/drivers/pci/hotplug/pciehp_hpc.c b/drivers/pci/hotplug/pciehp_hpc.c
index 5a5c08f12af2..8e9530c4c36d 100644
--- a/drivers/pci/hotplug/pciehp_hpc.c
+++ b/drivers/pci/hotplug/pciehp_hpc.c
@@ -235,7 +235,8 @@ static inline int pciehp_request_irq(struct controller *ctrl)
 	/* Installs the interrupt handler */
 	retval = request_irq(irq, pcie_isr, IRQF_SHARED, MY_NAME, ctrl);
 	if (retval)
-		err("Cannot get irq %d for the hotplug controller\n", irq);
+		ctrl_err(ctrl, "Cannot get irq %d for the hotplug controller\n",
+			 irq);
 	return retval;
 }
 
@@ -282,7 +283,7 @@ static void pcie_wait_cmd(struct controller *ctrl, int poll)
 	else
 		rc = wait_event_timeout(ctrl->queue, !ctrl->cmd_busy, timeout);
 	if (!rc)
-		dbg("Command not completed in 1000 msec\n");
+		ctrl_dbg(ctrl, "Command not completed in 1000 msec\n");
 }
 
 /**
@@ -301,7 +302,8 @@ static int pcie_write_cmd(struct controller *ctrl, u16 cmd, u16 mask)
 
 	retval = pciehp_readw(ctrl, SLOTSTATUS, &slot_status);
 	if (retval) {
-		err("%s: Cannot read SLOTSTATUS register\n", __func__);
+		ctrl_err(ctrl, "%s: Cannot read SLOTSTATUS register\n",
+			 __func__);
 		goto out;
 	}
 
@@ -312,26 +314,28 @@ static int pcie_write_cmd(struct controller *ctrl, u16 cmd, u16 mask)
 			 * proceed forward to issue the next command according
 			 * to spec. Just print out the error message.
 			 */
-			dbg("%s: CMD_COMPLETED not clear after 1 sec.\n",
-			    __func__);
+			ctrl_dbg(ctrl,
+				 "%s: CMD_COMPLETED not clear after 1 sec.\n",
+				 __func__);
 		} else if (!NO_CMD_CMPL(ctrl)) {
 			/*
 			 * This controller semms to notify of command completed
 			 * event even though it supports none of power
 			 * controller, attention led, power led and EMI.
 			 */
-			dbg("%s: Unexpected CMD_COMPLETED. Need to wait for "
-			    "command completed event.\n", __func__);
+			ctrl_dbg(ctrl, "%s: Unexpected CMD_COMPLETED. Need to "
+				 "wait for command completed event.\n",
+				 __func__);
 			ctrl->no_cmd_complete = 0;
 		} else {
-			dbg("%s: Unexpected CMD_COMPLETED. Maybe the "
-			    "controller is broken.\n", __func__);
+			ctrl_dbg(ctrl, "%s: Unexpected CMD_COMPLETED. Maybe "
+				 "the controller is broken.\n", __func__);
 		}
 	}
 
 	retval = pciehp_readw(ctrl, SLOTCTRL, &slot_ctrl);
 	if (retval) {
-		err("%s: Cannot read SLOTCTRL register\n", __func__);
+		ctrl_err(ctrl, "%s: Cannot read SLOTCTRL register\n", __func__);
 		goto out;
 	}
 
@@ -341,7 +345,8 @@ static int pcie_write_cmd(struct controller *ctrl, u16 cmd, u16 mask)
 	smp_mb();
 	retval = pciehp_writew(ctrl, SLOTCTRL, slot_ctrl);
 	if (retval)
-		err("%s: Cannot write to SLOTCTRL register\n", __func__);
+		ctrl_err(ctrl, "%s: Cannot write to SLOTCTRL register\n",
+			 __func__);
 
 	/*
 	 * Wait for command completion.
@@ -370,14 +375,15 @@ static int hpc_check_lnk_status(struct controller *ctrl)
 
 	retval = pciehp_readw(ctrl, LNKSTATUS, &lnk_status);
 	if (retval) {
-		err("%s: Cannot read LNKSTATUS register\n", __func__);
+		ctrl_err(ctrl, "%s: Cannot read LNKSTATUS register\n",
+			 __func__);
 		return retval;
 	}
 
-	dbg("%s: lnk_status = %x\n", __func__, lnk_status);
+	ctrl_dbg(ctrl, "%s: lnk_status = %x\n", __func__, lnk_status);
 	if ( (lnk_status & LNK_TRN) || (lnk_status & LNK_TRN_ERR) ||
 		!(lnk_status & NEG_LINK_WD)) {
-		err("%s : Link Training Error occurs \n", __func__);
+		ctrl_err(ctrl, "%s : Link Training Error occurs \n", __func__);
 		retval = -1;
 		return retval;
 	}
@@ -394,12 +400,12 @@ static int hpc_get_attention_status(struct slot *slot, u8 *status)
 
 	retval = pciehp_readw(ctrl, SLOTCTRL, &slot_ctrl);
 	if (retval) {
-		err("%s: Cannot read SLOTCTRL register\n", __func__);
+		ctrl_err(ctrl, "%s: Cannot read SLOTCTRL register\n", __func__);
 		return retval;
 	}
 
-	dbg("%s: SLOTCTRL %x, value read %x\n",
-	    __func__, ctrl->cap_base + SLOTCTRL, slot_ctrl);
+	ctrl_dbg(ctrl, "%s: SLOTCTRL %x, value read %x\n",
+		 __func__, ctrl->cap_base + SLOTCTRL, slot_ctrl);
 
 	atten_led_state = (slot_ctrl & ATTN_LED_CTRL) >> 6;
 
@@ -433,11 +439,11 @@ static int hpc_get_power_status(struct slot *slot, u8 *status)
 
 	retval = pciehp_readw(ctrl, SLOTCTRL, &slot_ctrl);
 	if (retval) {
-		err("%s: Cannot read SLOTCTRL register\n", __func__);
+		ctrl_err(ctrl, "%s: Cannot read SLOTCTRL register\n", __func__);
 		return retval;
 	}
-	dbg("%s: SLOTCTRL %x value read %x\n",
-	    __func__, ctrl->cap_base + SLOTCTRL, slot_ctrl);
+	ctrl_dbg(ctrl, "%s: SLOTCTRL %x value read %x\n",
+		 __func__, ctrl->cap_base + SLOTCTRL, slot_ctrl);
 
 	pwr_state = (slot_ctrl & PWR_CTRL) >> 10;
 
@@ -464,7 +470,8 @@ static int hpc_get_latch_status(struct slot *slot, u8 *status)
 
 	retval = pciehp_readw(ctrl, SLOTSTATUS, &slot_status);
 	if (retval) {
-		err("%s: Cannot read SLOTSTATUS register\n", __func__);
+		ctrl_err(ctrl, "%s: Cannot read SLOTSTATUS register\n",
+			 __func__);
 		return retval;
 	}
 
@@ -482,7 +489,8 @@ static int hpc_get_adapter_status(struct slot *slot, u8 *status)
 
 	retval = pciehp_readw(ctrl, SLOTSTATUS, &slot_status);
 	if (retval) {
-		err("%s: Cannot read SLOTSTATUS register\n", __func__);
+		ctrl_err(ctrl, "%s: Cannot read SLOTSTATUS register\n",
+			 __func__);
 		return retval;
 	}
 	card_state = (u8)((slot_status & PRSN_STATE) >> 6);
@@ -500,7 +508,7 @@ static int hpc_query_power_fault(struct slot *slot)
 
 	retval = pciehp_readw(ctrl, SLOTSTATUS, &slot_status);
 	if (retval) {
-		err("%s: Cannot check for power fault\n", __func__);
+		ctrl_err(ctrl, "%s: Cannot check for power fault\n", __func__);
 		return retval;
 	}
 	pwr_fault = (u8)((slot_status & PWR_FAULT_DETECTED) >> 1);
@@ -516,7 +524,7 @@ static int hpc_get_emi_status(struct slot *slot, u8 *status)
 
 	retval = pciehp_readw(ctrl, SLOTSTATUS, &slot_status);
 	if (retval) {
-		err("%s : Cannot check EMI status\n", __func__);
+		ctrl_err(ctrl, "%s : Cannot check EMI status\n", __func__);
 		return retval;
 	}
 	*status = (slot_status & EMI_STATE) >> EMI_STATUS_BIT;
@@ -560,8 +568,8 @@ static int hpc_set_attention_status(struct slot *slot, u8 value)
 			return -1;
 	}
 	rc = pcie_write_cmd(ctrl, slot_cmd, cmd_mask);
-	dbg("%s: SLOTCTRL %x write cmd %x\n",
-	    __func__, ctrl->cap_base + SLOTCTRL, slot_cmd);
+	ctrl_dbg(ctrl, "%s: SLOTCTRL %x write cmd %x\n",
+		 __func__, ctrl->cap_base + SLOTCTRL, slot_cmd);
 
 	return rc;
 }
@@ -575,8 +583,8 @@ static void hpc_set_green_led_on(struct slot *slot)
 	slot_cmd = 0x0100;
 	cmd_mask = PWR_LED_CTRL;
 	pcie_write_cmd(ctrl, slot_cmd, cmd_mask);
-	dbg("%s: SLOTCTRL %x write cmd %x\n",
-	    __func__, ctrl->cap_base + SLOTCTRL, slot_cmd);
+	ctrl_dbg(ctrl, "%s: SLOTCTRL %x write cmd %x\n",
+		 __func__, ctrl->cap_base + SLOTCTRL, slot_cmd);
 }
 
 static void hpc_set_green_led_off(struct slot *slot)
@@ -588,8 +596,8 @@ static void hpc_set_green_led_off(struct slot *slot)
 	slot_cmd = 0x0300;
 	cmd_mask = PWR_LED_CTRL;
 	pcie_write_cmd(ctrl, slot_cmd, cmd_mask);
-	dbg("%s: SLOTCTRL %x write cmd %x\n",
-	    __func__, ctrl->cap_base + SLOTCTRL, slot_cmd);
+	ctrl_dbg(ctrl, "%s: SLOTCTRL %x write cmd %x\n",
+		 __func__, ctrl->cap_base + SLOTCTRL, slot_cmd);
 }
 
 static void hpc_set_green_led_blink(struct slot *slot)
@@ -601,8 +609,8 @@ static void hpc_set_green_led_blink(struct slot *slot)
 	slot_cmd = 0x0200;
 	cmd_mask = PWR_LED_CTRL;
 	pcie_write_cmd(ctrl, slot_cmd, cmd_mask);
-	dbg("%s: SLOTCTRL %x write cmd %x\n",
-	    __func__, ctrl->cap_base + SLOTCTRL, slot_cmd);
+	ctrl_dbg(ctrl, "%s: SLOTCTRL %x write cmd %x\n",
+		 __func__, ctrl->cap_base + SLOTCTRL, slot_cmd);
 }
 
 static int hpc_power_on_slot(struct slot * slot)
@@ -613,20 +621,22 @@ static int hpc_power_on_slot(struct slot * slot)
 	u16 slot_status;
 	int retval = 0;
 
-	dbg("%s: slot->hp_slot %x\n", __func__, slot->hp_slot);
+	ctrl_dbg(ctrl, "%s: slot->hp_slot %x\n", __func__, slot->hp_slot);
 
 	/* Clear sticky power-fault bit from previous power failures */
 	retval = pciehp_readw(ctrl, SLOTSTATUS, &slot_status);
 	if (retval) {
-		err("%s: Cannot read SLOTSTATUS register\n", __func__);
+		ctrl_err(ctrl, "%s: Cannot read SLOTSTATUS register\n",
+			 __func__);
 		return retval;
 	}
 	slot_status &= PWR_FAULT_DETECTED;
 	if (slot_status) {
 		retval = pciehp_writew(ctrl, SLOTSTATUS, slot_status);
 		if (retval) {
-			err("%s: Cannot write to SLOTSTATUS register\n",
-			    __func__);
+			ctrl_err(ctrl,
+				 "%s: Cannot write to SLOTSTATUS register\n",
+				 __func__);
 			return retval;
 		}
 	}
@@ -644,11 +654,12 @@ static int hpc_power_on_slot(struct slot * slot)
 	retval = pcie_write_cmd(ctrl, slot_cmd, cmd_mask);
 
 	if (retval) {
-		err("%s: Write %x command failed!\n", __func__, slot_cmd);
+		ctrl_err(ctrl, "%s: Write %x command failed!\n",
+			 __func__, slot_cmd);
 		return -1;
 	}
-	dbg("%s: SLOTCTRL %x write cmd %x\n",
-	    __func__, ctrl->cap_base + SLOTCTRL, slot_cmd);
+	ctrl_dbg(ctrl, "%s: SLOTCTRL %x write cmd %x\n",
+		 __func__, ctrl->cap_base + SLOTCTRL, slot_cmd);
 
 	return retval;
 }
@@ -694,7 +705,7 @@ static int hpc_power_off_slot(struct slot * slot)
 	int retval = 0;
 	int changed;
 
-	dbg("%s: slot->hp_slot %x\n", __func__, slot->hp_slot);
+	ctrl_dbg(ctrl, "%s: slot->hp_slot %x\n", __func__, slot->hp_slot);
 
 	/*
 	 * Set Bad DLLP Mask bit in Correctable Error Mask
@@ -722,12 +733,12 @@ static int hpc_power_off_slot(struct slot * slot)
 
 	retval = pcie_write_cmd(ctrl, slot_cmd, cmd_mask);
 	if (retval) {
-		err("%s: Write command failed!\n", __func__);
+		ctrl_err(ctrl, "%s: Write command failed!\n", __func__);
 		retval = -1;
 		goto out;
 	}
-	dbg("%s: SLOTCTRL %x write cmd %x\n",
-	    __func__, ctrl->cap_base + SLOTCTRL, slot_cmd);
+	ctrl_dbg(ctrl, "%s: SLOTCTRL %x write cmd %x\n",
+		 __func__, ctrl->cap_base + SLOTCTRL, slot_cmd);
  out:
 	if (changed)
 		pcie_unmask_bad_dllp(ctrl);
@@ -749,7 +760,8 @@ static irqreturn_t pcie_isr(int irq, void *dev_id)
 	intr_loc = 0;
 	do {
 		if (pciehp_readw(ctrl, SLOTSTATUS, &detected)) {
-			err("%s: Cannot read SLOTSTATUS\n", __func__);
+			ctrl_err(ctrl, "%s: Cannot read SLOTSTATUS\n",
+				 __func__);
 			return IRQ_NONE;
 		}
 
@@ -760,12 +772,13 @@ static irqreturn_t pcie_isr(int irq, void *dev_id)
 		if (!intr_loc)
 			return IRQ_NONE;
 		if (detected && pciehp_writew(ctrl, SLOTSTATUS, detected)) {
-			err("%s: Cannot write to SLOTSTATUS\n", __func__);
+			ctrl_err(ctrl, "%s: Cannot write to SLOTSTATUS\n",
+				 __func__);
 			return IRQ_NONE;
 		}
 	} while (detected);
 
-	dbg("%s: intr_loc %x\n", __FUNCTION__, intr_loc);
+	ctrl_dbg(ctrl, "%s: intr_loc %x\n", __func__, intr_loc);
 
 	/* Check Command Complete Interrupt Pending */
 	if (intr_loc & CMD_COMPLETED) {
@@ -807,7 +820,7 @@ static int hpc_get_max_lnk_speed(struct slot *slot, enum pci_bus_speed *value)
 
 	retval = pciehp_readl(ctrl, LNKCAP, &lnk_cap);
 	if (retval) {
-		err("%s: Cannot read LNKCAP register\n", __func__);
+		ctrl_err(ctrl, "%s: Cannot read LNKCAP register\n", __func__);
 		return retval;
 	}
 
@@ -821,7 +834,7 @@ static int hpc_get_max_lnk_speed(struct slot *slot, enum pci_bus_speed *value)
 	}
 
 	*value = lnk_speed;
-	dbg("Max link speed = %d\n", lnk_speed);
+	ctrl_dbg(ctrl, "Max link speed = %d\n", lnk_speed);
 
 	return retval;
 }
@@ -836,7 +849,7 @@ static int hpc_get_max_lnk_width(struct slot *slot,
 
 	retval = pciehp_readl(ctrl, LNKCAP, &lnk_cap);
 	if (retval) {
-		err("%s: Cannot read LNKCAP register\n", __func__);
+		ctrl_err(ctrl, "%s: Cannot read LNKCAP register\n", __func__);
 		return retval;
 	}
 
@@ -871,7 +884,7 @@ static int hpc_get_max_lnk_width(struct slot *slot,
 	}
 
 	*value = lnk_wdth;
-	dbg("Max link width = %d\n", lnk_wdth);
+	ctrl_dbg(ctrl, "Max link width = %d\n", lnk_wdth);
 
 	return retval;
 }
@@ -885,7 +898,8 @@ static int hpc_get_cur_lnk_speed(struct slot *slot, enum pci_bus_speed *value)
 
 	retval = pciehp_readw(ctrl, LNKSTATUS, &lnk_status);
 	if (retval) {
-		err("%s: Cannot read LNKSTATUS register\n", __func__);
+		ctrl_err(ctrl, "%s: Cannot read LNKSTATUS register\n",
+			 __func__);
 		return retval;
 	}
 
@@ -899,7 +913,7 @@ static int hpc_get_cur_lnk_speed(struct slot *slot, enum pci_bus_speed *value)
 	}
 
 	*value = lnk_speed;
-	dbg("Current link speed = %d\n", lnk_speed);
+	ctrl_dbg(ctrl, "Current link speed = %d\n", lnk_speed);
 
 	return retval;
 }
@@ -914,7 +928,8 @@ static int hpc_get_cur_lnk_width(struct slot *slot,
 
 	retval = pciehp_readw(ctrl, LNKSTATUS, &lnk_status);
 	if (retval) {
-		err("%s: Cannot read LNKSTATUS register\n", __func__);
+		ctrl_err(ctrl, "%s: Cannot read LNKSTATUS register\n",
+			 __func__);
 		return retval;
 	}
 
@@ -949,7 +964,7 @@ static int hpc_get_cur_lnk_width(struct slot *slot,
 	}
 
 	*value = lnk_wdth;
-	dbg("Current link width = %d\n", lnk_wdth);
+	ctrl_dbg(ctrl, "Current link width = %d\n", lnk_wdth);
 
 	return retval;
 }
@@ -998,7 +1013,8 @@ int pcie_enable_notification(struct controller *ctrl)
 	       PWR_FAULT_DETECT_ENABLE | HP_INTR_ENABLE | CMD_CMPL_INTR_ENABLE;
 
 	if (pcie_write_cmd(ctrl, cmd, mask)) {
-		err("%s: Cannot enable software notification\n", __func__);
+		ctrl_err(ctrl, "%s: Cannot enable software notification\n",
+			 __func__);
 		return -1;
 	}
 	return 0;
@@ -1010,7 +1026,8 @@ static void pcie_disable_notification(struct controller *ctrl)
 	mask = PRSN_DETECT_ENABLE | ATTN_BUTTN_ENABLE | MRL_DETECT_ENABLE |
 	       PWR_FAULT_DETECT_ENABLE | HP_INTR_ENABLE | CMD_CMPL_INTR_ENABLE;
 	if (pcie_write_cmd(ctrl, 0, mask))
-		warn("%s: Cannot disable software notification\n", __func__);
+		ctrl_warn(ctrl, "%s: Cannot disable software notification\n",
+			  __func__);
 }
 
 static int pcie_init_notification(struct controller *ctrl)
@@ -1071,34 +1088,45 @@ static inline void dbg_ctrl(struct controller *ctrl)
 	if (!pciehp_debug)
 		return;
 
-	dbg("Hotplug Controller:\n");
-	dbg("  Seg/Bus/Dev/Func/IRQ : %s IRQ %d\n", pci_name(pdev), pdev->irq);
-	dbg("  Vendor ID            : 0x%04x\n", pdev->vendor);
-	dbg("  Device ID            : 0x%04x\n", pdev->device);
-	dbg("  Subsystem ID         : 0x%04x\n", pdev->subsystem_device);
-	dbg("  Subsystem Vendor ID  : 0x%04x\n", pdev->subsystem_vendor);
-	dbg("  PCIe Cap offset      : 0x%02x\n", ctrl->cap_base);
+	ctrl_info(ctrl, "Hotplug Controller:\n");
+	ctrl_info(ctrl, "  Seg/Bus/Dev/Func/IRQ : %s IRQ %d\n",
+		  pci_name(pdev), pdev->irq);
+	ctrl_info(ctrl, "  Vendor ID            : 0x%04x\n", pdev->vendor);
+	ctrl_info(ctrl, "  Device ID            : 0x%04x\n", pdev->device);
+	ctrl_info(ctrl, "  Subsystem ID         : 0x%04x\n",
+		  pdev->subsystem_device);
+	ctrl_info(ctrl, "  Subsystem Vendor ID  : 0x%04x\n",
+		  pdev->subsystem_vendor);
+	ctrl_info(ctrl, "  PCIe Cap offset      : 0x%02x\n", ctrl->cap_base);
 	for (i = 0; i < DEVICE_COUNT_RESOURCE; i++) {
 		if (!pci_resource_len(pdev, i))
 			continue;
-		dbg("  PCI resource [%d]     : 0x%llx@0x%llx\n", i,
-		    (unsigned long long)pci_resource_len(pdev, i),
-		    (unsigned long long)pci_resource_start(pdev, i));
+		ctrl_info(ctrl, "  PCI resource [%d]     : 0x%llx@0x%llx\n",
+			  i, (unsigned long long)pci_resource_len(pdev, i),
+			  (unsigned long long)pci_resource_start(pdev, i));
 	}
-	dbg("Slot Capabilities      : 0x%08x\n", ctrl->slot_cap);
-	dbg("  Physical Slot Number : %d\n", ctrl->first_slot);
-	dbg("  Attention Button     : %3s\n", ATTN_BUTTN(ctrl) ? "yes" : "no");
-	dbg("  Power Controller     : %3s\n", POWER_CTRL(ctrl) ? "yes" : "no");
-	dbg("  MRL Sensor           : %3s\n", MRL_SENS(ctrl)   ? "yes" : "no");
-	dbg("  Attention Indicator  : %3s\n", ATTN_LED(ctrl)   ? "yes" : "no");
-	dbg("  Power Indicator      : %3s\n", PWR_LED(ctrl)    ? "yes" : "no");
-	dbg("  Hot-Plug Surprise    : %3s\n", HP_SUPR_RM(ctrl) ? "yes" : "no");
-	dbg("  EMI Present          : %3s\n", EMI(ctrl)        ? "yes" : "no");
-	dbg("  Command Completed    : %3s\n", NO_CMD_CMPL(ctrl)? "no" : "yes");
+	ctrl_info(ctrl, "Slot Capabilities      : 0x%08x\n", ctrl->slot_cap);
+	ctrl_info(ctrl, "  Physical Slot Number : %d\n", ctrl->first_slot);
+	ctrl_info(ctrl, "  Attention Button     : %3s\n",
+		  ATTN_BUTTN(ctrl) ? "yes" : "no");
+	ctrl_info(ctrl, "  Power Controller     : %3s\n",
+		  POWER_CTRL(ctrl) ? "yes" : "no");
+	ctrl_info(ctrl, "  MRL Sensor           : %3s\n",
+		  MRL_SENS(ctrl)   ? "yes" : "no");
+	ctrl_info(ctrl, "  Attention Indicator  : %3s\n",
+		  ATTN_LED(ctrl)   ? "yes" : "no");
+	ctrl_info(ctrl, "  Power Indicator      : %3s\n",
+		  PWR_LED(ctrl)    ? "yes" : "no");
+	ctrl_info(ctrl, "  Hot-Plug Surprise    : %3s\n",
+		  HP_SUPR_RM(ctrl) ? "yes" : "no");
+	ctrl_info(ctrl, "  EMI Present          : %3s\n",
+		  EMI(ctrl)        ? "yes" : "no");
+	ctrl_info(ctrl, "  Command Completed    : %3s\n",
+		  NO_CMD_CMPL(ctrl) ? "no" : "yes");
 	pciehp_readw(ctrl, SLOTSTATUS, &reg16);
-	dbg("Slot Status            : 0x%04x\n", reg16);
+	ctrl_info(ctrl, "Slot Status            : 0x%04x\n", reg16);
 	pciehp_readw(ctrl, SLOTCTRL, &reg16);
-	dbg("Slot Control           : 0x%04x\n", reg16);
+	ctrl_info(ctrl, "Slot Control           : 0x%04x\n", reg16);
 }
 
 struct controller *pcie_init(struct pcie_device *dev)
@@ -1109,7 +1137,7 @@ struct controller *pcie_init(struct pcie_device *dev)
 
 	ctrl = kzalloc(sizeof(*ctrl), GFP_KERNEL);
 	if (!ctrl) {
-		err("%s : out of memory\n", __func__);
+		dev_err(&dev->device, "%s : out of memory\n", __func__);
 		goto abort;
 	}
 	INIT_LIST_HEAD(&ctrl->slot_list);
@@ -1118,11 +1146,12 @@ struct controller *pcie_init(struct pcie_device *dev)
 	ctrl->pci_dev = pdev;
 	ctrl->cap_base = pci_find_capability(pdev, PCI_CAP_ID_EXP);
 	if (!ctrl->cap_base) {
-		err("%s: Cannot find PCI Express capability\n", __func__);
+		ctrl_err(ctrl, "%s: Cannot find PCI Express capability\n",
+			 __func__);
 		goto abort;
 	}
 	if (pciehp_readl(ctrl, SLOTCAP, &slot_cap)) {
-		err("%s: Cannot read SLOTCAP register\n", __func__);
+		ctrl_err(ctrl, "%s: Cannot read SLOTCAP register\n", __func__);
 		goto abort;
 	}
 
@@ -1162,9 +1191,9 @@ struct controller *pcie_init(struct pcie_device *dev)
 			goto abort_ctrl;
 	}
 
-	info("HPC vendor_id %x device_id %x ss_vid %x ss_did %x\n",
-	     pdev->vendor, pdev->device,
-	     pdev->subsystem_vendor, pdev->subsystem_device);
+	ctrl_info(ctrl, "HPC vendor_id %x device_id %x ss_vid %x ss_did %x\n",
+		  pdev->vendor, pdev->device, pdev->subsystem_vendor,
+		  pdev->subsystem_device);
 
 	if (pcie_init_slot(ctrl))
 		goto abort_ctrl;
diff --git a/drivers/pci/hotplug/pciehp_pci.c b/drivers/pci/hotplug/pciehp_pci.c
index 6040dcceb256..ffd11148fbe2 100644
--- a/drivers/pci/hotplug/pciehp_pci.c
+++ b/drivers/pci/hotplug/pciehp_pci.c
@@ -198,18 +198,20 @@ int pciehp_configure_device(struct slot *p_slot)
 	struct pci_dev *dev;
 	struct pci_bus *parent = p_slot->ctrl->pci_dev->subordinate;
 	int num, fn;
+	struct controller *ctrl = p_slot->ctrl;
 
 	dev = pci_get_slot(parent, PCI_DEVFN(p_slot->device, 0));
 	if (dev) {
-		err("Device %s already exists at %x:%x, cannot hot-add\n",
-				pci_name(dev), p_slot->bus, p_slot->device);
+		ctrl_err(ctrl,
+			 "Device %s already exists at %x:%x, cannot hot-add\n",
+			 pci_name(dev), p_slot->bus, p_slot->device);
 		pci_dev_put(dev);
 		return -EINVAL;
 	}
 
 	num = pci_scan_slot(parent, PCI_DEVFN(p_slot->device, 0));
 	if (num == 0) {
-		err("No new device found\n");
+		ctrl_err(ctrl, "No new device found\n");
 		return -ENODEV;
 	}
 
@@ -218,8 +220,8 @@ int pciehp_configure_device(struct slot *p_slot)
 		if (!dev)
 			continue;
 		if ((dev->class >> 16) == PCI_BASE_CLASS_DISPLAY) {
-			err("Cannot hot-add display device %s\n",
-					pci_name(dev));
+			ctrl_err(ctrl, "Cannot hot-add display device %s\n",
+				 pci_name(dev));
 			pci_dev_put(dev);
 			continue;
 		}
@@ -244,9 +246,10 @@ int pciehp_unconfigure_device(struct slot *p_slot)
 	u8 presence = 0;
 	struct pci_bus *parent = p_slot->ctrl->pci_dev->subordinate;
 	u16 command;
+	struct controller *ctrl = p_slot->ctrl;
 
-	dbg("%s: bus/dev = %x/%x\n", __func__, p_slot->bus,
-				p_slot->device);
+	ctrl_dbg(ctrl, "%s: bus/dev = %x/%x\n", __func__,
+		 p_slot->bus, p_slot->device);
 	ret = p_slot->hpc_ops->get_adapter_status(p_slot, &presence);
 	if (ret)
 		presence = 0;
@@ -257,16 +260,17 @@ int pciehp_unconfigure_device(struct slot *p_slot)
 		if (!temp)
 			continue;
 		if ((temp->class >> 16) == PCI_BASE_CLASS_DISPLAY) {
-			err("Cannot remove display device %s\n",
-					pci_name(temp));
+			ctrl_err(ctrl, "Cannot remove display device %s\n",
+				 pci_name(temp));
 			pci_dev_put(temp);
 			continue;
 		}
 		if (temp->hdr_type == PCI_HEADER_TYPE_BRIDGE && presence) {
 			pci_read_config_byte(temp, PCI_BRIDGE_CONTROL, &bctl);
 			if (bctl & PCI_BRIDGE_CTL_VGA) {
-				err("Cannot remove display device %s\n",
-				    pci_name(temp));
+				ctrl_err(ctrl,
+					 "Cannot remove display device %s\n",
+					 pci_name(temp));
 				pci_dev_put(temp);
 				continue;
 			}
-- 
cgit v1.2.3-55-g7522


From 5993760f7fc75b77e4701f1e56dc84c0d6cf18d5 Mon Sep 17 00:00:00 2001
From: Jike Song
Date: Tue, 9 Sep 2008 23:42:03 +0800
Subject: PCI: utilize calculated results when detecting MSI features

In msi_capability_init, we can make use of the calculated results
instead of calling is_mask_bit_support and is_64bit_address twice.

Signed-off-by: Jike Song <albcamus@gmail.com>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
 drivers/pci/msi.c | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/drivers/pci/msi.c b/drivers/pci/msi.c
index 4a10b5624f72..d2812013fd22 100644
--- a/drivers/pci/msi.c
+++ b/drivers/pci/msi.c
@@ -378,23 +378,21 @@ static int msi_capability_init(struct pci_dev *dev)
 	entry->msi_attrib.masked = 1;
 	entry->msi_attrib.default_irq = dev->irq;	/* Save IOAPIC IRQ */
 	entry->msi_attrib.pos = pos;
-	if (is_mask_bit_support(control)) {
+	if (entry->msi_attrib.maskbit) {
 		entry->mask_base = (void __iomem *)(long)msi_mask_bits_reg(pos,
-				is_64bit_address(control));
+				entry->msi_attrib.is_64);
 	}
 	entry->dev = dev;
 	if (entry->msi_attrib.maskbit) {
 		unsigned int maskbits, temp;
 		/* All MSIs are unmasked by default, Mask them all */
 		pci_read_config_dword(dev,
-			msi_mask_bits_reg(pos, is_64bit_address(control)),
+			msi_mask_bits_reg(pos, entry->msi_attrib.is_64),
 			&maskbits);
 		temp = (1 << multi_msi_capable(control));
 		temp = ((temp - 1) & ~temp);
 		maskbits |= temp;
-		pci_write_config_dword(dev,
-			msi_mask_bits_reg(pos, is_64bit_address(control)),
-			maskbits);
+		pci_write_config_dword(dev, entry->msi_attrib.is_64, maskbits);
 		entry->msi_attrib.maskbits_mask = temp;
 	}
 	list_add_tail(&entry->list, &dev->msi_list);
-- 
cgit v1.2.3-55-g7522


From 93ff68a55aa92180a765d6c51c3303f6200167a6 Mon Sep 17 00:00:00 2001
From: Mike Travis
Date: Sat, 6 Sep 2008 05:46:42 -0700
Subject: PCI: make CPU list affinity visible

Stephen Hemminger wrote:
> Looks like Mike created cpulistaffinty in sysfs but never completed
> the job.

This patch hooks things up correctly, taking care to remove the new file
when the bus is destroyed.

Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>
Signed-off-by: Mike Travis <travis@sgi.com>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
 drivers/pci/bus.c    | 7 +++++++
 drivers/pci/pci.h    | 1 +
 drivers/pci/remove.c | 1 +
 3 files changed, 9 insertions(+)

diff --git a/drivers/pci/bus.c b/drivers/pci/bus.c
index 529d9d7727b0..999cc4088b59 100644
--- a/drivers/pci/bus.c
+++ b/drivers/pci/bus.c
@@ -151,6 +151,13 @@ void pci_bus_add_devices(struct pci_bus *bus)
 			if (retval)
 				dev_err(&dev->dev, "Error creating cpuaffinity"
 					" file, continuing...\n");
+
+			retval = device_create_file(&child_bus->dev,
+						&dev_attr_cpulistaffinity);
+			if (retval)
+				dev_err(&dev->dev,
+					"Error creating cpulistaffinity"
+					" file, continuing...\n");
 		}
 	}
 }
diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h
index d807cd786f20..4723b12fb39a 100644
--- a/drivers/pci/pci.h
+++ b/drivers/pci/pci.h
@@ -109,6 +109,7 @@ static inline int pci_no_d1d2(struct pci_dev *dev)
 extern int pcie_mch_quirk;
 extern struct device_attribute pci_dev_attrs[];
 extern struct device_attribute dev_attr_cpuaffinity;
+extern struct device_attribute dev_attr_cpulistaffinity;
 
 /**
  * pci_match_one_device - Tell if a PCI device structure has a matching
diff --git a/drivers/pci/remove.c b/drivers/pci/remove.c
index bdc2a44d68e1..f94f6d5ae297 100644
--- a/drivers/pci/remove.c
+++ b/drivers/pci/remove.c
@@ -73,6 +73,7 @@ void pci_remove_bus(struct pci_bus *pci_bus)
 	up_write(&pci_bus_sem);
 	pci_remove_legacy_files(pci_bus);
 	device_remove_file(&pci_bus->dev, &dev_attr_cpuaffinity);
+	device_remove_file(&pci_bus->dev, &dev_attr_cpulistaffinity);
 	device_unregister(&pci_bus->dev);
 }
 EXPORT_SYMBOL(pci_remove_bus);
-- 
cgit v1.2.3-55-g7522


From cef354db0d7a7207ea78c716753d9216a9c2b7e1 Mon Sep 17 00:00:00 2001
From: Alex Chiang
Date: Tue, 2 Sep 2008 09:40:51 -0600
Subject: PCI: connect struct pci_dev to struct pci_slot

The introduction of struct pci_slot (f46753c5e354b857b20ab8e0fe7b25)
added a struct pci_slot pointer to struct pci_dev, but we forgot to
associate the two.

Connect the two structs together; the interesting portions of the object
lifetimes are:

	- when a new pci_slot is created, connect it to the appropriate
	  pci_dev's. A single pci_slot may be associated with multiple
	  pci_dev's, e.g. any multi-function PCI device.

	- when a pci_slot is released, look for all the pci_dev's it was
	  associated with, and set their pci_slot pointers to NULL

	- when a pci_dev is created, look for slots to associate with.

Note -- when a pci_dev is released, we don't need to do any bookkeeping,
since pci_slot's do not have pointers to pci_dev's.

Signed-off-by: Alex Chiang <achiang@hp.com>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
 drivers/pci/probe.c |  5 +++++
 drivers/pci/slot.c  | 10 ++++++++++
 2 files changed, 15 insertions(+)

diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c
index 578d15f49e02..7aa71636dd3c 100644
--- a/drivers/pci/probe.c
+++ b/drivers/pci/probe.c
@@ -949,6 +949,7 @@ EXPORT_SYMBOL(alloc_pci_dev);
 static struct pci_dev *pci_scan_device(struct pci_bus *bus, int devfn)
 {
 	struct pci_dev *dev;
+	struct pci_slot *slot;
 	u32 l;
 	u8 hdr_type;
 	int delay = 1;
@@ -997,6 +998,10 @@ static struct pci_dev *pci_scan_device(struct pci_bus *bus, int devfn)
 	dev->error_state = pci_channel_io_normal;
 	set_pcie_port_type(dev);
 
+	list_for_each_entry(slot, &bus->slots, list)
+		if (PCI_SLOT(devfn) == slot->number)
+			dev->slot = slot;
+
 	/* Assume 32-bit PCI; let 64-bit PCI cards (which are far rarer)
 	   set this higher, assuming the system even supports it.  */
 	dev->dma_mask = 0xffffffff;
diff --git a/drivers/pci/slot.c b/drivers/pci/slot.c
index 7e5b85cbd948..0c6db03698ea 100644
--- a/drivers/pci/slot.c
+++ b/drivers/pci/slot.c
@@ -49,11 +49,16 @@ static ssize_t address_read_file(struct pci_slot *slot, char *buf)
 
 static void pci_slot_release(struct kobject *kobj)
 {
+	struct pci_dev *dev;
 	struct pci_slot *slot = to_pci_slot(kobj);
 
 	pr_debug("%s: releasing pci_slot on %x:%d\n", __func__,
 		 slot->bus->number, slot->number);
 
+	list_for_each_entry(dev, &slot->bus->devices, bus_list)
+		if (PCI_SLOT(dev->devfn) == slot->number)
+			dev->slot = NULL;
+
 	list_del(&slot->list);
 
 	kfree(slot);
@@ -108,6 +113,7 @@ static struct kobj_type pci_slot_ktype = {
 struct pci_slot *pci_create_slot(struct pci_bus *parent, int slot_nr,
 				 const char *name)
 {
+	struct pci_dev *dev;
 	struct pci_slot *slot;
 	int err;
 
@@ -150,6 +156,10 @@ placeholder:
 	INIT_LIST_HEAD(&slot->list);
 	list_add(&slot->list, &parent->slots);
 
+	list_for_each_entry(dev, &parent->devices, bus_list)
+		if (PCI_SLOT(dev->devfn) == slot_nr)
+			dev->slot = slot;
+
 	/* Don't care if debug printk has a -1 for slot_nr */
 	pr_debug("%s: created pci_slot on %04x:%02x:%02x\n",
 		 __func__, pci_domain_nr(parent), parent->number, slot_nr);
-- 
cgit v1.2.3-55-g7522


From ec84f1268fcf16c4a852fdb38b3a541748644918 Mon Sep 17 00:00:00 2001
From: Jesse Barnes
Date: Tue, 23 Sep 2008 11:43:34 -0700
Subject: PCI: fix -Wakpm warnings in pci_pm_init debug output

Checkpatch would have complained about this but neither Bjorn nor myself
ran it prior to pushing.  Fixup the issues Andrew pointed out.

Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
 drivers/pci/pci.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
index e1a17b85447b..09dc893c81db 100644
--- a/drivers/pci/pci.c
+++ b/drivers/pci/pci.c
@@ -1270,8 +1270,8 @@ void pci_pm_init(struct pci_dev *dev)
 
 		if (dev->d1_support || dev->d2_support)
 			dev_printk(KERN_DEBUG, &dev->dev, "supports%s%s\n",
-				   dev->d1_support ? " D1": "",
-				   dev->d2_support ? " D2": "");
+				   dev->d1_support ? " D1" : "",
+				   dev->d2_support ? " D2" : "");
 	}
 
 	pmc &= PCI_PM_CAP_PME_MASK;
-- 
cgit v1.2.3-55-g7522


From 50cbfa511a21cac1909b6b4c955fa39d1da81457 Mon Sep 17 00:00:00 2001
From: Roland Dreier
Date: Mon, 22 Sep 2008 14:55:24 -0700
Subject: PCI: fix MSI-HOWTO.txt info about MSI-X MMIO space

The current MSI-HOWTO.txt says that device drivers should not request the
memory space that contains MSI-X tables.  This is because the original
MSI-X implementation did a request_mem_region() on this space, but that
code was removed long ago (in the pre-git era, in fact).  Years after the
code was changed, we might as well clean up the documention to avoid a
confusing mention of requesting regions: drivers using MSI-X can just use
pci_request_regions() just like any other driver, and so there's no need
for MSI-HOWTO.txt to talk about this at all.

Signed-off-by: Roland Dreier <roland@digitalvampire.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
 Documentation/MSI-HOWTO.txt | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/Documentation/MSI-HOWTO.txt b/Documentation/MSI-HOWTO.txt
index a51f693c1541..256defd7e174 100644
--- a/Documentation/MSI-HOWTO.txt
+++ b/Documentation/MSI-HOWTO.txt
@@ -236,10 +236,8 @@ software system can set different pages for controlling accesses to the
 MSI-X structure. The implementation of MSI support requires the PCI
 subsystem, not a device driver, to maintain full control of the MSI-X
 table/MSI-X PBA (Pending Bit Array) and MMIO address space of the MSI-X
-table/MSI-X PBA.  A device driver is prohibited from requesting the MMIO
-address space of the MSI-X table/MSI-X PBA. Otherwise, the PCI subsystem
-will fail enabling MSI-X on its hardware device when it calls the function
-pci_enable_msix().
+table/MSI-X PBA.  A device driver should not access the MMIO address
+space of the MSI-X table/MSI-X PBA.
 
 5.3.2 API pci_enable_msix
 
-- 
cgit v1.2.3-55-g7522


From 11d587429e9cbb40ac20d7ed8126c66da0d7aba5 Mon Sep 17 00:00:00 2001
From: Stephen Hemminger
Date: Fri, 5 Sep 2008 21:45:36 -0700
Subject: PCI: fix sparse warning in pci_remove_behind_bridge

Get rid of the second definition of dev which hides the earlier one in
the argument list and causes a warning from sparse.

Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
 drivers/pci/remove.c | 10 +++-------
 1 file changed, 3 insertions(+), 7 deletions(-)

diff --git a/drivers/pci/remove.c b/drivers/pci/remove.c
index f94f6d5ae297..042e08924421 100644
--- a/drivers/pci/remove.c
+++ b/drivers/pci/remove.c
@@ -115,13 +115,9 @@ void pci_remove_behind_bridge(struct pci_dev *dev)
 {
 	struct list_head *l, *n;
 
-	if (dev->subordinate) {
-		list_for_each_safe(l, n, &dev->subordinate->devices) {
-			struct pci_dev *dev = pci_dev_b(l);
-
-			pci_remove_bus_device(dev);
-		}
-	}
+	if (dev->subordinate)
+		list_for_each_safe(l, n, &dev->subordinate->devices)
+			pci_remove_bus_device(pci_dev_b(l));
 }
 
 static void pci_stop_bus_devices(struct pci_bus *bus)
-- 
cgit v1.2.3-55-g7522


From c8761fe80ed052634153438405c9048611ae7ae1 Mon Sep 17 00:00:00 2001
From: Zhao, Yu
Date: Mon, 22 Sep 2008 14:26:05 +0800
Subject: PCI: fix hotplug get_##name return value problem

Currently, get_##name in pci_hotplug_core.c will return 0 if module
unload wins the race between unload & reading the hotplug file.  Fix
that case to return -ENODEV like it should.

Reviewed-by: Alex Chiang <achiang@hp.com>
Reviewed-by: Matthew Wilcox <willy@linux.intel.com>
Signed-off-by: Yu Zhao <yu.zhao@intel.com>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
 drivers/pci/hotplug/pci_hotplug_core.c | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/drivers/pci/hotplug/pci_hotplug_core.c b/drivers/pci/hotplug/pci_hotplug_core.c
index 5f85b1b120e3..27d2b6fe5d53 100644
--- a/drivers/pci/hotplug/pci_hotplug_core.c
+++ b/drivers/pci/hotplug/pci_hotplug_core.c
@@ -102,13 +102,13 @@ static int get_##name (struct hotplug_slot *slot, type *value)		\
 {									\
 	struct hotplug_slot_ops *ops = slot->ops;			\
 	int retval = 0;							\
-	if (try_module_get(ops->owner)) {				\
-		if (ops->get_##name)					\
-			retval = ops->get_##name(slot, value);		\
-		else							\
-			*value = slot->info->name;			\
-		module_put(ops->owner);					\
-	}								\
+	if (try_module_get(ops->owner))					\
+		return -ENODEV;						\
+	if (ops->get_##name)						\
+		retval = ops->get_##name(slot, value);			\
+	else								\
+		*value = slot->info->name;				\
+	module_put(ops->owner);						\
 	return retval;							\
 }
 
-- 
cgit v1.2.3-55-g7522


From c9bbb4abb658daf6cc6f92fb4751a7796a5aab75 Mon Sep 17 00:00:00 2001
From: Yinghai Lu
Date: Wed, 24 Sep 2008 19:04:33 -0700
Subject: PCI: use %pF instead of print_fn_descriptor_symbol() in quirks.c

Use %pF instead of print_fn_descriptor_symbol() in quirks.c to get the name of
the hook we're calling.

Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
 drivers/pci/quirks.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c
index 246918fe9482..9575fc8f87b3 100644
--- a/drivers/pci/quirks.c
+++ b/drivers/pci/quirks.c
@@ -1935,8 +1935,7 @@ static void pci_do_fixups(struct pci_dev *dev, struct pci_fixup *f, struct pci_f
 		if ((f->vendor == dev->vendor || f->vendor == (u16) PCI_ANY_ID) &&
  		    (f->device == dev->device || f->device == (u16) PCI_ANY_ID)) {
 #ifdef DEBUG
-			dev_dbg(&dev->dev, "calling ");
-			print_fn_descriptor_symbol("%s\n", f->hook);
+			dev_dbg(&dev->dev, "calling %pF\n", f->hook);
 #endif
 			f->hook(dev);
 		}
-- 
cgit v1.2.3-55-g7522


From 5d9bc1fa47f0c1561f1d7c0bdff5e24860852b42 Mon Sep 17 00:00:00 2001
From: Kristen Carlson Accardi
Date: Mon, 13 Oct 2008 09:59:12 -0700
Subject: PCI hotplug: rpaphp: make debug var unique

Change debug variable name to one more unique to this driver.

Signed-off-by: Kristen Carlson Accardi <kristen.c.accardi@intel.com>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
 drivers/pci/hotplug/rpaphp.h      | 4 ++--
 drivers/pci/hotplug/rpaphp_core.c | 4 ++--
 drivers/pci/hotplug/rpaphp_pci.c  | 2 +-
 3 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/drivers/pci/hotplug/rpaphp.h b/drivers/pci/hotplug/rpaphp.h
index 7d5921b1ee78..419919a87b0f 100644
--- a/drivers/pci/hotplug/rpaphp.h
+++ b/drivers/pci/hotplug/rpaphp.h
@@ -46,10 +46,10 @@
 #define PRESENT         1	/* Card in slot */
 
 #define MY_NAME "rpaphp"
-extern int debug;
+extern int rpaphp_debug;
 #define dbg(format, arg...)					\
 	do {							\
-		if (debug)					\
+		if (rpaphp_debug)					\
 			printk(KERN_DEBUG "%s: " format,	\
 				MY_NAME , ## arg); 		\
 	} while (0)
diff --git a/drivers/pci/hotplug/rpaphp_core.c b/drivers/pci/hotplug/rpaphp_core.c
index 1f84f402acdb..95d02a08fdc7 100644
--- a/drivers/pci/hotplug/rpaphp_core.c
+++ b/drivers/pci/hotplug/rpaphp_core.c
@@ -37,7 +37,7 @@
 				/* and pci_do_scan_bus */
 #include "rpaphp.h"
 
-int debug;
+int rpaphp_debug;
 LIST_HEAD(rpaphp_slot_head);
 
 #define DRIVER_VERSION	"0.1"
@@ -50,7 +50,7 @@ MODULE_AUTHOR(DRIVER_AUTHOR);
 MODULE_DESCRIPTION(DRIVER_DESC);
 MODULE_LICENSE("GPL");
 
-module_param(debug, bool, 0644);
+module_param_named(debug, rpaphp_debug, bool, 0644);
 
 /**
  * set_attention_status - set attention LED
diff --git a/drivers/pci/hotplug/rpaphp_pci.c b/drivers/pci/hotplug/rpaphp_pci.c
index 5acfd4f3d4cb..513e1e282391 100644
--- a/drivers/pci/hotplug/rpaphp_pci.c
+++ b/drivers/pci/hotplug/rpaphp_pci.c
@@ -123,7 +123,7 @@ int rpaphp_enable_slot(struct slot *slot)
 			slot->state = CONFIGURED;
 		}
 
-		if (debug) {
+		if (rpaphp_debug) {
 			struct pci_dev *dev;
 			dbg("%s: pci_devs of slot[%s]\n", __func__, slot->dn->full_name);
 			list_for_each_entry (dev, &bus->devices, bus_list)
-- 
cgit v1.2.3-55-g7522


From c322b28a04c084a467a862766f74c40c917a721c Mon Sep 17 00:00:00 2001
From: Zhao, Yu
Date: Mon, 13 Oct 2008 19:36:05 +0800
Subject: PCI: use same arg names in PCI_VDEVICE comment

This cleanup makes the argument names in PCI_VDEVICE comment consistent
with those used in its definition.

Signed-off-by: Yu Zhao <yu.zhao@intel.com>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
 include/linux/pci.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/include/linux/pci.h b/include/linux/pci.h
index 8a4d0bebc311..008005674b60 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -455,8 +455,8 @@ struct pci_driver {
 
 /**
  * PCI_VDEVICE - macro used to describe a specific pci device in short form
- * @vend: the vendor name
- * @dev: the 16 bit PCI Device ID
+ * @vendor: the vendor name
+ * @device: the 16 bit PCI Device ID
  *
  * This macro is used to create a struct pci_device_id that matches a
  * specific PCI device.  The subvendor, and subdevice fields will be set
-- 
cgit v1.2.3-55-g7522


From 022edd86d7c864bc8fadc3c8ac4e6a464472ab05 Mon Sep 17 00:00:00 2001
From: Zhao, Yu
Date: Mon, 13 Oct 2008 19:24:28 +0800
Subject: PCI: use resource_size() everywhere.

This is a cleanup that replaces the resource calculation formula with
resource_size().

Signed-off-by: Yu Zhao <yu.zhao@intel.com>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
 drivers/pci/setup-bus.c | 4 ++--
 drivers/pci/setup-res.c | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/pci/setup-bus.c b/drivers/pci/setup-bus.c
index 471a429d7a20..ea979f2bc6db 100644
--- a/drivers/pci/setup-bus.c
+++ b/drivers/pci/setup-bus.c
@@ -299,7 +299,7 @@ static void pbus_size_io(struct pci_bus *bus)
 
 			if (r->parent || !(r->flags & IORESOURCE_IO))
 				continue;
-			r_size = r->end - r->start + 1;
+			r_size = resource_size(r);
 
 			if (r_size < 0x400)
 				/* Might be re-aligned for ISA */
@@ -350,7 +350,7 @@ static int pbus_size_mem(struct pci_bus *bus, unsigned long mask, unsigned long
 
 			if (r->parent || (r->flags & mask) != type)
 				continue;
-			r_size = r->end - r->start + 1;
+			r_size = resource_size(r);
 			/* For bridges size != alignment */
 			align = resource_alignment(r);
 			order = __ffs(align) - 20;
diff --git a/drivers/pci/setup-res.c b/drivers/pci/setup-res.c
index d4b5c690eaa7..2dbd96cce2d8 100644
--- a/drivers/pci/setup-res.c
+++ b/drivers/pci/setup-res.c
@@ -129,7 +129,7 @@ int pci_assign_resource(struct pci_dev *dev, int resno)
 	resource_size_t size, min, align;
 	int ret;
 
-	size = res->end - res->start + 1;
+	size = resource_size(res);
 	min = (res->flags & IORESOURCE_IO) ? PCIBIOS_MIN_IO : PCIBIOS_MIN_MEM;
 
 	align = resource_alignment(res);
-- 
cgit v1.2.3-55-g7522


From 557848c3c03ad1d1e66cb3b5b06698e3a9ebc33c Mon Sep 17 00:00:00 2001
From: Zhao, Yu
Date: Mon, 13 Oct 2008 19:18:07 +0800
Subject: PCI: replace cfg space size (256/4096) by macros.

This is a cleanup that changes all PCI configuration space size
representations to the macros (PCI_CFG_SPACE_SIZE and
PCI_CFG_SPACE_EXP_SIZE). And the macros are also moved from
drivers/pci/probe.c to drivers/pci/pci.h.

Signed-off-by: Yu Zhao <yu.zhao@intel.com>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
 drivers/pci/pci-sysfs.c | 10 +++++-----
 drivers/pci/pci.c       | 11 +++++++----
 drivers/pci/pci.h       |  7 +++++++
 drivers/pci/probe.c     |  5 ++---
 4 files changed, 21 insertions(+), 12 deletions(-)

diff --git a/drivers/pci/pci-sysfs.c b/drivers/pci/pci-sysfs.c
index 77baff022f71..00a9947cb7cc 100644
--- a/drivers/pci/pci-sysfs.c
+++ b/drivers/pci/pci-sysfs.c
@@ -715,7 +715,7 @@ static struct bin_attribute pci_config_attr = {
 		.name = "config",
 		.mode = S_IRUGO | S_IWUSR,
 	},
-	.size = 256,
+	.size = PCI_CFG_SPACE_SIZE,
 	.read = pci_read_config,
 	.write = pci_write_config,
 };
@@ -725,7 +725,7 @@ static struct bin_attribute pcie_config_attr = {
 		.name = "config",
 		.mode = S_IRUGO | S_IWUSR,
 	},
-	.size = 4096,
+	.size = PCI_CFG_SPACE_EXP_SIZE,
 	.read = pci_read_config,
 	.write = pci_write_config,
 };
@@ -743,7 +743,7 @@ int __must_check pci_create_sysfs_dev_files (struct pci_dev *pdev)
 	if (!sysfs_initialized)
 		return -EACCES;
 
-	if (pdev->cfg_size < 4096)
+	if (pdev->cfg_size < PCI_CFG_SPACE_EXP_SIZE)
 		retval = sysfs_create_bin_file(&pdev->dev.kobj, &pci_config_attr);
 	else
 		retval = sysfs_create_bin_file(&pdev->dev.kobj, &pcie_config_attr);
@@ -814,7 +814,7 @@ err_vpd:
 		kfree(pdev->vpd->attr);
 	}
 err_config_file:
-	if (pdev->cfg_size < 4096)
+	if (pdev->cfg_size < PCI_CFG_SPACE_EXP_SIZE)
 		sysfs_remove_bin_file(&pdev->dev.kobj, &pci_config_attr);
 	else
 		sysfs_remove_bin_file(&pdev->dev.kobj, &pcie_config_attr);
@@ -839,7 +839,7 @@ void pci_remove_sysfs_dev_files(struct pci_dev *pdev)
 		sysfs_remove_bin_file(&pdev->dev.kobj, pdev->vpd->attr);
 		kfree(pdev->vpd->attr);
 	}
-	if (pdev->cfg_size < 4096)
+	if (pdev->cfg_size < PCI_CFG_SPACE_EXP_SIZE)
 		sysfs_remove_bin_file(&pdev->dev.kobj, &pci_config_attr);
 	else
 		sysfs_remove_bin_file(&pdev->dev.kobj, &pcie_config_attr);
diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
index 09dc893c81db..553ca6657955 100644
--- a/drivers/pci/pci.c
+++ b/drivers/pci/pci.c
@@ -213,10 +213,13 @@ int pci_bus_find_capability(struct pci_bus *bus, unsigned int devfn, int cap)
 int pci_find_ext_capability(struct pci_dev *dev, int cap)
 {
 	u32 header;
-	int ttl = 480; /* 3840 bytes, minimum 8 bytes per capability */
-	int pos = 0x100;
+	int ttl;
+	int pos = PCI_CFG_SPACE_SIZE;
 
-	if (dev->cfg_size <= 256)
+	/* minimum 8 bytes per capability */
+	ttl = (PCI_CFG_SPACE_EXP_SIZE - PCI_CFG_SPACE_SIZE) / 8;
+
+	if (dev->cfg_size <= PCI_CFG_SPACE_SIZE)
 		return 0;
 
 	if (pci_read_config_dword(dev, pos, &header) != PCIBIOS_SUCCESSFUL)
@@ -234,7 +237,7 @@ int pci_find_ext_capability(struct pci_dev *dev, int cap)
 			return pos;
 
 		pos = PCI_EXT_CAP_NEXT(header);
-		if (pos < 0x100)
+		if (pos < PCI_CFG_SPACE_SIZE)
 			break;
 
 		if (pci_read_config_dword(dev, pos, &header) != PCIBIOS_SUCCESSFUL)
diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h
index 4723b12fb39a..601abdc8dd9f 100644
--- a/drivers/pci/pci.h
+++ b/drivers/pci/pci.h
@@ -1,3 +1,9 @@
+#ifndef DRIVERS_PCI_H
+#define DRIVERS_PCI_H
+
+#define PCI_CFG_SPACE_SIZE	256
+#define PCI_CFG_SPACE_EXP_SIZE	4096
+
 /* Functions internal to the PCI core code */
 
 extern int pci_uevent(struct device *dev, struct kobj_uevent_env *env);
@@ -145,3 +151,4 @@ struct pci_slot_attribute {
 };
 #define to_pci_slot_attr(s) container_of(s, struct pci_slot_attribute, attr)
 
+#endif /* DRIVERS_PCI_H */
diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c
index 7aa71636dd3c..f6754e87f046 100644
--- a/drivers/pci/probe.c
+++ b/drivers/pci/probe.c
@@ -14,8 +14,6 @@
 
 #define CARDBUS_LATENCY_TIMER	176	/* secondary latency timer */
 #define CARDBUS_RESERVE_BUSNR	3
-#define PCI_CFG_SPACE_SIZE	256
-#define PCI_CFG_SPACE_EXP_SIZE	4096
 
 /* Ugh.  Need to stop exporting this to modules. */
 LIST_HEAD(pci_root_buses);
@@ -887,8 +885,9 @@ static void set_pcie_port_type(struct pci_dev *pdev)
 int pci_cfg_space_size_ext(struct pci_dev *dev)
 {
 	u32 status;
+	int pos = PCI_CFG_SPACE_SIZE;
 
-	if (pci_read_config_dword(dev, 256, &status) != PCIBIOS_SUCCESSFUL)
+	if (pci_read_config_dword(dev, pos, &status) != PCIBIOS_SUCCESSFUL)
 		goto fail;
 	if (status == 0xffffffff)
 		goto fail;
-- 
cgit v1.2.3-55-g7522


From e354597cce8d219d135d65e585dc4f30323486b9 Mon Sep 17 00:00:00 2001
From: Peter Chubb
Date: Mon, 13 Oct 2008 11:49:04 +1100
Subject: PCI: fix 64-vbit prefetchable memory resource BARs

Since patch 6ac665c63dcac8fcec534a1d224ecbb8b867ad59 my infiniband
controller hasn't worked.  This is because it has 64-bit prefetchable
memory, which was mistakenly being  taken to be 32-bit memory.  The
resource flags in this case are PCI_BASE_ADDRESS_MEM_TYPE_64 |
PCI_BASE_ADDRESS_MEM_PREFETCH.

This patch checks only for the PCI_BASE_ADDRESS_MEM_TYPE_64 bit; thus
whether the region is prefetchable or not is ignored.  This fixes my
Infiniband.

Reviewed-by: Matthew Wilcox <matthew@wil.cx>
Signed-off-by: Peter Chubb <peterc@gelato.unsw.edu.au>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
 drivers/pci/probe.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c
index f6754e87f046..49599ac49bda 100644
--- a/drivers/pci/probe.c
+++ b/drivers/pci/probe.c
@@ -217,7 +217,7 @@ static inline enum pci_bar_type decode_bar(struct resource *res, u32 bar)
 
 	res->flags = bar & ~PCI_BASE_ADDRESS_MEM_MASK;
 
-	if (res->flags == PCI_BASE_ADDRESS_MEM_TYPE_64)
+	if (res->flags & PCI_BASE_ADDRESS_MEM_TYPE_64)
 		return pci_bar_mem64;
 	return pci_bar_mem32;
 }
-- 
cgit v1.2.3-55-g7522


From 280c73d3691fb182fa55b0160737c2c0feb79471 Mon Sep 17 00:00:00 2001
From: Zhao, Yu
Date: Mon, 13 Oct 2008 20:01:00 +0800
Subject: PCI: centralize the capabilities code in pci-sysfs.c

This patch centralizes functions used to add and remove sysfs entries
for various capabilities. With this cleanup, the code is more readable
and easier for adding new capability related functions.

Signed-off-by: Yu Zhao <yu.zhao@intel.com>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
 drivers/pci/pci-sysfs.c | 138 +++++++++++++++++++++++++++++-------------------
 1 file changed, 83 insertions(+), 55 deletions(-)

diff --git a/drivers/pci/pci-sysfs.c b/drivers/pci/pci-sysfs.c
index 00a9947cb7cc..2cad6da2e4aa 100644
--- a/drivers/pci/pci-sysfs.c
+++ b/drivers/pci/pci-sysfs.c
@@ -735,10 +735,41 @@ int __attribute__ ((weak)) pcibios_add_platform_entries(struct pci_dev *dev)
 	return 0;
 }
 
+static int pci_create_capabilities_sysfs(struct pci_dev *dev)
+{
+	int retval;
+	struct bin_attribute *attr;
+
+	/* If the device has VPD, try to expose it in sysfs. */
+	if (dev->vpd) {
+		attr = kzalloc(sizeof(*attr), GFP_ATOMIC);
+		if (!attr)
+			return -ENOMEM;
+
+		attr->size = dev->vpd->len;
+		attr->attr.name = "vpd";
+		attr->attr.mode = S_IRUSR | S_IWUSR;
+		attr->read = pci_read_vpd;
+		attr->write = pci_write_vpd;
+		retval = sysfs_create_bin_file(&dev->dev.kobj, attr);
+		if (retval) {
+			kfree(dev->vpd->attr);
+			return retval;
+		}
+		dev->vpd->attr = attr;
+	}
+
+	/* Active State Power Management */
+	pcie_aspm_create_sysfs_dev_files(dev);
+
+	return 0;
+}
+
 int __must_check pci_create_sysfs_dev_files (struct pci_dev *pdev)
 {
-	struct bin_attribute *attr = NULL;
 	int retval;
+	int rom_size = 0;
+	struct bin_attribute *attr;
 
 	if (!sysfs_initialized)
 		return -EACCES;
@@ -750,69 +781,55 @@ int __must_check pci_create_sysfs_dev_files (struct pci_dev *pdev)
 	if (retval)
 		goto err;
 
-	/* If the device has VPD, try to expose it in sysfs. */
-	if (pdev->vpd) {
-		attr = kzalloc(sizeof(*attr), GFP_ATOMIC);
-		if (attr) {
-			pdev->vpd->attr = attr;
-			attr->size = pdev->vpd->len;
-			attr->attr.name = "vpd";
-			attr->attr.mode = S_IRUSR | S_IWUSR;
-			attr->read = pci_read_vpd;
-			attr->write = pci_write_vpd;
-			retval = sysfs_create_bin_file(&pdev->dev.kobj, attr);
-			if (retval)
-				goto err_vpd;
-		} else {
-			retval = -ENOMEM;
-			goto err_config_file;
-		}
-	}
-
 	retval = pci_create_resource_files(pdev);
 	if (retval)
-		goto err_vpd_file;
+		goto err_config_file;
+
+	if (pci_resource_len(pdev, PCI_ROM_RESOURCE))
+		rom_size = pci_resource_len(pdev, PCI_ROM_RESOURCE);
+	else if (pdev->resource[PCI_ROM_RESOURCE].flags & IORESOURCE_ROM_SHADOW)
+		rom_size = 0x20000;
 
 	/* If the device has a ROM, try to expose it in sysfs. */
-	if (pci_resource_len(pdev, PCI_ROM_RESOURCE) ||
-	    (pdev->resource[PCI_ROM_RESOURCE].flags & IORESOURCE_ROM_SHADOW)) {
+	if (rom_size) {
 		attr = kzalloc(sizeof(*attr), GFP_ATOMIC);
-		if (attr) {
-			pdev->rom_attr = attr;
-			attr->size = pci_resource_len(pdev, PCI_ROM_RESOURCE);
-			attr->attr.name = "rom";
-			attr->attr.mode = S_IRUSR;
-			attr->read = pci_read_rom;
-			attr->write = pci_write_rom;
-			retval = sysfs_create_bin_file(&pdev->dev.kobj, attr);
-			if (retval)
-				goto err_rom;
-		} else {
+		if (!attr) {
 			retval = -ENOMEM;
 			goto err_resource_files;
 		}
+		attr->size = rom_size;
+		attr->attr.name = "rom";
+		attr->attr.mode = S_IRUSR;
+		attr->read = pci_read_rom;
+		attr->write = pci_write_rom;
+		retval = sysfs_create_bin_file(&pdev->dev.kobj, attr);
+		if (retval) {
+			kfree(attr);
+			goto err_resource_files;
+		}
+		pdev->rom_attr = attr;
 	}
+
 	/* add platform-specific attributes */
-	if (pcibios_add_platform_entries(pdev))
+	retval = pcibios_add_platform_entries(pdev);
+	if (retval)
 		goto err_rom_file;
 
-	pcie_aspm_create_sysfs_dev_files(pdev);
+	/* add sysfs entries for various capabilities */
+	retval = pci_create_capabilities_sysfs(pdev);
+	if (retval)
+		goto err_rom_file;
 
 	return 0;
 
 err_rom_file:
-	if (pci_resource_len(pdev, PCI_ROM_RESOURCE))
+	if (rom_size) {
 		sysfs_remove_bin_file(&pdev->dev.kobj, pdev->rom_attr);
-err_rom:
-	kfree(pdev->rom_attr);
+		kfree(pdev->rom_attr);
+		pdev->rom_attr = NULL;
+	}
 err_resource_files:
 	pci_remove_resource_files(pdev);
-err_vpd_file:
-	if (pdev->vpd) {
-		sysfs_remove_bin_file(&pdev->dev.kobj, pdev->vpd->attr);
-err_vpd:
-		kfree(pdev->vpd->attr);
-	}
 err_config_file:
 	if (pdev->cfg_size < PCI_CFG_SPACE_EXP_SIZE)
 		sysfs_remove_bin_file(&pdev->dev.kobj, &pci_config_attr);
@@ -822,6 +839,16 @@ err:
 	return retval;
 }
 
+static void pci_remove_capabilities_sysfs(struct pci_dev *dev)
+{
+	if (dev->vpd && dev->vpd->attr) {
+		sysfs_remove_bin_file(&dev->dev.kobj, dev->vpd->attr);
+		kfree(dev->vpd->attr);
+	}
+
+	pcie_aspm_remove_sysfs_dev_files(dev);
+}
+
 /**
  * pci_remove_sysfs_dev_files - cleanup PCI specific sysfs files
  * @pdev: device whose entries we should free
@@ -830,15 +857,13 @@ err:
  */
 void pci_remove_sysfs_dev_files(struct pci_dev *pdev)
 {
+	int rom_size = 0;
+
 	if (!sysfs_initialized)
 		return;
 
-	pcie_aspm_remove_sysfs_dev_files(pdev);
+	pci_remove_capabilities_sysfs(pdev);
 
-	if (pdev->vpd) {
-		sysfs_remove_bin_file(&pdev->dev.kobj, pdev->vpd->attr);
-		kfree(pdev->vpd->attr);
-	}
 	if (pdev->cfg_size < PCI_CFG_SPACE_EXP_SIZE)
 		sysfs_remove_bin_file(&pdev->dev.kobj, &pci_config_attr);
 	else
@@ -846,11 +871,14 @@ void pci_remove_sysfs_dev_files(struct pci_dev *pdev)
 
 	pci_remove_resource_files(pdev);
 
-	if (pci_resource_len(pdev, PCI_ROM_RESOURCE)) {
-		if (pdev->rom_attr) {
-			sysfs_remove_bin_file(&pdev->dev.kobj, pdev->rom_attr);
-			kfree(pdev->rom_attr);
-		}
+	if (pci_resource_len(pdev, PCI_ROM_RESOURCE))
+		rom_size = pci_resource_len(pdev, PCI_ROM_RESOURCE);
+	else if (pdev->resource[PCI_ROM_RESOURCE].flags & IORESOURCE_ROM_SHADOW)
+		rom_size = 0x20000;
+
+	if (rom_size && pdev->rom_attr) {
+		sysfs_remove_bin_file(&pdev->dev.kobj, pdev->rom_attr);
+		kfree(pdev->rom_attr);
 	}
 }
 
-- 
cgit v1.2.3-55-g7522


From 201de56eb22f1ff3f36804bc70cbff220b50f067 Mon Sep 17 00:00:00 2001
From: Zhao, Yu
Date: Mon, 13 Oct 2008 19:49:55 +0800
Subject: PCI: centralize the capabilities code in probe.c

This patch centralizes the initialization and release functions of
various PCI capabilities in probe.c, which makes the introduction
of new capability support functions cleaner in the future.

Signed-off-by: Yu Zhao <yu.zhao@intel.com>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
 drivers/pci/probe.c | 27 ++++++++++++++++++++-------
 1 file changed, 20 insertions(+), 7 deletions(-)

diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c
index 49599ac49bda..8c158b9abd41 100644
--- a/drivers/pci/probe.c
+++ b/drivers/pci/probe.c
@@ -842,6 +842,11 @@ static int pci_setup_device(struct pci_dev * dev)
 	return 0;
 }
 
+static void pci_release_capabilities(struct pci_dev *dev)
+{
+	pci_vpd_release(dev);
+}
+
 /**
  * pci_release_dev - free a pci device structure when all users of it are finished.
  * @dev: device that's been disconnected
@@ -854,7 +859,7 @@ static void pci_release_dev(struct device *dev)
 	struct pci_dev *pci_dev;
 
 	pci_dev = to_pci_dev(dev);
-	pci_vpd_release(pci_dev);
+	pci_release_capabilities(pci_dev);
 	kfree(pci_dev);
 }
 
@@ -935,8 +940,6 @@ struct pci_dev *alloc_pci_dev(void)
 
 	INIT_LIST_HEAD(&dev->bus_list);
 
-	pci_msi_init_pci_dev(dev);
-
 	return dev;
 }
 EXPORT_SYMBOL(alloc_pci_dev);
@@ -1009,11 +1012,21 @@ static struct pci_dev *pci_scan_device(struct pci_bus *bus, int devfn)
 		return NULL;
 	}
 
-	pci_vpd_pci22_init(dev);
-
 	return dev;
 }
 
+static void pci_init_capabilities(struct pci_dev *dev)
+{
+	/* MSI/MSI-X list */
+	pci_msi_init_pci_dev(dev);
+
+	/* Power Management */
+	pci_pm_init(dev);
+
+	/* Vital Product Data */
+	pci_vpd_pci22_init(dev);
+}
+
 void pci_device_add(struct pci_dev *dev, struct pci_bus *bus)
 {
 	device_initialize(&dev->dev);
@@ -1030,8 +1043,8 @@ void pci_device_add(struct pci_dev *dev, struct pci_bus *bus)
 	/* Fix up broken headers */
 	pci_fixup_device(pci_fixup_header, dev);
 
-	/* Initialize power management of the device */
-	pci_pm_init(dev);
+	/* Initialize various capabilities */
+	pci_init_capabilities(dev);
 
 	/*
 	 * Add the device to our list of discovered devices
-- 
cgit v1.2.3-55-g7522


From 58c3a727cb73b75a9104d295f096cca12959a5a5 Mon Sep 17 00:00:00 2001
From: Yu Zhao
Date: Tue, 14 Oct 2008 14:02:53 +0800
Subject: PCI: support PCIe ARI capability

This patch adds support for PCI Express Alternative Routing-ID
Interpretation (ARI) capability.

The ARI capability extends the Function Number field of the PCI Express
Endpoint by reusing the Device Number which is otherwise hardwired to 0.
With ARI, an Endpoint can have up to 256 functions.

Signed-off-by: Yu Zhao <yu.zhao@intel.com>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
 drivers/pci/pci.c        | 32 ++++++++++++++++++++++++++++++++
 drivers/pci/pci.h        | 12 ++++++++++++
 drivers/pci/probe.c      |  3 +++
 include/linux/pci.h      |  1 +
 include/linux/pci_regs.h | 14 ++++++++++++++
 5 files changed, 62 insertions(+)

diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
index 553ca6657955..4db261e13e69 100644
--- a/drivers/pci/pci.c
+++ b/drivers/pci/pci.c
@@ -1299,6 +1299,38 @@ void pci_pm_init(struct pci_dev *dev)
 	}
 }
 
+/**
+ * pci_enable_ari - enable ARI forwarding if hardware support it
+ * @dev: the PCI device
+ */
+void pci_enable_ari(struct pci_dev *dev)
+{
+	int pos;
+	u32 cap;
+	u16 ctrl;
+
+	if (!dev->is_pcie)
+		return;
+
+	if (dev->pcie_type != PCI_EXP_TYPE_ROOT_PORT &&
+	    dev->pcie_type != PCI_EXP_TYPE_DOWNSTREAM)
+		return;
+
+	pos = pci_find_capability(dev, PCI_CAP_ID_EXP);
+	if (!pos)
+		return;
+
+	pci_read_config_dword(dev, pos + PCI_EXP_DEVCAP2, &cap);
+	if (!(cap & PCI_EXP_DEVCAP2_ARI))
+		return;
+
+	pci_read_config_word(dev, pos + PCI_EXP_DEVCTL2, &ctrl);
+	ctrl |= PCI_EXP_DEVCTL2_ARI;
+	pci_write_config_word(dev, pos + PCI_EXP_DEVCTL2, ctrl);
+
+	dev->ari_enabled = 1;
+}
+
 int
 pci_get_interrupt_pin(struct pci_dev *dev, struct pci_dev **bridge)
 {
diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h
index 601abdc8dd9f..39684c1415c5 100644
--- a/drivers/pci/pci.h
+++ b/drivers/pci/pci.h
@@ -151,4 +151,16 @@ struct pci_slot_attribute {
 };
 #define to_pci_slot_attr(s) container_of(s, struct pci_slot_attribute, attr)
 
+extern void pci_enable_ari(struct pci_dev *dev);
+/**
+ * pci_ari_enabled - query ARI forwarding status
+ * @dev: the PCI device
+ *
+ * Returns 1 if ARI forwarding is enabled, or 0 if not enabled;
+ */
+static inline int pci_ari_enabled(struct pci_dev *dev)
+{
+	return dev->ari_enabled;
+}
+
 #endif /* DRIVERS_PCI_H */
diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c
index 8c158b9abd41..3141e8deeac4 100644
--- a/drivers/pci/probe.c
+++ b/drivers/pci/probe.c
@@ -1025,6 +1025,9 @@ static void pci_init_capabilities(struct pci_dev *dev)
 
 	/* Vital Product Data */
 	pci_vpd_pci22_init(dev);
+
+	/* Alternative Routing-ID Forwarding */
+	pci_enable_ari(dev);
 }
 
 void pci_device_add(struct pci_dev *dev, struct pci_bus *bus)
diff --git a/include/linux/pci.h b/include/linux/pci.h
index 008005674b60..7e9a1f0715e6 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -214,6 +214,7 @@ struct pci_dev {
 	unsigned int	broken_parity_status:1;	/* Device generates false positive parity */
 	unsigned int 	msi_enabled:1;
 	unsigned int	msix_enabled:1;
+	unsigned int	ari_enabled:1;	/* ARI forwarding */
 	unsigned int	is_managed:1;
 	unsigned int	is_pcie:1;
 	pci_dev_flags_t dev_flags;
diff --git a/include/linux/pci_regs.h b/include/linux/pci_regs.h
index 450684f7eaac..eb6686b88f9a 100644
--- a/include/linux/pci_regs.h
+++ b/include/linux/pci_regs.h
@@ -419,6 +419,10 @@
 #define  PCI_EXP_RTCTL_CRSSVE	0x10	/* CRS Software Visibility Enable */
 #define PCI_EXP_RTCAP		30	/* Root Capabilities */
 #define PCI_EXP_RTSTA		32	/* Root Status */
+#define PCI_EXP_DEVCAP2		36	/* Device Capabilities 2 */
+#define  PCI_EXP_DEVCAP2_ARI	0x20	/* Alternative Routing-ID */
+#define PCI_EXP_DEVCTL2		40	/* Device Control 2 */
+#define  PCI_EXP_DEVCTL2_ARI	0x20	/* Alternative Routing-ID */
 
 /* Extended Capabilities (PCI-X 2.0 and Express) */
 #define PCI_EXT_CAP_ID(header)		(header & 0x0000ffff)
@@ -429,6 +433,7 @@
 #define PCI_EXT_CAP_ID_VC	2
 #define PCI_EXT_CAP_ID_DSN	3
 #define PCI_EXT_CAP_ID_PWR	4
+#define PCI_EXT_CAP_ID_ARI	14
 
 /* Advanced Error Reporting */
 #define PCI_ERR_UNCOR_STATUS	4	/* Uncorrectable Error Status */
@@ -536,5 +541,14 @@
 #define HT_CAPTYPE_GEN3		0xD0	/* Generation 3 hypertransport configuration */
 #define HT_CAPTYPE_PM		0xE0	/* Hypertransport powermanagement configuration */
 
+/* Alternative Routing-ID Interpretation */
+#define PCI_ARI_CAP		0x04	/* ARI Capability Register */
+#define  PCI_ARI_CAP_MFVC	0x0001	/* MFVC Function Groups Capability */
+#define  PCI_ARI_CAP_ACS	0x0002	/* ACS Function Groups Capability */
+#define  PCI_ARI_CAP_NFN(x)	(((x) >> 8) & 0xff) /* Next Function Number */
+#define PCI_ARI_CTRL		0x06	/* ARI Control Register */
+#define  PCI_ARI_CTRL_MFVC	0x0001	/* MFVC Function Groups Enable */
+#define  PCI_ARI_CTRL_ACS	0x0002	/* ACS Function Groups Enable */
+#define  PCI_ARI_CTRL_FG(x)	(((x) >> 4) & 7) /* Function Group */
 
 #endif /* LINUX_PCI_REGS_H */
-- 
cgit v1.2.3-55-g7522


From f393d9b130423a7a47c751b26df07ceaa5dc76a9 Mon Sep 17 00:00:00 2001
From: Vincent Legoll
Date: Sun, 12 Oct 2008 12:26:12 +0200
Subject: PCI: probing debug message uniformization

This patch uniformizes PCI probing debug boot messages with dev_printk()
intead of manual printk()

It changes adress range output from [%llx, %llx] to [%#llx-%#llx], like
in pci_request_region().

For example, it goes from the mixed-style:

PCI: 0000:00:1b.0 reg 10 64bit mmio: [f4280000, f4283fff]
pci 0000:00:1b.0: PME# supported from D0 D3hot D3cold

to uniform:

pci 0000:00:1b.0: reg 10 64bit mmio: [0xf4280000-0xf4283fff]
pci 0000:00:1b.0: PME# supported from D0 D3hot D3cold

This patch has been runtime tested, boot log messages diffed, everything
looks OK.

Acked-by: Bjorn Helgaas <bjorn.helgaas@hp.com>
Signed-off-by: Vincent Legoll <vincent.legoll@gmail.com>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
 drivers/pci/pcie/aspm.c |  6 +++---
 drivers/pci/probe.c     | 25 ++++++++++++-------------
 2 files changed, 15 insertions(+), 16 deletions(-)

diff --git a/drivers/pci/pcie/aspm.c b/drivers/pci/pcie/aspm.c
index 851f5b83cdbc..8f63f4c6b85f 100644
--- a/drivers/pci/pcie/aspm.c
+++ b/drivers/pci/pcie/aspm.c
@@ -528,9 +528,9 @@ static int pcie_aspm_sanity_check(struct pci_dev *pdev)
 		pci_read_config_dword(child_dev, child_pos + PCI_EXP_DEVCAP,
 			&reg32);
 		if (!(reg32 & PCI_EXP_DEVCAP_RBER) && !aspm_force) {
-			printk("Pre-1.1 PCIe device detected, "
-				"disable ASPM for %s. It can be enabled forcedly"
-				" with 'pcie_aspm=force'\n", pci_name(pdev));
+			dev_printk(KERN_INFO, &child_dev->dev, "disabling ASPM"
+				" on pre-1.1 PCIe device.  You can enable it"
+				" with 'pcie_aspm=force'\n");
 			return -EINVAL;
 		}
 	}
diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c
index 3141e8deeac4..afb9f1c0bc28 100644
--- a/drivers/pci/probe.c
+++ b/drivers/pci/probe.c
@@ -302,8 +302,8 @@ static int __pci_read_base(struct pci_dev *dev, enum pci_bar_type type,
 		} else {
 			res->start = l64;
 			res->end = l64 + sz64;
-			printk(KERN_DEBUG "PCI: %s reg %x 64bit mmio: %pR\n",
-				pci_name(dev), pos, res);
+			dev_printk(KERN_DEBUG, &dev->dev,
+				"reg %x 64bit mmio: %pR\n", pos, res);
 		}
 	} else {
 		sz = pci_size(l, sz, mask);
@@ -313,10 +313,10 @@ static int __pci_read_base(struct pci_dev *dev, enum pci_bar_type type,
 
 		res->start = l;
 		res->end = l + sz;
-		printk(KERN_DEBUG "PCI: %s reg %x %s: %pR\n",
-		       pci_name(dev), pos,
-		       (res->flags & IORESOURCE_IO) ? "io port":"32bit mmio",
-		       res);
+
+		dev_printk(KERN_DEBUG, &dev->dev, "reg %x %s: %pR\n", pos,
+			(res->flags & IORESOURCE_IO) ? "io port" : "32bit mmio",
+			res);
 	}
 
  out:
@@ -387,8 +387,7 @@ void __devinit pci_read_bridge_bases(struct pci_bus *child)
 			res->start = base;
 		if (!res->end)
 			res->end = limit + 0xfff;
-		printk(KERN_DEBUG "PCI: bridge %s io port: %pR\n",
-		       pci_name(dev), res);
+		dev_printk(KERN_DEBUG, &dev->dev, "bridge io port: %pR\n", res);
 	}
 
 	res = child->resource[1];
@@ -400,8 +399,8 @@ void __devinit pci_read_bridge_bases(struct pci_bus *child)
 		res->flags = (mem_base_lo & PCI_MEMORY_RANGE_TYPE_MASK) | IORESOURCE_MEM;
 		res->start = base;
 		res->end = limit + 0xfffff;
-		printk(KERN_DEBUG "PCI: bridge %s 32bit mmio: %pR\n",
-		       pci_name(dev), res);
+		dev_printk(KERN_DEBUG, &dev->dev, "bridge 32bit mmio: %pR\n",
+			res);
 	}
 
 	res = child->resource[2];
@@ -437,9 +436,9 @@ void __devinit pci_read_bridge_bases(struct pci_bus *child)
 		res->flags = (mem_base_lo & PCI_MEMORY_RANGE_TYPE_MASK) | IORESOURCE_MEM | IORESOURCE_PREFETCH;
 		res->start = base;
 		res->end = limit + 0xfffff;
-		printk(KERN_DEBUG "PCI: bridge %s %sbit mmio pref: %pR\n",
-		       pci_name(dev),
-		       (res->flags & PCI_PREF_RANGE_TYPE_64) ? "64":"32", res);
+		dev_printk(KERN_DEBUG, &dev->dev, "bridge %sbit mmio pref: %pR\n",
+			(res->flags & PCI_PREF_RANGE_TYPE_64) ? "64" : "32",
+			res);
 	}
 }
 
-- 
cgit v1.2.3-55-g7522


From f19aeb1f3638b7bb4ca21eb361f004fac2bfe259 Mon Sep 17 00:00:00 2001
From: Benjamin Herrenschmidt
Date: Fri, 3 Oct 2008 19:49:32 +1000
Subject: PCI: Add ability to mmap legacy_io on some platforms

This adds the ability to mmap legacy IO space to the legacy_io files
in sysfs on platforms that support it. This will allow to clean up
X to use this instead of /dev/mem for legacy IO accesses such as
those performed by Int10.

While at it I moved pci_create/remove_legacy_files() to pci-sysfs.c
where I think they belong, thus making more things statis in there
and cleaned up some spurrious prototypes in the ia64 pci.h file

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
 arch/ia64/include/asm/pci.h | 12 +-----
 arch/ia64/pci/pci.c         |  7 +++-
 drivers/pci/pci-sysfs.c     | 93 ++++++++++++++++++++++++++++++++++++++++++---
 drivers/pci/pci.h           |  6 +++
 drivers/pci/probe.c         | 66 --------------------------------
 5 files changed, 102 insertions(+), 82 deletions(-)

diff --git a/arch/ia64/include/asm/pci.h b/arch/ia64/include/asm/pci.h
index 0149097b736d..ce342fb74246 100644
--- a/arch/ia64/include/asm/pci.h
+++ b/arch/ia64/include/asm/pci.h
@@ -95,16 +95,8 @@ extern int pci_mmap_page_range (struct pci_dev *dev, struct vm_area_struct *vma,
 				enum pci_mmap_state mmap_state, int write_combine);
 #define HAVE_PCI_LEGACY
 extern int pci_mmap_legacy_page_range(struct pci_bus *bus,
-				      struct vm_area_struct *vma);
-extern ssize_t pci_read_legacy_io(struct kobject *kobj,
-				  struct bin_attribute *bin_attr,
-				  char *buf, loff_t off, size_t count);
-extern ssize_t pci_write_legacy_io(struct kobject *kobj,
-				   struct bin_attribute *bin_attr,
-				   char *buf, loff_t off, size_t count);
-extern int pci_mmap_legacy_mem(struct kobject *kobj,
-			       struct bin_attribute *attr,
-			       struct vm_area_struct *vma);
+				      struct vm_area_struct *vma,
+				      enum pci_mmap_state mmap_state);
 
 #define pci_get_legacy_mem platform_pci_get_legacy_mem
 #define pci_legacy_read platform_pci_legacy_read
diff --git a/arch/ia64/pci/pci.c b/arch/ia64/pci/pci.c
index 7545037a8625..211fcfd115f9 100644
--- a/arch/ia64/pci/pci.c
+++ b/arch/ia64/pci/pci.c
@@ -614,12 +614,17 @@ char *ia64_pci_get_legacy_mem(struct pci_bus *bus)
  * vector to get the base address.
  */
 int
-pci_mmap_legacy_page_range(struct pci_bus *bus, struct vm_area_struct *vma)
+pci_mmap_legacy_page_range(struct pci_bus *bus, struct vm_area_struct *vma,
+			   enum pci_mmap_state mmap_state)
 {
 	unsigned long size = vma->vm_end - vma->vm_start;
 	pgprot_t prot;
 	char *addr;
 
+	/* We only support mmap'ing of legacy memory space */
+	if (mmap_state != pci_mmap_mem)
+		return -ENOSYS;
+
 	/*
 	 * Avoid attribute aliasing.  See Documentation/ia64/aliasing.txt
 	 * for more details.
diff --git a/drivers/pci/pci-sysfs.c b/drivers/pci/pci-sysfs.c
index 2cad6da2e4aa..110022d78689 100644
--- a/drivers/pci/pci-sysfs.c
+++ b/drivers/pci/pci-sysfs.c
@@ -423,7 +423,7 @@ pci_write_vpd(struct kobject *kobj, struct bin_attribute *bin_attr,
  * Reads 1, 2, or 4 bytes from legacy I/O port space using an arch specific
  * callback routine (pci_legacy_read).
  */
-ssize_t
+static ssize_t
 pci_read_legacy_io(struct kobject *kobj, struct bin_attribute *bin_attr,
 		   char *buf, loff_t off, size_t count)
 {
@@ -448,7 +448,7 @@ pci_read_legacy_io(struct kobject *kobj, struct bin_attribute *bin_attr,
  * Writes 1, 2, or 4 bytes from legacy I/O port space using an arch specific
  * callback routine (pci_legacy_write).
  */
-ssize_t
+static ssize_t
 pci_write_legacy_io(struct kobject *kobj, struct bin_attribute *bin_attr,
 		    char *buf, loff_t off, size_t count)
 {
@@ -468,11 +468,11 @@ pci_write_legacy_io(struct kobject *kobj, struct bin_attribute *bin_attr,
  * @attr: struct bin_attribute for this file
  * @vma: struct vm_area_struct passed to mmap
  *
- * Uses an arch specific callback, pci_mmap_legacy_page_range, to mmap
+ * Uses an arch specific callback, pci_mmap_legacy_mem_page_range, to mmap
  * legacy memory space (first meg of bus space) into application virtual
  * memory space.
  */
-int
+static int
 pci_mmap_legacy_mem(struct kobject *kobj, struct bin_attribute *attr,
                     struct vm_area_struct *vma)
 {
@@ -480,7 +480,90 @@ pci_mmap_legacy_mem(struct kobject *kobj, struct bin_attribute *attr,
                                                       struct device,
 						      kobj));
 
-        return pci_mmap_legacy_page_range(bus, vma);
+        return pci_mmap_legacy_page_range(bus, vma, pci_mmap_mem);
+}
+
+/**
+ * pci_mmap_legacy_io - map legacy PCI IO into user memory space
+ * @kobj: kobject corresponding to device to be mapped
+ * @attr: struct bin_attribute for this file
+ * @vma: struct vm_area_struct passed to mmap
+ *
+ * Uses an arch specific callback, pci_mmap_legacy_io_page_range, to mmap
+ * legacy IO space (first meg of bus space) into application virtual
+ * memory space. Returns -ENOSYS if the operation isn't supported
+ */
+static int
+pci_mmap_legacy_io(struct kobject *kobj, struct bin_attribute *attr,
+		   struct vm_area_struct *vma)
+{
+        struct pci_bus *bus = to_pci_bus(container_of(kobj,
+                                                      struct device,
+						      kobj));
+
+        return pci_mmap_legacy_page_range(bus, vma, pci_mmap_io);
+}
+
+/**
+ * pci_create_legacy_files - create legacy I/O port and memory files
+ * @b: bus to create files under
+ *
+ * Some platforms allow access to legacy I/O port and ISA memory space on
+ * a per-bus basis.  This routine creates the files and ties them into
+ * their associated read, write and mmap files from pci-sysfs.c
+ *
+ * On error unwind, but don't propogate the error to the caller
+ * as it is ok to set up the PCI bus without these files.
+ */
+void pci_create_legacy_files(struct pci_bus *b)
+{
+	int error;
+
+	b->legacy_io = kzalloc(sizeof(struct bin_attribute) * 2,
+			       GFP_ATOMIC);
+	if (!b->legacy_io)
+		goto kzalloc_err;
+
+	b->legacy_io->attr.name = "legacy_io";
+	b->legacy_io->size = 0xffff;
+	b->legacy_io->attr.mode = S_IRUSR | S_IWUSR;
+	b->legacy_io->read = pci_read_legacy_io;
+	b->legacy_io->write = pci_write_legacy_io;
+	b->legacy_io->mmap = pci_mmap_legacy_io;
+	error = device_create_bin_file(&b->dev, b->legacy_io);
+	if (error)
+		goto legacy_io_err;
+
+	/* Allocated above after the legacy_io struct */
+	b->legacy_mem = b->legacy_io + 1;
+	b->legacy_mem->attr.name = "legacy_mem";
+	b->legacy_mem->size = 1024*1024;
+	b->legacy_mem->attr.mode = S_IRUSR | S_IWUSR;
+	b->legacy_mem->mmap = pci_mmap_legacy_mem;
+	error = device_create_bin_file(&b->dev, b->legacy_mem);
+	if (error)
+		goto legacy_mem_err;
+
+	return;
+
+legacy_mem_err:
+	device_remove_bin_file(&b->dev, b->legacy_io);
+legacy_io_err:
+	kfree(b->legacy_io);
+	b->legacy_io = NULL;
+kzalloc_err:
+	printk(KERN_WARNING "pci: warning: could not create legacy I/O port "
+	       "and ISA memory resources to sysfs\n");
+	return;
+}
+
+void pci_remove_legacy_files(struct pci_bus *b)
+{
+	if (b->legacy_io) {
+		device_remove_bin_file(&b->dev, b->legacy_io);
+		device_remove_bin_file(&b->dev, b->legacy_mem);
+		kfree(b->legacy_io); /* both are allocated here */
+	}
 }
 #endif /* HAVE_PCI_LEGACY */
 
diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h
index 39684c1415c5..b205ab866a1d 100644
--- a/drivers/pci/pci.h
+++ b/drivers/pci/pci.h
@@ -82,7 +82,13 @@ static inline int pci_proc_detach_bus(struct pci_bus *bus) { return 0; }
 /* Functions for PCI Hotplug drivers to use */
 extern unsigned int pci_do_scan_bus(struct pci_bus *bus);
 
+#ifdef HAVE_PCI_LEGACY
+extern void pci_create_legacy_files(struct pci_bus *bus);
 extern void pci_remove_legacy_files(struct pci_bus *bus);
+#else
+static inline void pci_create_legacy_files(struct pci_bus *bus) { return; }
+static inline void pci_remove_legacy_files(struct pci_bus *bus) { return; }
+#endif
 
 /* Lock for read/write access to pci device and bus lists */
 extern struct rw_semaphore pci_bus_sem;
diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c
index afb9f1c0bc28..aaaf0a1fed22 100644
--- a/drivers/pci/probe.c
+++ b/drivers/pci/probe.c
@@ -42,72 +42,6 @@ int no_pci_devices(void)
 }
 EXPORT_SYMBOL(no_pci_devices);
 
-#ifdef HAVE_PCI_LEGACY
-/**
- * pci_create_legacy_files - create legacy I/O port and memory files
- * @b: bus to create files under
- *
- * Some platforms allow access to legacy I/O port and ISA memory space on
- * a per-bus basis.  This routine creates the files and ties them into
- * their associated read, write and mmap files from pci-sysfs.c
- *
- * On error unwind, but don't propogate the error to the caller
- * as it is ok to set up the PCI bus without these files.
- */
-static void pci_create_legacy_files(struct pci_bus *b)
-{
-	int error;
-
-	b->legacy_io = kzalloc(sizeof(struct bin_attribute) * 2,
-			       GFP_ATOMIC);
-	if (!b->legacy_io)
-		goto kzalloc_err;
-
-	b->legacy_io->attr.name = "legacy_io";
-	b->legacy_io->size = 0xffff;
-	b->legacy_io->attr.mode = S_IRUSR | S_IWUSR;
-	b->legacy_io->read = pci_read_legacy_io;
-	b->legacy_io->write = pci_write_legacy_io;
-	error = device_create_bin_file(&b->dev, b->legacy_io);
-	if (error)
-		goto legacy_io_err;
-
-	/* Allocated above after the legacy_io struct */
-	b->legacy_mem = b->legacy_io + 1;
-	b->legacy_mem->attr.name = "legacy_mem";
-	b->legacy_mem->size = 1024*1024;
-	b->legacy_mem->attr.mode = S_IRUSR | S_IWUSR;
-	b->legacy_mem->mmap = pci_mmap_legacy_mem;
-	error = device_create_bin_file(&b->dev, b->legacy_mem);
-	if (error)
-		goto legacy_mem_err;
-
-	return;
-
-legacy_mem_err:
-	device_remove_bin_file(&b->dev, b->legacy_io);
-legacy_io_err:
-	kfree(b->legacy_io);
-	b->legacy_io = NULL;
-kzalloc_err:
-	printk(KERN_WARNING "pci: warning: could not create legacy I/O port "
-	       "and ISA memory resources to sysfs\n");
-	return;
-}
-
-void pci_remove_legacy_files(struct pci_bus *b)
-{
-	if (b->legacy_io) {
-		device_remove_bin_file(&b->dev, b->legacy_io);
-		device_remove_bin_file(&b->dev, b->legacy_mem);
-		kfree(b->legacy_io); /* both are allocated here */
-	}
-}
-#else /* !HAVE_PCI_LEGACY */
-static inline void pci_create_legacy_files(struct pci_bus *bus) { return; }
-void pci_remove_legacy_files(struct pci_bus *bus) { return; }
-#endif /* HAVE_PCI_LEGACY */
-
 /*
  * PCI Bus Class Devices
  */
-- 
cgit v1.2.3-55-g7522


From e9f82cb75096ae30658a72d473bf170bf4d3bb2e Mon Sep 17 00:00:00 2001
From: Benjamin Herrenschmidt
Date: Tue, 14 Oct 2008 11:55:31 +1100
Subject: powerpc/PCI: Add legacy PCI access via sysfs

This patch adds support for legacy_io and legacy_mem files in
bus class directories in sysfs for powerpc

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
 arch/powerpc/include/asm/pci-bridge.h |   7 ++
 arch/powerpc/include/asm/pci.h        |  11 +++
 arch/powerpc/kernel/pci-common.c      | 136 +++++++++++++++++++++++++++++++++-
 3 files changed, 153 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/include/asm/pci-bridge.h b/arch/powerpc/include/asm/pci-bridge.h
index ae2ea803a0f2..9047af7baa69 100644
--- a/arch/powerpc/include/asm/pci-bridge.h
+++ b/arch/powerpc/include/asm/pci-bridge.h
@@ -74,6 +74,13 @@ struct pci_controller {
 	unsigned long pci_io_size;
 #endif
 
+	/* Some machines have a special region to forward the ISA
+	 * "memory" cycles such as VGA memory regions. Left to 0
+	 * if unsupported
+	 */
+	resource_size_t	isa_mem_phys;
+	resource_size_t	isa_mem_size;
+
 	struct pci_ops *ops;
 	unsigned int __iomem *cfg_addr;
 	void __iomem *cfg_data;
diff --git a/arch/powerpc/include/asm/pci.h b/arch/powerpc/include/asm/pci.h
index 0e52c7828ea4..39d547fde956 100644
--- a/arch/powerpc/include/asm/pci.h
+++ b/arch/powerpc/include/asm/pci.h
@@ -123,6 +123,16 @@ int pci_mmap_page_range(struct pci_dev *pdev, struct vm_area_struct *vma,
 /* Tell drivers/pci/proc.c that we have pci_mmap_page_range() */
 #define HAVE_PCI_MMAP	1
 
+extern int pci_legacy_read(struct pci_bus *bus, loff_t port, u32 *val,
+			   size_t count);
+extern int pci_legacy_write(struct pci_bus *bus, loff_t port, u32 val,
+			   size_t count);
+extern int pci_mmap_legacy_page_range(struct pci_bus *bus,
+				      struct vm_area_struct *vma,
+				      enum pci_mmap_state mmap_state);
+
+#define HAVE_PCI_LEGACY	1
+
 #if defined(CONFIG_PPC64) || defined(CONFIG_NOT_COHERENT_CACHE)
 /*
  * For 64-bit kernels, pci_unmap_{single,page} is not a nop.
@@ -226,5 +236,6 @@ extern void pci_resource_to_user(const struct pci_dev *dev, int bar,
 extern void pcibios_do_bus_setup(struct pci_bus *bus);
 extern void pcibios_fixup_of_probed_bus(struct pci_bus *bus);
 
+
 #endif	/* __KERNEL__ */
 #endif /* __ASM_POWERPC_PCI_H */
diff --git a/arch/powerpc/kernel/pci-common.c b/arch/powerpc/kernel/pci-common.c
index 01ce8c38bae6..3815d84a1ef4 100644
--- a/arch/powerpc/kernel/pci-common.c
+++ b/arch/powerpc/kernel/pci-common.c
@@ -451,7 +451,8 @@ pgprot_t pci_phys_mem_access_prot(struct file *file,
 		pci_dev_put(pdev);
 	}
 
-	DBG("non-PCI map for %lx, prot: %lx\n", offset, prot);
+	DBG("non-PCI map for %llx, prot: %lx\n",
+	    (unsigned long long)offset, prot);
 
 	return __pgprot(prot);
 }
@@ -490,6 +491,131 @@ int pci_mmap_page_range(struct pci_dev *dev, struct vm_area_struct *vma,
 	return ret;
 }
 
+/* This provides legacy IO read access on a bus */
+int pci_legacy_read(struct pci_bus *bus, loff_t port, u32 *val, size_t size)
+{
+	unsigned long offset;
+	struct pci_controller *hose = pci_bus_to_host(bus);
+	struct resource *rp = &hose->io_resource;
+	void __iomem *addr;
+
+	/* Check if port can be supported by that bus. We only check
+	 * the ranges of the PHB though, not the bus itself as the rules
+	 * for forwarding legacy cycles down bridges are not our problem
+	 * here. So if the host bridge supports it, we do it.
+	 */
+	offset = (unsigned long)hose->io_base_virt - _IO_BASE;
+	offset += port;
+
+	if (!(rp->flags & IORESOURCE_IO))
+		return -ENXIO;
+	if (offset < rp->start || (offset + size) > rp->end)
+		return -ENXIO;
+	addr = hose->io_base_virt + port;
+
+	switch(size) {
+	case 1:
+		*((u8 *)val) = in_8(addr);
+		return 1;
+	case 2:
+		if (port & 1)
+			return -EINVAL;
+		*((u16 *)val) = in_le16(addr);
+		return 2;
+	case 4:
+		if (port & 3)
+			return -EINVAL;
+		*((u32 *)val) = in_le32(addr);
+		return 4;
+	}
+	return -EINVAL;
+}
+
+/* This provides legacy IO write access on a bus */
+int pci_legacy_write(struct pci_bus *bus, loff_t port, u32 val, size_t size)
+{
+	unsigned long offset;
+	struct pci_controller *hose = pci_bus_to_host(bus);
+	struct resource *rp = &hose->io_resource;
+	void __iomem *addr;
+
+	/* Check if port can be supported by that bus. We only check
+	 * the ranges of the PHB though, not the bus itself as the rules
+	 * for forwarding legacy cycles down bridges are not our problem
+	 * here. So if the host bridge supports it, we do it.
+	 */
+	offset = (unsigned long)hose->io_base_virt - _IO_BASE;
+	offset += port;
+
+	if (!(rp->flags & IORESOURCE_IO))
+		return -ENXIO;
+	if (offset < rp->start || (offset + size) > rp->end)
+		return -ENXIO;
+	addr = hose->io_base_virt + port;
+
+	/* WARNING: The generic code is idiotic. It gets passed a pointer
+	 * to what can be a 1, 2 or 4 byte quantity and always reads that
+	 * as a u32, which means that we have to correct the location of
+	 * the data read within those 32 bits for size 1 and 2
+	 */
+	switch(size) {
+	case 1:
+		out_8(addr, val >> 24);
+		return 1;
+	case 2:
+		if (port & 1)
+			return -EINVAL;
+		out_le16(addr, val >> 16);
+		return 2;
+	case 4:
+		if (port & 3)
+			return -EINVAL;
+		out_le32(addr, val);
+		return 4;
+	}
+	return -EINVAL;
+}
+
+/* This provides legacy IO or memory mmap access on a bus */
+int pci_mmap_legacy_page_range(struct pci_bus *bus,
+			       struct vm_area_struct *vma,
+			       enum pci_mmap_state mmap_state)
+{
+	struct pci_controller *hose = pci_bus_to_host(bus);
+	resource_size_t offset =
+		((resource_size_t)vma->vm_pgoff) << PAGE_SHIFT;
+	resource_size_t size = vma->vm_end - vma->vm_start;
+	struct resource *rp;
+
+	pr_debug("pci_mmap_legacy_page_range(%04x:%02x, %s @%llx..%llx)\n",
+		 pci_domain_nr(bus), bus->number,
+		 mmap_state == pci_mmap_mem ? "MEM" : "IO",
+		 (unsigned long long)offset,
+		 (unsigned long long)(offset + size - 1));
+
+	if (mmap_state == pci_mmap_mem) {
+		if ((offset + size) > hose->isa_mem_size)
+			return -ENXIO;
+		offset += hose->isa_mem_phys;
+	} else {
+		unsigned long io_offset = (unsigned long)hose->io_base_virt - _IO_BASE;
+		unsigned long roffset = offset + io_offset;
+		rp = &hose->io_resource;
+		if (!(rp->flags & IORESOURCE_IO))
+			return -ENXIO;
+		if (roffset < rp->start || (roffset + size) > rp->end)
+			return -ENXIO;
+		offset += hose->io_base_phys;
+	}
+	pr_debug(" -> mapping phys %llx\n", (unsigned long long)offset);
+
+	vma->vm_pgoff = offset >> PAGE_SHIFT;
+	vma->vm_page_prot |= _PAGE_NO_CACHE | _PAGE_GUARDED;
+	return remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff,
+			       vma->vm_end - vma->vm_start,
+			       vma->vm_page_prot);
+}
+
 void pci_resource_to_user(const struct pci_dev *dev, int bar,
 			  const struct resource *rsrc,
 			  resource_size_t *start, resource_size_t *end)
@@ -592,6 +718,12 @@ void __devinit pci_process_bridge_OF_ranges(struct pci_controller *hose,
 		cpu_addr = of_translate_address(dev, ranges + 3);
 		size = of_read_number(ranges + pna + 3, 2);
 		ranges += np;
+
+		/* If we failed translation or got a zero-sized region
+		 * (some FW try to feed us with non sensical zero sized regions
+		 * such as power3 which look like some kind of attempt at exposing
+		 * the VGA memory hole)
+		 */
 		if (cpu_addr == OF_BAD_ADDR || size == 0)
 			continue;
 
@@ -665,6 +797,8 @@ void __devinit pci_process_bridge_OF_ranges(struct pci_controller *hose,
 				isa_hole = memno;
 				if (primary || isa_mem_base == 0)
 					isa_mem_base = cpu_addr;
+				hose->isa_mem_phys = cpu_addr;
+				hose->isa_mem_size = size;
 			}
 
 			/* We get the PCI/Mem offset from the first range or
-- 
cgit v1.2.3-55-g7522


From aa42d7c6138afdc54f74e971456a0fbfec16b77b Mon Sep 17 00:00:00 2001
From: Arjan van de Ven
Date: Sun, 28 Sep 2008 16:36:11 -0700
Subject: PCI: introduce an pci_ioremap(pdev, barnr) function

A common thing in many PCI drivers is to ioremap() an entire bar.  This
is a slightly fragile thing right now, needing both an address and a
size, and many driver writers do.. various things there.

This patch introduces an pci_ioremap() function taking just a PCI device
struct and the bar number as arguments, and figures this all out itself,
in one place.  In addition, we can add various sanity checks to this
function (the patch already checks to make sure that the bar in question
really is a MEM bar; few to no drivers do that sort of thing).

Hopefully with this type of API we get less chance of mistakes in
drivers with ioremap() operations.

Signed-off-by: Arjan van de Ven <arjan@linux.intel.com>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
 include/linux/pci.h | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/include/linux/pci.h b/include/linux/pci.h
index 7e9a1f0715e6..46ad282ffe4d 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -1119,5 +1119,18 @@ static inline void pci_mmcfg_early_init(void) { }
 static inline void pci_mmcfg_late_init(void) { }
 #endif
 
+static inline void * pci_ioremap_bar(struct pci_dev *pdev, int bar)
+{
+	/*
+	 * Make sure the BAR is actually a memory resource, not an IO resource
+	 */
+	if (!(pci_resource_flags(pdev, bar) & IORESOURCE_MEM)) {
+		WARN_ON(1);
+		return NULL;
+	}
+	return ioremap_nocache(pci_resource_start(pdev, bar),
+				     pci_resource_len(pdev, bar));
+}
+
 #endif /* __KERNEL__ */
 #endif /* LINUX_PCI_H */
-- 
cgit v1.2.3-55-g7522


From e5665a45fa28d0114f61b5d534a3b2678592219d Mon Sep 17 00:00:00 2001
From: Chuck Ebbert
Date: Wed, 24 Sep 2008 20:40:34 -0400
Subject: PCI: document the pcie_aspm kernel parameter

It can be handy so make sure people know about it.

Signed-off-by: Chuck Ebbert <cebbert@redhat.com>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
 Documentation/kernel-parameters.txt | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index 08fbc8372ba4..53ba7c7d82b3 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -1678,6 +1678,12 @@ and is between 256 and 4096 characters. It is defined in the file
 				reserved for the CardBus bridge's memory
 				window. The default value is 64 megabytes.
 
+	pcie_aspm=	[PCIE] Forcibly enable or disable PCIe Active State Power
+			Management.
+		off	Disable ASPM.
+		force	Enable ASPM even on devices that claim not to support it.
+			WARNING: Forcing ASPM on may cause system lockups.
+
 	pcmv=		[HW,PCMCIA] BadgePAD 4
 
 	pd.		[PARIDE]
-- 
cgit v1.2.3-55-g7522


From bd1d9855be3ab8a5c2b31053d464b7fe63e6963b Mon Sep 17 00:00:00 2001
From: Kenji Kaneshige
Date: Mon, 29 Sep 2008 17:37:05 +0900
Subject: PCI hotplug: fix get_##name return value problem

The commit 356a9d6f3dd283f83861adf1ac909879f0e66411 (PCI: fix hotplug
get_##name return value problem) doesn't seem to be merged properly.
Because of this, PCI hotplug no longer works (Read/Write PCI hotplug
files always returns -ENODEV).

This patch fixes wrong check of try_module_get() return value check in
get_##name().

Signed-off-by: Kenji Kaneshige <kaneshige.kenji@jp.fujitsu.com>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
 drivers/pci/hotplug/pci_hotplug_core.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/pci/hotplug/pci_hotplug_core.c b/drivers/pci/hotplug/pci_hotplug_core.c
index 27d2b6fe5d53..2e6c4474644e 100644
--- a/drivers/pci/hotplug/pci_hotplug_core.c
+++ b/drivers/pci/hotplug/pci_hotplug_core.c
@@ -102,7 +102,7 @@ static int get_##name (struct hotplug_slot *slot, type *value)		\
 {									\
 	struct hotplug_slot_ops *ops = slot->ops;			\
 	int retval = 0;							\
-	if (try_module_get(ops->owner))					\
+	if (!try_module_get(ops->owner))				\
 		return -ENODEV;						\
 	if (ops->get_##name)						\
 		retval = ops->get_##name(slot, value);			\
-- 
cgit v1.2.3-55-g7522


From 1543c90c39360df333a21bfbbdfe812ae23b8167 Mon Sep 17 00:00:00 2001
From: Jesse Barnes
Date: Sat, 18 Oct 2008 17:19:38 -0700
Subject: PCI: remove #ifdef DEBUG around dev_dbg call

No longer needed since we don't use the function symbol stuff anymore.

Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
 drivers/pci/quirks.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c
index 9575fc8f87b3..bbf66ea8fd87 100644
--- a/drivers/pci/quirks.c
+++ b/drivers/pci/quirks.c
@@ -1934,9 +1934,7 @@ static void pci_do_fixups(struct pci_dev *dev, struct pci_fixup *f, struct pci_f
 	while (f < end) {
 		if ((f->vendor == dev->vendor || f->vendor == (u16) PCI_ANY_ID) &&
  		    (f->device == dev->device || f->device == (u16) PCI_ANY_ID)) {
-#ifdef DEBUG
 			dev_dbg(&dev->dev, "calling %pF\n", f->hook);
-#endif
 			f->hook(dev);
 		}
 		f++;
-- 
cgit v1.2.3-55-g7522


From 0927678f55c9a50c296f7e6dae85e87b8236e155 Mon Sep 17 00:00:00 2001
From: Jesse Barnes
Date: Sat, 18 Oct 2008 17:33:19 -0700
Subject: PCI: use pci_find_ext_capability everywhere

Remove some open coded (and buggy) versions of pci_find_ext_capability
in favor of the real routine in the PCI core.

Tested-by: Tomasz Czernecki <czernecki@gmail.com>
Acked-by: Andrew Vasquez <andrew.vasquez@qlogic.com>
Reviewed-by: Matthew Wilcox <willy@linux.intel.com>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
 drivers/pci/pcie/aer/aerdrv.c      |  6 ++---
 drivers/pci/pcie/aer/aerdrv_core.c | 47 ++++++++------------------------------
 drivers/pci/pcie/portdrv_core.c    | 23 ++++---------------
 drivers/scsi/qla2xxx/qla_os.c      |  5 ++--
 include/linux/aer.h                |  4 ----
 5 files changed, 20 insertions(+), 65 deletions(-)

diff --git a/drivers/pci/pcie/aer/aerdrv.c b/drivers/pci/pcie/aer/aerdrv.c
index 77036f46acfe..e390707661dd 100644
--- a/drivers/pci/pcie/aer/aerdrv.c
+++ b/drivers/pci/pcie/aer/aerdrv.c
@@ -105,7 +105,7 @@ static irqreturn_t aer_irq(int irq, void *context)
 	unsigned long flags;
 	int pos;
 
-	pos = pci_find_aer_capability(pdev->port);
+	pos = pci_find_ext_capability(pdev->port, PCI_EXT_CAP_ID_ERR);
 	/*
 	 * Must lock access to Root Error Status Reg, Root Error ID Reg,
 	 * and Root error producer/consumer index
@@ -252,7 +252,7 @@ static pci_ers_result_t aer_root_reset(struct pci_dev *dev)
 	u32 status;
 	int pos;
 
-	pos = pci_find_aer_capability(dev);
+	pos = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_ERR);
 
 	/* Disable Root's interrupt in response to error messages */
 	pci_write_config_dword(dev, pos + PCI_ERR_ROOT_COMMAND, 0);
@@ -316,7 +316,7 @@ static void aer_error_resume(struct pci_dev *dev)
 	pci_write_config_word(dev, pos + PCI_EXP_DEVSTA, reg16);
 
 	/* Clean AER Root Error Status */
-	pos = pci_find_aer_capability(dev);
+	pos = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_ERR);
 	pci_read_config_dword(dev, pos + PCI_ERR_UNCOR_STATUS, &status);
 	pci_read_config_dword(dev, pos + PCI_ERR_UNCOR_SEVER, &mask);
 	if (dev->error_state == pci_channel_io_normal)
diff --git a/drivers/pci/pcie/aer/aerdrv_core.c b/drivers/pci/pcie/aer/aerdrv_core.c
index ee5e7b5176d0..1ff21f6045d6 100644
--- a/drivers/pci/pcie/aer/aerdrv_core.c
+++ b/drivers/pci/pcie/aer/aerdrv_core.c
@@ -28,36 +28,6 @@
 static int forceload;
 module_param(forceload, bool, 0);
 
-#define PCI_CFG_SPACE_SIZE	(0x100)
-int pci_find_aer_capability(struct pci_dev *dev)
-{
-	int pos;
-	u32 reg32 = 0;
-
-	/* Check if it's a pci-express device */
-	pos = pci_find_capability(dev, PCI_CAP_ID_EXP);
-	if (!pos)
-		return 0;
-
-	/* Check if it supports pci-express AER */
-	pos = PCI_CFG_SPACE_SIZE;
-	while (pos) {
-		if (pci_read_config_dword(dev, pos, &reg32))
-			return 0;
-
-		/* some broken boards return ~0 */
-		if (reg32 == 0xffffffff)
-			return 0;
-
-		if (PCI_EXT_CAP_ID(reg32) == PCI_EXT_CAP_ID_ERR)
-			break;
-
-		pos = reg32 >> 20;
-	}
-
-	return pos;
-}
-
 int pci_enable_pcie_error_reporting(struct pci_dev *dev)
 {
 	u16 reg16 = 0;
@@ -67,6 +37,10 @@ int pci_enable_pcie_error_reporting(struct pci_dev *dev)
 	if (!pos)
 		return -EIO;
 
+	pos = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_ERR);
+	if (!pos)
+		return -EIO;
+
 	pci_read_config_word(dev, pos+PCI_EXP_DEVCTL, &reg16);
 	reg16 = reg16 |
 		PCI_EXP_DEVCTL_CERE |
@@ -102,7 +76,7 @@ int pci_cleanup_aer_uncorrect_error_status(struct pci_dev *dev)
 	int pos;
 	u32 status, mask;
 
-	pos = pci_find_aer_capability(dev);
+	pos = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_ERR);
 	if (!pos)
 		return -EIO;
 
@@ -123,7 +97,7 @@ int pci_cleanup_aer_correct_error_status(struct pci_dev *dev)
 	int pos;
 	u32 status;
 
-	pos = pci_find_aer_capability(dev);
+	pos = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_ERR);
 	if (!pos)
 		return -EIO;
 
@@ -502,7 +476,7 @@ static void handle_error_source(struct pcie_device * aerdev,
 		 * Correctable error does not need software intevention.
 		 * No need to go through error recovery process.
 		 */
-		pos = pci_find_aer_capability(dev);
+		pos = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_ERR);
 		if (pos)
 			pci_write_config_dword(dev, pos + PCI_ERR_COR_STATUS,
 					info.status);
@@ -542,7 +516,7 @@ void aer_enable_rootport(struct aer_rpc *rpc)
 	reg16 &= ~(SYSTEM_ERROR_INTR_ON_MESG_MASK);
 	pci_write_config_word(pdev, pos + PCI_EXP_RTCTL, reg16);
 
-	aer_pos = pci_find_aer_capability(pdev);
+	aer_pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_ERR);
 	/* Clear error status */
 	pci_read_config_dword(pdev, aer_pos + PCI_ERR_ROOT_STATUS, &reg32);
 	pci_write_config_dword(pdev, aer_pos + PCI_ERR_ROOT_STATUS, reg32);
@@ -579,7 +553,7 @@ static void disable_root_aer(struct aer_rpc *rpc)
 	u32 reg32;
 	int pos;
 
-	pos = pci_find_aer_capability(pdev);
+	pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_ERR);
 	/* Disable Root's interrupt in response to error messages */
 	pci_write_config_dword(pdev, pos + PCI_ERR_ROOT_COMMAND, 0);
 
@@ -618,7 +592,7 @@ static int get_device_error_info(struct pci_dev *dev, struct aer_err_info *info)
 {
 	int pos;
 
-	pos = pci_find_aer_capability(dev);
+	pos = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_ERR);
 
 	/* The device might not support AER */
 	if (!pos)
@@ -755,7 +729,6 @@ int aer_init(struct pcie_device *dev)
 	return AER_SUCCESS;
 }
 
-EXPORT_SYMBOL_GPL(pci_find_aer_capability);
 EXPORT_SYMBOL_GPL(pci_enable_pcie_error_reporting);
 EXPORT_SYMBOL_GPL(pci_disable_pcie_error_reporting);
 EXPORT_SYMBOL_GPL(pci_cleanup_aer_uncorrect_error_status);
diff --git a/drivers/pci/pcie/portdrv_core.c b/drivers/pci/pcie/portdrv_core.c
index 890f0d2b370a..2e091e014829 100644
--- a/drivers/pci/pcie/portdrv_core.c
+++ b/drivers/pci/pcie/portdrv_core.c
@@ -195,24 +195,11 @@ static int get_port_device_capability(struct pci_dev *dev)
 	/* PME Capable - root port capability */
 	if (((reg16 >> 4) & PORT_TYPE_MASK) == PCIE_RC_PORT)
 		services |= PCIE_PORT_SERVICE_PME;
-	
-	pos = PCI_CFG_SPACE_SIZE;
-	while (pos) {
-		pci_read_config_dword(dev, pos, &reg32);
-		switch (reg32 & 0xffff) {
-		case PCI_EXT_CAP_ID_ERR:
-			services |= PCIE_PORT_SERVICE_AER;
-			pos = reg32 >> 20;
-			break;
-		case PCI_EXT_CAP_ID_VC:
-			services |= PCIE_PORT_SERVICE_VC;
-			pos = reg32 >> 20;
-			break;
-		default:
-			pos = 0;
-			break;
-		}
-	}
+
+	if (pci_find_ext_capability(dev, PCI_EXT_CAP_ID_ERR))
+		services |= PCIE_PORT_SERVICE_AER;
+	if (pci_find_ext_capability(dev, PCI_EXT_CAP_ID_VC))
+		services |= PCIE_PORT_SERVICE_VC;
 
 	return services;
 }
diff --git a/drivers/scsi/qla2xxx/qla_os.c b/drivers/scsi/qla2xxx/qla_os.c
index 2aed4721c0d0..21dd182ad512 100644
--- a/drivers/scsi/qla2xxx/qla_os.c
+++ b/drivers/scsi/qla2xxx/qla_os.c
@@ -1566,9 +1566,8 @@ qla2x00_probe_one(struct pci_dev *pdev, const struct pci_device_id *id)
 			goto probe_out;
 	}
 
-	if (pci_find_aer_capability(pdev))
-		if (pci_enable_pcie_error_reporting(pdev))
-			goto probe_out;
+	/* This may fail but that's ok */
+	pci_enable_pcie_error_reporting(pdev);
 
 	host = scsi_host_alloc(sht, sizeof(scsi_qla_host_t));
 	if (host == NULL) {
diff --git a/include/linux/aer.h b/include/linux/aer.h
index f2518141de88..a2383a72356a 100644
--- a/include/linux/aer.h
+++ b/include/linux/aer.h
@@ -18,10 +18,6 @@ static inline int pci_enable_pcie_error_reporting(struct pci_dev *dev)
 {
 	return -EINVAL;
 }
-static inline int pci_find_aer_capability(struct pci_dev *dev)
-{
-	return 0;
-}
 static inline int pci_disable_pcie_error_reporting(struct pci_dev *dev)
 {
 	return -EINVAL;
-- 
cgit v1.2.3-55-g7522


From 270c66be9b4a6f2be53ef3aec5dc8e7b07782ec9 Mon Sep 17 00:00:00 2001
From: Yu Zhao
Date: Sun, 19 Oct 2008 20:35:20 +0800
Subject: PCI: fix AER capability check

The 'use pci_find_ext_capability everywhere' cleanup brought a new bug,
which makes the AER stop working.  Fix it by actually using find_ext_cap
instead of just find_cap.  Drop the unused config space size define while
we're at it.

Signed-off-by: Yu Zhao <yu.zhao@intel.com>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
 Documentation/PCI/pcieaer-howto.txt | 11 +++--------
 drivers/pci/pcie/aer/aerdrv_core.c  |  4 ++--
 drivers/pci/pcie/portdrv.h          |  1 -
 include/linux/aer.h                 |  1 -
 4 files changed, 5 insertions(+), 12 deletions(-)

diff --git a/Documentation/PCI/pcieaer-howto.txt b/Documentation/PCI/pcieaer-howto.txt
index 16c251230c82..ddeb14beacc8 100644
--- a/Documentation/PCI/pcieaer-howto.txt
+++ b/Documentation/PCI/pcieaer-howto.txt
@@ -203,22 +203,17 @@ to mmio_enabled.
 
 3.3 helper functions
 
-3.3.1 int pci_find_aer_capability(struct pci_dev *dev);
-pci_find_aer_capability locates the PCI Express AER capability
-in the device configuration space. If the device doesn't support
-PCI-Express AER, the function returns 0.
-
-3.3.2 int pci_enable_pcie_error_reporting(struct pci_dev *dev);
+3.3.1 int pci_enable_pcie_error_reporting(struct pci_dev *dev);
 pci_enable_pcie_error_reporting enables the device to send error
 messages to root port when an error is detected. Note that devices
 don't enable the error reporting by default, so device drivers need
 call this function to enable it.
 
-3.3.3 int pci_disable_pcie_error_reporting(struct pci_dev *dev);
+3.3.2 int pci_disable_pcie_error_reporting(struct pci_dev *dev);
 pci_disable_pcie_error_reporting disables the device to send error
 messages to root port when an error is detected.
 
-3.3.4 int pci_cleanup_aer_uncorrect_error_status(struct pci_dev *dev);
+3.3.3 int pci_cleanup_aer_uncorrect_error_status(struct pci_dev *dev);
 pci_cleanup_aer_uncorrect_error_status cleanups the uncorrectable
 error status register.
 
diff --git a/drivers/pci/pcie/aer/aerdrv_core.c b/drivers/pci/pcie/aer/aerdrv_core.c
index 1ff21f6045d6..dfc63d01f20a 100644
--- a/drivers/pci/pcie/aer/aerdrv_core.c
+++ b/drivers/pci/pcie/aer/aerdrv_core.c
@@ -33,11 +33,11 @@ int pci_enable_pcie_error_reporting(struct pci_dev *dev)
 	u16 reg16 = 0;
 	int pos;
 
-	pos = pci_find_capability(dev, PCI_CAP_ID_EXP);
+	pos = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_ERR);
 	if (!pos)
 		return -EIO;
 
-	pos = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_ERR);
+	pos = pci_find_capability(dev, PCI_CAP_ID_EXP);
 	if (!pos)
 		return -EIO;
 
diff --git a/drivers/pci/pcie/portdrv.h b/drivers/pci/pcie/portdrv.h
index 3656e0349dd1..2529f3f2ea5a 100644
--- a/drivers/pci/pcie/portdrv.h
+++ b/drivers/pci/pcie/portdrv.h
@@ -25,7 +25,6 @@
 #define PCIE_CAPABILITIES_REG		0x2
 #define PCIE_SLOT_CAPABILITIES_REG	0x14
 #define PCIE_PORT_DEVICE_MAXSERVICES	4
-#define PCI_CFG_SPACE_SIZE		256
 
 #define get_descriptor_id(type, service) (((type - 4) << 4) | service)
 
diff --git a/include/linux/aer.h b/include/linux/aer.h
index a2383a72356a..f7df1eefc107 100644
--- a/include/linux/aer.h
+++ b/include/linux/aer.h
@@ -10,7 +10,6 @@
 #if defined(CONFIG_PCIEAER)
 /* pci-e port driver needs this function to enable aer */
 extern int pci_enable_pcie_error_reporting(struct pci_dev *dev);
-extern int pci_find_aer_capability(struct pci_dev *dev);
 extern int pci_disable_pcie_error_reporting(struct pci_dev *dev);
 extern int pci_cleanup_aer_uncorrect_error_status(struct pci_dev *dev);
 #else
-- 
cgit v1.2.3-55-g7522


From 96499871f45b9126157b1a5c512d6e30f1635225 Mon Sep 17 00:00:00 2001
From: Heiko Carstens
Date: Mon, 20 Oct 2008 19:45:43 +0200
Subject: PCI: fix pci_ioremap_bar() on s390

s390 doesn't have ioremap_*, so protect the definition of the new
pci_ioremap_bar function with CONFIG_HAS_IOMEM to avoid build breakage.

Acked-by: Arjan van de Ven <arjan@linux.intel.com>
Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
 include/linux/pci.h | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/include/linux/pci.h b/include/linux/pci.h
index 46ad282ffe4d..085187be29c7 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -1119,6 +1119,7 @@ static inline void pci_mmcfg_early_init(void) { }
 static inline void pci_mmcfg_late_init(void) { }
 #endif
 
+#ifdef CONFIG_HAS_IOMEM
 static inline void * pci_ioremap_bar(struct pci_dev *pdev, int bar)
 {
 	/*
@@ -1131,6 +1132,7 @@ static inline void * pci_ioremap_bar(struct pci_dev *pdev, int bar)
 	return ioremap_nocache(pci_resource_start(pdev, bar),
 				     pci_resource_len(pdev, bar));
 }
+#endif
 
 #endif /* __KERNEL__ */
 #endif /* LINUX_PCI_H */
-- 
cgit v1.2.3-55-g7522


From f4432c5caec5fa95ea7eefd00f8e6cee17e2e023 Mon Sep 17 00:00:00 2001
From: Dave Jones
Date: Mon, 20 Oct 2008 13:31:45 -0400
Subject: Update email addresses.

Update assorted email addresses and related info to point
to a single current, valid address.

additionally
- trivial CREDITS entry updates. (Not that this file means much any more)
- remove arjans dead redhat.com address from powernow driver

Signed-off-by: Dave Jones <davej@redhat.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 CREDITS                                     | 12 ++++++------
 MAINTAINERS                                 |  2 +-
 arch/x86/kernel/cpu/cpufreq/longhaul.c      |  4 ++--
 arch/x86/kernel/cpu/cpufreq/powernow-k6.c   |  2 +-
 arch/x86/kernel/cpu/cpufreq/powernow-k7.c   |  4 ++--
 arch/x86/kernel/cpu/cpufreq/powernow-k8.c   |  2 +-
 arch/x86/kernel/cpu/cpufreq/speedstep-ich.c |  2 +-
 arch/x86/kernel/cpu/mcheck/k7.c             |  4 ++--
 arch/x86/kernel/cpu/mcheck/mce_32.c         |  2 +-
 arch/x86/kernel/cpu/mcheck/non-fatal.c      |  2 +-
 drivers/char/agp/ali-agp.c                  |  2 +-
 drivers/char/agp/amd64-agp.c                |  2 +-
 drivers/char/agp/ati-agp.c                  |  2 +-
 drivers/char/agp/backend.c                  |  2 +-
 drivers/char/agp/intel-agp.c                |  2 +-
 drivers/char/agp/nvidia-agp.c               |  2 +-
 drivers/char/agp/via-agp.c                  |  2 +-
 scripts/checkpatch.pl                       |  2 +-
 18 files changed, 26 insertions(+), 26 deletions(-)

diff --git a/CREDITS b/CREDITS
index c62dcb3b7e26..2358846f06be 100644
--- a/CREDITS
+++ b/CREDITS
@@ -1653,14 +1653,14 @@ S: Chapel Hill, North Carolina 27514-4818
 S: USA
 
 N: Dave Jones
-E: davej@codemonkey.org.uk
+E: davej@redhat.com
 W: http://www.codemonkey.org.uk
-D: x86 errata/setup maintenance.
-D: AGPGART driver.
+D: Assorted VIA x86 support.
+D: 2.5 AGPGART overhaul.
 D: CPUFREQ maintenance.
-D: Backport/Forwardport merge monkey.
-D: Various Janitor work.
-S: United Kingdom
+D: Fedora kernel maintainence.
+D: Misc/Other.
+S: 314 Littleton Rd, Westford, MA 01886, USA
 
 N: Martin Josfsson
 E: gandalf@wlug.westbo.se
diff --git a/MAINTAINERS b/MAINTAINERS
index 355c192d6997..5c3f79c26384 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -1198,7 +1198,7 @@ S:	Maintained
 
 CPU FREQUENCY DRIVERS
 P:	Dave Jones
-M:	davej@codemonkey.org.uk
+M:	davej@redhat.com
 L:	cpufreq@vger.kernel.org
 W:	http://www.codemonkey.org.uk/projects/cpufreq/
 T:	git kernel.org/pub/scm/linux/kernel/git/davej/cpufreq.git
diff --git a/arch/x86/kernel/cpu/cpufreq/longhaul.c b/arch/x86/kernel/cpu/cpufreq/longhaul.c
index 06fcce516d51..b0461856acfb 100644
--- a/arch/x86/kernel/cpu/cpufreq/longhaul.c
+++ b/arch/x86/kernel/cpu/cpufreq/longhaul.c
@@ -1,5 +1,5 @@
 /*
- *  (C) 2001-2004  Dave Jones. <davej@codemonkey.org.uk>
+ *  (C) 2001-2004  Dave Jones. <davej@redhat.com>
  *  (C) 2002  Padraig Brady. <padraig@antefacto.com>
  *
  *  Licensed under the terms of the GNU GPL License version 2.
@@ -1019,7 +1019,7 @@ MODULE_PARM_DESC(scale_voltage, "Scale voltage of processor");
 module_param(revid_errata, int, 0644);
 MODULE_PARM_DESC(revid_errata, "Ignore CPU Revision ID");
 
-MODULE_AUTHOR ("Dave Jones <davej@codemonkey.org.uk>");
+MODULE_AUTHOR ("Dave Jones <davej@redhat.com>");
 MODULE_DESCRIPTION ("Longhaul driver for VIA Cyrix processors.");
 MODULE_LICENSE ("GPL");
 
diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k6.c b/arch/x86/kernel/cpu/cpufreq/powernow-k6.c
index b5ced806a316..c1ac5790c63e 100644
--- a/arch/x86/kernel/cpu/cpufreq/powernow-k6.c
+++ b/arch/x86/kernel/cpu/cpufreq/powernow-k6.c
@@ -246,7 +246,7 @@ static void __exit powernow_k6_exit(void)
 }
 
 
-MODULE_AUTHOR("Arjan van de Ven <arjanv@redhat.com>, Dave Jones <davej@codemonkey.org.uk>, Dominik Brodowski <linux@brodo.de>");
+MODULE_AUTHOR("Arjan van de Ven, Dave Jones <davej@redhat.com>, Dominik Brodowski <linux@brodo.de>");
 MODULE_DESCRIPTION("PowerNow! driver for AMD K6-2+ / K6-3+ processors.");
 MODULE_LICENSE("GPL");
 
diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k7.c b/arch/x86/kernel/cpu/cpufreq/powernow-k7.c
index 0a61159d7b71..7c7d56b43136 100644
--- a/arch/x86/kernel/cpu/cpufreq/powernow-k7.c
+++ b/arch/x86/kernel/cpu/cpufreq/powernow-k7.c
@@ -1,6 +1,6 @@
 /*
  *  AMD K7 Powernow driver.
- *  (C) 2003 Dave Jones <davej@codemonkey.org.uk> on behalf of SuSE Labs.
+ *  (C) 2003 Dave Jones on behalf of SuSE Labs.
  *  (C) 2003-2004 Dave Jones <davej@redhat.com>
  *
  *  Licensed under the terms of the GNU GPL License version 2.
@@ -692,7 +692,7 @@ static void __exit powernow_exit (void)
 module_param(acpi_force,  int, 0444);
 MODULE_PARM_DESC(acpi_force, "Force ACPI to be used.");
 
-MODULE_AUTHOR ("Dave Jones <davej@codemonkey.org.uk>");
+MODULE_AUTHOR ("Dave Jones <davej@redhat.com>");
 MODULE_DESCRIPTION ("Powernow driver for AMD K7 processors.");
 MODULE_LICENSE ("GPL");
 
diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c
index 84bb395038d8..008d23ba491b 100644
--- a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c
+++ b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c
@@ -7,7 +7,7 @@
  *  Support : mark.langsdorf@amd.com
  *
  *  Based on the powernow-k7.c module written by Dave Jones.
- *  (C) 2003 Dave Jones <davej@codemonkey.org.uk> on behalf of SuSE Labs
+ *  (C) 2003 Dave Jones on behalf of SuSE Labs
  *  (C) 2004 Dominik Brodowski <linux@brodo.de>
  *  (C) 2004 Pavel Machek <pavel@suse.cz>
  *  Licensed under the terms of the GNU GPL License version 2.
diff --git a/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c b/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c
index 191f7263c61d..04d0376b64b0 100644
--- a/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c
+++ b/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c
@@ -431,7 +431,7 @@ static void __exit speedstep_exit(void)
 }
 
 
-MODULE_AUTHOR ("Dave Jones <davej@codemonkey.org.uk>, Dominik Brodowski <linux@brodo.de>");
+MODULE_AUTHOR ("Dave Jones <davej@redhat.com>, Dominik Brodowski <linux@brodo.de>");
 MODULE_DESCRIPTION ("Speedstep driver for Intel mobile processors on chipsets with ICH-M southbridges.");
 MODULE_LICENSE ("GPL");
 
diff --git a/arch/x86/kernel/cpu/mcheck/k7.c b/arch/x86/kernel/cpu/mcheck/k7.c
index f390c9f66351..dd3af6e7b39a 100644
--- a/arch/x86/kernel/cpu/mcheck/k7.c
+++ b/arch/x86/kernel/cpu/mcheck/k7.c
@@ -1,6 +1,6 @@
 /*
- * Athlon/Hammer specific Machine Check Exception Reporting
- * (C) Copyright 2002 Dave Jones <davej@codemonkey.org.uk>
+ * Athlon specific Machine Check Exception Reporting
+ * (C) Copyright 2002 Dave Jones <davej@redhat.com>
  */
 
 #include <linux/init.h>
diff --git a/arch/x86/kernel/cpu/mcheck/mce_32.c b/arch/x86/kernel/cpu/mcheck/mce_32.c
index 774d87cfd8cd..0ebf3fc6a610 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_32.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_32.c
@@ -1,6 +1,6 @@
 /*
  * mce.c - x86 Machine Check Exception Reporting
- * (c) 2002 Alan Cox <alan@redhat.com>, Dave Jones <davej@codemonkey.org.uk>
+ * (c) 2002 Alan Cox <alan@redhat.com>, Dave Jones <davej@redhat.com>
  */
 
 #include <linux/init.h>
diff --git a/arch/x86/kernel/cpu/mcheck/non-fatal.c b/arch/x86/kernel/cpu/mcheck/non-fatal.c
index cc1fccdd31e0..a74af128efc9 100644
--- a/arch/x86/kernel/cpu/mcheck/non-fatal.c
+++ b/arch/x86/kernel/cpu/mcheck/non-fatal.c
@@ -1,7 +1,7 @@
 /*
  * Non Fatal Machine Check Exception Reporting
  *
- * (C) Copyright 2002 Dave Jones. <davej@codemonkey.org.uk>
+ * (C) Copyright 2002 Dave Jones. <davej@redhat.com>
  *
  * This file contains routines to check for non-fatal MCEs every 15s
  *
diff --git a/drivers/char/agp/ali-agp.c b/drivers/char/agp/ali-agp.c
index 31dcd9142d54..dc8d1a90971f 100644
--- a/drivers/char/agp/ali-agp.c
+++ b/drivers/char/agp/ali-agp.c
@@ -417,6 +417,6 @@ static void __exit agp_ali_cleanup(void)
 module_init(agp_ali_init);
 module_exit(agp_ali_cleanup);
 
-MODULE_AUTHOR("Dave Jones <davej@codemonkey.org.uk>");
+MODULE_AUTHOR("Dave Jones <davej@redhat.com>");
 MODULE_LICENSE("GPL and additional rights");
 
diff --git a/drivers/char/agp/amd64-agp.c b/drivers/char/agp/amd64-agp.c
index 2812ee2b165a..52f4361eb6e4 100644
--- a/drivers/char/agp/amd64-agp.c
+++ b/drivers/char/agp/amd64-agp.c
@@ -772,6 +772,6 @@ module_init(agp_amd64_init);
 module_exit(agp_amd64_cleanup);
 #endif
 
-MODULE_AUTHOR("Dave Jones <davej@codemonkey.org.uk>, Andi Kleen");
+MODULE_AUTHOR("Dave Jones <davej@redhat.com>, Andi Kleen");
 module_param(agp_try_unsupported, bool, 0);
 MODULE_LICENSE("GPL");
diff --git a/drivers/char/agp/ati-agp.c b/drivers/char/agp/ati-agp.c
index ae2791b926b9..f1537eece07f 100644
--- a/drivers/char/agp/ati-agp.c
+++ b/drivers/char/agp/ati-agp.c
@@ -561,6 +561,6 @@ static void __exit agp_ati_cleanup(void)
 module_init(agp_ati_init);
 module_exit(agp_ati_cleanup);
 
-MODULE_AUTHOR("Dave Jones <davej@codemonkey.org.uk>");
+MODULE_AUTHOR("Dave Jones <davej@redhat.com>");
 MODULE_LICENSE("GPL and additional rights");
 
diff --git a/drivers/char/agp/backend.c b/drivers/char/agp/backend.c
index 3a3cc03d401c..8c617ad7497f 100644
--- a/drivers/char/agp/backend.c
+++ b/drivers/char/agp/backend.c
@@ -349,7 +349,7 @@ static __init int agp_setup(char *s)
 __setup("agp=", agp_setup);
 #endif
 
-MODULE_AUTHOR("Dave Jones <davej@codemonkey.org.uk>");
+MODULE_AUTHOR("Dave Jones <davej@redhat.com>");
 MODULE_DESCRIPTION("AGP GART driver");
 MODULE_LICENSE("GPL and additional rights");
 MODULE_ALIAS_MISCDEV(AGPGART_MINOR);
diff --git a/drivers/char/agp/intel-agp.c b/drivers/char/agp/intel-agp.c
index 1108665913e2..9cf6e9bb017e 100644
--- a/drivers/char/agp/intel-agp.c
+++ b/drivers/char/agp/intel-agp.c
@@ -2390,5 +2390,5 @@ static void __exit agp_intel_cleanup(void)
 module_init(agp_intel_init);
 module_exit(agp_intel_cleanup);
 
-MODULE_AUTHOR("Dave Jones <davej@codemonkey.org.uk>");
+MODULE_AUTHOR("Dave Jones <davej@redhat.com>");
 MODULE_LICENSE("GPL and additional rights");
diff --git a/drivers/char/agp/nvidia-agp.c b/drivers/char/agp/nvidia-agp.c
index 5bbed3d79db9..16acee2de117 100644
--- a/drivers/char/agp/nvidia-agp.c
+++ b/drivers/char/agp/nvidia-agp.c
@@ -1,7 +1,7 @@
 /*
  * Nvidia AGPGART routines.
  * Based upon a 2.4 agpgart diff by the folks from NVIDIA, and hacked up
- * to work in 2.5 by Dave Jones <davej@codemonkey.org.uk>
+ * to work in 2.5 by Dave Jones <davej@redhat.com>
  */
 
 #include <linux/module.h>
diff --git a/drivers/char/agp/via-agp.c b/drivers/char/agp/via-agp.c
index 9f4d49e1b59a..d3bd243867fc 100644
--- a/drivers/char/agp/via-agp.c
+++ b/drivers/char/agp/via-agp.c
@@ -595,4 +595,4 @@ module_init(agp_via_init);
 module_exit(agp_via_cleanup);
 
 MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Dave Jones <davej@codemonkey.org.uk>");
+MODULE_AUTHOR("Dave Jones <davej@redhat.com>");
diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl
index e30bac141b21..f88bb3e21cda 100755
--- a/scripts/checkpatch.pl
+++ b/scripts/checkpatch.pl
@@ -1,5 +1,5 @@
 #!/usr/bin/perl -w
-# (c) 2001, Dave Jones. <davej@codemonkey.org.uk> (the file handling bit)
+# (c) 2001, Dave Jones. <davej@redhat.com> (the file handling bit)
 # (c) 2005, Joel Schopp <jschopp@austin.ibm.com> (the ugly bit)
 # (c) 2007, Andy Whitcroft <apw@uk.ibm.com> (new conditions, test suite, etc)
 # Licensed under the terms of the GNU GPL License version 2
-- 
cgit v1.2.3-55-g7522


From f07767fd0f95c385108fa4c456a9cb216a424fec Mon Sep 17 00:00:00 2001
From: Harvey Harrison
Date: Mon, 20 Oct 2008 10:23:38 -0700
Subject: byteorder: remove direct includes of linux/byteorder/swab[b].h

A consolidated implementation will provide this generically through
asm/byteorder, remove direct includes to avoid breakage when the
changeover to the new implementation occurs.

This hunk was lost from commit 1d8cca44b6a244b7e378546d719041819049a0f9
("byteorder: provide swabb.h generically in asm/byteorder.h")

Signed-off-by: Harvey Harrison <harvey.harrison@gmail.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 kernel/rcutorture.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kernel/rcutorture.c b/kernel/rcutorture.c
index 90b5b123f7a1..85cb90588a55 100644
--- a/kernel/rcutorture.c
+++ b/kernel/rcutorture.c
@@ -42,10 +42,10 @@
 #include <linux/freezer.h>
 #include <linux/cpu.h>
 #include <linux/delay.h>
-#include <linux/byteorder/swabb.h>
 #include <linux/stat.h>
 #include <linux/srcu.h>
 #include <linux/slab.h>
+#include <asm/byteorder.h>
 
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Paul E. McKenney <paulmck@us.ibm.com> and "
-- 
cgit v1.2.3-55-g7522


From 1ae87786800b5e0411847974b211797b6ada63c4 Mon Sep 17 00:00:00 2001
From: Linus Torvalds
Date: Mon, 20 Oct 2008 14:14:25 -0700
Subject: Fix sprintf format warnings in drm_proc.c

Use "%zd" for size_t, and make sure to have a space between the numbers
instead of depending on the field width.

I don't like warnings in my default targeted build.

Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/gpu/drm/drm_proc.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/drm_proc.c b/drivers/gpu/drm/drm_proc.c
index d490db4c0de0..ae73b7f7249a 100644
--- a/drivers/gpu/drm/drm_proc.c
+++ b/drivers/gpu/drm/drm_proc.c
@@ -522,12 +522,12 @@ static int drm_gem_one_name_info(int id, void *ptr, void *data)
 	struct drm_gem_object *obj = ptr;
 	struct drm_gem_name_info_data   *nid = data;
 
-	DRM_INFO("name %d size %d\n", obj->name, obj->size);
+	DRM_INFO("name %d size %zd\n", obj->name, obj->size);
 	if (nid->eof)
 		return 0;
 
 	nid->len += sprintf(&nid->buf[nid->len],
-			    "%6d%9d%8d%9d\n",
+			    "%6d %8zd %7d %8d\n",
 			    obj->name, obj->size,
 			    atomic_read(&obj->handlecount.refcount),
 			    atomic_read(&obj->refcount.refcount));
-- 
cgit v1.2.3-55-g7522


From 9b7530cc329eb036cfa589930c270e85031f554c Mon Sep 17 00:00:00 2001
From: Linus Torvalds
Date: Mon, 20 Oct 2008 14:16:43 -0700
Subject: i915: cleanup coding horrors in i915_gem_gtt_pwrite()

Yes, this will probably be switched over to a cleaner model anyway, but
in the meantime I don't want to see the 'unused variable' warnings that
come from the disgusting #ifdef code.  Make the special case be a nice
inlien function of its own, clean up the code, and make the warning go
away.

I wish people didn't write code that gets (valid) warnings from the
compiler, but I'll limit my fixes to code that I actually care about (in
this case just because I see the warning and it annoys me).

Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/gpu/drm/i915/i915_gem.c | 59 +++++++++++++++++++++++++----------------
 1 file changed, 36 insertions(+), 23 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 9ac73dd1b422..49c5a1798ac4 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -171,6 +171,36 @@ i915_gem_pread_ioctl(struct drm_device *dev, void *data,
 	return 0;
 }
 
+/*
+ * Try to write quickly with an atomic kmap. Return true on success.
+ *
+ * If this fails (which includes a partial write), we'll redo the whole
+ * thing with the slow version.
+ *
+ * This is a workaround for the low performance of iounmap (approximate
+ * 10% cpu cost on normal 3D workloads).  kmap_atomic on HIGHMEM kernels
+ * happens to let us map card memory without taking IPIs.  When the vmap
+ * rework lands we should be able to dump this hack.
+ */
+static inline int fast_user_write(unsigned long pfn, char __user *user_data, int l)
+{
+#ifdef CONFIG_HIGHMEM
+	unsigned long unwritten;
+	char *vaddr_atomic;
+
+	vaddr_atomic = kmap_atomic_pfn(pfn, KM_USER0);
+#if WATCH_PWRITE
+	DRM_INFO("pwrite i %d o %d l %d pfn %ld vaddr %p\n",
+		 i, o, l, pfn, vaddr_atomic);
+#endif
+	unwritten = __copy_from_user_inatomic_nocache(vaddr_atomic + o, user_data, l);
+	kunmap_atomic(vaddr_atomic, KM_USER0);
+	return !unwritten;
+#else
+	return 0;
+#endif
+}
+
 static int
 i915_gem_gtt_pwrite(struct drm_device *dev, struct drm_gem_object *obj,
 		    struct drm_i915_gem_pwrite *args,
@@ -180,12 +210,7 @@ i915_gem_gtt_pwrite(struct drm_device *dev, struct drm_gem_object *obj,
 	ssize_t remain;
 	loff_t offset;
 	char __user *user_data;
-	char __iomem *vaddr;
-	char *vaddr_atomic;
-	int i, o, l;
 	int ret = 0;
-	unsigned long pfn;
-	unsigned long unwritten;
 
 	user_data = (char __user *) (uintptr_t) args->data_ptr;
 	remain = args->size;
@@ -209,6 +234,9 @@ i915_gem_gtt_pwrite(struct drm_device *dev, struct drm_gem_object *obj,
 	obj_priv->dirty = 1;
 
 	while (remain > 0) {
+		unsigned long pfn;
+		int i, o, l;
+
 		/* Operation in this page
 		 *
 		 * i = page number
@@ -223,25 +251,10 @@ i915_gem_gtt_pwrite(struct drm_device *dev, struct drm_gem_object *obj,
 
 		pfn = (dev->agp->base >> PAGE_SHIFT) + i;
 
-#ifdef CONFIG_HIGHMEM
-		/* This is a workaround for the low performance of iounmap
-		 * (approximate 10% cpu cost on normal 3D workloads).
-		 * kmap_atomic on HIGHMEM kernels happens to let us map card
-		 * memory without taking IPIs.  When the vmap rework lands
-		 * we should be able to dump this hack.
-		 */
-		vaddr_atomic = kmap_atomic_pfn(pfn, KM_USER0);
-#if WATCH_PWRITE
-		DRM_INFO("pwrite i %d o %d l %d pfn %ld vaddr %p\n",
-			 i, o, l, pfn, vaddr_atomic);
-#endif
-		unwritten = __copy_from_user_inatomic_nocache(vaddr_atomic + o,
-							      user_data, l);
-		kunmap_atomic(vaddr_atomic, KM_USER0);
+		if (!fast_user_write(pfn, user_data, l)) {
+			unsigned long unwritten;
+			char __iomem *vaddr;
 
-		if (unwritten)
-#endif /* CONFIG_HIGHMEM */
-		{
 			vaddr = ioremap_wc(pfn << PAGE_SHIFT, PAGE_SIZE);
 #if WATCH_PWRITE
 			DRM_INFO("pwrite slow i %d o %d l %d "
-- 
cgit v1.2.3-55-g7522


From a9b6148d25f15ddfe9d7a7f3e526fdb64e7cf7da Mon Sep 17 00:00:00 2001
From: Linus Torvalds
Date: Mon, 20 Oct 2008 14:23:29 -0700
Subject: USB: Fix unused label warnings in drivers/usb/host/ehci-hcd.c

This gets rid of an annoying warning in ehci-hcd.c when DEBUG isn't
enabled:

    warning: label 'err_debug' defined but not used

by moving it inside the already-existing #ifdef DEBUG, so that it
matches the goto.  And now my regular build is warning-free again.

Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/usb/host/ehci-hcd.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/usb/host/ehci-hcd.c b/drivers/usb/host/ehci-hcd.c
index d343afacb0b0..15a803b206b8 100644
--- a/drivers/usb/host/ehci-hcd.c
+++ b/drivers/usb/host/ehci-hcd.c
@@ -1111,8 +1111,8 @@ clean0:
 #ifdef DEBUG
 	debugfs_remove(ehci_debug_root);
 	ehci_debug_root = NULL;
-#endif
 err_debug:
+#endif
 	clear_bit(USB_EHCI_LOADED, &usb_hcds_loaded);
 	return retval;
 }
-- 
cgit v1.2.3-55-g7522


From 5f41b8cdc6ef33b3432cee36264d628a80398362 Mon Sep 17 00:00:00 2001
From: Luck, Tony
Date: Mon, 20 Oct 2008 15:23:40 -0700
Subject: kexec: fix crash_save_vmcoreinfo_init build problem

This fixes

  kernel/kexec.c: In function 'crash_save_vmcoreinfo_init':
  kernel/kexec.c:1374: error: 'vmlist' undeclared (first use in this function)
  kernel/kexec.c:1374: error: (Each undeclared identifier is reported only once
  kernel/kexec.c:1374: error: for each function it appears in.)
  kernel/kexec.c:1410: error: invalid use of undefined type 'struct vm_struct'
  make[1]: *** [kernel/kexec.o] Error 1

Signed-off-by: Tony Luck <tony.luck@intel.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 kernel/kexec.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/kernel/kexec.c b/kernel/kexec.c
index 777ac458ac99..ac0fde7b54d0 100644
--- a/kernel/kexec.c
+++ b/kernel/kexec.c
@@ -30,6 +30,7 @@
 #include <linux/pm.h>
 #include <linux/cpu.h>
 #include <linux/console.h>
+#include <linux/vmalloc.h>
 
 #include <asm/page.h>
 #include <asm/uaccess.h>
-- 
cgit v1.2.3-55-g7522


From f6f286f33e843862c559bfea9281318c4cdec6b0 Mon Sep 17 00:00:00 2001
From: Arjan van de Ven
Date: Mon, 20 Oct 2008 14:41:03 -0700
Subject: fix WARN() for PPC

powerpc doesn't use the generic WARN_ON infrastructure.  The newly
introduced WARN() as a result didn't print the message, this patch adds
the printk for this specific case.

Signed-off-by: Arjan van de Ven <arjan@linux.intel.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/asm-generic/bug.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/asm-generic/bug.h b/include/asm-generic/bug.h
index 0f6dabd4b517..12c07c1866b2 100644
--- a/include/asm-generic/bug.h
+++ b/include/asm-generic/bug.h
@@ -41,7 +41,7 @@ extern void warn_slowpath(const char *file, const int line,
 #define __WARN() warn_on_slowpath(__FILE__, __LINE__)
 #define __WARN_printf(arg...) warn_slowpath(__FILE__, __LINE__, arg)
 #else
-#define __WARN_printf(arg...) __WARN()
+#define __WARN_printf(arg...) do { printk(arg); __WARN(); } while (0)
 #endif
 
 #ifndef WARN_ON
-- 
cgit v1.2.3-55-g7522


From e8848a170fd432bdda176a2d568919d4bba90467 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner
Date: Tue, 21 Oct 2008 00:47:45 +0200
Subject: fix CONFIG_HIGHMEM compile error in drivers/gpu/drm/i915/i915_gem.c

commit 9b7530cc329eb036cfa589930c270e85031f554c ("i915: cleanup coding
horrors in i915_gem_gtt_pwrite()")

broke the i386 build for CONFIG_HIGHMEM=y.

Caught by automatic testing http://www.tglx.de/autoqa-logs/000137-0006-0001.log

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
[ My bad. It's the same patch I sent out earlier, nobody noticed then either.. ]
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/gpu/drm/i915/i915_gem.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 49c5a1798ac4..dc2e6fdb6ca3 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -182,7 +182,8 @@ i915_gem_pread_ioctl(struct drm_device *dev, void *data,
  * happens to let us map card memory without taking IPIs.  When the vmap
  * rework lands we should be able to dump this hack.
  */
-static inline int fast_user_write(unsigned long pfn, char __user *user_data, int l)
+static inline int fast_user_write(unsigned long pfn, char __user *user_data,
+				  int l, int o)
 {
 #ifdef CONFIG_HIGHMEM
 	unsigned long unwritten;
@@ -251,7 +252,7 @@ i915_gem_gtt_pwrite(struct drm_device *dev, struct drm_gem_object *obj,
 
 		pfn = (dev->agp->base >> PAGE_SHIFT) + i;
 
-		if (!fast_user_write(pfn, user_data, l)) {
+		if (!fast_user_write(pfn, user_data, l, o)) {
 			unsigned long unwritten;
 			char __iomem *vaddr;
 
-- 
cgit v1.2.3-55-g7522


From 653c03168348ac7aebb969931f87ba281749d7dd Mon Sep 17 00:00:00 2001
From: Harvey Harrison
Date: Mon, 20 Oct 2008 16:00:08 -0700
Subject: misc: replace remaining __FUNCTION__ with __func__

__FUNCTION__ is gcc-specific, use __func__

Signed-off-by: Harvey Harrison <harvey.harrison@gmail.com>
Acked-by: Randy Dunlap <randy.dunlap@oracle.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 Documentation/DocBook/kernel-hacking.tmpl | 2 +-
 arch/arm/mach-iop13xx/include/mach/time.h | 4 ++--
 arch/arm/mach-pxa/include/mach/zylonite.h | 4 ++--
 arch/powerpc/include/asm/ptrace.h         | 2 +-
 drivers/net/usb/pegasus.c                 | 4 ++--
 5 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/Documentation/DocBook/kernel-hacking.tmpl b/Documentation/DocBook/kernel-hacking.tmpl
index 4c63e5864160..ae15d55350ec 100644
--- a/Documentation/DocBook/kernel-hacking.tmpl
+++ b/Documentation/DocBook/kernel-hacking.tmpl
@@ -1105,7 +1105,7 @@ static struct block_device_operations opt_fops = {
     </listitem>
     <listitem>
      <para>
-      Function names as strings (__FUNCTION__).
+      Function names as strings (__func__).
      </para>
     </listitem>
     <listitem>
diff --git a/arch/arm/mach-iop13xx/include/mach/time.h b/arch/arm/mach-iop13xx/include/mach/time.h
index 49213d9d7cad..d6d52527589d 100644
--- a/arch/arm/mach-iop13xx/include/mach/time.h
+++ b/arch/arm/mach-iop13xx/include/mach/time.h
@@ -41,7 +41,7 @@ static inline unsigned long iop13xx_core_freq(void)
 		return 1200000000;
 	default:
 		printk("%s: warning unknown frequency, defaulting to 800Mhz\n",
-			__FUNCTION__);
+			__func__);
 	}
 
 	return 800000000;
@@ -60,7 +60,7 @@ static inline unsigned long iop13xx_xsi_bus_ratio(void)
 		return 4;
 	default:
 		printk("%s: warning unknown ratio, defaulting to 2\n",
-			__FUNCTION__);
+			__func__);
 	}
 
 	return 2;
diff --git a/arch/arm/mach-pxa/include/mach/zylonite.h b/arch/arm/mach-pxa/include/mach/zylonite.h
index 0d35ca04731e..bf6785adccf4 100644
--- a/arch/arm/mach-pxa/include/mach/zylonite.h
+++ b/arch/arm/mach-pxa/include/mach/zylonite.h
@@ -30,7 +30,7 @@ extern void zylonite_pxa300_init(void);
 static inline void zylonite_pxa300_init(void)
 {
 	if (cpu_is_pxa300() || cpu_is_pxa310())
-		panic("%s: PXA300/PXA310 not supported\n", __FUNCTION__);
+		panic("%s: PXA300/PXA310 not supported\n", __func__);
 }
 #endif
 
@@ -40,7 +40,7 @@ extern void zylonite_pxa320_init(void);
 static inline void zylonite_pxa320_init(void)
 {
 	if (cpu_is_pxa320())
-		panic("%s: PXA320 not supported\n", __FUNCTION__);
+		panic("%s: PXA320 not supported\n", __func__);
 }
 #endif
 
diff --git a/arch/powerpc/include/asm/ptrace.h b/arch/powerpc/include/asm/ptrace.h
index 734e0754fb9b..280a90cc9894 100644
--- a/arch/powerpc/include/asm/ptrace.h
+++ b/arch/powerpc/include/asm/ptrace.h
@@ -129,7 +129,7 @@ extern int ptrace_put_reg(struct task_struct *task, int regno,
 #define CHECK_FULL_REGS(regs)						      \
 do {									      \
 	if ((regs)->trap & 1)						      \
-		printk(KERN_CRIT "%s: partial register set\n", __FUNCTION__); \
+		printk(KERN_CRIT "%s: partial register set\n", __func__); \
 } while (0)
 #endif /* __powerpc64__ */
 
diff --git a/drivers/net/usb/pegasus.c b/drivers/net/usb/pegasus.c
index 38b90e7a7ed3..7914867110ed 100644
--- a/drivers/net/usb/pegasus.c
+++ b/drivers/net/usb/pegasus.c
@@ -168,7 +168,7 @@ static int get_registers(pegasus_t * pegasus, __u16 indx, __u16 size,
 			netif_device_detach(pegasus->net);
 		if (netif_msg_drv(pegasus) && printk_ratelimit())
 			dev_err(&pegasus->intf->dev, "%s, status %d\n",
-					__FUNCTION__, ret);
+					__func__, ret);
 		goto out;
 	}
 
@@ -192,7 +192,7 @@ static int set_registers(pegasus_t * pegasus, __u16 indx, __u16 size,
 	if (!buffer) {
 		if (netif_msg_drv(pegasus))
 			dev_warn(&pegasus->intf->dev, "out of memory in %s\n",
-					__FUNCTION__);
+					__func__);
 		return -ENOMEM;
 	}
 	memcpy(buffer, data, size);
-- 
cgit v1.2.3-55-g7522


From e798ba57e9f423dddbf1bdeb20a62bdd0593890f Mon Sep 17 00:00:00 2001
From: Hugh Dickins
Date: Tue, 21 Oct 2008 00:04:04 +0100
Subject: Export tiny shmem_file_setup for DRM-GEM

We're trying to keep the !CONFIG_SHMEM tiny-shmem.c (using ramfs without
swap) in synch with CONFIG_SHMEM shmem.c (and mpm is preparing patches
to combine them).  I was glad to see EXPORT_SYMBOL_GPL(shmem_file_setup)
go into shmem.c, but why not support DRM-GEM when !CONFIG_SHMEM too?
But caution says still depend on MMU, since !CONFIG_MMU is.. different.

Signed-off-by: Hugh Dickins <hugh@veritas.com>
Acked-by: Matt Mackall <mpm@selenic.com>
Acked-by: Dave Airlie <airlied@redhat.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/gpu/drm/Kconfig | 2 +-
 mm/tiny-shmem.c         | 1 +
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/Kconfig b/drivers/gpu/drm/Kconfig
index 9097500de5f4..a8b33c2ec8d2 100644
--- a/drivers/gpu/drm/Kconfig
+++ b/drivers/gpu/drm/Kconfig
@@ -6,7 +6,7 @@
 #
 menuconfig DRM
 	tristate "Direct Rendering Manager (XFree86 4.1.0 and higher DRI support)"
-	depends on (AGP || AGP=n) && PCI && !EMULATED_CMPXCHG && SHMEM
+	depends on (AGP || AGP=n) && PCI && !EMULATED_CMPXCHG && MMU
 	help
 	  Kernel-level support for the Direct Rendering Infrastructure (DRI)
 	  introduced in XFree86 4.0. If you say Y here, you need to select
diff --git a/mm/tiny-shmem.c b/mm/tiny-shmem.c
index 8d7a27a6335c..3e67d575ee6e 100644
--- a/mm/tiny-shmem.c
+++ b/mm/tiny-shmem.c
@@ -95,6 +95,7 @@ put_dentry:
 put_memory:
 	return ERR_PTR(error);
 }
+EXPORT_SYMBOL_GPL(shmem_file_setup);
 
 /**
  * shmem_zero_setup - setup a shared anonymous mapping
-- 
cgit v1.2.3-55-g7522


From a50c22eed593f474e75f693381e4d42e81762de8 Mon Sep 17 00:00:00 2001
From: Huang Weiyi
Date: Tue, 21 Oct 2008 06:43:33 +0800
Subject: mm: remove duplicated #include's

Removed duplicated #include <linux/vmalloc.h> in mm/vmalloc.c and
"internal.h" in mm/memory.c.

Signed-off-by: Huang Weiyi <weiyi.huang@gmail.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/memory.c  | 2 --
 mm/vmalloc.c | 1 -
 2 files changed, 3 deletions(-)

diff --git a/mm/memory.c b/mm/memory.c
index 3a6c4a658325..164951c47305 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -64,8 +64,6 @@
 
 #include "internal.h"
 
-#include "internal.h"
-
 #ifndef CONFIG_NEED_MULTIPLE_NODES
 /* use the per-pgdat data instead for discontigmem - mbligh */
 unsigned long max_mapnr;
diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index 0797589d51f8..65ae576030da 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -17,7 +17,6 @@
 #include <linux/interrupt.h>
 #include <linux/seq_file.h>
 #include <linux/debugobjects.h>
-#include <linux/vmalloc.h>
 #include <linux/kallsyms.h>
 #include <linux/list.h>
 #include <linux/rbtree.h>
-- 
cgit v1.2.3-55-g7522


From 2515ddc6db8eb49a79f0fe5e67ff09ac7c81eab4 Mon Sep 17 00:00:00 2001
From: Paul Mundt
Date: Tue, 21 Oct 2008 12:07:40 +0900
Subject: binfmt_elf_fdpic: Update for cputime changes.

Commit f06febc96ba8e0af80bcc3eaec0a109e88275fac ("timers: fix itimer/
many thread hang") introduced a new task_cputime interface and
subsequently only converted binfmt_elf over to it.  This results in the
build for binfmt_elf_fdpic blowing up given that p->signal->{u,s}time
have disappeared from underneath us.

Apply the same trivial fix from binfmt_elf to binfmt_elf_fdpic.

Signed-off-by: Paul Mundt <lethal@linux-sh.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/binfmt_elf_fdpic.c | 19 +++++++------------
 1 file changed, 7 insertions(+), 12 deletions(-)

diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c
index 0e8367c54624..5b5424cb3391 100644
--- a/fs/binfmt_elf_fdpic.c
+++ b/fs/binfmt_elf_fdpic.c
@@ -1390,20 +1390,15 @@ static void fill_prstatus(struct elf_prstatus *prstatus,
 	prstatus->pr_pgrp = task_pgrp_vnr(p);
 	prstatus->pr_sid = task_session_vnr(p);
 	if (thread_group_leader(p)) {
+		struct task_cputime cputime;
+
 		/*
-		 * This is the record for the group leader.  Add in the
-		 * cumulative times of previous dead threads.  This total
-		 * won't include the time of each live thread whose state
-		 * is included in the core dump.  The final total reported
-		 * to our parent process when it calls wait4 will include
-		 * those sums as well as the little bit more time it takes
-		 * this and each other thread to finish dying after the
-		 * core dump synchronization phase.
+		 * This is the record for the group leader.  It shows the
+		 * group-wide total, not its individual thread total.
 		 */
-		cputime_to_timeval(cputime_add(p->utime, p->signal->utime),
-				   &prstatus->pr_utime);
-		cputime_to_timeval(cputime_add(p->stime, p->signal->stime),
-				   &prstatus->pr_stime);
+		thread_group_cputime(p, &cputime);
+		cputime_to_timeval(cputime.utime, &prstatus->pr_utime);
+		cputime_to_timeval(cputime.stime, &prstatus->pr_stime);
 	} else {
 		cputime_to_timeval(p->utime, &prstatus->pr_utime);
 		cputime_to_timeval(p->stime, &prstatus->pr_stime);
-- 
cgit v1.2.3-55-g7522