33 files changed, 1700 insertions, 1359 deletions
diff --git a/arch/sh/mm/Kconfig b/arch/sh/mm/Kconfig
index 64dc1ad59801..1445ca6257df 100644
--- a/arch/sh/mm/Kconfig
+++ b/arch/sh/mm/Kconfig
@@ -75,57 +75,25 @@ config MEMORY_SIZE
 config 29BIT
 	def_bool !32BIT
 	depends on SUPERH32
+	select UNCACHED_MAPPING
 
 config 32BIT
 	bool
 	default y if CPU_SH5
 
-config PMB_ENABLE
-	bool "Support 32-bit physical addressing through PMB"
-	depends on MMU && EXPERIMENTAL && (CPU_SUBTYPE_SH7757 || CPU_SUBTYPE_SH7780 || CPU_SUBTYPE_SH7785)
-	select 32BIT
-	default y
-	help
-	  If you say Y here, physical addressing will be extended to
-	  32-bits through the SH-4A PMB. If this is not set, legacy
-	  29-bit physical addressing will be used.
-
-choice
-	prompt "PMB handling type"
-	depends on PMB_ENABLE
-	default PMB_FIXED
-
 config PMB
-	bool "PMB"
-	depends on MMU && EXPERIMENTAL && (CPU_SUBTYPE_SH7757 || CPU_SUBTYPE_SH7780 || CPU_SUBTYPE_SH7785)
+	bool "Support 32-bit physical addressing through PMB"
+	depends on MMU && EXPERIMENTAL && CPU_SH4A && !CPU_SH4AL_DSP
 	select 32BIT
+	select UNCACHED_MAPPING
 	help
 	  If you say Y here, physical addressing will be extended to
 	  32-bits through the SH-4A PMB. If this is not set, legacy
 	  29-bit physical addressing will be used.
 
-config PMB_FIXED
-	bool "fixed PMB"
-	depends on MMU && EXPERIMENTAL && (CPU_SUBTYPE_SH7757 || \
-					   CPU_SUBTYPE_SH7780 || \
-					   CPU_SUBTYPE_SH7785)
-	select 32BIT
-	help
-	  If this option is enabled, fixed PMB mappings are inherited
-	  from the boot loader, and the kernel does not attempt dynamic
-	  management. This is the closest to legacy 29-bit physical mode,
-	  and allows systems to support up to 512MiB of system memory.
-
-endchoice
-
 config X2TLB
-	bool "Enable extended TLB mode"
-	depends on (CPU_SHX2 || CPU_SHX3) && MMU && EXPERIMENTAL
-	help
-	  Selecting this option will enable the extended mode of the SH-X2
-	  TLB. For legacy SH-X behaviour and interoperability, say N. For
-	  all of the fun new features and a willingless to submit bug reports,
-	  say Y.
+	def_bool y
+	depends on (CPU_SHX2 || CPU_SHX3) && MMU
 
 config VSYSCALL
 	bool "Support vsyscall page"
@@ -193,14 +161,19 @@ config ARCH_MEMORY_PROBE
 	def_bool y
 	depends on MEMORY_HOTPLUG
 
+config IOREMAP_FIXED
+       def_bool y
+       depends on X2TLB || SUPERH64
+
+config UNCACHED_MAPPING
+	bool
+
 choice
 	prompt "Kernel page size"
-	default PAGE_SIZE_8KB if X2TLB
 	default PAGE_SIZE_4KB
 
 config PAGE_SIZE_4KB
 	bool "4kB"
-	depends on !MMU || !X2TLB
 	help
 	  This is the default page size used by all SuperH CPUs.
 
@@ -227,7 +200,7 @@ endchoice
 
 choice
 	prompt "HugeTLB page size"
-	depends on HUGETLB_PAGE && (CPU_SH4 || CPU_SH5) && MMU
+	depends on HUGETLB_PAGE
 	default HUGETLB_PAGE_SIZE_1MB if PAGE_SIZE_64KB
 	default HUGETLB_PAGE_SIZE_64K
 
@@ -258,6 +231,15 @@ endchoice
 
 source "mm/Kconfig"
 
+config SCHED_MC
+	bool "Multi-core scheduler support"
+	depends on SMP
+	default y
+	help
+	  Multi-core scheduler support improves the CPU scheduler's decision
+	  making when dealing with multi-core CPU chips at a cost of slightly
+	  increased overhead in some places. If unsure say N here.
+
 endmenu
 
 menu "Cache configuration"
diff --git a/arch/sh/mm/Makefile b/arch/sh/mm/Makefile
index 3759bf853293..3dc8a8a63822 100644
--- a/arch/sh/mm/Makefile
+++ b/arch/sh/mm/Makefile
@@ -2,7 +2,7 @@
 # Makefile for the Linux SuperH-specific parts of the memory manager.
 #
 
-obj-y			:= cache.o init.o consistent.o mmap.o
+obj-y			:= alignment.o cache.o init.o consistent.o mmap.o
 
 cacheops-$(CONFIG_CPU_SH2)		:= cache-sh2.o
 cacheops-$(CONFIG_CPU_SH2A)		:= cache-sh2a.o
@@ -15,7 +15,7 @@ obj-y			+= $(cacheops-y)
 
 mmu-y			:= nommu.o extable_32.o
 mmu-$(CONFIG_MMU)	:= extable_$(BITS).o fault_$(BITS).o \
-			   ioremap_$(BITS).o kmap.o tlbflush_$(BITS).o
+			   ioremap.o kmap.o pgtable.o tlbflush_$(BITS).o
 
 obj-y			+= $(mmu-y)
 obj-$(CONFIG_DEBUG_FS)	+= asids-debugfs.o
@@ -26,16 +26,17 @@ endif
 
 ifdef CONFIG_MMU
 tlb-$(CONFIG_CPU_SH3)		:= tlb-sh3.o
-tlb-$(CONFIG_CPU_SH4)		:= tlb-sh4.o
+tlb-$(CONFIG_CPU_SH4)		:= tlb-sh4.o tlb-urb.o
 tlb-$(CONFIG_CPU_SH5)		:= tlb-sh5.o
-tlb-$(CONFIG_CPU_HAS_PTEAEX)	:= tlb-pteaex.o
+tlb-$(CONFIG_CPU_HAS_PTEAEX)	:= tlb-pteaex.o tlb-urb.o
 obj-y				+= $(tlb-y)
 endif
 
 obj-$(CONFIG_HUGETLB_PAGE)	+= hugetlbpage.o
 obj-$(CONFIG_PMB)		+= pmb.o
-obj-$(CONFIG_PMB_FIXED)		+= pmb-fixed.o
 obj-$(CONFIG_NUMA)		+= numa.o
+obj-$(CONFIG_IOREMAP_FIXED)	+= ioremap_fixed.o
+obj-$(CONFIG_UNCACHED_MAPPING)	+= uncached.o
 
 # Special flags for fault_64.o.  This puts restrictions on the number of
 # caller-save registers that the compiler can target when building this file.
diff --git a/arch/sh/mm/alignment.c b/arch/sh/mm/alignment.c
new file mode 100644
index 000000000000..b2595b8548ee
--- /dev/null
+++ b/arch/sh/mm/alignment.c
@@ -0,0 +1,189 @@
+/*
+ * Alignment access counters and corresponding user-space interfaces.
+ *
+ * Copyright (C) 2009 ST Microelectronics
+ * Copyright (C) 2009 - 2010 Paul Mundt
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ */
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/seq_file.h>
+#include <linux/proc_fs.h>
+#include <linux/uaccess.h>
+#include <asm/alignment.h>
+#include <asm/processor.h>
+
+static unsigned long se_user;
+static unsigned long se_sys;
+static unsigned long se_half;
+static unsigned long se_word;
+static unsigned long se_dword;
+static unsigned long se_multi;
+/* bitfield: 1: warn 2: fixup 4: signal -> combinations 2|4 && 1|2|4 are not
+   valid! */
+static int se_usermode = UM_WARN | UM_FIXUP;
+/* 0: no warning 1: print a warning message, disabled by default */
+static int se_kernmode_warn;
+
+core_param(alignment, se_usermode, int, 0600);
+
+void inc_unaligned_byte_access(void)
+{
+	se_half++;
+}
+
+void inc_unaligned_word_access(void)
+{
+	se_word++;
+}
+
+void inc_unaligned_dword_access(void)
+{
+	se_dword++;
+}
+
+void inc_unaligned_multi_access(void)
+{
+	se_multi++;
+}
+
+void inc_unaligned_user_access(void)
+{
+	se_user++;
+}
+
+void inc_unaligned_kernel_access(void)
+{
+	se_sys++;
+}
+
+/*
+ * This defaults to the global policy which can be set from the command
+ * line, while processes can overload their preferences via prctl().
+ */
+unsigned int unaligned_user_action(void)
+{
+	unsigned int action = se_usermode;
+
+	if (current->thread.flags & SH_THREAD_UAC_SIGBUS) {
+		action &= ~UM_FIXUP;
+		action |= UM_SIGNAL;
+	}
+
+	if (current->thread.flags & SH_THREAD_UAC_NOPRINT)
+		action &= ~UM_WARN;
+
+	return action;
+}
+
+int get_unalign_ctl(struct task_struct *tsk, unsigned long addr)
+{
+	return put_user(tsk->thread.flags & SH_THREAD_UAC_MASK,
+			(unsigned int __user *)addr);
+}
+
+int set_unalign_ctl(struct task_struct *tsk, unsigned int val)
+{
+	tsk->thread.flags = (tsk->thread.flags & ~SH_THREAD_UAC_MASK) |
+			    (val & SH_THREAD_UAC_MASK);
+	return 0;
+}
+
+void unaligned_fixups_notify(struct task_struct *tsk, insn_size_t insn,
+			     struct pt_regs *regs)
+{
+	if (user_mode(regs) && (se_usermode & UM_WARN) && printk_ratelimit())
+		pr_notice("Fixing up unaligned userspace access "
+			  "in \"%s\" pid=%d pc=0x%p ins=0x%04hx\n",
+			  tsk->comm, task_pid_nr(tsk),
+			  (void *)instruction_pointer(regs), insn);
+	else if (se_kernmode_warn && printk_ratelimit())
+		pr_notice("Fixing up unaligned kernel access "
+			  "in \"%s\" pid=%d pc=0x%p ins=0x%04hx\n",
+			  tsk->comm, task_pid_nr(tsk),
+			  (void *)instruction_pointer(regs), insn);
+}
+
+static const char *se_usermode_action[] = {
+	"ignored",
+	"warn",
+	"fixup",
+	"fixup+warn",
+	"signal",
+	"signal+warn"
+};
+
+static int alignment_proc_show(struct seq_file *m, void *v)
+{
+	seq_printf(m, "User:\t\t%lu\n", se_user);
+	seq_printf(m, "System:\t\t%lu\n", se_sys);
+	seq_printf(m, "Half:\t\t%lu\n", se_half);
+	seq_printf(m, "Word:\t\t%lu\n", se_word);
+	seq_printf(m, "DWord:\t\t%lu\n", se_dword);
+	seq_printf(m, "Multi:\t\t%lu\n", se_multi);
+	seq_printf(m, "User faults:\t%i (%s)\n", se_usermode,
+			se_usermode_action[se_usermode]);
+	seq_printf(m, "Kernel faults:\t%i (fixup%s)\n", se_kernmode_warn,
+			se_kernmode_warn ? "+warn" : "");
+	return 0;
+}
+
+static int alignment_proc_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, alignment_proc_show, NULL);
+}
+
+static ssize_t alignment_proc_write(struct file *file,
+		const char __user *buffer, size_t count, loff_t *pos)
+{
+	int *data = PDE(file->f_path.dentry->d_inode)->data;
+	char mode;
+
+	if (count > 0) {
+		if (get_user(mode, buffer))
+			return -EFAULT;
+		if (mode >= '0' && mode <= '5')
+			*data = mode - '0';
+	}
+	return count;
+}
+
+static const struct file_operations alignment_proc_fops = {
+	.owner		= THIS_MODULE,
+	.open		= alignment_proc_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+	.write		= alignment_proc_write,
+};
+
+/*
+ * This needs to be done after sysctl_init, otherwise sys/ will be
+ * overwritten.  Actually, this shouldn't be in sys/ at all since
+ * it isn't a sysctl, and it doesn't contain sysctl information.
+ * We now locate it in /proc/cpu/alignment instead.
+ */
+static int __init alignment_init(void)
+{
+	struct proc_dir_entry *dir, *res;
+
+	dir = proc_mkdir("cpu", NULL);
+	if (!dir)
+		return -ENOMEM;
+
+	res = proc_create_data("alignment", S_IWUSR | S_IRUGO, dir,
+			       &alignment_proc_fops, &se_usermode);
+	if (!res)
+		return -ENOMEM;
+
+        res = proc_create_data("kernel_alignment", S_IWUSR | S_IRUGO, dir,
+			       &alignment_proc_fops, &se_kernmode_warn);
+        if (!res)
+                return -ENOMEM;
+
+	return 0;
+}
+fs_initcall(alignment_init);
diff --git a/arch/sh/mm/cache-debugfs.c b/arch/sh/mm/cache-debugfs.c
index 5ba067b26591..690ed010d002 100644
--- a/arch/sh/mm/cache-debugfs.c
+++ b/arch/sh/mm/cache-debugfs.c
@@ -22,8 +22,7 @@ enum cache_type {
 	CACHE_TYPE_UNIFIED,
 };
 
-static int __uses_jump_to_uncached cache_seq_show(struct seq_file *file,
-						  void *iter)
+static int cache_seq_show(struct seq_file *file, void *iter)
 {
 	unsigned int cache_type = (unsigned int)file->private;
 	struct cache_info *cache;
@@ -37,7 +36,7 @@ static int __uses_jump_to_uncached cache_seq_show(struct seq_file *file,
 	 */
 	jump_to_uncached();
 
-	ccr = ctrl_inl(CCR);
+	ccr = __raw_readl(CCR);
 	if ((ccr & CCR_CACHE_ENABLE) == 0) {
 		back_to_cached();
 
@@ -90,7 +89,7 @@ static int __uses_jump_to_uncached cache_seq_show(struct seq_file *file,
 		for (addr = addrstart, line = 0;
 		     addr < addrstart + waysize;
 		     addr += cache->linesz, line++) {
-			unsigned long data = ctrl_inl(addr);
+			unsigned long data = __raw_readl(addr);
 
 			/* Check the V bit, ignore invalid cachelines */
 			if ((data & 1) == 0)
diff --git a/arch/sh/mm/cache-sh2.c b/arch/sh/mm/cache-sh2.c
index 699a71f46327..defcf719f2e8 100644
--- a/arch/sh/mm/cache-sh2.c
+++ b/arch/sh/mm/cache-sh2.c
@@ -28,10 +28,10 @@ static void sh2__flush_wback_region(void *start, int size)
 		unsigned long addr = CACHE_OC_ADDRESS_ARRAY | (v & 0x00000ff0);
 		int way;
 		for (way = 0; way < 4; way++) {
-			unsigned long data =  ctrl_inl(addr | (way << 12));
+			unsigned long data =  __raw_readl(addr | (way << 12));
 			if ((data & CACHE_PHYSADDR_MASK) == (v & CACHE_PHYSADDR_MASK)) {
 				data &= ~SH_CACHE_UPDATED;
-				ctrl_outl(data, addr | (way << 12));
+				__raw_writel(data, addr | (way << 12));
 			}
 		}
 	}
@@ -47,7 +47,7 @@ static void sh2__flush_purge_region(void *start, int size)
 		& ~(L1_CACHE_BYTES-1);
 
 	for (v = begin; v < end; v+=L1_CACHE_BYTES)
-		ctrl_outl((v & CACHE_PHYSADDR_MASK),
+		__raw_writel((v & CACHE_PHYSADDR_MASK),
 			  CACHE_OC_ADDRESS_ARRAY | (v & 0x00000ff0) | 0x00000008);
 }
 
@@ -63,9 +63,9 @@ static void sh2__flush_invalidate_region(void *start, int size)
 	local_irq_save(flags);
 	jump_to_uncached();
 
-	ccr = ctrl_inl(CCR);
+	ccr = __raw_readl(CCR);
 	ccr |= CCR_CACHE_INVALIDATE;
-	ctrl_outl(ccr, CCR);
+	__raw_writel(ccr, CCR);
 
 	back_to_cached();
 	local_irq_restore(flags);
@@ -78,7 +78,7 @@ static void sh2__flush_invalidate_region(void *start, int size)
 		& ~(L1_CACHE_BYTES-1);
 
 	for (v = begin; v < end; v+=L1_CACHE_BYTES)
-		ctrl_outl((v & CACHE_PHYSADDR_MASK),
+		__raw_writel((v & CACHE_PHYSADDR_MASK),
 			  CACHE_OC_ADDRESS_ARRAY | (v & 0x00000ff0) | 0x00000008);
 #endif
 }
diff --git a/arch/sh/mm/cache-sh2a.c b/arch/sh/mm/cache-sh2a.c
index 975899d83564..1f51225426a2 100644
--- a/arch/sh/mm/cache-sh2a.c
+++ b/arch/sh/mm/cache-sh2a.c
@@ -32,10 +32,10 @@ static void sh2a__flush_wback_region(void *start, int size)
 		unsigned long addr = CACHE_OC_ADDRESS_ARRAY | (v & 0x000007f0);
 		int way;
 		for (way = 0; way < 4; way++) {
-			unsigned long data =  ctrl_inl(addr | (way << 11));
+			unsigned long data =  __raw_readl(addr | (way << 11));
 			if ((data & CACHE_PHYSADDR_MASK) == (v & CACHE_PHYSADDR_MASK)) {
 				data &= ~SH_CACHE_UPDATED;
-				ctrl_outl(data, addr | (way << 11));
+				__raw_writel(data, addr | (way << 11));
 			}
 		}
 	}
@@ -58,7 +58,7 @@ static void sh2a__flush_purge_region(void *start, int size)
 	jump_to_uncached();
 
 	for (v = begin; v < end; v+=L1_CACHE_BYTES) {
-		ctrl_outl((v & CACHE_PHYSADDR_MASK),
+		__raw_writel((v & CACHE_PHYSADDR_MASK),
 			  CACHE_OC_ADDRESS_ARRAY | (v & 0x000007f0) | 0x00000008);
 	}
 	back_to_cached();
@@ -78,17 +78,17 @@ static void sh2a__flush_invalidate_region(void *start, int size)
 	jump_to_uncached();
 
 #ifdef CONFIG_CACHE_WRITEBACK
-	ctrl_outl(ctrl_inl(CCR) | CCR_OCACHE_INVALIDATE, CCR);
+	__raw_writel(__raw_readl(CCR) | CCR_OCACHE_INVALIDATE, CCR);
 	/* I-cache invalidate */
 	for (v = begin; v < end; v+=L1_CACHE_BYTES) {
-		ctrl_outl((v & CACHE_PHYSADDR_MASK),
+		__raw_writel((v & CACHE_PHYSADDR_MASK),
 			  CACHE_IC_ADDRESS_ARRAY | (v & 0x000007f0) | 0x00000008);
 	}
 #else
 	for (v = begin; v < end; v+=L1_CACHE_BYTES) {
-		ctrl_outl((v & CACHE_PHYSADDR_MASK),
+		__raw_writel((v & CACHE_PHYSADDR_MASK),
 			  CACHE_IC_ADDRESS_ARRAY | (v & 0x000007f0) | 0x00000008);
-		ctrl_outl((v & CACHE_PHYSADDR_MASK),
+		__raw_writel((v & CACHE_PHYSADDR_MASK),
 			  CACHE_OC_ADDRESS_ARRAY | (v & 0x000007f0) | 0x00000008);
 	}
 #endif
@@ -115,14 +115,14 @@ static void sh2a_flush_icache_range(void *args)
 		int way;
 		/* O-Cache writeback */
 		for (way = 0; way < 4; way++) {
-			unsigned long data =  ctrl_inl(CACHE_OC_ADDRESS_ARRAY | addr | (way << 11));
+			unsigned long data =  __raw_readl(CACHE_OC_ADDRESS_ARRAY | addr | (way << 11));
 			if ((data & CACHE_PHYSADDR_MASK) == (v & CACHE_PHYSADDR_MASK)) {
 				data &= ~SH_CACHE_UPDATED;
-				ctrl_outl(data, CACHE_OC_ADDRESS_ARRAY | addr | (way << 11));
+				__raw_writel(data, CACHE_OC_ADDRESS_ARRAY | addr | (way << 11));
 			}
 		}
 		/* I-Cache invalidate */
-		ctrl_outl(addr,
+		__raw_writel(addr,
 			  CACHE_IC_ADDRESS_ARRAY | addr | 0x00000008);
 	}
 
diff --git a/arch/sh/mm/cache-sh3.c b/arch/sh/mm/cache-sh3.c
index faef80c98134..e37523f65195 100644
--- a/arch/sh/mm/cache-sh3.c
+++ b/arch/sh/mm/cache-sh3.c
@@ -50,12 +50,12 @@ static void sh3__flush_wback_region(void *start, int size)
 			p = __pa(v);
 			addr = addrstart | (v & current_cpu_data.dcache.entry_mask);
 			local_irq_save(flags);
-			data = ctrl_inl(addr);
+			data = __raw_readl(addr);
 
 			if ((data & CACHE_PHYSADDR_MASK) ==
 			    (p & CACHE_PHYSADDR_MASK)) {
 				data &= ~SH_CACHE_UPDATED;
-				ctrl_outl(data, addr);
+				__raw_writel(data, addr);
 				local_irq_restore(flags);
 				break;
 			}
@@ -86,7 +86,7 @@ static void sh3__flush_purge_region(void *start, int size)
 		data = (v & 0xfffffc00); /* _Virtual_ address, ~U, ~V */
 		addr = CACHE_OC_ADDRESS_ARRAY |
 			(v & current_cpu_data.dcache.entry_mask) | SH_CACHE_ASSOC;
-		ctrl_outl(data, addr);
+		__raw_writel(data, addr);
 	}
 }
 
diff --git a/arch/sh/mm/cache-sh4.c b/arch/sh/mm/cache-sh4.c
index b2453bbef4cd..2cfae81914aa 100644
--- a/arch/sh/mm/cache-sh4.c
+++ b/arch/sh/mm/cache-sh4.c
@@ -2,7 +2,7 @@
  * arch/sh/mm/cache-sh4.c
  *
  * Copyright (C) 1999, 2000, 2002  Niibe Yutaka
- * Copyright (C) 2001 - 2007  Paul Mundt
+ * Copyright (C) 2001 - 2009  Paul Mundt
  * Copyright (C) 2003  Richard Curnow
  * Copyright (c) 2007 STMicroelectronics (R&D) Ltd.
  *
@@ -15,6 +15,8 @@
 #include <linux/io.h>
 #include <linux/mutex.h>
 #include <linux/fs.h>
+#include <linux/highmem.h>
+#include <asm/pgtable.h>
 #include <asm/mmu_context.h>
 #include <asm/cacheflush.h>
 
@@ -23,21 +25,12 @@
  * flushing. Anything exceeding this will simply flush the dcache in its
  * entirety.
  */
-#define MAX_DCACHE_PAGES	64	/* XXX: Tune for ways */
 #define MAX_ICACHE_PAGES	32
 
-static void __flush_cache_4096(unsigned long addr, unsigned long phys,
+static void __flush_cache_one(unsigned long addr, unsigned long phys,
 			       unsigned long exec_offset);
 
 /*
- * This is initialised here to ensure that it is not placed in the BSS.  If
- * that were to happen, note that cache_init gets called before the BSS is
- * cleared, so this would get nulled out which would be hopeless.
- */
-static void (*__flush_dcache_segment_fn)(unsigned long, unsigned long) =
-	(void (*)(unsigned long, unsigned long))0xdeadbeef;
-
-/*
  * Write back the range of D-cache, and purge the I-cache.
  *
  * Called from kernel/module.c:sys_init_module and routine for a.out format,
@@ -72,6 +65,7 @@ static void sh4_flush_icache_range(void *args)
 
 	for (v = start; v < end; v += L1_CACHE_BYTES) {
 		unsigned long icacheaddr;
+		int j, n;
 
 		__ocbwb(v);
 
@@ -79,8 +73,10 @@ static void sh4_flush_icache_range(void *args)
 				cpu_data->icache.entry_mask);
 
 		/* Clear i-cache line valid-bit */
+		n = boot_cpu_data.icache.n_aliases;
 		for (i = 0; i < cpu_data->icache.ways; i++) {
-			__raw_writel(0, icacheaddr);
+			for (j = 0; j < n; j++)
+				__raw_writel(0, icacheaddr + (j * PAGE_SIZE));
 			icacheaddr += cpu_data->icache.way_incr;
 		}
 	}
@@ -89,22 +85,20 @@ static void sh4_flush_icache_range(void *args)
 	local_irq_restore(flags);
 }
 
-static inline void flush_cache_4096(unsigned long start,
-				    unsigned long phys)
+static inline void flush_cache_one(unsigned long start, unsigned long phys)
 {
 	unsigned long flags, exec_offset = 0;
 
 	/*
-	 * All types of SH-4 require PC to be in P2 to operate on the I-cache.
-	 * Some types of SH-4 require PC to be in P2 to operate on the D-cache.
+	 * All types of SH-4 require PC to be uncached to operate on the I-cache.
+	 * Some types of SH-4 require PC to be uncached to operate on the D-cache.
 	 */
 	if ((boot_cpu_data.flags & CPU_HAS_P2_FLUSH_BUG) ||
 	    (start < CACHE_OC_ADDRESS_ARRAY))
-		exec_offset = 0x20000000;
+		exec_offset = cached_to_uncached;
 
 	local_irq_save(flags);
-	__flush_cache_4096(start | SH_CACHE_ASSOC,
-			   P1SEGADDR(phys), exec_offset);
+	__flush_cache_one(start, phys, exec_offset);
 	local_irq_restore(flags);
 }
 
@@ -115,6 +109,7 @@ static inline void flush_cache_4096(unsigned long start,
 static void sh4_flush_dcache_page(void *arg)
 {
 	struct page *page = arg;
+	unsigned long addr = (unsigned long)page_address(page);
 #ifndef CONFIG_SMP
 	struct address_space *mapping = page_mapping(page);
 
@@ -122,22 +117,14 @@ static void sh4_flush_dcache_page(void *arg)
 		set_bit(PG_dcache_dirty, &page->flags);
 	else
 #endif
-	{
-		unsigned long phys = PHYSADDR(page_address(page));
-		unsigned long addr = CACHE_OC_ADDRESS_ARRAY;
-		int i, n;
-
-		/* Loop all the D-cache */
-		n = boot_cpu_data.dcache.n_aliases;
-		for (i = 0; i < n; i++, addr += 4096)
-			flush_cache_4096(addr, phys);
-	}
+		flush_cache_one(CACHE_OC_ADDRESS_ARRAY |
+				(addr & shm_align_mask), page_to_phys(page));
 
 	wmb();
 }
 
 /* TODO: Selective icache invalidation through IC address array.. */
-static void __uses_jump_to_uncached flush_icache_all(void)
+static void flush_icache_all(void)
 {
 	unsigned long flags, ccr;
 
@@ -145,9 +132,9 @@ static void __uses_jump_to_uncached flush_icache_all(void)
 	jump_to_uncached();
 
 	/* Flush I-cache */
-	ccr = ctrl_inl(CCR);
+	ccr = __raw_readl(CCR);
 	ccr |= CCR_CACHE_ICI;
-	ctrl_outl(ccr, CCR);
+	__raw_writel(ccr, CCR);
 
 	/*
 	 * back_to_cached() will take care of the barrier for us, don't add
@@ -158,10 +145,27 @@ static void __uses_jump_to_uncached flush_icache_all(void)
 	local_irq_restore(flags);
 }
 
-static inline void flush_dcache_all(void)
+static void flush_dcache_all(void)
 {
-	(*__flush_dcache_segment_fn)(0UL, boot_cpu_data.dcache.way_size);
-	wmb();
+	unsigned long addr, end_addr, entry_offset;
+
+	end_addr = CACHE_OC_ADDRESS_ARRAY +
+		(current_cpu_data.dcache.sets <<
+		 current_cpu_data.dcache.entry_shift) *
+			current_cpu_data.dcache.ways;
+
+	entry_offset = 1 << current_cpu_data.dcache.entry_shift;
+
+	for (addr = CACHE_OC_ADDRESS_ARRAY; addr < end_addr; ) {
+		__raw_writel(0, addr); addr += entry_offset;
+		__raw_writel(0, addr); addr += entry_offset;
+		__raw_writel(0, addr); addr += entry_offset;
+		__raw_writel(0, addr); addr += entry_offset;
+		__raw_writel(0, addr); addr += entry_offset;
+		__raw_writel(0, addr); addr += entry_offset;
+		__raw_writel(0, addr); addr += entry_offset;
+		__raw_writel(0, addr); addr += entry_offset;
+	}
 }
 
 static void sh4_flush_cache_all(void *unused)
@@ -170,89 +174,13 @@ static void sh4_flush_cache_all(void *unused)
 	flush_icache_all();
 }
 
-static void __flush_cache_mm(struct mm_struct *mm, unsigned long start,
-			     unsigned long end)
-{
-	unsigned long d = 0, p = start & PAGE_MASK;
-	unsigned long alias_mask = boot_cpu_data.dcache.alias_mask;
-	unsigned long n_aliases = boot_cpu_data.dcache.n_aliases;
-	unsigned long select_bit;
-	unsigned long all_aliases_mask;
-	unsigned long addr_offset;
-	pgd_t *dir;
-	pmd_t *pmd;
-	pud_t *pud;
-	pte_t *pte;
-	int i;
-
-	dir = pgd_offset(mm, p);
-	pud = pud_offset(dir, p);
-	pmd = pmd_offset(pud, p);
-	end = PAGE_ALIGN(end);
-
-	all_aliases_mask = (1 << n_aliases) - 1;
-
-	do {
-		if (pmd_none(*pmd) || unlikely(pmd_bad(*pmd))) {
-			p &= PMD_MASK;
-			p += PMD_SIZE;
-			pmd++;
-
-			continue;
-		}
-
-		pte = pte_offset_kernel(pmd, p);
-
-		do {
-			unsigned long phys;
-			pte_t entry = *pte;
-
-			if (!(pte_val(entry) & _PAGE_PRESENT)) {
-				pte++;
-				p += PAGE_SIZE;
-				continue;
-			}
-
-			phys = pte_val(entry) & PTE_PHYS_MASK;
-
-			if ((p ^ phys) & alias_mask) {
-				d |= 1 << ((p & alias_mask) >> PAGE_SHIFT);
-				d |= 1 << ((phys & alias_mask) >> PAGE_SHIFT);
-
-				if (d == all_aliases_mask)
-					goto loop_exit;
-			}
-
-			pte++;
-			p += PAGE_SIZE;
-		} while (p < end && ((unsigned long)pte & ~PAGE_MASK));
-		pmd++;
-	} while (p < end);
-
-loop_exit:
-	addr_offset = 0;
-	select_bit = 1;
-
-	for (i = 0; i < n_aliases; i++) {
-		if (d & select_bit) {
-			(*__flush_dcache_segment_fn)(addr_offset, PAGE_SIZE);
-			wmb();
-		}
-
-		select_bit <<= 1;
-		addr_offset += PAGE_SIZE;
-	}
-}
-
 /*
  * Note : (RPC) since the caches are physically tagged, the only point
  * of flush_cache_mm for SH-4 is to get rid of aliases from the
  * D-cache.  The assumption elsewhere, e.g. flush_cache_range, is that
  * lines can stay resident so long as the virtual address they were
  * accessed with (hence cache set) is in accord with the physical
- * address (i.e. tag).  It's no different here.  So I reckon we don't
- * need to flush the I-cache, since aliases don't matter for that.  We
- * should try that.
+ * address (i.e. tag).  It's no different here.
  *
  * Caller takes mm->mmap_sem.
  */
@@ -263,33 +191,7 @@ static void sh4_flush_cache_mm(void *arg)
 	if (cpu_context(smp_processor_id(), mm) == NO_CONTEXT)
 		return;
 
-	/*
-	 * If cache is only 4k-per-way, there are never any 'aliases'.  Since
-	 * the cache is physically tagged, the data can just be left in there.
-	 */
-	if (boot_cpu_data.dcache.n_aliases == 0)
-		return;
-
-	/*
-	 * Don't bother groveling around the dcache for the VMA ranges
-	 * if there are too many PTEs to make it worthwhile.
-	 */
-	if (mm->nr_ptes >= MAX_DCACHE_PAGES)
-		flush_dcache_all();
-	else {
-		struct vm_area_struct *vma;
-
-		/*
-		 * In this case there are reasonably sized ranges to flush,
-		 * iterate through the VMA list and take care of any aliases.
-		 */
-		for (vma = mm->mmap; vma; vma = vma->vm_next)
-			__flush_cache_mm(mm, vma->vm_start, vma->vm_end);
-	}
-
-	/* Only touch the icache if one of the VMAs has VM_EXEC set. */
-	if (mm->exec_vm)
-		flush_icache_all();
+	flush_dcache_all();
 }
 
 /*
@@ -302,44 +204,62 @@ static void sh4_flush_cache_page(void *args)
 {
 	struct flusher_data *data = args;
 	struct vm_area_struct *vma;
+	struct page *page;
 	unsigned long address, pfn, phys;
-	unsigned int alias_mask;
+	int map_coherent = 0;
+	pgd_t *pgd;
+	pud_t *pud;
+	pmd_t *pmd;
+	pte_t *pte;
+	void *vaddr;
 
 	vma = data->vma;
-	address = data->addr1;
+	address = data->addr1 & PAGE_MASK;
 	pfn = data->addr2;
 	phys = pfn << PAGE_SHIFT;
+	page = pfn_to_page(pfn);
 
 	if (cpu_context(smp_processor_id(), vma->vm_mm) == NO_CONTEXT)
 		return;
 
-	alias_mask = boot_cpu_data.dcache.alias_mask;
-
-	/* We only need to flush D-cache when we have alias */
-	if ((address^phys) & alias_mask) {
-		/* Loop 4K of the D-cache */
-		flush_cache_4096(
-			CACHE_OC_ADDRESS_ARRAY | (address & alias_mask),
-			phys);
-		/* Loop another 4K of the D-cache */
-		flush_cache_4096(
-			CACHE_OC_ADDRESS_ARRAY | (phys & alias_mask),
-			phys);
-	}
+	pgd = pgd_offset(vma->vm_mm, address);
+	pud = pud_offset(pgd, address);
+	pmd = pmd_offset(pud, address);
+	pte = pte_offset_kernel(pmd, address);
+
+	/* If the page isn't present, there is nothing to do here. */
+	if (!(pte_val(*pte) & _PAGE_PRESENT))
+		return;
 
-	alias_mask = boot_cpu_data.icache.alias_mask;
-	if (vma->vm_flags & VM_EXEC) {
+	if ((vma->vm_mm == current->active_mm))
+		vaddr = NULL;
+	else {
 		/*
-		 * Evict entries from the portion of the cache from which code
-		 * may have been executed at this address (virtual).  There's
-		 * no need to evict from the portion corresponding to the
-		 * physical address as for the D-cache, because we know the
-		 * kernel has never executed the code through its identity
-		 * translation.
+		 * Use kmap_coherent or kmap_atomic to do flushes for
+		 * another ASID than the current one.
 		 */
-		flush_cache_4096(
-			CACHE_IC_ADDRESS_ARRAY | (address & alias_mask),
-			phys);
+		map_coherent = (current_cpu_data.dcache.n_aliases &&
+			!test_bit(PG_dcache_dirty, &page->flags) &&
+			page_mapped(page));
+		if (map_coherent)
+			vaddr = kmap_coherent(page, address);
+		else
+			vaddr = kmap_atomic(page, KM_USER0);
+
+		address = (unsigned long)vaddr;
+	}
+
+	flush_cache_one(CACHE_OC_ADDRESS_ARRAY |
+			(address & shm_align_mask), phys);
+
+	if (vma->vm_flags & VM_EXEC)
+		flush_icache_all();
+
+	if (vaddr) {
+		if (map_coherent)
+			kunmap_coherent(vaddr);
+		else
+			kunmap_atomic(vaddr, KM_USER0);
 	}
 }
 
@@ -372,28 +292,14 @@ static void sh4_flush_cache_range(void *args)
 	if (boot_cpu_data.dcache.n_aliases == 0)
 		return;
 
-	/*
-	 * Don't bother with the lookup and alias check if we have a
-	 * wide range to cover, just blow away the dcache in its
-	 * entirety instead. -- PFM.
-	 */
-	if (((end - start) >> PAGE_SHIFT) >= MAX_DCACHE_PAGES)
-		flush_dcache_all();
-	else
-		__flush_cache_mm(vma->vm_mm, start, end);
+	flush_dcache_all();
 
-	if (vma->vm_flags & VM_EXEC) {
-		/*
-		 * TODO: Is this required???  Need to look at how I-cache
-		 * coherency is assured when new programs are loaded to see if
-		 * this matters.
-		 */
+	if (vma->vm_flags & VM_EXEC)
 		flush_icache_all();
-	}
 }
 
 /**
- * __flush_cache_4096
+ * __flush_cache_one
  *
  * @addr:  address in memory mapped cache array
  * @phys:  P1 address to flush (has to match tags if addr has 'A' bit
@@ -406,7 +312,7 @@ static void sh4_flush_cache_range(void *args)
  * operation (purge/write-back) is selected by the lower 2 bits of
  * 'phys'.
  */
-static void __flush_cache_4096(unsigned long addr, unsigned long phys,
+static void __flush_cache_one(unsigned long addr, unsigned long phys,
 			       unsigned long exec_offset)
 {
 	int way_count;
@@ -463,245 +369,6 @@ static void __flush_cache_4096(unsigned long addr, unsigned long phys,
 	} while (--way_count != 0);
 }
 
-/*
- * Break the 1, 2 and 4 way variants of this out into separate functions to
- * avoid nearly all the overhead of having the conditional stuff in the function
- * bodies (+ the 1 and 2 way cases avoid saving any registers too).
- *
- * We want to eliminate unnecessary bus transactions, so this code uses
- * a non-obvious technique.
- *
- * Loop over a cache way sized block of, one cache line at a time. For each
- * line, use movca.a to cause the current cache line contents to be written
- * back, but without reading anything from main memory. However this has the
- * side effect that the cache is now caching that memory location. So follow
- * this with a cache invalidate to mark the cache line invalid. And do all
- * this with interrupts disabled, to avoid the cache line being accidently
- * evicted while it is holding garbage.
- *
- * This also breaks in a number of circumstances:
- * - if there are modifications to the region of memory just above
- *   empty_zero_page (for example because a breakpoint has been placed
- *   there), then these can be lost.
- *
- *   This is because the the memory address which the cache temporarily
- *   caches in the above description is empty_zero_page. So the
- *   movca.l hits the cache (it is assumed that it misses, or at least
- *   isn't dirty), modifies the line and then invalidates it, losing the
- *   required change.
- *
- * - If caches are disabled or configured in write-through mode, then
- *   the movca.l writes garbage directly into memory.
- */
-static void __flush_dcache_segment_writethrough(unsigned long start,
-					        unsigned long extent_per_way)
-{
-	unsigned long addr;
-	int i;
-
-	addr = CACHE_OC_ADDRESS_ARRAY | (start & cpu_data->dcache.entry_mask);
-
-	while (extent_per_way) {
-		for (i = 0; i < cpu_data->dcache.ways; i++)
-			__raw_writel(0, addr + cpu_data->dcache.way_incr * i);
-
-		addr += cpu_data->dcache.linesz;
-		extent_per_way -= cpu_data->dcache.linesz;
-	}
-}
-
-static void __flush_dcache_segment_1way(unsigned long start,
-					unsigned long extent_per_way)
-{
-	unsigned long orig_sr, sr_with_bl;
-	unsigned long base_addr;
-	unsigned long way_incr, linesz, way_size;
-	struct cache_info *dcache;
-	register unsigned long a0, a0e;
-
-	asm volatile("stc sr, %0" : "=r" (orig_sr));
-	sr_with_bl = orig_sr | (1<<28);
-	base_addr = ((unsigned long)&empty_zero_page[0]);
-
-	/*
-	 * The previous code aligned base_addr to 16k, i.e. the way_size of all
-	 * existing SH-4 D-caches.  Whilst I don't see a need to have this
-	 * aligned to any better than the cache line size (which it will be
-	 * anyway by construction), let's align it to at least the way_size of
-	 * any existing or conceivable SH-4 D-cache.  -- RPC
-	 */
-	base_addr = ((base_addr >> 16) << 16);
-	base_addr |= start;
-
-	dcache = &boot_cpu_data.dcache;
-	linesz = dcache->linesz;
-	way_incr = dcache->way_incr;
-	way_size = dcache->way_size;
-
-	a0 = base_addr;
-	a0e = base_addr + extent_per_way;
-	do {
-		asm volatile("ldc %0, sr" : : "r" (sr_with_bl));
-		asm volatile("movca.l r0, @%0\n\t"
-			     "ocbi @%0" : : "r" (a0));
-		a0 += linesz;
-		asm volatile("movca.l r0, @%0\n\t"
-			     "ocbi @%0" : : "r" (a0));
-		a0 += linesz;
-		asm volatile("movca.l r0, @%0\n\t"
-			     "ocbi @%0" : : "r" (a0));
-		a0 += linesz;
-		asm volatile("movca.l r0, @%0\n\t"
-			     "ocbi @%0" : : "r" (a0));
-		asm volatile("ldc %0, sr" : : "r" (orig_sr));
-		a0 += linesz;
-	} while (a0 < a0e);
-}
-
-static void __flush_dcache_segment_2way(unsigned long start,
-					unsigned long extent_per_way)
-{
-	unsigned long orig_sr, sr_with_bl;
-	unsigned long base_addr;
-	unsigned long way_incr, linesz, way_size;
-	struct cache_info *dcache;
-	register unsigned long a0, a1, a0e;
-
-	asm volatile("stc sr, %0" : "=r" (orig_sr));
-	sr_with_bl = orig_sr | (1<<28);
-	base_addr = ((unsigned long)&empty_zero_page[0]);
-
-	/* See comment under 1-way above */
-	base_addr = ((base_addr >> 16) << 16);
-	base_addr |= start;
-
-	dcache = &boot_cpu_data.dcache;
-	linesz = dcache->linesz;
-	way_incr = dcache->way_incr;
-	way_size = dcache->way_size;
-
-	a0 = base_addr;
-	a1 = a0 + way_incr;
-	a0e = base_addr + extent_per_way;
-	do {
-		asm volatile("ldc %0, sr" : : "r" (sr_with_bl));
-		asm volatile("movca.l r0, @%0\n\t"
-			     "movca.l r0, @%1\n\t"
-			     "ocbi @%0\n\t"
-			     "ocbi @%1" : :
-			     "r" (a0), "r" (a1));
-		a0 += linesz;
-		a1 += linesz;
-		asm volatile("movca.l r0, @%0\n\t"
-			     "movca.l r0, @%1\n\t"
-			     "ocbi @%0\n\t"
-			     "ocbi @%1" : :
-			     "r" (a0), "r" (a1));
-		a0 += linesz;
-		a1 += linesz;
-		asm volatile("movca.l r0, @%0\n\t"
-			     "movca.l r0, @%1\n\t"
-			     "ocbi @%0\n\t"
-			     "ocbi @%1" : :
-			     "r" (a0), "r" (a1));
-		a0 += linesz;
-		a1 += linesz;
-		asm volatile("movca.l r0, @%0\n\t"
-			     "movca.l r0, @%1\n\t"
-			     "ocbi @%0\n\t"
-			     "ocbi @%1" : :
-			     "r" (a0), "r" (a1));
-		asm volatile("ldc %0, sr" : : "r" (orig_sr));
-		a0 += linesz;
-		a1 += linesz;
-	} while (a0 < a0e);
-}
-
-static void __flush_dcache_segment_4way(unsigned long start,
-					unsigned long extent_per_way)
-{
-	unsigned long orig_sr, sr_with_bl;
-	unsigned long base_addr;
-	unsigned long way_incr, linesz, way_size;
-	struct cache_info *dcache;
-	register unsigned long a0, a1, a2, a3, a0e;
-
-	asm volatile("stc sr, %0" : "=r" (orig_sr));
-	sr_with_bl = orig_sr | (1<<28);
-	base_addr = ((unsigned long)&empty_zero_page[0]);
-
-	/* See comment under 1-way above */
-	base_addr = ((base_addr >> 16) << 16);
-	base_addr |= start;
-
-	dcache = &boot_cpu_data.dcache;
-	linesz = dcache->linesz;
-	way_incr = dcache->way_incr;
-	way_size = dcache->way_size;
-
-	a0 = base_addr;
-	a1 = a0 + way_incr;
-	a2 = a1 + way_incr;
-	a3 = a2 + way_incr;
-	a0e = base_addr + extent_per_way;
-	do {
-		asm volatile("ldc %0, sr" : : "r" (sr_with_bl));
-		asm volatile("movca.l r0, @%0\n\t"
-			     "movca.l r0, @%1\n\t"
-			     "movca.l r0, @%2\n\t"
-			     "movca.l r0, @%3\n\t"
-			     "ocbi @%0\n\t"
-			     "ocbi @%1\n\t"
-			     "ocbi @%2\n\t"
-			     "ocbi @%3\n\t" : :
-			     "r" (a0), "r" (a1), "r" (a2), "r" (a3));
-		a0 += linesz;
-		a1 += linesz;
-		a2 += linesz;
-		a3 += linesz;
-		asm volatile("movca.l r0, @%0\n\t"
-			     "movca.l r0, @%1\n\t"
-			     "movca.l r0, @%2\n\t"
-			     "movca.l r0, @%3\n\t"
-			     "ocbi @%0\n\t"
-			     "ocbi @%1\n\t"
-			     "ocbi @%2\n\t"
-			     "ocbi @%3\n\t" : :
-			     "r" (a0), "r" (a1), "r" (a2), "r" (a3));
-		a0 += linesz;
-		a1 += linesz;
-		a2 += linesz;
-		a3 += linesz;
-		asm volatile("movca.l r0, @%0\n\t"
-			     "movca.l r0, @%1\n\t"
-			     "movca.l r0, @%2\n\t"
-			     "movca.l r0, @%3\n\t"
-			     "ocbi @%0\n\t"
-			     "ocbi @%1\n\t"
-			     "ocbi @%2\n\t"
-			     "ocbi @%3\n\t" : :
-			     "r" (a0), "r" (a1), "r" (a2), "r" (a3));
-		a0 += linesz;
-		a1 += linesz;
-		a2 += linesz;
-		a3 += linesz;
-		asm volatile("movca.l r0, @%0\n\t"
-			     "movca.l r0, @%1\n\t"
-			     "movca.l r0, @%2\n\t"
-			     "movca.l r0, @%3\n\t"
-			     "ocbi @%0\n\t"
-			     "ocbi @%1\n\t"
-			     "ocbi @%2\n\t"
-			     "ocbi @%3\n\t" : :
-			     "r" (a0), "r" (a1), "r" (a2), "r" (a3));
-		asm volatile("ldc %0, sr" : : "r" (orig_sr));
-		a0 += linesz;
-		a1 += linesz;
-		a2 += linesz;
-		a3 += linesz;
-	} while (a0 < a0e);
-}
-
 extern void __weak sh4__flush_region_init(void);
 
 /*
@@ -709,31 +376,10 @@ extern void __weak sh4__flush_region_init(void);
  */
 void __init sh4_cache_init(void)
 {
-	unsigned int wt_enabled = !!(__raw_readl(CCR) & CCR_CACHE_WT);
-
 	printk("PVR=%08x CVR=%08x PRR=%08x\n",
-		ctrl_inl(CCN_PVR),
-		ctrl_inl(CCN_CVR),
-		ctrl_inl(CCN_PRR));
-
-	if (wt_enabled)
-		__flush_dcache_segment_fn = __flush_dcache_segment_writethrough;
-	else {
-		switch (boot_cpu_data.dcache.ways) {
-		case 1:
-			__flush_dcache_segment_fn = __flush_dcache_segment_1way;
-			break;
-		case 2:
-			__flush_dcache_segment_fn = __flush_dcache_segment_2way;
-			break;
-		case 4:
-			__flush_dcache_segment_fn = __flush_dcache_segment_4way;
-			break;
-		default:
-			panic("unknown number of cache ways\n");
-			break;
-		}
-	}
+		__raw_readl(CCN_PVR),
+		__raw_readl(CCN_CVR),
+		__raw_readl(CCN_PRR));
 
 	local_flush_icache_range	= sh4_flush_icache_range;
 	local_flush_dcache_page		= sh4_flush_dcache_page;
diff --git a/arch/sh/mm/cache-sh5.c b/arch/sh/mm/cache-sh5.c
index 467ff8e260f7..eb4cc4ec7952 100644
--- a/arch/sh/mm/cache-sh5.c
+++ b/arch/sh/mm/cache-sh5.c
@@ -563,7 +563,7 @@ static void sh5_flush_cache_page(void *args)
 
 static void sh5_flush_dcache_page(void *page)
 {
-	sh64_dcache_purge_phy_page(page_to_phys(page));
+	sh64_dcache_purge_phy_page(page_to_phys((struct page *)page));
 	wmb();
 }
 
diff --git a/arch/sh/mm/cache-sh7705.c b/arch/sh/mm/cache-sh7705.c
index 2cadee2037ac..f498da1cce7a 100644
--- a/arch/sh/mm/cache-sh7705.c
+++ b/arch/sh/mm/cache-sh7705.c
@@ -48,10 +48,10 @@ static inline void cache_wback_all(void)
 			unsigned long data;
 			int v = SH_CACHE_UPDATED | SH_CACHE_VALID;
 
-			data = ctrl_inl(addr);
+			data = __raw_readl(addr);
 
 			if ((data & v) == v)
-				ctrl_outl(data & ~v, addr);
+				__raw_writel(data & ~v, addr);
 
 		}
 
@@ -115,10 +115,10 @@ static void __flush_dcache_page(unsigned long phys)
 		     addr += current_cpu_data.dcache.linesz) {
 			unsigned long data;
 
-			data = ctrl_inl(addr) & (0x1ffffC00 | SH_CACHE_VALID);
+			data = __raw_readl(addr) & (0x1ffffC00 | SH_CACHE_VALID);
 		        if (data == phys) {
 				data &= ~(SH_CACHE_VALID | SH_CACHE_UPDATED);
-				ctrl_outl(data, addr);
+				__raw_writel(data, addr);
 			}
 		}
 
@@ -141,7 +141,7 @@ static void sh7705_flush_dcache_page(void *arg)
 	if (mapping && !mapping_mapped(mapping))
 		set_bit(PG_dcache_dirty, &page->flags);
 	else
-		__flush_dcache_page(PHYSADDR(page_address(page)));
+		__flush_dcache_page(__pa(page_address(page)));
 }
 
 static void sh7705_flush_cache_all(void *args)
diff --git a/arch/sh/mm/cache.c b/arch/sh/mm/cache.c
index 35c37b7f717a..0f4095d7ac8b 100644
--- a/arch/sh/mm/cache.c
+++ b/arch/sh/mm/cache.c
@@ -2,7 +2,7 @@
  * arch/sh/mm/cache.c
  *
  * Copyright (C) 1999, 2000, 2002  Niibe Yutaka
- * Copyright (C) 2002 - 2009  Paul Mundt
+ * Copyright (C) 2002 - 2010  Paul Mundt
  *
  * Released under the terms of the GNU GPL v2.0.
  */
@@ -27,8 +27,11 @@ void (*local_flush_icache_page)(void *args) = cache_noop;
 void (*local_flush_cache_sigtramp)(void *args) = cache_noop;
 
 void (*__flush_wback_region)(void *start, int size);
+EXPORT_SYMBOL(__flush_wback_region);
 void (*__flush_purge_region)(void *start, int size);
+EXPORT_SYMBOL(__flush_purge_region);
 void (*__flush_invalidate_region)(void *start, int size);
+EXPORT_SYMBOL(__flush_invalidate_region);
 
 static inline void noop__flush_region(void *start, int size)
 {
@@ -38,8 +41,17 @@ static inline void cacheop_on_each_cpu(void (*func) (void *info), void *info,
                                    int wait)
 {
 	preempt_disable();
-	smp_call_function(func, info, wait);
+
+	/*
+	 * It's possible that this gets called early on when IRQs are
+	 * still disabled due to ioremapping by the boot CPU, so don't
+	 * even attempt IPIs unless there are other CPUs online.
+	 */
+	if (num_online_cpus() > 1)
+		smp_call_function(func, info, wait);
+
 	func(info);
+
 	preempt_enable();
 }
 
@@ -128,14 +140,10 @@ void __update_cache(struct vm_area_struct *vma,
 		return;
 
 	page = pfn_to_page(pfn);
-	if (pfn_valid(pfn) && page_mapping(page)) {
+	if (pfn_valid(pfn)) {
 		int dirty = test_and_clear_bit(PG_dcache_dirty, &page->flags);
-		if (dirty) {
-			unsigned long addr = (unsigned long)page_address(page);
-
-			if (pages_do_alias(addr, address & PAGE_MASK))
-				__flush_purge_region((void *)addr, PAGE_SIZE);
-		}
+		if (dirty)
+			__flush_purge_region(page_address(page), PAGE_SIZE);
 	}
 }
 
@@ -161,14 +169,21 @@ void flush_cache_all(void)
 {
 	cacheop_on_each_cpu(local_flush_cache_all, NULL, 1);
 }
+EXPORT_SYMBOL(flush_cache_all);
 
 void flush_cache_mm(struct mm_struct *mm)
 {
+	if (boot_cpu_data.dcache.n_aliases == 0)
+		return;
+
 	cacheop_on_each_cpu(local_flush_cache_mm, mm, 1);
 }
 
 void flush_cache_dup_mm(struct mm_struct *mm)
 {
+	if (boot_cpu_data.dcache.n_aliases == 0)
+		return;
+
 	cacheop_on_each_cpu(local_flush_cache_dup_mm, mm, 1);
 }
 
@@ -195,11 +210,13 @@ void flush_cache_range(struct vm_area_struct *vma, unsigned long start,
 
 	cacheop_on_each_cpu(local_flush_cache_range, (void *)&data, 1);
 }
+EXPORT_SYMBOL(flush_cache_range);
 
 void flush_dcache_page(struct page *page)
 {
 	cacheop_on_each_cpu(local_flush_dcache_page, page, 1);
 }
+EXPORT_SYMBOL(flush_dcache_page);
 
 void flush_icache_range(unsigned long start, unsigned long end)
 {
@@ -265,6 +282,12 @@ static void __init emit_cache_params(void)
 
 void __init cpu_cache_init(void)
 {
+	unsigned int cache_disabled = 0;
+
+#ifdef CCR
+	cache_disabled = !(__raw_readl(CCR) & CCR_CACHE_ENABLE);
+#endif
+
 	compute_alias(&boot_cpu_data.icache);
 	compute_alias(&boot_cpu_data.dcache);
 	compute_alias(&boot_cpu_data.scache);
@@ -273,6 +296,13 @@ void __init cpu_cache_init(void)
 	__flush_purge_region		= noop__flush_region;
 	__flush_invalidate_region	= noop__flush_region;
 
+	/*
+	 * No flushing is necessary in the disabled cache case so we can
+	 * just keep the noop functions in local_flush_..() and __flush_..()
+	 */
+	if (unlikely(cache_disabled))
+		goto skip;
+
 	if (boot_cpu_data.family == CPU_FAMILY_SH2) {
 		extern void __weak sh2_cache_init(void);
 
@@ -312,5 +342,6 @@ void __init cpu_cache_init(void)
 		sh5_cache_init();
 	}
 
+skip:
 	emit_cache_params();
 }
diff --git a/arch/sh/mm/consistent.c b/arch/sh/mm/consistent.c
index e098ec158ddb..902967e3f841 100644
--- a/arch/sh/mm/consistent.c
+++ b/arch/sh/mm/consistent.c
@@ -15,11 +15,15 @@
 #include <linux/dma-mapping.h>
 #include <linux/dma-debug.h>
 #include <linux/io.h>
+#include <linux/module.h>
 #include <asm/cacheflush.h>
 #include <asm/addrspace.h>
 
 #define PREALLOC_DMA_DEBUG_ENTRIES	4096
 
+struct dma_map_ops *dma_ops;
+EXPORT_SYMBOL(dma_ops);
+
 static int __init dma_init(void)
 {
 	dma_debug_init(PREALLOC_DMA_DEBUG_ENTRIES);
@@ -27,15 +31,12 @@ static int __init dma_init(void)
 }
 fs_initcall(dma_init);
 
-void *dma_alloc_coherent(struct device *dev, size_t size,
-			   dma_addr_t *dma_handle, gfp_t gfp)
+void *dma_generic_alloc_coherent(struct device *dev, size_t size,
+				 dma_addr_t *dma_handle, gfp_t gfp)
 {
 	void *ret, *ret_nocache;
 	int order = get_order(size);
 
-	if (dma_alloc_from_coherent(dev, size, dma_handle, &ret))
-		return ret;
-
 	ret = (void *)__get_free_pages(gfp, order);
 	if (!ret)
 		return NULL;
@@ -57,35 +58,26 @@ void *dma_alloc_coherent(struct device *dev, size_t size,
 
 	*dma_handle = virt_to_phys(ret);
 
-	debug_dma_alloc_coherent(dev, size, *dma_handle, ret_nocache);
-
 	return ret_nocache;
 }
-EXPORT_SYMBOL(dma_alloc_coherent);
 
-void dma_free_coherent(struct device *dev, size_t size,
-			 void *vaddr, dma_addr_t dma_handle)
+void dma_generic_free_coherent(struct device *dev, size_t size,
+			       void *vaddr, dma_addr_t dma_handle)
 {
 	int order = get_order(size);
 	unsigned long pfn = dma_handle >> PAGE_SHIFT;
 	int k;
 
-	WARN_ON(irqs_disabled());	/* for portability */
-
-	if (dma_release_from_coherent(dev, order, vaddr))
-		return;
-
-	debug_dma_free_coherent(dev, size, vaddr, dma_handle);
 	for (k = 0; k < (1 << order); k++)
 		__free_pages(pfn_to_page(pfn + k), 0);
+
 	iounmap(vaddr);
 }
-EXPORT_SYMBOL(dma_free_coherent);
 
 void dma_cache_sync(struct device *dev, void *vaddr, size_t size,
 		    enum dma_data_direction direction)
 {
-#ifdef CONFIG_CPU_SH5
+#if defined(CONFIG_CPU_SH5) || defined(CONFIG_PMB)
 	void *p1addr = vaddr;
 #else
 	void *p1addr = (void*) P1SEGADDR((unsigned long)vaddr);
diff --git a/arch/sh/mm/fault_32.c b/arch/sh/mm/fault_32.c
index 47530104e0ad..8bf79e3b7bdd 100644
--- a/arch/sh/mm/fault_32.c
+++ b/arch/sh/mm/fault_32.c
@@ -53,6 +53,9 @@ static inline pmd_t *vmalloc_sync_one(pgd_t *pgd, unsigned long address)
 	if (!pud_present(*pud_k))
 		return NULL;
 
+	if (!pud_present(*pud))
+	    set_pud(pud, *pud_k);
+
 	pmd = pmd_offset(pud, address);
 	pmd_k = pmd_offset(pud_k, address);
 	if (!pmd_present(*pmd_k))
@@ -371,7 +374,7 @@ handle_tlbmiss(struct pt_regs *regs, unsigned long writeaccess,
 		local_flush_tlb_one(get_asid(), address & PAGE_MASK);
 #endif
 
-	update_mmu_cache(NULL, address, entry);
+	update_mmu_cache(NULL, address, pte);
 
 	return 0;
 }
diff --git a/arch/sh/mm/init.c b/arch/sh/mm/init.c
index 8173e38afd38..68028e8f26ce 100644
--- a/arch/sh/mm/init.c
+++ b/arch/sh/mm/init.c
@@ -15,30 +15,19 @@
 #include <linux/pagemap.h>
 #include <linux/percpu.h>
 #include <linux/io.h>
+#include <linux/dma-mapping.h>
 #include <asm/mmu_context.h>
 #include <asm/tlb.h>
 #include <asm/cacheflush.h>
 #include <asm/sections.h>
 #include <asm/cache.h>
+#include <asm/sizes.h>
 
 DEFINE_PER_CPU(struct mmu_gather, mmu_gathers);
 pgd_t swapper_pg_dir[PTRS_PER_PGD];
 
-#ifdef CONFIG_SUPERH32
-/*
- * Handle trivial transitions between cached and uncached
- * segments, making use of the 1:1 mapping relationship in
- * 512MB lowmem.
- *
- * This is the offset of the uncached section from its cached alias.
- * Default value only valid in 29 bit mode, in 32bit mode will be
- * overridden in pmb_init.
- */
-unsigned long cached_to_uncached = P2SEG - P1SEG;
-#endif
-
 #ifdef CONFIG_MMU
-static void set_pte_phys(unsigned long addr, unsigned long phys, pgprot_t prot)
+static pte_t *__get_pte_phys(unsigned long addr)
 {
 	pgd_t *pgd;
 	pud_t *pud;
@@ -48,22 +37,30 @@ static void set_pte_phys(unsigned long addr, unsigned long phys, pgprot_t prot)
 	pgd = pgd_offset_k(addr);
 	if (pgd_none(*pgd)) {
 		pgd_ERROR(*pgd);
-		return;
+		return NULL;
 	}
 
 	pud = pud_alloc(NULL, pgd, addr);
 	if (unlikely(!pud)) {
 		pud_ERROR(*pud);
-		return;
+		return NULL;
 	}
 
 	pmd = pmd_alloc(NULL, pud, addr);
 	if (unlikely(!pmd)) {
 		pmd_ERROR(*pmd);
-		return;
+		return NULL;
 	}
 
 	pte = pte_offset_kernel(pmd, addr);
+	return pte;
+}
+
+static void set_pte_phys(unsigned long addr, unsigned long phys, pgprot_t prot)
+{
+	pte_t *pte;
+
+	pte = __get_pte_phys(addr);
 	if (!pte_none(*pte)) {
 		pte_ERROR(*pte);
 		return;
@@ -71,23 +68,24 @@ static void set_pte_phys(unsigned long addr, unsigned long phys, pgprot_t prot)
 
 	set_pte(pte, pfn_pte(phys >> PAGE_SHIFT, prot));
 	local_flush_tlb_one(get_asid(), addr);
+
+	if (pgprot_val(prot) & _PAGE_WIRED)
+		tlb_wire_entry(NULL, addr, *pte);
+}
+
+static void clear_pte_phys(unsigned long addr, pgprot_t prot)
+{
+	pte_t *pte;
+
+	pte = __get_pte_phys(addr);
+
+	if (pgprot_val(prot) & _PAGE_WIRED)
+		tlb_unwire_entry();
+
+	set_pte(pte, pfn_pte(0, __pgprot(0)));
+	local_flush_tlb_one(get_asid(), addr);
 }
 
-/*
- * As a performance optimization, other platforms preserve the fixmap mapping
- * across a context switch, we don't presently do this, but this could be done
- * in a similar fashion as to the wired TLB interface that sh64 uses (by way
- * of the memory mapped UTLB configuration) -- this unfortunately forces us to
- * give up a TLB entry for each mapping we want to preserve. While this may be
- * viable for a small number of fixmaps, it's not particularly useful for
- * everything and needs to be carefully evaluated. (ie, we may want this for
- * the vsyscall page).
- *
- * XXX: Perhaps add a _PAGE_WIRED flag or something similar that we can pass
- * in at __set_fixmap() time to determine the appropriate behavior to follow.
- *
- *					 -- PFM.
- */
 void __set_fixmap(enum fixed_addresses idx, unsigned long phys, pgprot_t prot)
 {
 	unsigned long address = __fix_to_virt(idx);
@@ -100,6 +98,18 @@ void __set_fixmap(enum fixed_addresses idx, unsigned long phys, pgprot_t prot)
 	set_pte_phys(address, phys, prot);
 }
 
+void __clear_fixmap(enum fixed_addresses idx, pgprot_t prot)
+{
+	unsigned long address = __fix_to_virt(idx);
+
+	if (idx >= __end_of_fixed_addresses) {
+		BUG();
+		return;
+	}
+
+	clear_pte_phys(address, prot);
+}
+
 void __init page_table_range_init(unsigned long start, unsigned long end,
 					 pgd_t *pgd_base)
 {
@@ -119,7 +129,13 @@ void __init page_table_range_init(unsigned long start, unsigned long end,
 	for ( ; (i < PTRS_PER_PGD) && (vaddr != end); pgd++, i++) {
 		pud = (pud_t *)pgd;
 		for ( ; (j < PTRS_PER_PUD) && (vaddr != end); pud++, j++) {
+#ifdef __PAGETABLE_PMD_FOLDED
 			pmd = (pmd_t *)pud;
+#else
+			pmd = (pmd_t *)alloc_bootmem_low_pages(PAGE_SIZE);
+			pud_populate(&init_mm, pud, pmd);
+			pmd += k;
+#endif
 			for (; (k < PTRS_PER_PMD) && (vaddr != end); pmd++, k++) {
 				if (pmd_none(*pmd)) {
 					pte = (pte_t *) alloc_bootmem_low_pages(PAGE_SIZE);
@@ -181,16 +197,25 @@ void __init paging_init(void)
 	}
 
 	free_area_init_nodes(max_zone_pfns);
+}
 
-	/* Set up the uncached fixmap */
-	set_fixmap_nocache(FIX_UNCACHED, __pa(&__uncached_start));
+/*
+ * Early initialization for any I/O MMUs we might have.
+ */
+static void __init iommu_init(void)
+{
+	no_iommu_init();
 }
 
+unsigned int mem_init_done = 0;
+
 void __init mem_init(void)
 {
 	int codesize, datasize, initsize;
 	int nid;
 
+	iommu_init();
+
 	num_physpages = 0;
 	high_memory = NULL;
 
@@ -220,6 +245,8 @@ void __init mem_init(void)
 	memset(empty_zero_page, 0, PAGE_SIZE);
 	__flush_wback_region(empty_zero_page, PAGE_SIZE);
 
+	vsyscall_init();
+
 	codesize =  (unsigned long) &_etext - (unsigned long) &_text;
 	datasize =  (unsigned long) &_edata - (unsigned long) &_etext;
 	initsize =  (unsigned long) &__init_end - (unsigned long) &__init_begin;
@@ -232,8 +259,48 @@ void __init mem_init(void)
 		datasize >> 10,
 		initsize >> 10);
 
-	/* Initialize the vDSO */
-	vsyscall_init();
+	printk(KERN_INFO "virtual kernel memory layout:\n"
+		"    fixmap  : 0x%08lx - 0x%08lx   (%4ld kB)\n"
+#ifdef CONFIG_HIGHMEM
+		"    pkmap   : 0x%08lx - 0x%08lx   (%4ld kB)\n"
+#endif
+		"    vmalloc : 0x%08lx - 0x%08lx   (%4ld MB)\n"
+		"    lowmem  : 0x%08lx - 0x%08lx   (%4ld MB) (cached)\n"
+#ifdef CONFIG_UNCACHED_MAPPING
+		"            : 0x%08lx - 0x%08lx   (%4ld MB) (uncached)\n"
+#endif
+		"      .init : 0x%08lx - 0x%08lx   (%4ld kB)\n"
+		"      .data : 0x%08lx - 0x%08lx   (%4ld kB)\n"
+		"      .text : 0x%08lx - 0x%08lx   (%4ld kB)\n",
+		FIXADDR_START, FIXADDR_TOP,
+		(FIXADDR_TOP - FIXADDR_START) >> 10,
+
+#ifdef CONFIG_HIGHMEM
+		PKMAP_BASE, PKMAP_BASE+LAST_PKMAP*PAGE_SIZE,
+		(LAST_PKMAP*PAGE_SIZE) >> 10,
+#endif
+
+		(unsigned long)VMALLOC_START, VMALLOC_END,
+		(VMALLOC_END - VMALLOC_START) >> 20,
+
+		(unsigned long)memory_start, (unsigned long)high_memory,
+		((unsigned long)high_memory - (unsigned long)memory_start) >> 20,
+
+#ifdef CONFIG_UNCACHED_MAPPING
+		uncached_start, uncached_end, uncached_size >> 20,
+#endif
+
+		(unsigned long)&__init_begin, (unsigned long)&__init_end,
+		((unsigned long)&__init_end -
+		 (unsigned long)&__init_begin) >> 10,
+
+		(unsigned long)&_etext, (unsigned long)&_edata,
+		((unsigned long)&_edata - (unsigned long)&_etext) >> 10,
+
+		(unsigned long)&_text, (unsigned long)&_etext,
+		((unsigned long)&_etext - (unsigned long)&_text) >> 10);
+
+	mem_init_done = 1;
 }
 
 void free_initmem(void)
@@ -266,35 +333,6 @@ void free_initrd_mem(unsigned long start, unsigned long end)
 }
 #endif
 
-#if THREAD_SHIFT < PAGE_SHIFT
-static struct kmem_cache *thread_info_cache;
-
-struct thread_info *alloc_thread_info(struct task_struct *tsk)
-{
-	struct thread_info *ti;
-
-	ti = kmem_cache_alloc(thread_info_cache, GFP_KERNEL);
-	if (unlikely(ti == NULL))
-		return NULL;
-#ifdef CONFIG_DEBUG_STACK_USAGE
-	memset(ti, 0, THREAD_SIZE);
-#endif
-	return ti;
-}
-
-void free_thread_info(struct thread_info *ti)
-{
-	kmem_cache_free(thread_info_cache, ti);
-}
-
-void thread_info_cache_init(void)
-{
-	thread_info_cache = kmem_cache_create("thread_info", THREAD_SIZE,
-					      THREAD_SIZE, 0, NULL);
-	BUG_ON(thread_info_cache == NULL);
-}
-#endif /* THREAD_SHIFT < PAGE_SHIFT */
-
 #ifdef CONFIG_MEMORY_HOTPLUG
 int arch_add_memory(int nid, u64 start, u64 size)
 {
@@ -323,4 +361,5 @@ int memory_add_physaddr_to_nid(u64 addr)
 }
 EXPORT_SYMBOL_GPL(memory_add_physaddr_to_nid);
 #endif
+
 #endif /* CONFIG_MEMORY_HOTPLUG */
diff --git a/arch/sh/mm/ioremap.c b/arch/sh/mm/ioremap.c
new file mode 100644
index 000000000000..1ab2385ecefe
--- /dev/null
+++ b/arch/sh/mm/ioremap.c
@@ -0,0 +1,136 @@
+/*
+ * arch/sh/mm/ioremap.c
+ *
+ * (C) Copyright 1995 1996 Linus Torvalds
+ * (C) Copyright 2005 - 2010  Paul Mundt
+ *
+ * Re-map IO memory to kernel address space so that we can access it.
+ * This is needed for high PCI addresses that aren't mapped in the
+ * 640k-1MB IO memory area on PC's
+ *
+ * This file is subject to the terms and conditions of the GNU General
+ * Public License. See the file "COPYING" in the main directory of this
+ * archive for more details.
+ */
+#include <linux/vmalloc.h>
+#include <linux/module.h>
+#include <linux/mm.h>
+#include <linux/pci.h>
+#include <linux/io.h>
+#include <asm/page.h>
+#include <asm/pgalloc.h>
+#include <asm/addrspace.h>
+#include <asm/cacheflush.h>
+#include <asm/tlbflush.h>
+#include <asm/mmu.h>
+
+/*
+ * Remap an arbitrary physical address space into the kernel virtual
+ * address space. Needed when the kernel wants to access high addresses
+ * directly.
+ *
+ * NOTE! We need to allow non-page-aligned mappings too: we will obviously
+ * have to convert them into an offset in a page-aligned mapping, but the
+ * caller shouldn't need to know that small detail.
+ */
+void __iomem * __init_refok
+__ioremap_caller(phys_addr_t phys_addr, unsigned long size,
+		 pgprot_t pgprot, void *caller)
+{
+	struct vm_struct *area;
+	unsigned long offset, last_addr, addr, orig_addr;
+	void __iomem *mapped;
+
+	/* Don't allow wraparound or zero size */
+	last_addr = phys_addr + size - 1;
+	if (!size || last_addr < phys_addr)
+		return NULL;
+
+	/*
+	 * If we can't yet use the regular approach, go the fixmap route.
+	 */
+	if (!mem_init_done)
+		return ioremap_fixed(phys_addr, size, pgprot);
+
+	/*
+	 * First try to remap through the PMB.
+	 * PMB entries are all pre-faulted.
+	 */
+	mapped = pmb_remap_caller(phys_addr, size, pgprot, caller);
+	if (mapped && !IS_ERR(mapped))
+		return mapped;
+
+	/*
+	 * Mappings have to be page-aligned
+	 */
+	offset = phys_addr & ~PAGE_MASK;
+	phys_addr &= PAGE_MASK;
+	size = PAGE_ALIGN(last_addr+1) - phys_addr;
+
+	/*
+	 * Ok, go for it..
+	 */
+	area = get_vm_area_caller(size, VM_IOREMAP, caller);
+	if (!area)
+		return NULL;
+	area->phys_addr = phys_addr;
+	orig_addr = addr = (unsigned long)area->addr;
+
+	if (ioremap_page_range(addr, addr + size, phys_addr, pgprot)) {
+		vunmap((void *)orig_addr);
+		return NULL;
+	}
+
+	return (void __iomem *)(offset + (char *)orig_addr);
+}
+EXPORT_SYMBOL(__ioremap_caller);
+
+/*
+ * Simple checks for non-translatable mappings.
+ */
+static inline int iomapping_nontranslatable(unsigned long offset)
+{
+#ifdef CONFIG_29BIT
+	/*
+	 * In 29-bit mode this includes the fixed P1/P2 areas, as well as
+	 * parts of P3.
+	 */
+	if (PXSEG(offset) < P3SEG || offset >= P3_ADDR_MAX)
+		return 1;
+#endif
+
+	return 0;
+}
+
+void __iounmap(void __iomem *addr)
+{
+	unsigned long vaddr = (unsigned long __force)addr;
+	struct vm_struct *p;
+
+	/*
+	 * Nothing to do if there is no translatable mapping.
+	 */
+	if (iomapping_nontranslatable(vaddr))
+		return;
+
+	/*
+	 * There's no VMA if it's from an early fixed mapping.
+	 */
+	if (iounmap_fixed(addr) == 0)
+		return;
+
+	/*
+	 * If the PMB handled it, there's nothing else to do.
+	 */
+	if (pmb_unmap(addr) == 0)
+		return;
+
+	p = remove_vm_area((void *)(vaddr & PAGE_MASK));
+	if (!p) {
+		printk(KERN_ERR "%s: bad address %p\n", __func__, addr);
+		return;
+	}
+
+	kfree(p);
+}
+EXPORT_SYMBOL(__iounmap);
diff --git a/arch/sh/mm/ioremap_32.c b/arch/sh/mm/ioremap_32.c
deleted file mode 100644
index c3250614e3ae..000000000000
--- a/arch/sh/mm/ioremap_32.c
+++ /dev/null
@@ -1,145 +0,0 @@
-/*
- * arch/sh/mm/ioremap.c
- *
- * Re-map IO memory to kernel address space so that we can access it.
- * This is needed for high PCI addresses that aren't mapped in the
- * 640k-1MB IO memory area on PC's
- *
- * (C) Copyright 1995 1996 Linus Torvalds
- * (C) Copyright 2005, 2006 Paul Mundt
- *
- * This file is subject to the terms and conditions of the GNU General
- * Public License. See the file "COPYING" in the main directory of this
- * archive for more details.
- */
-#include <linux/vmalloc.h>
-#include <linux/module.h>
-#include <linux/mm.h>
-#include <linux/pci.h>
-#include <linux/io.h>
-#include <asm/page.h>
-#include <asm/pgalloc.h>
-#include <asm/addrspace.h>
-#include <asm/cacheflush.h>
-#include <asm/tlbflush.h>
-#include <asm/mmu.h>
-
-/*
- * Remap an arbitrary physical address space into the kernel virtual
- * address space. Needed when the kernel wants to access high addresses
- * directly.
- *
- * NOTE! We need to allow non-page-aligned mappings too: we will obviously
- * have to convert them into an offset in a page-aligned mapping, but the
- * caller shouldn't need to know that small detail.
- */
-void __iomem *__ioremap(unsigned long phys_addr, unsigned long size,
-			unsigned long flags)
-{
-	struct vm_struct * area;
-	unsigned long offset, last_addr, addr, orig_addr;
-	pgprot_t pgprot;
-
-	/* Don't allow wraparound or zero size */
-	last_addr = phys_addr + size - 1;
-	if (!size || last_addr < phys_addr)
-		return NULL;
-
-	/*
-	 * If we're in the fixed PCI memory range, mapping through page
-	 * tables is not only pointless, but also fundamentally broken.
-	 * Just return the physical address instead.
-	 *
-	 * For boards that map a small PCI memory aperture somewhere in
-	 * P1/P2 space, ioremap() will already do the right thing,
-	 * and we'll never get this far.
-	 */
-	if (is_pci_memory_fixed_range(phys_addr, size))
-		return (void __iomem *)phys_addr;
-
-	/*
-	 * Mappings have to be page-aligned
-	 */
-	offset = phys_addr & ~PAGE_MASK;
-	phys_addr &= PAGE_MASK;
-	size = PAGE_ALIGN(last_addr+1) - phys_addr;
-
-	/*
-	 * Ok, go for it..
-	 */
-	area = get_vm_area(size, VM_IOREMAP);
-	if (!area)
-		return NULL;
-	area->phys_addr = phys_addr;
-	orig_addr = addr = (unsigned long)area->addr;
-
-#ifdef CONFIG_PMB
-	/*
-	 * First try to remap through the PMB once a valid VMA has been
-	 * established. Smaller allocations (or the rest of the size
-	 * remaining after a PMB mapping due to the size not being
-	 * perfectly aligned on a PMB size boundary) are then mapped
-	 * through the UTLB using conventional page tables.
-	 *
-	 * PMB entries are all pre-faulted.
-	 */
-	if (unlikely(size >= 0x1000000)) {
-		unsigned long mapped = pmb_remap(addr, phys_addr, size, flags);
-
-		if (likely(mapped)) {
-			addr		+= mapped;
-			phys_addr	+= mapped;
-			size		-= mapped;
-		}
-	}
-#endif
-
-	pgprot = __pgprot(pgprot_val(PAGE_KERNEL_NOCACHE) | flags);
-	if (likely(size))
-		if (ioremap_page_range(addr, addr + size, phys_addr, pgprot)) {
-			vunmap((void *)orig_addr);
-			return NULL;
-		}
-
-	return (void __iomem *)(offset + (char *)orig_addr);
-}
-EXPORT_SYMBOL(__ioremap);
-
-void __iounmap(void __iomem *addr)
-{
-	unsigned long vaddr = (unsigned long __force)addr;
-	unsigned long seg = PXSEG(vaddr);
-	struct vm_struct *p;
-
-	if (seg < P3SEG || vaddr >= P3_ADDR_MAX)
-		return;
-	if (is_pci_memory_fixed_range(vaddr, 0))
-		return;
-
-#ifdef CONFIG_PMB
-	/*
-	 * Purge any PMB entries that may have been established for this
-	 * mapping, then proceed with conventional VMA teardown.
-	 *
-	 * XXX: Note that due to the way that remove_vm_area() does
-	 * matching of the resultant VMA, we aren't able to fast-forward
-	 * the address past the PMB space until the end of the VMA where
-	 * the page tables reside. As such, unmap_vm_area() will be
-	 * forced to linearly scan over the area until it finds the page
-	 * tables where PTEs that need to be unmapped actually reside,
-	 * which is far from optimal. Perhaps we need to use a separate
-	 * VMA for the PMB mappings?
-	 *					-- PFM.
-	 */
-	pmb_unmap(vaddr);
-#endif
-
-	p = remove_vm_area((void *)(vaddr & PAGE_MASK));
-	if (!p) {
-		printk(KERN_ERR "%s: bad address %p\n", __func__, addr);
-		return;
-	}
-
-	kfree(p);
-}
-EXPORT_SYMBOL(__iounmap);
diff --git a/arch/sh/mm/ioremap_64.c b/arch/sh/mm/ioremap_64.c
deleted file mode 100644
index b16843d02b76..000000000000
--- a/arch/sh/mm/ioremap_64.c
+++ /dev/null
@@ -1,326 +0,0 @@
-/*
- * arch/sh/mm/ioremap_64.c
- *
- * Copyright (C) 2000, 2001  Paolo Alberelli
- * Copyright (C) 2003 - 2007  Paul Mundt
- *
- * Mostly derived from arch/sh/mm/ioremap.c which, in turn is mostly
- * derived from arch/i386/mm/ioremap.c .
- *
- *   (C) Copyright 1995 1996 Linus Torvalds
- *
- * This file is subject to the terms and conditions of the GNU General Public
- * License.  See the file "COPYING" in the main directory of this archive
- * for more details.
- */
-#include <linux/vmalloc.h>
-#include <linux/ioport.h>
-#include <linux/module.h>
-#include <linux/mm.h>
-#include <linux/io.h>
-#include <linux/bootmem.h>
-#include <linux/proc_fs.h>
-#include <linux/slab.h>
-#include <asm/page.h>
-#include <asm/pgalloc.h>
-#include <asm/addrspace.h>
-#include <asm/cacheflush.h>
-#include <asm/tlbflush.h>
-#include <asm/mmu.h>
-
-static struct resource shmedia_iomap = {
-	.name	= "shmedia_iomap",
-	.start	= IOBASE_VADDR + PAGE_SIZE,
-	.end	= IOBASE_END - 1,
-};
-
-static void shmedia_mapioaddr(unsigned long pa, unsigned long va,
-			      unsigned long flags);
-static void shmedia_unmapioaddr(unsigned long vaddr);
-static void __iomem *shmedia_ioremap(struct resource *res, u32 pa,
-				     int sz, unsigned long flags);
-
-/*
- * We have the same problem as the SPARC, so lets have the same comment:
- * Our mini-allocator...
- * Boy this is gross! We need it because we must map I/O for
- * timers and interrupt controller before the kmalloc is available.
- */
-
-#define XNMLN  15
-#define XNRES  10
-
-struct xresource {
-	struct resource xres;   /* Must be first */
-	int xflag;              /* 1 == used */
-	char xname[XNMLN+1];
-};
-
-static struct xresource xresv[XNRES];
-
-static struct xresource *xres_alloc(void)
-{
-	struct xresource *xrp;
-	int n;
-
-	xrp = xresv;
-	for (n = 0; n < XNRES; n++) {
-		if (xrp->xflag == 0) {
-			xrp->xflag = 1;
-			return xrp;
-		}
-		xrp++;
-	}
-	return NULL;
-}
-
-static void xres_free(struct xresource *xrp)
-{
-	xrp->xflag = 0;
-}
-
-static struct resource *shmedia_find_resource(struct resource *root,
-					      unsigned long vaddr)
-{
-	struct resource *res;
-
-	for (res = root->child; res; res = res->sibling)
-		if (res->start <= vaddr && res->end >= vaddr)
-			return res;
-
-	return NULL;
-}
-
-static void __iomem *shmedia_alloc_io(unsigned long phys, unsigned long size,
-				      const char *name, unsigned long flags)
-{
-	struct xresource *xres;
-	struct resource *res;
-	char *tack;
-	int tlen;
-
-	if (name == NULL)
-		name = "???";
-
-	xres = xres_alloc();
-	if (xres != 0) {
-		tack = xres->xname;
-		res = &xres->xres;
-	} else {
-		printk_once(KERN_NOTICE "%s: done with statics, "
-			       "switching to kmalloc\n", __func__);
-		tlen = strlen(name);
-		tack = kmalloc(sizeof(struct resource) + tlen + 1, GFP_KERNEL);
-		if (!tack)
-			return NULL;
-		memset(tack, 0, sizeof(struct resource));
-		res = (struct resource *) tack;
-		tack += sizeof(struct resource);
-	}
-
-	strncpy(tack, name, XNMLN);
-	tack[XNMLN] = 0;
-	res->name = tack;
-
-	return shmedia_ioremap(res, phys, size, flags);
-}
-
-static void __iomem *shmedia_ioremap(struct resource *res, u32 pa, int sz,
-				     unsigned long flags)
-{
-	unsigned long offset = ((unsigned long) pa) & (~PAGE_MASK);
-	unsigned long round_sz = (offset + sz + PAGE_SIZE-1) & PAGE_MASK;
-	unsigned long va;
-	unsigned int psz;
-
-	if (allocate_resource(&shmedia_iomap, res, round_sz,
-			      shmedia_iomap.start, shmedia_iomap.end,
-			      PAGE_SIZE, NULL, NULL) != 0) {
-		panic("alloc_io_res(%s): cannot occupy\n",
-		      (res->name != NULL) ? res->name : "???");
-	}
-
-	va = res->start;
-	pa &= PAGE_MASK;
-
-	psz = (res->end - res->start + (PAGE_SIZE - 1)) / PAGE_SIZE;
-
-	for (psz = res->end - res->start + 1; psz != 0; psz -= PAGE_SIZE) {
-		shmedia_mapioaddr(pa, va, flags);
-		va += PAGE_SIZE;
-		pa += PAGE_SIZE;
-	}
-
-	return (void __iomem *)(unsigned long)(res->start + offset);
-}
-
-static void shmedia_free_io(struct resource *res)
-{
-	unsigned long len = res->end - res->start + 1;
-
-	BUG_ON((len & (PAGE_SIZE - 1)) != 0);
-
-	while (len) {
-		len -= PAGE_SIZE;
-		shmedia_unmapioaddr(res->start + len);
-	}
-
-	release_resource(res);
-}
-
-static __init_refok void *sh64_get_page(void)
-{
-	void *page;
-
-	if (slab_is_available())
-		page = (void *)get_zeroed_page(GFP_KERNEL);
-	else
-		page = alloc_bootmem_pages(PAGE_SIZE);
-
-	if (!page || ((unsigned long)page & ~PAGE_MASK))
-		panic("sh64_get_page: Out of memory already?\n");
-
-	return page;
-}
-
-static void shmedia_mapioaddr(unsigned long pa, unsigned long va,
-			      unsigned long flags)
-{
-	pgd_t *pgdp;
-	pud_t *pudp;
-	pmd_t *pmdp;
-	pte_t *ptep, pte;
-	pgprot_t prot;
-
-	pr_debug("shmedia_mapiopage pa %08lx va %08lx\n",  pa, va);
-
-	if (!flags)
-		flags = 1; /* 1 = CB0-1 device */
-
-	pgdp = pgd_offset_k(va);
-	if (pgd_none(*pgdp) || !pgd_present(*pgdp)) {
-		pudp = (pud_t *)sh64_get_page();
-		set_pgd(pgdp, __pgd((unsigned long)pudp | _KERNPG_TABLE));
-	}
-
-	pudp = pud_offset(pgdp, va);
-	if (pud_none(*pudp) || !pud_present(*pudp)) {
-		pmdp = (pmd_t *)sh64_get_page();
-		set_pud(pudp, __pud((unsigned long)pmdp | _KERNPG_TABLE));
-	}
-
-	pmdp = pmd_offset(pudp, va);
-	if (pmd_none(*pmdp) || !pmd_present(*pmdp)) {
-		ptep = (pte_t *)sh64_get_page();
-		set_pmd(pmdp, __pmd((unsigned long)ptep + _PAGE_TABLE));
-	}
-
-	prot = __pgprot(_PAGE_PRESENT | _PAGE_READ     | _PAGE_WRITE  |
-			_PAGE_DIRTY   | _PAGE_ACCESSED | _PAGE_SHARED | flags);
-
-	pte = pfn_pte(pa >> PAGE_SHIFT, prot);
-	ptep = pte_offset_kernel(pmdp, va);
-
-	if (!pte_none(*ptep) &&
-	    pte_val(*ptep) != pte_val(pte))
-		pte_ERROR(*ptep);
-
-	set_pte(ptep, pte);
-
-	flush_tlb_kernel_range(va, PAGE_SIZE);
-}
-
-static void shmedia_unmapioaddr(unsigned long vaddr)
-{
-	pgd_t *pgdp;
-	pud_t *pudp;
-	pmd_t *pmdp;
-	pte_t *ptep;
-
-	pgdp = pgd_offset_k(vaddr);
-	if (pgd_none(*pgdp) || pgd_bad(*pgdp))
-		return;
-
-	pudp = pud_offset(pgdp, vaddr);
-	if (pud_none(*pudp) || pud_bad(*pudp))
-		return;
-
-	pmdp = pmd_offset(pudp, vaddr);
-	if (pmd_none(*pmdp) || pmd_bad(*pmdp))
-		return;
-
-	ptep = pte_offset_kernel(pmdp, vaddr);
-
-	if (pte_none(*ptep) || !pte_present(*ptep))
-		return;
-
-	clear_page((void *)ptep);
-	pte_clear(&init_mm, vaddr, ptep);
-}
-
-void __iomem *__ioremap(unsigned long offset, unsigned long size,
-			unsigned long flags)
-{
-	char name[14];
-
-	sprintf(name, "phys_%08x", (u32)offset);
-	return shmedia_alloc_io(offset, size, name, flags);
-}
-EXPORT_SYMBOL(__ioremap);
-
-void __iounmap(void __iomem *virtual)
-{
-	unsigned long vaddr = (unsigned long)virtual & PAGE_MASK;
-	struct resource *res;
-	unsigned int psz;
-
-	res = shmedia_find_resource(&shmedia_iomap, vaddr);
-	if (!res) {
-		printk(KERN_ERR "%s: Failed to free 0x%08lx\n",
-		       __func__, vaddr);
-		return;
-	}
-
-	psz = (res->end - res->start + (PAGE_SIZE - 1)) / PAGE_SIZE;
-
-	shmedia_free_io(res);
-
-	if ((char *)res >= (char *)xresv &&
-	    (char *)res <  (char *)&xresv[XNRES]) {
-		xres_free((struct xresource *)res);
-	} else {
-		kfree(res);
-	}
-}
-EXPORT_SYMBOL(__iounmap);
-
-static int
-ioremap_proc_info(char *buf, char **start, off_t fpos, int length, int *eof,
-		  void *data)
-{
-	char *p = buf, *e = buf + length;
-	struct resource *r;
-	const char *nm;
-
-	for (r = ((struct resource *)data)->child; r != NULL; r = r->sibling) {
-		if (p + 32 >= e)        /* Better than nothing */
-			break;
-		nm = r->name;
-		if (nm == NULL)
-			nm = "???";
-
-		p += sprintf(p, "%08lx-%08lx: %s\n",
-			     (unsigned long)r->start,
-			     (unsigned long)r->end, nm);
-	}
-
-	return p-buf;
-}
-
-static int __init register_proc_onchip(void)
-{
-	create_proc_read_entry("io_map", 0, 0, ioremap_proc_info,
-			       &shmedia_iomap);
-	return 0;
-}
-late_initcall(register_proc_onchip);
diff --git a/arch/sh/mm/ioremap_fixed.c b/arch/sh/mm/ioremap_fixed.c
new file mode 100644
index 000000000000..7f682e5dafcf
--- /dev/null
+++ b/arch/sh/mm/ioremap_fixed.c
@@ -0,0 +1,135 @@
+/*
+ * Re-map IO memory to kernel address space so that we can access it.
+ *
+ * These functions should only be used when it is necessary to map a
+ * physical address space into the kernel address space before ioremap()
+ * can be used, e.g. early in boot before paging_init().
+ *
+ * Copyright (C) 2009  Matt Fleming
+ */
+
+#include <linux/vmalloc.h>
+#include <linux/ioport.h>
+#include <linux/module.h>
+#include <linux/mm.h>
+#include <linux/io.h>
+#include <linux/bootmem.h>
+#include <linux/proc_fs.h>
+#include <linux/slab.h>
+#include <asm/fixmap.h>
+#include <asm/page.h>
+#include <asm/pgalloc.h>
+#include <asm/addrspace.h>
+#include <asm/cacheflush.h>
+#include <asm/tlbflush.h>
+#include <asm/mmu.h>
+#include <asm/mmu_context.h>
+
+struct ioremap_map {
+	void __iomem *addr;
+	unsigned long size;
+	unsigned long fixmap_addr;
+};
+
+static struct ioremap_map ioremap_maps[FIX_N_IOREMAPS];
+
+void __init ioremap_fixed_init(void)
+{
+	struct ioremap_map *map;
+	int i;
+
+	for (i = 0; i < FIX_N_IOREMAPS; i++) {
+		map = &ioremap_maps[i];
+		map->fixmap_addr = __fix_to_virt(FIX_IOREMAP_BEGIN + i);
+	}
+}
+
+void __init __iomem *
+ioremap_fixed(phys_addr_t phys_addr, unsigned long size, pgprot_t prot)
+{
+	enum fixed_addresses idx0, idx;
+	struct ioremap_map *map;
+	unsigned int nrpages;
+	unsigned long offset;
+	int i, slot;
+
+	/*
+	 * Mappings have to be page-aligned
+	 */
+	offset = phys_addr & ~PAGE_MASK;
+	phys_addr &= PAGE_MASK;
+	size = PAGE_ALIGN(phys_addr + size) - phys_addr;
+
+	slot = -1;
+	for (i = 0; i < FIX_N_IOREMAPS; i++) {
+		map = &ioremap_maps[i];
+		if (!map->addr) {
+			map->size = size;
+			slot = i;
+			break;
+		}
+	}
+
+	if (slot < 0)
+		return NULL;
+
+	/*
+	 * Mappings have to fit in the FIX_IOREMAP area.
+	 */
+	nrpages = size >> PAGE_SHIFT;
+	if (nrpages > FIX_N_IOREMAPS)
+		return NULL;
+
+	/*
+	 * Ok, go for it..
+	 */
+	idx0 = FIX_IOREMAP_BEGIN + slot;
+	idx = idx0;
+	while (nrpages > 0) {
+		pgprot_val(prot) |= _PAGE_WIRED;
+		__set_fixmap(idx, phys_addr, prot);
+		phys_addr += PAGE_SIZE;
+		idx++;
+		--nrpages;
+	}
+
+	map->addr = (void __iomem *)(offset + map->fixmap_addr);
+	return map->addr;
+}
+
+int iounmap_fixed(void __iomem *addr)
+{
+	enum fixed_addresses idx;
+	struct ioremap_map *map;
+	unsigned int nrpages;
+	int i, slot;
+
+	slot = -1;
+	for (i = 0; i < FIX_N_IOREMAPS; i++) {
+		map = &ioremap_maps[i];
+		if (map->addr == addr) {
+			slot = i;
+			break;
+		}
+	}
+
+	/*
+	 * If we don't match, it's not for us.
+	 */
+	if (slot < 0)
+		return -EINVAL;
+
+	nrpages = map->size >> PAGE_SHIFT;
+
+	idx = FIX_IOREMAP_BEGIN + slot + nrpages - 1;
+	while (nrpages > 0) {
+		__clear_fixmap(idx, __pgprot(_PAGE_WIRED));
+		--idx;
+		--nrpages;
+	}
+
+	map->size = 0;
+	map->addr = NULL;
+
+	return 0;
+}
diff --git a/arch/sh/mm/kmap.c b/arch/sh/mm/kmap.c
index 16e01b5fed04..15d74ea42094 100644
--- a/arch/sh/mm/kmap.c
+++ b/arch/sh/mm/kmap.c
@@ -39,7 +39,9 @@ void *kmap_coherent(struct page *page, unsigned long addr)
 	pagefault_disable();
 
 	idx = FIX_CMAP_END -
-		((addr & current_cpu_data.dcache.alias_mask) >> PAGE_SHIFT);
+		(((addr >> PAGE_SHIFT) & (FIX_N_COLOURS - 1)) +
+		 (FIX_N_COLOURS * smp_processor_id()));
+
 	vaddr = __fix_to_virt(idx);
 
 	BUG_ON(!pte_none(*(kmap_coherent_pte - idx)));
diff --git a/arch/sh/mm/mmap.c b/arch/sh/mm/mmap.c
index d2984fa42d3d..afeb710ec5c3 100644
--- a/arch/sh/mm/mmap.c
+++ b/arch/sh/mm/mmap.c
@@ -54,7 +54,8 @@ unsigned long arch_get_unmapped_area(struct file *filp, unsigned long addr,
 		/* We do not accept a shared mapping if it would violate
 		 * cache aliasing constraints.
 		 */
-		if ((flags & MAP_SHARED) && (addr & shm_align_mask))
+		if ((flags & MAP_SHARED) &&
+		    ((addr - (pgoff << PAGE_SHIFT)) & shm_align_mask))
 			return -EINVAL;
 		return addr;
 	}
diff --git a/arch/sh/mm/nommu.c b/arch/sh/mm/nommu.c
index ac16c05917ef..7694f50c9034 100644
--- a/arch/sh/mm/nommu.c
+++ b/arch/sh/mm/nommu.c
@@ -94,3 +94,7 @@ void __init page_table_range_init(unsigned long start, unsigned long end,
 void __set_fixmap(enum fixed_addresses idx, unsigned long phys, pgprot_t prot)
 {
 }
+
+void pgtable_cache_init(void)
+{
+}
diff --git a/arch/sh/mm/numa.c b/arch/sh/mm/numa.c
index 9b784fdb947c..961b34085e3b 100644
--- a/arch/sh/mm/numa.c
+++ b/arch/sh/mm/numa.c
@@ -28,7 +28,7 @@ void __init setup_memory(void)
 {
 	unsigned long free_pfn = PFN_UP(__pa(_end));
 	u64 base = min_low_pfn << PAGE_SHIFT;
-	u64 size = (max_low_pfn << PAGE_SHIFT) - min_low_pfn;
+	u64 size = (max_low_pfn << PAGE_SHIFT) - base;
 
 	lmb_add(base, size);
 
@@ -38,6 +38,15 @@ void __init setup_memory(void)
 		    (__MEMORY_START + CONFIG_ZERO_PAGE_OFFSET));
 
 	/*
+	 * Reserve physical pages below CONFIG_ZERO_PAGE_OFFSET.
+	 */
+	if (CONFIG_ZERO_PAGE_OFFSET != 0)
+		lmb_reserve(__MEMORY_START, CONFIG_ZERO_PAGE_OFFSET);
+
+	lmb_analyze();
+	lmb_dump_all();
+
+	/*
 	 * Node 0 sets up its pgdat at the first available pfn,
 	 * and bumps it up before setting up the bootmem allocator.
 	 */
@@ -60,18 +69,21 @@ void __init setup_bootmem_node(int nid, unsigned long start, unsigned long end)
 	unsigned long bootmem_paddr;
 
 	/* Don't allow bogus node assignment */
-	BUG_ON(nid > MAX_NUMNODES || nid == 0);
+	BUG_ON(nid > MAX_NUMNODES || nid <= 0);
 
 	start_pfn = start >> PAGE_SHIFT;
 	end_pfn = end >> PAGE_SHIFT;
 
+	pmb_bolt_mapping((unsigned long)__va(start), start, end - start,
+			 PAGE_KERNEL);
+
 	lmb_add(start, end - start);
 
 	__add_active_range(nid, start_pfn, end_pfn);
 
 	/* Node-local pgdat */
 	NODE_DATA(nid) = __va(lmb_alloc_base(sizeof(struct pglist_data),
-					     SMP_CACHE_BYTES, end_pfn));
+					     SMP_CACHE_BYTES, end));
 	memset(NODE_DATA(nid), 0, sizeof(struct pglist_data));
 
 	NODE_DATA(nid)->bdata = &bootmem_node_data[nid];
@@ -81,7 +93,7 @@ void __init setup_bootmem_node(int nid, unsigned long start, unsigned long end)
 	/* Node-local bootmap */
 	bootmap_pages = bootmem_bootmap_pages(end_pfn - start_pfn);
 	bootmem_paddr = lmb_alloc_base(bootmap_pages << PAGE_SHIFT,
-				       PAGE_SIZE, end_pfn);
+				       PAGE_SIZE, end);
 	init_bootmem_node(NODE_DATA(nid), bootmem_paddr >> PAGE_SHIFT,
 			  start_pfn, end_pfn);
 
diff --git a/arch/sh/mm/pgtable.c b/arch/sh/mm/pgtable.c
new file mode 100644
index 000000000000..6f21fb1d8726
--- /dev/null
+++ b/arch/sh/mm/pgtable.c
@@ -0,0 +1,56 @@
+#include <linux/mm.h>
+
+#define PGALLOC_GFP GFP_KERNEL | __GFP_REPEAT | __GFP_ZERO
+
+static struct kmem_cache *pgd_cachep;
+#if PAGETABLE_LEVELS > 2
+static struct kmem_cache *pmd_cachep;
+#endif
+
+void pgd_ctor(void *x)
+{
+	pgd_t *pgd = x;
+
+	memcpy(pgd + USER_PTRS_PER_PGD,
+	       swapper_pg_dir + USER_PTRS_PER_PGD,
+	       (PTRS_PER_PGD - USER_PTRS_PER_PGD) * sizeof(pgd_t));
+}
+
+void pgtable_cache_init(void)
+{
+	pgd_cachep = kmem_cache_create("pgd_cache",
+				       PTRS_PER_PGD * (1<<PTE_MAGNITUDE),
+				       PAGE_SIZE, SLAB_PANIC, pgd_ctor);
+#if PAGETABLE_LEVELS > 2
+	pmd_cachep = kmem_cache_create("pmd_cache",
+				       PTRS_PER_PMD * (1<<PTE_MAGNITUDE),
+				       PAGE_SIZE, SLAB_PANIC, NULL);
+#endif
+}
+
+pgd_t *pgd_alloc(struct mm_struct *mm)
+{
+	return kmem_cache_alloc(pgd_cachep, PGALLOC_GFP);
+}
+
+void pgd_free(struct mm_struct *mm, pgd_t *pgd)
+{
+	kmem_cache_free(pgd_cachep, pgd);
+}
+
+#if PAGETABLE_LEVELS > 2
+void pud_populate(struct mm_struct *mm, pud_t *pud, pmd_t *pmd)
+{
+	set_pud(pud, __pud((unsigned long)pmd));
+}
+
+pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long address)
+{
+	return kmem_cache_alloc(pmd_cachep, PGALLOC_GFP);
+}
+
+void pmd_free(struct mm_struct *mm, pmd_t *pmd)
+{
+	kmem_cache_free(pmd_cachep, pmd);
+}
+#endif /* PAGETABLE_LEVELS > 2 */
diff --git a/arch/sh/mm/pmb-fixed.c b/arch/sh/mm/pmb-fixed.c
deleted file mode 100644
index 43c8eac4d8a1..000000000000
--- a/arch/sh/mm/pmb-fixed.c
+++ /dev/null
@@ -1,45 +0,0 @@
-/*
- * arch/sh/mm/fixed_pmb.c
- *
- * Copyright (C) 2009  Renesas Solutions Corp.
- *
- * This file is subject to the terms and conditions of the GNU General Public
- * License.  See the file "COPYING" in the main directory of this archive
- * for more details.
- */
-#include <linux/init.h>
-#include <linux/mm.h>
-#include <linux/io.h>
-#include <asm/mmu.h>
-#include <asm/mmu_context.h>
-
-static int __uses_jump_to_uncached fixed_pmb_init(void)
-{
-	int i;
-	unsigned long addr, data;
-
-	jump_to_uncached();
-
-	for (i = 0; i < PMB_ENTRY_MAX; i++) {
-		addr = PMB_DATA + (i << PMB_E_SHIFT);
-		data = ctrl_inl(addr);
-		if (!(data & PMB_V))
-			continue;
-
-		if (data & PMB_C) {
-#if defined(CONFIG_CACHE_WRITETHROUGH)
-			data |= PMB_WT;
-#elif defined(CONFIG_CACHE_WRITEBACK)
-			data &= ~PMB_WT;
-#else
-			data &= ~(PMB_C | PMB_WT);
-#endif
-		}
-		ctrl_outl(data, addr);
-	}
-
-	back_to_cached();
-
-	return 0;
-}
-arch_initcall(fixed_pmb_init);
diff --git a/arch/sh/mm/pmb.c b/arch/sh/mm/pmb.c
index b1a714a92b14..a4662e2782c3 100644
--- a/arch/sh/mm/pmb.c
+++ b/arch/sh/mm/pmb.c
@@ -3,11 +3,8 @@
  *
  * Privileged Space Mapping Buffer (PMB) Support.
  *
- * Copyright (C) 2005, 2006, 2007 Paul Mundt
- *
- * P1/P2 Section mapping definitions from map32.h, which was:
- *
- *	Copyright 2003 (c) Lineo Solutions,Inc.
+ * Copyright (C) 2005 - 2010  Paul Mundt
+ * Copyright (C) 2010  Matt Fleming
  *
  * This file is subject to the terms and conditions of the GNU General Public
  * License.  See the file "COPYING" in the main directory of this archive
@@ -24,324 +21,795 @@
 #include <linux/fs.h>
 #include <linux/seq_file.h>
 #include <linux/err.h>
+#include <linux/io.h>
+#include <linux/spinlock.h>
+#include <linux/vmalloc.h>
+#include <asm/cacheflush.h>
+#include <asm/sizes.h>
 #include <asm/system.h>
 #include <asm/uaccess.h>
 #include <asm/pgtable.h>
+#include <asm/page.h>
 #include <asm/mmu.h>
-#include <asm/io.h>
 #include <asm/mmu_context.h>
 
-#define NR_PMB_ENTRIES	16
-
-static struct kmem_cache *pmb_cache;
-static unsigned long pmb_map;
-
-static struct pmb_entry pmb_init_map[] = {
-	/* vpn         ppn         flags (ub/sz/c/wt) */
-
-	/* P1 Section Mappings */
-	{ 0x80000000, 0x00000000, PMB_SZ_64M  | PMB_C, },
-	{ 0x84000000, 0x04000000, PMB_SZ_64M  | PMB_C, },
-	{ 0x88000000, 0x08000000, PMB_SZ_128M | PMB_C, },
-	{ 0x90000000, 0x10000000, PMB_SZ_64M  | PMB_C, },
-	{ 0x94000000, 0x14000000, PMB_SZ_64M  | PMB_C, },
-	{ 0x98000000, 0x18000000, PMB_SZ_64M  | PMB_C, },
-
-	/* P2 Section Mappings */
-	{ 0xa0000000, 0x00000000, PMB_UB | PMB_SZ_64M  | PMB_WT, },
-	{ 0xa4000000, 0x04000000, PMB_UB | PMB_SZ_64M  | PMB_WT, },
-	{ 0xa8000000, 0x08000000, PMB_UB | PMB_SZ_128M | PMB_WT, },
-	{ 0xb0000000, 0x10000000, PMB_UB | PMB_SZ_64M  | PMB_WT, },
-	{ 0xb4000000, 0x14000000, PMB_UB | PMB_SZ_64M  | PMB_WT, },
-	{ 0xb8000000, 0x18000000, PMB_UB | PMB_SZ_64M  | PMB_WT, },
+struct pmb_entry;
+
+struct pmb_entry {
+	unsigned long vpn;
+	unsigned long ppn;
+	unsigned long flags;
+	unsigned long size;
+
+	spinlock_t lock;
+
+	/*
+	 * 0 .. NR_PMB_ENTRIES for specific entry selection, or
+	 * PMB_NO_ENTRY to search for a free one
+	 */
+	int entry;
+
+	/* Adjacent entry link for contiguous multi-entry mappings */
+	struct pmb_entry *link;
 };
 
-static inline unsigned long mk_pmb_entry(unsigned int entry)
+static struct {
+	unsigned long size;
+	int flag;
+} pmb_sizes[] = {
+	{ .size	= SZ_512M, .flag = PMB_SZ_512M, },
+	{ .size = SZ_128M, .flag = PMB_SZ_128M, },
+	{ .size = SZ_64M,  .flag = PMB_SZ_64M,  },
+	{ .size = SZ_16M,  .flag = PMB_SZ_16M,  },
+};
+
+static void pmb_unmap_entry(struct pmb_entry *, int depth);
+
+static DEFINE_RWLOCK(pmb_rwlock);
+static struct pmb_entry pmb_entry_list[NR_PMB_ENTRIES];
+static DECLARE_BITMAP(pmb_map, NR_PMB_ENTRIES);
+
+static unsigned int pmb_iomapping_enabled;
+
+static __always_inline unsigned long mk_pmb_entry(unsigned int entry)
 {
 	return (entry & PMB_E_MASK) << PMB_E_SHIFT;
 }
 
-static inline unsigned long mk_pmb_addr(unsigned int entry)
+static __always_inline unsigned long mk_pmb_addr(unsigned int entry)
 {
 	return mk_pmb_entry(entry) | PMB_ADDR;
 }
 
-static inline unsigned long mk_pmb_data(unsigned int entry)
+static __always_inline unsigned long mk_pmb_data(unsigned int entry)
 {
 	return mk_pmb_entry(entry) | PMB_DATA;
 }
 
-static DEFINE_SPINLOCK(pmb_list_lock);
-static struct pmb_entry *pmb_list;
+static __always_inline unsigned int pmb_ppn_in_range(unsigned long ppn)
+{
+	return ppn >= __pa(memory_start) && ppn < __pa(memory_end);
+}
 
-static inline void pmb_list_add(struct pmb_entry *pmbe)
+/*
+ * Ensure that the PMB entries match our cache configuration.
+ *
+ * When we are in 32-bit address extended mode, CCR.CB becomes
+ * invalid, so care must be taken to manually adjust cacheable
+ * translations.
+ */
+static __always_inline unsigned long pmb_cache_flags(void)
 {
-	struct pmb_entry **p, *tmp;
+	unsigned long flags = 0;
+
+#if defined(CONFIG_CACHE_OFF)
+	flags |= PMB_WT | PMB_UB;
+#elif defined(CONFIG_CACHE_WRITETHROUGH)
+	flags |= PMB_C | PMB_WT | PMB_UB;
+#elif defined(CONFIG_CACHE_WRITEBACK)
+	flags |= PMB_C;
+#endif
 
-	p = &pmb_list;
-	while ((tmp = *p) != NULL)
-		p = &tmp->next;
+	return flags;
+}
 
-	pmbe->next = tmp;
-	*p = pmbe;
+/*
+ * Convert typical pgprot value to the PMB equivalent
+ */
+static inline unsigned long pgprot_to_pmb_flags(pgprot_t prot)
+{
+	unsigned long pmb_flags = 0;
+	u64 flags = pgprot_val(prot);
+
+	if (flags & _PAGE_CACHABLE)
+		pmb_flags |= PMB_C;
+	if (flags & _PAGE_WT)
+		pmb_flags |= PMB_WT | PMB_UB;
+
+	return pmb_flags;
+}
+
+static inline bool pmb_can_merge(struct pmb_entry *a, struct pmb_entry *b)
+{
+	return (b->vpn == (a->vpn + a->size)) &&
+	       (b->ppn == (a->ppn + a->size)) &&
+	       (b->flags == a->flags);
 }
 
-static inline void pmb_list_del(struct pmb_entry *pmbe)
+static bool pmb_mapping_exists(unsigned long vaddr, phys_addr_t phys,
+			       unsigned long size)
 {
-	struct pmb_entry **p, *tmp;
+	int i;
+
+	read_lock(&pmb_rwlock);
 
-	for (p = &pmb_list; (tmp = *p); p = &tmp->next)
-		if (tmp == pmbe) {
-			*p = tmp->next;
-			return;
+	for (i = 0; i < ARRAY_SIZE(pmb_entry_list); i++) {
+		struct pmb_entry *pmbe, *iter;
+		unsigned long span;
+
+		if (!test_bit(i, pmb_map))
+			continue;
+
+		pmbe = &pmb_entry_list[i];
+
+		/*
+		 * See if VPN and PPN are bounded by an existing mapping.
+		 */
+		if ((vaddr < pmbe->vpn) || (vaddr >= (pmbe->vpn + pmbe->size)))
+			continue;
+		if ((phys < pmbe->ppn) || (phys >= (pmbe->ppn + pmbe->size)))
+			continue;
+
+		/*
+		 * Now see if we're in range of a simple mapping.
+		 */
+		if (size <= pmbe->size) {
+			read_unlock(&pmb_rwlock);
+			return true;
 		}
+
+		span = pmbe->size;
+
+		/*
+		 * Finally for sizes that involve compound mappings, walk
+		 * the chain.
+		 */
+		for (iter = pmbe->link; iter; iter = iter->link)
+			span += iter->size;
+
+		/*
+		 * Nothing else to do if the range requirements are met.
+		 */
+		if (size <= span) {
+			read_unlock(&pmb_rwlock);
+			return true;
+		}
+	}
+
+	read_unlock(&pmb_rwlock);
+	return false;
+}
+
+static bool pmb_size_valid(unsigned long size)
+{
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(pmb_sizes); i++)
+		if (pmb_sizes[i].size == size)
+			return true;
+
+	return false;
 }
 
-struct pmb_entry *pmb_alloc(unsigned long vpn, unsigned long ppn,
-			    unsigned long flags)
+static inline bool pmb_addr_valid(unsigned long addr, unsigned long size)
+{
+	return (addr >= P1SEG && (addr + size - 1) < P3SEG);
+}
+
+static inline bool pmb_prot_valid(pgprot_t prot)
+{
+	return (pgprot_val(prot) & _PAGE_USER) == 0;
+}
+
+static int pmb_size_to_flags(unsigned long size)
+{
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(pmb_sizes); i++)
+		if (pmb_sizes[i].size == size)
+			return pmb_sizes[i].flag;
+
+	return 0;
+}
+
+static int pmb_alloc_entry(void)
+{
+	int pos;
+
+	pos = find_first_zero_bit(pmb_map, NR_PMB_ENTRIES);
+	if (pos >= 0 && pos < NR_PMB_ENTRIES)
+		__set_bit(pos, pmb_map);
+	else
+		pos = -ENOSPC;
+
+	return pos;
+}
+
+static struct pmb_entry *pmb_alloc(unsigned long vpn, unsigned long ppn,
+				   unsigned long flags, int entry)
 {
 	struct pmb_entry *pmbe;
+	unsigned long irqflags;
+	void *ret = NULL;
+	int pos;
+
+	write_lock_irqsave(&pmb_rwlock, irqflags);
+
+	if (entry == PMB_NO_ENTRY) {
+		pos = pmb_alloc_entry();
+		if (unlikely(pos < 0)) {
+			ret = ERR_PTR(pos);
+			goto out;
+		}
+	} else {
+		if (__test_and_set_bit(entry, pmb_map)) {
+			ret = ERR_PTR(-ENOSPC);
+			goto out;
+		}
+
+		pos = entry;
+	}
+
+	write_unlock_irqrestore(&pmb_rwlock, irqflags);
+
+	pmbe = &pmb_entry_list[pos];
 
-	pmbe = kmem_cache_alloc(pmb_cache, GFP_KERNEL);
-	if (!pmbe)
-		return ERR_PTR(-ENOMEM);
+	memset(pmbe, 0, sizeof(struct pmb_entry));
+
+	spin_lock_init(&pmbe->lock);
 
 	pmbe->vpn	= vpn;
 	pmbe->ppn	= ppn;
 	pmbe->flags	= flags;
-
-	spin_lock_irq(&pmb_list_lock);
-	pmb_list_add(pmbe);
-	spin_unlock_irq(&pmb_list_lock);
+	pmbe->entry	= pos;
 
 	return pmbe;
+
+out:
+	write_unlock_irqrestore(&pmb_rwlock, irqflags);
+	return ret;
 }
 
-void pmb_free(struct pmb_entry *pmbe)
+static void pmb_free(struct pmb_entry *pmbe)
 {
-	spin_lock_irq(&pmb_list_lock);
-	pmb_list_del(pmbe);
-	spin_unlock_irq(&pmb_list_lock);
+	__clear_bit(pmbe->entry, pmb_map);
 
-	kmem_cache_free(pmb_cache, pmbe);
+	pmbe->entry	= PMB_NO_ENTRY;
+	pmbe->link	= NULL;
 }
 
 /*
- * Must be in P2 for __set_pmb_entry()
+ * Must be run uncached.
  */
-int __set_pmb_entry(unsigned long vpn, unsigned long ppn,
-		    unsigned long flags, int *entry)
+static void __set_pmb_entry(struct pmb_entry *pmbe)
 {
-	unsigned int pos = *entry;
+	unsigned long addr, data;
 
-	if (unlikely(pos == PMB_NO_ENTRY))
-		pos = find_first_zero_bit(&pmb_map, NR_PMB_ENTRIES);
+	addr = mk_pmb_addr(pmbe->entry);
+	data = mk_pmb_data(pmbe->entry);
 
-repeat:
-	if (unlikely(pos > NR_PMB_ENTRIES))
-		return -ENOSPC;
+	jump_to_uncached();
 
-	if (test_and_set_bit(pos, &pmb_map)) {
-		pos = find_first_zero_bit(&pmb_map, NR_PMB_ENTRIES);
-		goto repeat;
-	}
+	/* Set V-bit */
+	__raw_writel(pmbe->vpn | PMB_V, addr);
+	__raw_writel(pmbe->ppn | pmbe->flags | PMB_V, data);
 
-	ctrl_outl(vpn | PMB_V, mk_pmb_addr(pos));
+	back_to_cached();
+}
 
-#ifdef CONFIG_CACHE_WRITETHROUGH
-	/*
-	 * When we are in 32-bit address extended mode, CCR.CB becomes
-	 * invalid, so care must be taken to manually adjust cacheable
-	 * translations.
-	 */
-	if (likely(flags & PMB_C))
-		flags |= PMB_WT;
-#endif
+static void __clear_pmb_entry(struct pmb_entry *pmbe)
+{
+	unsigned long addr, data;
+	unsigned long addr_val, data_val;
 
-	ctrl_outl(ppn | flags | PMB_V, mk_pmb_data(pos));
+	addr = mk_pmb_addr(pmbe->entry);
+	data = mk_pmb_data(pmbe->entry);
 
-	*entry = pos;
+	addr_val = __raw_readl(addr);
+	data_val = __raw_readl(data);
 
-	return 0;
+	/* Clear V-bit */
+	writel_uncached(addr_val & ~PMB_V, addr);
+	writel_uncached(data_val & ~PMB_V, data);
 }
 
-int __uses_jump_to_uncached set_pmb_entry(struct pmb_entry *pmbe)
+static void set_pmb_entry(struct pmb_entry *pmbe)
 {
-	int ret;
+	unsigned long flags;
 
-	jump_to_uncached();
-	ret = __set_pmb_entry(pmbe->vpn, pmbe->ppn, pmbe->flags, &pmbe->entry);
-	back_to_cached();
+	spin_lock_irqsave(&pmbe->lock, flags);
+	__set_pmb_entry(pmbe);
+	spin_unlock_irqrestore(&pmbe->lock, flags);
+}
 
-	return ret;
+int pmb_bolt_mapping(unsigned long vaddr, phys_addr_t phys,
+		     unsigned long size, pgprot_t prot)
+{
+	struct pmb_entry *pmbp, *pmbe;
+	unsigned long orig_addr, orig_size;
+	unsigned long flags, pmb_flags;
+	int i, mapped;
+
+	if (!pmb_addr_valid(vaddr, size))
+		return -EFAULT;
+	if (pmb_mapping_exists(vaddr, phys, size))
+		return 0;
+
+	orig_addr = vaddr;
+	orig_size = size;
+
+	flush_tlb_kernel_range(vaddr, vaddr + size);
+
+	pmb_flags = pgprot_to_pmb_flags(prot);
+	pmbp = NULL;
+
+	do {
+		for (i = mapped = 0; i < ARRAY_SIZE(pmb_sizes); i++) {
+			if (size < pmb_sizes[i].size)
+				continue;
+
+			pmbe = pmb_alloc(vaddr, phys, pmb_flags |
+					 pmb_sizes[i].flag, PMB_NO_ENTRY);
+			if (IS_ERR(pmbe)) {
+				pmb_unmap_entry(pmbp, mapped);
+				return PTR_ERR(pmbe);
+			}
+
+			spin_lock_irqsave(&pmbe->lock, flags);
+
+			pmbe->size = pmb_sizes[i].size;
+
+			__set_pmb_entry(pmbe);
+
+			phys	+= pmbe->size;
+			vaddr	+= pmbe->size;
+			size	-= pmbe->size;
+
+			/*
+			 * Link adjacent entries that span multiple PMB
+			 * entries for easier tear-down.
+			 */
+			if (likely(pmbp)) {
+				spin_lock(&pmbp->lock);
+				pmbp->link = pmbe;
+				spin_unlock(&pmbp->lock);
+			}
+
+			pmbp = pmbe;
+
+			/*
+			 * Instead of trying smaller sizes on every
+			 * iteration (even if we succeed in allocating
+			 * space), try using pmb_sizes[i].size again.
+			 */
+			i--;
+			mapped++;
+
+			spin_unlock_irqrestore(&pmbe->lock, flags);
+		}
+	} while (size >= SZ_16M);
+
+	flush_cache_vmap(orig_addr, orig_addr + orig_size);
+
+	return 0;
 }
 
-void __uses_jump_to_uncached clear_pmb_entry(struct pmb_entry *pmbe)
+void __iomem *pmb_remap_caller(phys_addr_t phys, unsigned long size,
+			       pgprot_t prot, void *caller)
 {
-	unsigned int entry = pmbe->entry;
-	unsigned long addr;
+	unsigned long vaddr;
+	phys_addr_t offset, last_addr;
+	phys_addr_t align_mask;
+	unsigned long aligned;
+	struct vm_struct *area;
+	int i, ret;
+
+	if (!pmb_iomapping_enabled)
+		return NULL;
 
 	/*
-	 * Don't allow clearing of wired init entries, P1 or P2 access
-	 * without a corresponding mapping in the PMB will lead to reset
-	 * by the TLB.
+	 * Small mappings need to go through the TLB.
 	 */
-	if (unlikely(entry < ARRAY_SIZE(pmb_init_map) ||
-		     entry >= NR_PMB_ENTRIES))
-		return;
+	if (size < SZ_16M)
+		return ERR_PTR(-EINVAL);
+	if (!pmb_prot_valid(prot))
+		return ERR_PTR(-EINVAL);
 
-	jump_to_uncached();
+	for (i = 0; i < ARRAY_SIZE(pmb_sizes); i++)
+		if (size >= pmb_sizes[i].size)
+			break;
 
-	/* Clear V-bit */
-	addr = mk_pmb_addr(entry);
-	ctrl_outl(ctrl_inl(addr) & ~PMB_V, addr);
+	last_addr = phys + size;
+	align_mask = ~(pmb_sizes[i].size - 1);
+	offset = phys & ~align_mask;
+	phys &= align_mask;
+	aligned = ALIGN(last_addr, pmb_sizes[i].size) - phys;
 
-	addr = mk_pmb_data(entry);
-	ctrl_outl(ctrl_inl(addr) & ~PMB_V, addr);
+	/*
+	 * XXX: This should really start from uncached_end, but this
+	 * causes the MMU to reset, so for now we restrict it to the
+	 * 0xb000...0xc000 range.
+	 */
+	area = __get_vm_area_caller(aligned, VM_IOREMAP, 0xb0000000,
+				    P3SEG, caller);
+	if (!area)
+		return NULL;
 
-	back_to_cached();
+	area->phys_addr = phys;
+	vaddr = (unsigned long)area->addr;
+
+	ret = pmb_bolt_mapping(vaddr, phys, size, prot);
+	if (unlikely(ret != 0))
+		return ERR_PTR(ret);
 
-	clear_bit(entry, &pmb_map);
+	return (void __iomem *)(offset + (char *)vaddr);
 }
 
+int pmb_unmap(void __iomem *addr)
+{
+	struct pmb_entry *pmbe = NULL;
+	unsigned long vaddr = (unsigned long __force)addr;
+	int i, found = 0;
+
+	read_lock(&pmb_rwlock);
+
+	for (i = 0; i < ARRAY_SIZE(pmb_entry_list); i++) {
+		if (test_bit(i, pmb_map)) {
+			pmbe = &pmb_entry_list[i];
+			if (pmbe->vpn == vaddr) {
+				found = 1;
+				break;
+			}
+		}
+	}
 
-static struct {
-	unsigned long size;
-	int flag;
-} pmb_sizes[] = {
-	{ .size	= 0x20000000, .flag = PMB_SZ_512M, },
-	{ .size = 0x08000000, .flag = PMB_SZ_128M, },
-	{ .size = 0x04000000, .flag = PMB_SZ_64M,  },
-	{ .size = 0x01000000, .flag = PMB_SZ_16M,  },
-};
+	read_unlock(&pmb_rwlock);
+
+	if (found) {
+		pmb_unmap_entry(pmbe, NR_PMB_ENTRIES);
+		return 0;
+	}
+
+	return -EINVAL;
+}
 
-long pmb_remap(unsigned long vaddr, unsigned long phys,
-	       unsigned long size, unsigned long flags)
+static void __pmb_unmap_entry(struct pmb_entry *pmbe, int depth)
 {
-	struct pmb_entry *pmbp;
-	unsigned long wanted;
-	int pmb_flags, i;
+	do {
+		struct pmb_entry *pmblink = pmbe;
 
-	/* Convert typical pgprot value to the PMB equivalent */
-	if (flags & _PAGE_CACHABLE) {
-		if (flags & _PAGE_WT)
-			pmb_flags = PMB_WT;
-		else
-			pmb_flags = PMB_C;
-	} else
-		pmb_flags = PMB_WT | PMB_UB;
+		/*
+		 * We may be called before this pmb_entry has been
+		 * entered into the PMB table via set_pmb_entry(), but
+		 * that's OK because we've allocated a unique slot for
+		 * this entry in pmb_alloc() (even if we haven't filled
+		 * it yet).
+		 *
+		 * Therefore, calling __clear_pmb_entry() is safe as no
+		 * other mapping can be using that slot.
+		 */
+		__clear_pmb_entry(pmbe);
 
-	pmbp = NULL;
-	wanted = size;
+		flush_cache_vunmap(pmbe->vpn, pmbe->vpn + pmbe->size);
+
+		pmbe = pmblink->link;
+
+		pmb_free(pmblink);
+	} while (pmbe && --depth);
+}
+
+static void pmb_unmap_entry(struct pmb_entry *pmbe, int depth)
+{
+	unsigned long flags;
 
-again:
-	for (i = 0; i < ARRAY_SIZE(pmb_sizes); i++) {
+	if (unlikely(!pmbe))
+		return;
+
+	write_lock_irqsave(&pmb_rwlock, flags);
+	__pmb_unmap_entry(pmbe, depth);
+	write_unlock_irqrestore(&pmb_rwlock, flags);
+}
+
+static void __init pmb_notify(void)
+{
+	int i;
+
+	pr_info("PMB: boot mappings:\n");
+
+	read_lock(&pmb_rwlock);
+
+	for (i = 0; i < ARRAY_SIZE(pmb_entry_list); i++) {
 		struct pmb_entry *pmbe;
-		int ret;
 
-		if (size < pmb_sizes[i].size)
+		if (!test_bit(i, pmb_map))
 			continue;
 
-		pmbe = pmb_alloc(vaddr, phys, pmb_flags | pmb_sizes[i].flag);
-		if (IS_ERR(pmbe))
-			return PTR_ERR(pmbe);
+		pmbe = &pmb_entry_list[i];
 
-		ret = set_pmb_entry(pmbe);
-		if (ret != 0) {
-			pmb_free(pmbe);
-			return -EBUSY;
-		}
+		pr_info("       0x%08lx -> 0x%08lx [ %4ldMB %2scached ]\n",
+			pmbe->vpn >> PAGE_SHIFT, pmbe->ppn >> PAGE_SHIFT,
+			pmbe->size >> 20, (pmbe->flags & PMB_C) ? "" : "un");
+	}
 
-		phys	+= pmb_sizes[i].size;
-		vaddr	+= pmb_sizes[i].size;
-		size	-= pmb_sizes[i].size;
+	read_unlock(&pmb_rwlock);
+}
+
+/*
+ * Sync our software copy of the PMB mappings with those in hardware. The
+ * mappings in the hardware PMB were either set up by the bootloader or
+ * very early on by the kernel.
+ */
+static void __init pmb_synchronize(void)
+{
+	struct pmb_entry *pmbp = NULL;
+	int i, j;
+
+	/*
+	 * Run through the initial boot mappings, log the established
+	 * ones, and blow away anything that falls outside of the valid
+	 * PPN range. Specifically, we only care about existing mappings
+	 * that impact the cached/uncached sections.
+	 *
+	 * Note that touching these can be a bit of a minefield; the boot
+	 * loader can establish multi-page mappings with the same caching
+	 * attributes, so we need to ensure that we aren't modifying a
+	 * mapping that we're presently executing from, or may execute
+	 * from in the case of straddling page boundaries.
+	 *
+	 * In the future we will have to tidy up after the boot loader by
+	 * jumping between the cached and uncached mappings and tearing
+	 * down alternating mappings while executing from the other.
+	 */
+	for (i = 0; i < NR_PMB_ENTRIES; i++) {
+		unsigned long addr, data;
+		unsigned long addr_val, data_val;
+		unsigned long ppn, vpn, flags;
+		unsigned long irqflags;
+		unsigned int size;
+		struct pmb_entry *pmbe;
+
+		addr = mk_pmb_addr(i);
+		data = mk_pmb_data(i);
+
+		addr_val = __raw_readl(addr);
+		data_val = __raw_readl(data);
 
 		/*
-		 * Link adjacent entries that span multiple PMB entries
-		 * for easier tear-down.
+		 * Skip over any bogus entries
 		 */
-		if (likely(pmbp))
-			pmbp->link = pmbe;
+		if (!(data_val & PMB_V) || !(addr_val & PMB_V))
+			continue;
 
-		pmbp = pmbe;
-	}
+		ppn = data_val & PMB_PFN_MASK;
+		vpn = addr_val & PMB_PFN_MASK;
+
+		/*
+		 * Only preserve in-range mappings.
+		 */
+		if (!pmb_ppn_in_range(ppn)) {
+			/*
+			 * Invalidate anything out of bounds.
+			 */
+			writel_uncached(addr_val & ~PMB_V, addr);
+			writel_uncached(data_val & ~PMB_V, data);
+			continue;
+		}
+
+		/*
+		 * Update the caching attributes if necessary
+		 */
+		if (data_val & PMB_C) {
+			data_val &= ~PMB_CACHE_MASK;
+			data_val |= pmb_cache_flags();
+
+			writel_uncached(data_val, data);
+		}
+
+		size = data_val & PMB_SZ_MASK;
+		flags = size | (data_val & PMB_CACHE_MASK);
 
-	if (size >= 0x1000000)
-		goto again;
+		pmbe = pmb_alloc(vpn, ppn, flags, i);
+		if (IS_ERR(pmbe)) {
+			WARN_ON_ONCE(1);
+			continue;
+		}
+
+		spin_lock_irqsave(&pmbe->lock, irqflags);
+
+		for (j = 0; j < ARRAY_SIZE(pmb_sizes); j++)
+			if (pmb_sizes[j].flag == size)
+				pmbe->size = pmb_sizes[j].size;
+
+		if (pmbp) {
+			spin_lock(&pmbp->lock);
 
-	return wanted - size;
+			/*
+			 * Compare the previous entry against the current one to
+			 * see if the entries span a contiguous mapping. If so,
+			 * setup the entry links accordingly. Compound mappings
+			 * are later coalesced.
+			 */
+			if (pmb_can_merge(pmbp, pmbe))
+				pmbp->link = pmbe;
+
+			spin_unlock(&pmbp->lock);
+		}
+
+		pmbp = pmbe;
+
+		spin_unlock_irqrestore(&pmbe->lock, irqflags);
+	}
 }
 
-void pmb_unmap(unsigned long addr)
+static void __init pmb_merge(struct pmb_entry *head)
 {
-	struct pmb_entry **p, *pmbe;
+	unsigned long span, newsize;
+	struct pmb_entry *tail;
+	int i = 1, depth = 0;
+
+	span = newsize = head->size;
+
+	tail = head->link;
+	while (tail) {
+		span += tail->size;
 
-	for (p = &pmb_list; (pmbe = *p); p = &pmbe->next)
-		if (pmbe->vpn == addr)
+		if (pmb_size_valid(span)) {
+			newsize = span;
+			depth = i;
+		}
+
+		/* This is the end of the line.. */
+		if (!tail->link)
 			break;
 
-	if (unlikely(!pmbe))
-		return;
+		tail = tail->link;
+		i++;
+	}
 
-	WARN_ON(!test_bit(pmbe->entry, &pmb_map));
+	/*
+	 * The merged page size must be valid.
+	 */
+	if (!pmb_size_valid(newsize))
+		return;
 
-	do {
-		struct pmb_entry *pmblink = pmbe;
+	head->flags &= ~PMB_SZ_MASK;
+	head->flags |= pmb_size_to_flags(newsize);
 
-		clear_pmb_entry(pmbe);
-		pmbe = pmblink->link;
+	head->size = newsize;
 
-		pmb_free(pmblink);
-	} while (pmbe);
+	__pmb_unmap_entry(head->link, depth);
+	__set_pmb_entry(head);
 }
 
-static void pmb_cache_ctor(void *pmb)
+static void __init pmb_coalesce(void)
 {
-	struct pmb_entry *pmbe = pmb;
+	unsigned long flags;
+	int i;
 
-	memset(pmb, 0, sizeof(struct pmb_entry));
+	write_lock_irqsave(&pmb_rwlock, flags);
 
-	pmbe->entry = PMB_NO_ENTRY;
-}
+	for (i = 0; i < ARRAY_SIZE(pmb_entry_list); i++) {
+		struct pmb_entry *pmbe;
 
-static int __uses_jump_to_uncached pmb_init(void)
-{
-	unsigned int nr_entries = ARRAY_SIZE(pmb_init_map);
-	unsigned int entry, i;
+		if (!test_bit(i, pmb_map))
+			continue;
 
-	BUG_ON(unlikely(nr_entries >= NR_PMB_ENTRIES));
+		pmbe = &pmb_entry_list[i];
 
-	pmb_cache = kmem_cache_create("pmb", sizeof(struct pmb_entry), 0,
-				      SLAB_PANIC, pmb_cache_ctor);
+		/*
+		 * We're only interested in compound mappings
+		 */
+		if (!pmbe->link)
+			continue;
 
-	jump_to_uncached();
+		/*
+		 * Nothing to do if it already uses the largest possible
+		 * page size.
+		 */
+		if (pmbe->size == SZ_512M)
+			continue;
+
+		pmb_merge(pmbe);
+	}
+
+	write_unlock_irqrestore(&pmb_rwlock, flags);
+}
+
+#ifdef CONFIG_UNCACHED_MAPPING
+static void __init pmb_resize(void)
+{
+	int i;
 
 	/*
-	 * Ordering is important, P2 must be mapped in the PMB before we
-	 * can set PMB.SE, and P1 must be mapped before we jump back to
-	 * P1 space.
+	 * If the uncached mapping was constructed by the kernel, it will
+	 * already be a reasonable size.
 	 */
-	for (entry = 0; entry < nr_entries; entry++) {
-		struct pmb_entry *pmbe = pmb_init_map + entry;
+	if (uncached_size == SZ_16M)
+		return;
 
-		__set_pmb_entry(pmbe->vpn, pmbe->ppn, pmbe->flags, &entry);
-	}
+	read_lock(&pmb_rwlock);
 
-	ctrl_outl(0, PMB_IRMCR);
+	for (i = 0; i < ARRAY_SIZE(pmb_entry_list); i++) {
+		struct pmb_entry *pmbe;
+		unsigned long flags;
 
-	/* PMB.SE and UB[7] */
-	ctrl_outl((1 << 31) | (1 << 7), PMB_PASCR);
+		if (!test_bit(i, pmb_map))
+			continue;
 
-	/* Flush out the TLB */
-	i =  ctrl_inl(MMUCR);
-	i |= MMUCR_TI;
-	ctrl_outl(i, MMUCR);
+		pmbe = &pmb_entry_list[i];
 
-	back_to_cached();
+		if (pmbe->vpn != uncached_start)
+			continue;
+
+		/*
+		 * Found it, now resize it.
+		 */
+		spin_lock_irqsave(&pmbe->lock, flags);
+
+		pmbe->size = SZ_16M;
+		pmbe->flags &= ~PMB_SZ_MASK;
+		pmbe->flags |= pmb_size_to_flags(pmbe->size);
+
+		uncached_resize(pmbe->size);
+
+		__set_pmb_entry(pmbe);
+
+		spin_unlock_irqrestore(&pmbe->lock, flags);
+	}
+
+	read_lock(&pmb_rwlock);
+}
+#endif
+
+static int __init early_pmb(char *p)
+{
+	if (!p)
+		return 0;
+
+	if (strstr(p, "iomap"))
+		pmb_iomapping_enabled = 1;
 
 	return 0;
 }
-arch_initcall(pmb_init);
+early_param("pmb", early_pmb);
+
+void __init pmb_init(void)
+{
+	/* Synchronize software state */
+	pmb_synchronize();
+
+	/* Attempt to combine compound mappings */
+	pmb_coalesce();
+
+#ifdef CONFIG_UNCACHED_MAPPING
+	/* Resize initial mappings, if necessary */
+	pmb_resize();
+#endif
+
+	/* Log them */
+	pmb_notify();
+
+	writel_uncached(0, PMB_IRMCR);
+
+	/* Flush out the TLB */
+	__raw_writel(__raw_readl(MMUCR) | MMUCR_TI, MMUCR);
+	ctrl_barrier();
+}
+
+bool __in_29bit_mode(void)
+{
+        return (__raw_readl(PMB_PASCR) & PASCR_SE) == 0;
+}
 
 static int pmb_seq_show(struct seq_file *file, void *iter)
 {
@@ -356,8 +824,8 @@ static int pmb_seq_show(struct seq_file *file, void *iter)
 		unsigned int size;
 		char *sz_str = NULL;
 
-		addr = ctrl_inl(mk_pmb_addr(i));
-		data = ctrl_inl(mk_pmb_data(i));
+		addr = __raw_readl(mk_pmb_addr(i));
+		data = __raw_readl(mk_pmb_data(i));
 
 		size = data & PMB_SZ_MASK;
 		sz_str = (size == PMB_SZ_16M)  ? " 16MB":
@@ -403,23 +871,33 @@ static int __init pmb_debugfs_init(void)
 
 	return 0;
 }
-postcore_initcall(pmb_debugfs_init);
+subsys_initcall(pmb_debugfs_init);
 
 #ifdef CONFIG_PM
 static int pmb_sysdev_suspend(struct sys_device *dev, pm_message_t state)
 {
 	static pm_message_t prev_state;
+	int i;
 
 	/* Restore the PMB after a resume from hibernation */
 	if (state.event == PM_EVENT_ON &&
 	    prev_state.event == PM_EVENT_FREEZE) {
 		struct pmb_entry *pmbe;
-		spin_lock_irq(&pmb_list_lock);
-		for (pmbe = pmb_list; pmbe; pmbe = pmbe->next)
-			set_pmb_entry(pmbe);
-		spin_unlock_irq(&pmb_list_lock);
+
+		read_lock(&pmb_rwlock);
+
+		for (i = 0; i < ARRAY_SIZE(pmb_entry_list); i++) {
+			if (test_bit(i, pmb_map)) {
+				pmbe = &pmb_entry_list[i];
+				set_pmb_entry(pmbe);
+			}
+		}
+
+		read_unlock(&pmb_rwlock);
 	}
+
 	prev_state = state;
+
 	return 0;
 }
 
@@ -437,6 +915,5 @@ static int __init pmb_sysdev_init(void)
 {
 	return sysdev_driver_register(&cpu_sysdev_class, &pmb_sysdev_driver);
 }
-
 subsys_initcall(pmb_sysdev_init);
 #endif
diff --git a/arch/sh/mm/tlb-pteaex.c b/arch/sh/mm/tlb-pteaex.c
index 409b7c2b4b9d..32dc674c550c 100644
--- a/arch/sh/mm/tlb-pteaex.c
+++ b/arch/sh/mm/tlb-pteaex.c
@@ -68,8 +68,7 @@ void __update_tlb(struct vm_area_struct *vma, unsigned long address, pte_t pte)
  * in extended mode, the legacy 8-bit ASID field in address array 1 has
  * undefined behaviour.
  */
-void __uses_jump_to_uncached local_flush_tlb_one(unsigned long asid,
-						 unsigned long page)
+void local_flush_tlb_one(unsigned long asid, unsigned long page)
 {
 	jump_to_uncached();
 	__raw_writel(page, MMU_UTLB_ADDRESS_ARRAY | MMU_PAGE_ASSOC_BIT);
diff --git a/arch/sh/mm/tlb-sh3.c b/arch/sh/mm/tlb-sh3.c
index ace8e6d2f59d..4f5f7cbdd508 100644
--- a/arch/sh/mm/tlb-sh3.c
+++ b/arch/sh/mm/tlb-sh3.c
@@ -41,14 +41,14 @@ void __update_tlb(struct vm_area_struct *vma, unsigned long address, pte_t pte)
 
 	/* Set PTEH register */
 	vpn = (address & MMU_VPN_MASK) | get_asid();
-	ctrl_outl(vpn, MMU_PTEH);
+	__raw_writel(vpn, MMU_PTEH);
 
 	pteval = pte_val(pte);
 
 	/* Set PTEL register */
 	pteval &= _PAGE_FLAGS_HARDWARE_MASK; /* drop software flags */
 	/* conveniently, we want all the software flags to be 0 anyway */
-	ctrl_outl(pteval, MMU_PTEL);
+	__raw_writel(pteval, MMU_PTEL);
 
 	/* Load the TLB */
 	asm volatile("ldtlb": /* no output */ : /* no input */ : "memory");
@@ -75,5 +75,5 @@ void local_flush_tlb_one(unsigned long asid, unsigned long page)
 	}
 
 	for (i = 0; i < ways; i++)
-		ctrl_outl(data, addr + (i << 8));
+		__raw_writel(data, addr + (i << 8));
 }
diff --git a/arch/sh/mm/tlb-sh4.c b/arch/sh/mm/tlb-sh4.c
index 8cf550e2570f..ccac77f504a8 100644
--- a/arch/sh/mm/tlb-sh4.c
+++ b/arch/sh/mm/tlb-sh4.c
@@ -29,7 +29,7 @@ void __update_tlb(struct vm_area_struct *vma, unsigned long address, pte_t pte)
 
 	/* Set PTEH register */
 	vpn = (address & MMU_VPN_MASK) | get_asid();
-	ctrl_outl(vpn, MMU_PTEH);
+	__raw_writel(vpn, MMU_PTEH);
 
 	pteval = pte.pte_low;
 
@@ -41,13 +41,13 @@ void __update_tlb(struct vm_area_struct *vma, unsigned long address, pte_t pte)
 	 * the protection bits (with the exception of the compat-mode SZ
 	 * and PR bits, which are cleared) being written out in PTEL.
 	 */
-	ctrl_outl(pte.pte_high, MMU_PTEA);
+	__raw_writel(pte.pte_high, MMU_PTEA);
 #else
 	if (cpu_data->flags & CPU_HAS_PTEA) {
 		/* The last 3 bits and the first one of pteval contains
 		 * the PTEA timing control and space attribute bits
 		 */
-		ctrl_outl(copy_ptea_attributes(pteval), MMU_PTEA);
+		__raw_writel(copy_ptea_attributes(pteval), MMU_PTEA);
 	}
 #endif
 
@@ -57,15 +57,14 @@ void __update_tlb(struct vm_area_struct *vma, unsigned long address, pte_t pte)
 	pteval |= _PAGE_WT;
 #endif
 	/* conveniently, we want all the software flags to be 0 anyway */
-	ctrl_outl(pteval, MMU_PTEL);
+	__raw_writel(pteval, MMU_PTEL);
 
 	/* Load the TLB */
 	asm volatile("ldtlb": /* no output */ : /* no input */ : "memory");
 	local_irq_restore(flags);
 }
 
-void __uses_jump_to_uncached local_flush_tlb_one(unsigned long asid,
-						 unsigned long page)
+void local_flush_tlb_one(unsigned long asid, unsigned long page)
 {
 	unsigned long addr, data;
 
@@ -78,6 +77,6 @@ void __uses_jump_to_uncached local_flush_tlb_one(unsigned long asid,
 	addr = MMU_UTLB_ADDRESS_ARRAY | MMU_PAGE_ASSOC_BIT;
 	data = page | asid; /* VALID bit is off */
 	jump_to_uncached();
-	ctrl_outl(data, addr);
+	__raw_writel(data, addr);
 	back_to_cached();
 }
diff --git a/arch/sh/mm/tlb-sh5.c b/arch/sh/mm/tlb-sh5.c
index fdb64e41ec50..f27dbe1c1599 100644
--- a/arch/sh/mm/tlb-sh5.c
+++ b/arch/sh/mm/tlb-sh5.c
@@ -143,3 +143,42 @@ void sh64_setup_tlb_slot(unsigned long long config_addr, unsigned long eaddr,
  */
 void sh64_teardown_tlb_slot(unsigned long long config_addr)
 	__attribute__ ((alias("__flush_tlb_slot")));
+
+static int dtlb_entry;
+static unsigned long long dtlb_entries[64];
+
+void tlb_wire_entry(struct vm_area_struct *vma, unsigned long addr, pte_t pte)
+{
+	unsigned long long entry;
+	unsigned long paddr, flags;
+
+	BUG_ON(dtlb_entry == ARRAY_SIZE(dtlb_entries));
+
+	local_irq_save(flags);
+
+	entry = sh64_get_wired_dtlb_entry();
+	dtlb_entries[dtlb_entry++] = entry;
+
+	paddr = pte_val(pte) & _PAGE_FLAGS_HARDWARE_MASK;
+	paddr &= ~PAGE_MASK;
+
+	sh64_setup_tlb_slot(entry, addr, get_asid(), paddr);
+
+	local_irq_restore(flags);
+}
+
+void tlb_unwire_entry(void)
+{
+	unsigned long long entry;
+	unsigned long flags;
+
+	BUG_ON(!dtlb_entry);
+
+	local_irq_save(flags);
+	entry = dtlb_entries[dtlb_entry--];
+
+	sh64_teardown_tlb_slot(entry);
+	sh64_put_wired_dtlb_entry(entry);
+
+	local_irq_restore(flags);
+}
diff --git a/arch/sh/mm/tlb-urb.c b/arch/sh/mm/tlb-urb.c
new file mode 100644
index 000000000000..bb5b9098956d
--- /dev/null
+++ b/arch/sh/mm/tlb-urb.c
@@ -0,0 +1,81 @@
+/*
+ * arch/sh/mm/tlb-urb.c
+ *
+ * TLB entry wiring helpers for URB-equipped parts.
+ *
+ * Copyright (C) 2010  Matt Fleming
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ */
+#include <linux/mm.h>
+#include <linux/io.h>
+#include <asm/tlb.h>
+#include <asm/mmu_context.h>
+
+/*
+ * Load the entry for 'addr' into the TLB and wire the entry.
+ */
+void tlb_wire_entry(struct vm_area_struct *vma, unsigned long addr, pte_t pte)
+{
+	unsigned long status, flags;
+	int urb;
+
+	local_irq_save(flags);
+
+	/* Load the entry into the TLB */
+	__update_tlb(vma, addr, pte);
+
+	/* ... and wire it up. */
+	status = __raw_readl(MMUCR);
+	urb = (status & MMUCR_URB) >> MMUCR_URB_SHIFT;
+	status &= ~MMUCR_URB;
+
+	/*
+	 * Make sure we're not trying to wire the last TLB entry slot.
+	 */
+	BUG_ON(!--urb);
+
+	urb = urb % MMUCR_URB_NENTRIES;
+
+	status |= (urb << MMUCR_URB_SHIFT);
+	__raw_writel(status, MMUCR);
+	ctrl_barrier();
+
+	local_irq_restore(flags);
+}
+
+/*
+ * Unwire the last wired TLB entry.
+ *
+ * It should also be noted that it is not possible to wire and unwire
+ * TLB entries in an arbitrary order. If you wire TLB entry N, followed
+ * by entry N+1, you must unwire entry N+1 first, then entry N. In this
+ * respect, it works like a stack or LIFO queue.
+ */
+void tlb_unwire_entry(void)
+{
+	unsigned long status, flags;
+	int urb;
+
+	local_irq_save(flags);
+
+	status = __raw_readl(MMUCR);
+	urb = (status & MMUCR_URB) >> MMUCR_URB_SHIFT;
+	status &= ~MMUCR_URB;
+
+	/*
+	 * Make sure we're not trying to unwire a TLB entry when none
+	 * have been wired.
+	 */
+	BUG_ON(urb++ == MMUCR_URB_NENTRIES);
+
+	urb = urb % MMUCR_URB_NENTRIES;
+
+	status |= (urb << MMUCR_URB_SHIFT);
+	__raw_writel(status, MMUCR);
+	ctrl_barrier();
+
+	local_irq_restore(flags);
+}
diff --git a/arch/sh/mm/tlbflush_32.c b/arch/sh/mm/tlbflush_32.c
index 6f45c1f8a7fe..004bb3f25b5f 100644
--- a/arch/sh/mm/tlbflush_32.c
+++ b/arch/sh/mm/tlbflush_32.c
@@ -132,9 +132,9 @@ void local_flush_tlb_all(void)
 	 *      It's same position, bit #2.
 	 */
 	local_irq_save(flags);
-	status = ctrl_inl(MMUCR);
+	status = __raw_readl(MMUCR);
 	status |= 0x04;
-	ctrl_outl(status, MMUCR);
+	__raw_writel(status, MMUCR);
 	ctrl_barrier();
 	local_irq_restore(flags);
 }
diff --git a/arch/sh/mm/tlbflush_64.c b/arch/sh/mm/tlbflush_64.c
index de0b0e881823..706da1d3a67a 100644
--- a/arch/sh/mm/tlbflush_64.c
+++ b/arch/sh/mm/tlbflush_64.c
@@ -36,7 +36,7 @@ extern void die(const char *,struct pt_regs *,long);
 
 static inline void print_prots(pgprot_t prot)
 {
-	printk("prot is 0x%08lx\n",pgprot_val(prot));
+	printk("prot is 0x%016llx\n",pgprot_val(prot));
 
 	printk("%s %s %s %s %s\n",PPROT(_PAGE_SHARED),PPROT(_PAGE_READ),
 	       PPROT(_PAGE_EXECUTE),PPROT(_PAGE_WRITE),PPROT(_PAGE_USER));
diff --git a/arch/sh/mm/uncached.c b/arch/sh/mm/uncached.c
new file mode 100644
index 000000000000..cf20a5c5136a
--- /dev/null
+++ b/arch/sh/mm/uncached.c
@@ -0,0 +1,34 @@
+#include <linux/init.h>
+#include <asm/sizes.h>
+#include <asm/page.h>
+
+/*
+ * This is the offset of the uncached section from its cached alias.
+ *
+ * Legacy platforms handle trivial transitions between cached and
+ * uncached segments by making use of the 1:1 mapping relationship in
+ * 512MB lowmem, others via a special uncached mapping.
+ *
+ * Default value only valid in 29 bit mode, in 32bit mode this will be
+ * updated by the early PMB initialization code.
+ */
+unsigned long cached_to_uncached = SZ_512M;
+unsigned long uncached_size = SZ_512M;
+unsigned long uncached_start, uncached_end;
+
+int virt_addr_uncached(unsigned long kaddr)
+{
+	return (kaddr >= uncached_start) && (kaddr < uncached_end);
+}
+
+void __init uncached_init(void)
+{
+	uncached_start = memory_end;
+	uncached_end = uncached_start + uncached_size;
+}
+
+void __init uncached_resize(unsigned long size)
+{
+	uncached_size = size;
+	uncached_end = uncached_start + uncached_size;
+}