diff options
Diffstat (limited to 'arch/powerpc/kernel')
67 files changed, 3124 insertions, 1822 deletions
diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile index b23664a0b86c..877326320e74 100644 --- a/arch/powerpc/kernel/Makefile +++ b/arch/powerpc/kernel/Makefile @@ -42,10 +42,11 @@ obj-$(CONFIG_ALTIVEC) += vecemu.o obj-$(CONFIG_PPC_970_NAP) += idle_power4.o obj-$(CONFIG_PPC_OF) += of_device.o of_platform.o prom_parse.o obj-$(CONFIG_PPC_CLOCK) += clock.o -procfs-$(CONFIG_PPC64) := proc_ppc64.o +procfs-y := proc_powerpc.o obj-$(CONFIG_PROC_FS) += $(procfs-y) rtaspci-$(CONFIG_PPC64)-$(CONFIG_PCI) := rtas_pci.o obj-$(CONFIG_PPC_RTAS) += rtas.o rtas-rtc.o $(rtaspci-y-y) +obj-$(CONFIG_PPC_RTAS_DAEMON) += rtasd.o obj-$(CONFIG_RTAS_FLASH) += rtas_flash.o obj-$(CONFIG_RTAS_PROC) += rtas-proc.o obj-$(CONFIG_LPARCFG) += lparcfg.o @@ -97,11 +98,16 @@ obj64-$(CONFIG_AUDIT) += compat_audit.o obj-$(CONFIG_DYNAMIC_FTRACE) += ftrace.o obj-$(CONFIG_FUNCTION_GRAPH_TRACER) += ftrace.o -obj-$(CONFIG_PPC_PERF_CTRS) += perf_event.o perf_callchain.o +obj-$(CONFIG_PERF_EVENTS) += perf_callchain.o + +obj-$(CONFIG_PPC_PERF_CTRS) += perf_event.o obj64-$(CONFIG_PPC_PERF_CTRS) += power4-pmu.o ppc970-pmu.o power5-pmu.o \ power5+-pmu.o power6-pmu.o power7-pmu.o obj32-$(CONFIG_PPC_PERF_CTRS) += mpc7450-pmu.o +obj-$(CONFIG_FSL_EMB_PERF_EVENT) += perf_event_fsl_emb.o +obj-$(CONFIG_FSL_EMB_PERF_EVENT_E500) += e500-pmu.o + obj-$(CONFIG_8XX_MINIMAL_FPEMU) += softemu8xx.o ifneq ($(CONFIG_PPC_INDIRECT_IO),y) diff --git a/arch/powerpc/kernel/align.c b/arch/powerpc/kernel/align.c index a5b632e52fae..b876e989220b 100644 --- a/arch/powerpc/kernel/align.c +++ b/arch/powerpc/kernel/align.c @@ -642,10 +642,14 @@ static int emulate_spe(struct pt_regs *regs, unsigned int reg, */ static int emulate_vsx(unsigned char __user *addr, unsigned int reg, unsigned int areg, struct pt_regs *regs, - unsigned int flags, unsigned int length) + unsigned int flags, unsigned int length, + unsigned int elsize) { char *ptr; + unsigned long *lptr; int ret = 0; + int sw = 0; + int i, j; flush_vsx_to_thread(current); @@ -654,19 +658,35 @@ static int emulate_vsx(unsigned char __user *addr, unsigned int reg, else ptr = (char *) ¤t->thread.vr[reg - 32]; - if (flags & ST) - ret = __copy_to_user(addr, ptr, length); - else { - if (flags & SPLT){ - ret = __copy_from_user(ptr, addr, length); - ptr += length; + lptr = (unsigned long *) ptr; + + if (flags & SW) + sw = elsize-1; + + for (j = 0; j < length; j += elsize) { + for (i = 0; i < elsize; ++i) { + if (flags & ST) + ret |= __put_user(ptr[i^sw], addr + i); + else + ret |= __get_user(ptr[i^sw], addr + i); } - ret |= __copy_from_user(ptr, addr, length); + ptr += elsize; + addr += elsize; } - if (flags & U) - regs->gpr[areg] = regs->dar; - if (ret) + + if (!ret) { + if (flags & U) + regs->gpr[areg] = regs->dar; + + /* Splat load copies the same data to top and bottom 8 bytes */ + if (flags & SPLT) + lptr[1] = lptr[0]; + /* For 8 byte loads, zero the top 8 bytes */ + else if (!(flags & ST) && (8 == length)) + lptr[1] = 0; + } else return -EFAULT; + return 1; } #endif @@ -732,7 +752,7 @@ int fix_alignment(struct pt_regs *regs) #ifdef CONFIG_SPE if ((instr >> 26) == 0x4) { - PPC_WARN_EMULATED(spe); + PPC_WARN_ALIGNMENT(spe, regs); return emulate_spe(regs, reg, instr); } #endif @@ -767,16 +787,25 @@ int fix_alignment(struct pt_regs *regs) #ifdef CONFIG_VSX if ((instruction & 0xfc00003e) == 0x7c000018) { - /* Additional register addressing bit (64 VSX vs 32 FPR/GPR */ + unsigned int elsize; + + /* Additional register addressing bit (64 VSX vs 32 FPR/GPR) */ reg |= (instruction & 0x1) << 5; /* Simple inline decoder instead of a table */ + /* VSX has only 8 and 16 byte memory accesses */ + nb = 8; if (instruction & 0x200) nb = 16; - else if (instruction & 0x080) - nb = 8; - else - nb = 4; + + /* Vector stores in little-endian mode swap individual + elements, so process them separately */ + elsize = 4; + if (instruction & 0x80) + elsize = 8; + flags = 0; + if (regs->msr & MSR_LE) + flags |= SW; if (instruction & 0x100) flags |= ST; if (instruction & 0x040) @@ -786,15 +815,15 @@ int fix_alignment(struct pt_regs *regs) flags |= SPLT; nb = 8; } - PPC_WARN_EMULATED(vsx); - return emulate_vsx(addr, reg, areg, regs, flags, nb); + PPC_WARN_ALIGNMENT(vsx, regs); + return emulate_vsx(addr, reg, areg, regs, flags, nb, elsize); } #endif /* A size of 0 indicates an instruction we don't support, with * the exception of DCBZ which is handled as a special case here */ if (instr == DCBZ) { - PPC_WARN_EMULATED(dcbz); + PPC_WARN_ALIGNMENT(dcbz, regs); return emulate_dcbz(regs, addr); } if (unlikely(nb == 0)) @@ -804,7 +833,7 @@ int fix_alignment(struct pt_regs *regs) * function */ if (flags & M) { - PPC_WARN_EMULATED(multiple); + PPC_WARN_ALIGNMENT(multiple, regs); return emulate_multiple(regs, addr, reg, nb, flags, instr, swiz); } @@ -825,11 +854,11 @@ int fix_alignment(struct pt_regs *regs) /* Special case for 16-byte FP loads and stores */ if (nb == 16) { - PPC_WARN_EMULATED(fp_pair); + PPC_WARN_ALIGNMENT(fp_pair, regs); return emulate_fp_pair(addr, reg, flags); } - PPC_WARN_EMULATED(unaligned); + PPC_WARN_ALIGNMENT(unaligned, regs); /* If we are loading, get the data from user space, else * get it from register values diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index 0812b0f414bb..957ceb7059c5 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -190,6 +190,35 @@ int main(void) DEFINE(PACA_SYSTEM_TIME, offsetof(struct paca_struct, system_time)); DEFINE(PACA_DATA_OFFSET, offsetof(struct paca_struct, data_offset)); DEFINE(PACA_TRAP_SAVE, offsetof(struct paca_struct, trap_save)); +#ifdef CONFIG_KVM_BOOK3S_64_HANDLER + DEFINE(PACA_KVM_IN_GUEST, offsetof(struct paca_struct, kvm_in_guest)); + DEFINE(PACA_KVM_SLB, offsetof(struct paca_struct, kvm_slb)); + DEFINE(PACA_KVM_SLB_MAX, offsetof(struct paca_struct, kvm_slb_max)); + DEFINE(PACA_KVM_CR, offsetof(struct paca_struct, shadow_vcpu.cr)); + DEFINE(PACA_KVM_XER, offsetof(struct paca_struct, shadow_vcpu.xer)); + DEFINE(PACA_KVM_R0, offsetof(struct paca_struct, shadow_vcpu.gpr[0])); + DEFINE(PACA_KVM_R1, offsetof(struct paca_struct, shadow_vcpu.gpr[1])); + DEFINE(PACA_KVM_R2, offsetof(struct paca_struct, shadow_vcpu.gpr[2])); + DEFINE(PACA_KVM_R3, offsetof(struct paca_struct, shadow_vcpu.gpr[3])); + DEFINE(PACA_KVM_R4, offsetof(struct paca_struct, shadow_vcpu.gpr[4])); + DEFINE(PACA_KVM_R5, offsetof(struct paca_struct, shadow_vcpu.gpr[5])); + DEFINE(PACA_KVM_R6, offsetof(struct paca_struct, shadow_vcpu.gpr[6])); + DEFINE(PACA_KVM_R7, offsetof(struct paca_struct, shadow_vcpu.gpr[7])); + DEFINE(PACA_KVM_R8, offsetof(struct paca_struct, shadow_vcpu.gpr[8])); + DEFINE(PACA_KVM_R9, offsetof(struct paca_struct, shadow_vcpu.gpr[9])); + DEFINE(PACA_KVM_R10, offsetof(struct paca_struct, shadow_vcpu.gpr[10])); + DEFINE(PACA_KVM_R11, offsetof(struct paca_struct, shadow_vcpu.gpr[11])); + DEFINE(PACA_KVM_R12, offsetof(struct paca_struct, shadow_vcpu.gpr[12])); + DEFINE(PACA_KVM_R13, offsetof(struct paca_struct, shadow_vcpu.gpr[13])); + DEFINE(PACA_KVM_HOST_R1, offsetof(struct paca_struct, shadow_vcpu.host_r1)); + DEFINE(PACA_KVM_HOST_R2, offsetof(struct paca_struct, shadow_vcpu.host_r2)); + DEFINE(PACA_KVM_VMHANDLER, offsetof(struct paca_struct, + shadow_vcpu.vmhandler)); + DEFINE(PACA_KVM_SCRATCH0, offsetof(struct paca_struct, + shadow_vcpu.scratch0)); + DEFINE(PACA_KVM_SCRATCH1, offsetof(struct paca_struct, + shadow_vcpu.scratch1)); +#endif #endif /* CONFIG_PPC64 */ /* RTAS */ @@ -384,8 +413,6 @@ int main(void) DEFINE(VCPU_HOST_PID, offsetof(struct kvm_vcpu, arch.host_pid)); DEFINE(VCPU_GPRS, offsetof(struct kvm_vcpu, arch.gpr)); DEFINE(VCPU_LR, offsetof(struct kvm_vcpu, arch.lr)); - DEFINE(VCPU_CR, offsetof(struct kvm_vcpu, arch.cr)); - DEFINE(VCPU_XER, offsetof(struct kvm_vcpu, arch.xer)); DEFINE(VCPU_CTR, offsetof(struct kvm_vcpu, arch.ctr)); DEFINE(VCPU_PC, offsetof(struct kvm_vcpu, arch.pc)); DEFINE(VCPU_MSR, offsetof(struct kvm_vcpu, arch.msr)); @@ -398,14 +425,29 @@ int main(void) DEFINE(VCPU_LAST_INST, offsetof(struct kvm_vcpu, arch.last_inst)); DEFINE(VCPU_FAULT_DEAR, offsetof(struct kvm_vcpu, arch.fault_dear)); DEFINE(VCPU_FAULT_ESR, offsetof(struct kvm_vcpu, arch.fault_esr)); + + /* book3s_64 */ +#ifdef CONFIG_PPC64 + DEFINE(VCPU_FAULT_DSISR, offsetof(struct kvm_vcpu, arch.fault_dsisr)); + DEFINE(VCPU_HOST_RETIP, offsetof(struct kvm_vcpu, arch.host_retip)); + DEFINE(VCPU_HOST_R2, offsetof(struct kvm_vcpu, arch.host_r2)); + DEFINE(VCPU_HOST_MSR, offsetof(struct kvm_vcpu, arch.host_msr)); + DEFINE(VCPU_SHADOW_MSR, offsetof(struct kvm_vcpu, arch.shadow_msr)); + DEFINE(VCPU_SHADOW_SRR1, offsetof(struct kvm_vcpu, arch.shadow_srr1)); + DEFINE(VCPU_TRAMPOLINE_LOWMEM, offsetof(struct kvm_vcpu, arch.trampoline_lowmem)); + DEFINE(VCPU_TRAMPOLINE_ENTER, offsetof(struct kvm_vcpu, arch.trampoline_enter)); + DEFINE(VCPU_HIGHMEM_HANDLER, offsetof(struct kvm_vcpu, arch.highmem_handler)); + DEFINE(VCPU_RMCALL, offsetof(struct kvm_vcpu, arch.rmcall)); + DEFINE(VCPU_HFLAGS, offsetof(struct kvm_vcpu, arch.hflags)); +#else + DEFINE(VCPU_CR, offsetof(struct kvm_vcpu, arch.cr)); + DEFINE(VCPU_XER, offsetof(struct kvm_vcpu, arch.xer)); +#endif /* CONFIG_PPC64 */ #endif #ifdef CONFIG_44x DEFINE(PGD_T_LOG2, PGD_T_LOG2); DEFINE(PTE_T_LOG2, PTE_T_LOG2); #endif -#ifdef CONFIG_FSL_BOOKE - DEFINE(TLBCAM_SIZE, sizeof(struct tlbcam)); -#endif #ifdef CONFIG_KVM_EXIT_TIMING DEFINE(VCPU_TIMING_EXIT_TBU, offsetof(struct kvm_vcpu, diff --git a/arch/powerpc/kernel/cacheinfo.c b/arch/powerpc/kernel/cacheinfo.c index bb37b1d19a58..01fe9ce28379 100644 --- a/arch/powerpc/kernel/cacheinfo.c +++ b/arch/powerpc/kernel/cacheinfo.c @@ -642,7 +642,7 @@ static struct kobj_attribute *cache_index_opt_attrs[] = { &cache_assoc_attr, }; -static struct sysfs_ops cache_index_ops = { +static const struct sysfs_ops cache_index_ops = { .show = cache_index_show, }; diff --git a/arch/powerpc/kernel/cputable.c b/arch/powerpc/kernel/cputable.c index 0b9c9135922e..8af4949434b2 100644 --- a/arch/powerpc/kernel/cputable.c +++ b/arch/powerpc/kernel/cputable.c @@ -697,9 +697,9 @@ static struct cpu_spec __initdata cpu_specs[] = { .machine_check = machine_check_generic, .platform = "ppc750", }, - { /* 750CL */ - .pvr_mask = 0xfffff0f0, - .pvr_value = 0x00087010, + { /* 750CL (and "Broadway") */ + .pvr_mask = 0xfffff0e0, + .pvr_value = 0x00087000, .cpu_name = "750CL", .cpu_features = CPU_FTRS_750CL, .cpu_user_features = COMMON_USER | PPC_FEATURE_PPC_LE, @@ -711,6 +711,8 @@ static struct cpu_spec __initdata cpu_specs[] = { .cpu_setup = __setup_cpu_750, .machine_check = machine_check_generic, .platform = "ppc750", + .oprofile_cpu_type = "ppc/750", + .oprofile_type = PPC_OPROFILE_G4, }, { /* 745/755 */ .pvr_mask = 0xfffff000, @@ -1806,7 +1808,7 @@ static struct cpu_spec __initdata cpu_specs[] = { .icache_bsize = 64, .dcache_bsize = 64, .num_pmcs = 4, - .oprofile_cpu_type = "ppc/e500", /* xxx - galak, e500mc? */ + .oprofile_cpu_type = "ppc/e500mc", .oprofile_type = PPC_OPROFILE_FSL_EMB, .cpu_setup = __setup_cpu_e500mc, .machine_check = machine_check_e500, diff --git a/arch/powerpc/kernel/crash.c b/arch/powerpc/kernel/crash.c index 0a8439aafdd1..6f4613dd05ef 100644 --- a/arch/powerpc/kernel/crash.c +++ b/arch/powerpc/kernel/crash.c @@ -373,7 +373,7 @@ void default_machine_crash_shutdown(struct pt_regs *regs) hard_irq_disable(); for_each_irq(i) { - struct irq_desc *desc = irq_desc + i; + struct irq_desc *desc = irq_to_desc(i); if (desc->status & IRQ_INPROGRESS) desc->chip->eoi(i); diff --git a/arch/powerpc/kernel/dma-swiotlb.c b/arch/powerpc/kernel/dma-swiotlb.c index e96cbbd9b449..59c928564a03 100644 --- a/arch/powerpc/kernel/dma-swiotlb.c +++ b/arch/powerpc/kernel/dma-swiotlb.c @@ -21,7 +21,6 @@ #include <asm/dma.h> #include <asm/abs_addr.h> -int swiotlb __read_mostly; unsigned int ppc_swiotlb_enable; /* diff --git a/arch/powerpc/kernel/e500-pmu.c b/arch/powerpc/kernel/e500-pmu.c new file mode 100644 index 000000000000..7c07de0d8943 --- /dev/null +++ b/arch/powerpc/kernel/e500-pmu.c @@ -0,0 +1,129 @@ +/* + * Performance counter support for e500 family processors. + * + * Copyright 2008-2009 Paul Mackerras, IBM Corporation. + * Copyright 2010 Freescale Semiconductor, Inc. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ +#include <linux/string.h> +#include <linux/perf_event.h> +#include <asm/reg.h> +#include <asm/cputable.h> + +/* + * Map of generic hardware event types to hardware events + * Zero if unsupported + */ +static int e500_generic_events[] = { + [PERF_COUNT_HW_CPU_CYCLES] = 1, + [PERF_COUNT_HW_INSTRUCTIONS] = 2, + [PERF_COUNT_HW_CACHE_MISSES] = 41, /* Data L1 cache reloads */ + [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 12, + [PERF_COUNT_HW_BRANCH_MISSES] = 15, +}; + +#define C(x) PERF_COUNT_HW_CACHE_##x + +/* + * Table of generalized cache-related events. + * 0 means not supported, -1 means nonsensical, other values + * are event codes. + */ +static int e500_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = { + /* + * D-cache misses are not split into read/write/prefetch; + * use raw event 41. + */ + [C(L1D)] = { /* RESULT_ACCESS RESULT_MISS */ + [C(OP_READ)] = { 27, 0 }, + [C(OP_WRITE)] = { 28, 0 }, + [C(OP_PREFETCH)] = { 29, 0 }, + }, + [C(L1I)] = { /* RESULT_ACCESS RESULT_MISS */ + [C(OP_READ)] = { 2, 60 }, + [C(OP_WRITE)] = { -1, -1 }, + [C(OP_PREFETCH)] = { 0, 0 }, + }, + /* + * Assuming LL means L2, it's not a good match for this model. + * It allocates only on L1 castout or explicit prefetch, and + * does not have separate read/write events (but it does have + * separate instruction/data events). + */ + [C(LL)] = { /* RESULT_ACCESS RESULT_MISS */ + [C(OP_READ)] = { 0, 0 }, + [C(OP_WRITE)] = { 0, 0 }, + [C(OP_PREFETCH)] = { 0, 0 }, + }, + /* + * There are data/instruction MMU misses, but that's a miss on + * the chip's internal level-one TLB which is probably not + * what the user wants. Instead, unified level-two TLB misses + * are reported here. + */ + [C(DTLB)] = { /* RESULT_ACCESS RESULT_MISS */ + [C(OP_READ)] = { 26, 66 }, + [C(OP_WRITE)] = { -1, -1 }, + [C(OP_PREFETCH)] = { -1, -1 }, + }, + [C(BPU)] = { /* RESULT_ACCESS RESULT_MISS */ + [C(OP_READ)] = { 12, 15 }, + [C(OP_WRITE)] = { -1, -1 }, + [C(OP_PREFETCH)] = { -1, -1 }, + }, +}; + +static int num_events = 128; + +/* Upper half of event id is PMLCb, for threshold events */ +static u64 e500_xlate_event(u64 event_id) +{ + u32 event_low = (u32)event_id; + u64 ret; + + if (event_low >= num_events) + return 0; + + ret = FSL_EMB_EVENT_VALID; + + if (event_low >= 76 && event_low <= 81) { + ret |= FSL_EMB_EVENT_RESTRICTED; + ret |= event_id & + (FSL_EMB_EVENT_THRESHMUL | FSL_EMB_EVENT_THRESH); + } else if (event_id & + (FSL_EMB_EVENT_THRESHMUL | FSL_EMB_EVENT_THRESH)) { + /* Threshold requested on non-threshold event */ + return 0; + } + + return ret; +} + +static struct fsl_emb_pmu e500_pmu = { + .name = "e500 family", + .n_counter = 4, + .n_restricted = 2, + .xlate_event = e500_xlate_event, + .n_generic = ARRAY_SIZE(e500_generic_events), + .generic_events = e500_generic_events, + .cache_events = &e500_cache_events, +}; + +static int init_e500_pmu(void) +{ + if (!cur_cpu_spec->oprofile_cpu_type) + return -ENODEV; + + if (!strcmp(cur_cpu_spec->oprofile_cpu_type, "ppc/e500mc")) + num_events = 256; + else if (strcmp(cur_cpu_spec->oprofile_cpu_type, "ppc/e500")) + return -ENODEV; + + return register_fsl_emb_pmu(&e500_pmu); +} + +arch_initcall(init_e500_pmu); diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S index 900e0eea0099..07109d843787 100644 --- a/arch/powerpc/kernel/entry_64.S +++ b/arch/powerpc/kernel/entry_64.S @@ -551,7 +551,7 @@ restore: BEGIN_FW_FTR_SECTION ld r5,SOFTE(r1) FW_FTR_SECTION_ELSE - b iseries_check_pending_irqs + b .Liseries_check_pending_irqs ALT_FW_FTR_SECTION_END_IFCLR(FW_FEATURE_ISERIES) 2: TRACE_AND_RESTORE_IRQ(r5); @@ -623,7 +623,7 @@ ALT_FW_FTR_SECTION_END_IFCLR(FW_FEATURE_ISERIES) #endif /* CONFIG_PPC_BOOK3E */ -iseries_check_pending_irqs: +.Liseries_check_pending_irqs: #ifdef CONFIG_PPC_ISERIES ld r5,SOFTE(r1) cmpdi 0,r5,0 @@ -658,42 +658,43 @@ do_work: cmpdi r0,0 crandc eq,cr1*4+eq,eq bne restore - /* here we are preempting the current task */ -1: -#ifdef CONFIG_TRACE_IRQFLAGS - bl .trace_hardirqs_on - /* Note: we just clobbered r10 which used to contain the previous - * MSR before the hard-disabling done by the caller of do_work. - * We don't have that value anymore, but it doesn't matter as - * we will hard-enable unconditionally, we can just reload the - * current MSR into r10 + + /* Here we are preempting the current task. + * + * Ensure interrupts are soft-disabled. We also properly mark + * the PACA to reflect the fact that they are hard-disabled + * and trace the change */ - mfmsr r10 -#endif /* CONFIG_TRACE_IRQFLAGS */ - li r0,1 + li r0,0 stb r0,PACASOFTIRQEN(r13) stb r0,PACAHARDIRQEN(r13) + TRACE_DISABLE_INTS + + /* Call the scheduler with soft IRQs off */ +1: bl .preempt_schedule_irq + + /* Hard-disable interrupts again (and update PACA) */ #ifdef CONFIG_PPC_BOOK3E - wrteei 1 - bl .preempt_schedule wrteei 0 #else - ori r10,r10,MSR_EE - mtmsrd r10,1 /* reenable interrupts */ - bl .preempt_schedule mfmsr r10 - clrrdi r9,r1,THREAD_SHIFT - rldicl r10,r10,48,1 /* disable interrupts again */ + rldicl r10,r10,48,1 rotldi r10,r10,16 mtmsrd r10,1 #endif /* CONFIG_PPC_BOOK3E */ + li r0,0 + stb r0,PACAHARDIRQEN(r13) + + /* Re-test flags and eventually loop */ + clrrdi r9,r1,THREAD_SHIFT ld r4,TI_FLAGS(r9) andi. r0,r4,_TIF_NEED_RESCHED bne 1b b restore user_work: -#endif +#endif /* CONFIG_PREEMPT */ + /* Enable interrupts */ #ifdef CONFIG_PPC_BOOK3E wrteei 1 @@ -790,9 +791,8 @@ _GLOBAL(enter_rtas) li r9,1 rldicr r9,r9,MSR_SF_LG,(63-MSR_SF_LG) - ori r9,r9,MSR_IR|MSR_DR|MSR_FE0|MSR_FE1|MSR_FP + ori r9,r9,MSR_IR|MSR_DR|MSR_FE0|MSR_FE1|MSR_FP|MSR_RI andc r6,r0,r9 - ori r6,r6,MSR_RI sync /* disable interrupts so SRR0/1 */ mtmsrd r0 /* don't get trashed */ @@ -1038,8 +1038,7 @@ _GLOBAL(mod_return_to_handler) * We are in a module using the module's TOC. * Switch to our TOC to run inside the core kernel. */ - LOAD_REG_IMMEDIATE(r4,ftrace_return_to_handler) - ld r2, 8(r4) + ld r2, PACATOC(r13) bl .ftrace_return_to_handler nop diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index 1808876edcc9..e3be98ffe2a7 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -41,6 +41,7 @@ __start_interrupts: . = 0x200 _machine_check_pSeries: HMT_MEDIUM + DO_KVM 0x200 mtspr SPRN_SPRG_SCRATCH0,r13 /* save r13 */ EXCEPTION_PROLOG_PSERIES(PACA_EXMC, machine_check_common) @@ -48,6 +49,7 @@ _machine_check_pSeries: .globl data_access_pSeries data_access_pSeries: HMT_MEDIUM + DO_KVM 0x300 mtspr SPRN_SPRG_SCRATCH0,r13 BEGIN_FTR_SECTION mfspr r13,SPRN_SPRG_PACA @@ -77,6 +79,7 @@ ALT_FTR_SECTION_END_IFCLR(CPU_FTR_SLB) .globl data_access_slb_pSeries data_access_slb_pSeries: HMT_MEDIUM + DO_KVM 0x380 mtspr SPRN_SPRG_SCRATCH0,r13 mfspr r13,SPRN_SPRG_PACA /* get paca address into r13 */ std r3,PACA_EXSLB+EX_R3(r13) @@ -115,6 +118,7 @@ data_access_slb_pSeries: .globl instruction_access_slb_pSeries instruction_access_slb_pSeries: HMT_MEDIUM + DO_KVM 0x480 mtspr SPRN_SPRG_SCRATCH0,r13 mfspr r13,SPRN_SPRG_PACA /* get paca address into r13 */ std r3,PACA_EXSLB+EX_R3(r13) @@ -154,6 +158,7 @@ instruction_access_slb_pSeries: .globl system_call_pSeries system_call_pSeries: HMT_MEDIUM + DO_KVM 0xc00 BEGIN_FTR_SECTION cmpdi r0,0x1ebe beq- 1f @@ -185,13 +190,19 @@ END_FTR_SECTION_IFSET(CPU_FTR_REAL_LE) * prolog code of the PerformanceMonitor one. A little * trickery is thus necessary */ +performance_monitor_pSeries_1: . = 0xf00 + DO_KVM 0xf00 b performance_monitor_pSeries +altivec_unavailable_pSeries_1: . = 0xf20 + DO_KVM 0xf20 b altivec_unavailable_pSeries +vsx_unavailable_pSeries_1: . = 0xf40 + DO_KVM 0xf40 b vsx_unavailable_pSeries #ifdef CONFIG_CBE_RAS diff --git a/arch/powerpc/kernel/firmware.c b/arch/powerpc/kernel/firmware.c index 1679a70bbcad..6b1f4271eb53 100644 --- a/arch/powerpc/kernel/firmware.c +++ b/arch/powerpc/kernel/firmware.c @@ -17,5 +17,5 @@ #include <asm/firmware.h> -unsigned long powerpc_firmware_features; +unsigned long powerpc_firmware_features __read_mostly; EXPORT_SYMBOL_GPL(powerpc_firmware_features); diff --git a/arch/powerpc/kernel/head_32.S b/arch/powerpc/kernel/head_32.S index 829c3fe7c5a2..e025e89fe93e 100644 --- a/arch/powerpc/kernel/head_32.S +++ b/arch/powerpc/kernel/head_32.S @@ -164,6 +164,9 @@ __after_mmu_off: #ifdef CONFIG_PPC_EARLY_DEBUG_CPM bl setup_cpm_bat #endif +#ifdef CONFIG_PPC_EARLY_DEBUG_USBGECKO + bl setup_usbgecko_bat +#endif /* * Call setup_cpu for CPU 0 and initialize 6xx Idle @@ -1203,6 +1206,28 @@ setup_cpm_bat: blr #endif +#ifdef CONFIG_PPC_EARLY_DEBUG_USBGECKO +setup_usbgecko_bat: + /* prepare a BAT for early io */ +#if defined(CONFIG_GAMECUBE) + lis r8, 0x0c00 +#elif defined(CONFIG_WII) + lis r8, 0x0d00 +#else +#error Invalid platform for USB Gecko based early debugging. +#endif + /* + * The virtual address used must match the virtual address + * associated to the fixmap entry FIX_EARLY_DEBUG_BASE. + */ + lis r11, 0xfffe /* top 128K */ + ori r8, r8, 0x002a /* uncached, guarded ,rw */ + ori r11, r11, 0x2 /* 128K, Vs=1, Vp=0 */ + mtspr SPRN_DBAT1L, r8 + mtspr SPRN_DBAT1U, r11 + blr +#endif + #ifdef CONFIG_8260 /* Jump into the system reset for the rom. * We first disable the MMU, and then jump to the ROM reset address. diff --git a/arch/powerpc/kernel/head_64.S b/arch/powerpc/kernel/head_64.S index c38afdb45d7b..bed9a29ee383 100644 --- a/arch/powerpc/kernel/head_64.S +++ b/arch/powerpc/kernel/head_64.S @@ -37,6 +37,7 @@ #include <asm/firmware.h> #include <asm/page_64.h> #include <asm/irqflags.h> +#include <asm/kvm_book3s_64_asm.h> /* The physical memory is layed out such that the secondary processor * spin code sits at 0x0000...0x00ff. On server, the vectors follow @@ -165,6 +166,12 @@ exception_marker: #include "exceptions-64s.S" #endif +/* KVM trampoline code needs to be close to the interrupt handlers */ + +#ifdef CONFIG_KVM_BOOK3S_64_HANDLER +#include "../kvm/book3s_64_rmhandlers.S" +#endif + _GLOBAL(generic_secondary_thread_init) mr r24,r3 @@ -212,7 +219,8 @@ generic_secondary_common_init: * physical cpu id in r24, we need to search the pacas to find * which logical id maps to our physical one. */ - LOAD_REG_ADDR(r13, paca) /* Get base vaddr of paca array */ + LOAD_REG_ADDR(r13, paca) /* Load paca pointer */ + ld r13,0(r13) /* Get base vaddr of paca array */ li r5,0 /* logical cpu id */ 1: lhz r6,PACAHWCPUID(r13) /* Load HW procid from paca */ cmpw r6,r24 /* Compare to our id */ @@ -529,7 +537,8 @@ _GLOBAL(pmac_secondary_start) mtmsrd r3 /* RI on */ /* Set up a paca value for this processor. */ - LOAD_REG_ADDR(r4,paca) /* Get base vaddr of paca array */ + LOAD_REG_ADDR(r4,paca) /* Load paca pointer */ + ld r4,0(r4) /* Get base vaddr of paca array */ mulli r13,r24,PACA_SIZE /* Calculate vaddr of right paca */ add r13,r13,r4 /* for this processor. */ mtspr SPRN_SPRG_PACA,r13 /* Save vaddr of paca in an SPRG*/ @@ -608,6 +617,17 @@ _GLOBAL(start_secondary_prolog) std r3,0(r1) /* Zero the stack frame pointer */ bl .start_secondary b . +/* + * Reset stack pointer and call start_secondary + * to continue with online operation when woken up + * from cede in cpu offline. + */ +_GLOBAL(start_secondary_resume) + ld r1,PACAKSAVE(r13) /* Reload kernel stack pointer */ + li r3,0 + std r3,0(r1) /* Zero the stack frame pointer */ + bl .start_secondary + b . #endif /* diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S index 6ded19d01891..3ef743fa5d7c 100644 --- a/arch/powerpc/kernel/head_8xx.S +++ b/arch/powerpc/kernel/head_8xx.S @@ -206,6 +206,8 @@ MachineCheck: EXCEPTION_PROLOG mfspr r4,SPRN_DAR stw r4,_DAR(r11) + li r5,0x00f0 + mtspr SPRN_DAR,r5 /* Tag DAR, to be used in DTLB Error */ mfspr r5,SPRN_DSISR stw r5,_DSISR(r11) addi r3,r1,STACK_FRAME_OVERHEAD @@ -222,6 +224,8 @@ DataAccess: stw r10,_DSISR(r11) mr r5,r10 mfspr r4,SPRN_DAR + li r10,0x00f0 + mtspr SPRN_DAR,r10 /* Tag DAR, to be used in DTLB Error */ EXC_XFER_EE_LITE(0x300, handle_page_fault) /* Instruction access exception. @@ -244,6 +248,8 @@ Alignment: EXCEPTION_PROLOG mfspr r4,SPRN_DAR stw r4,_DAR(r11) + li r5,0x00f0 + mtspr SPRN_DAR,r5 /* Tag DAR, to be used in DTLB Error */ mfspr r5,SPRN_DSISR stw r5,_DSISR(r11) addi r3,r1,STACK_FRAME_OVERHEAD @@ -333,26 +339,20 @@ InstructionTLBMiss: mfspr r11, SPRN_MD_TWC /* ....and get the pte address */ lwz r10, 0(r11) /* Get the pte */ -#ifdef CONFIG_SWAP - /* do not set the _PAGE_ACCESSED bit of a non-present page */ - andi. r11, r10, _PAGE_PRESENT - beq 4f - ori r10, r10, _PAGE_ACCESSED - mfspr r11, SPRN_MD_TWC /* get the pte address again */ - stw r10, 0(r11) -4: -#else - ori r10, r10, _PAGE_ACCESSED - stw r10, 0(r11) -#endif + andi. r11, r10, _PAGE_ACCESSED | _PAGE_PRESENT + cmpwi cr0, r11, _PAGE_ACCESSED | _PAGE_PRESENT + bne- cr0, 2f + + /* Clear PP lsb, 0x400 */ + rlwinm r10, r10, 0, 22, 20 /* The Linux PTE won't go exactly into the MMU TLB. - * Software indicator bits 21, 22 and 28 must be clear. + * Software indicator bits 22 and 28 must be clear. * Software indicator bits 24, 25, 26, and 27 must be * set. All other Linux PTE bits control the behavior * of the MMU. */ -2: li r11, 0x00f0 + li r11, 0x00f0 rlwimi r10, r11, 0, 24, 28 /* Set 24-27, clear 28 */ DO_8xx_CPU6(0x2d80, r3) mtspr SPRN_MI_RPN, r10 /* Update TLB entry */ @@ -365,6 +365,22 @@ InstructionTLBMiss: lwz r3, 8(r0) #endif rfi +2: + mfspr r11, SPRN_SRR1 + /* clear all error bits as TLB Miss + * sets a few unconditionally + */ + rlwinm r11, r11, 0, 0xffff + mtspr SPRN_SRR1, r11 + + mfspr r10, SPRN_M_TW /* Restore registers */ + lwz r11, 0(r0) + mtcr r11 + lwz r11, 4(r0) +#ifdef CONFIG_8xx_CPU6 + lwz r3, 8(r0) +#endif + b InstructionAccess . = 0x1200 DataStoreTLBMiss: @@ -406,29 +422,45 @@ DataStoreTLBMiss: * above. */ rlwimi r11, r10, 0, 27, 27 + /* Insert the WriteThru flag into the TWC from the Linux PTE. + * It is bit 25 in the Linux PTE and bit 30 in the TWC + */ + rlwimi r11, r10, 32-5, 30, 30 DO_8xx_CPU6(0x3b80, r3) mtspr SPRN_MD_TWC, r11 -#ifdef CONFIG_SWAP - /* do not set the _PAGE_ACCESSED bit of a non-present page */ - andi. r11, r10, _PAGE_PRESENT - beq 4f - ori r10, r10, _PAGE_ACCESSED -4: - /* and update pte in table */ -#else - ori r10, r10, _PAGE_ACCESSED -#endif - mfspr r11, SPRN_MD_TWC /* get the pte address again */ - stw r10, 0(r11) + /* Both _PAGE_ACCESSED and _PAGE_PRESENT has to be set. + * We also need to know if the insn is a load/store, so: + * Clear _PAGE_PRESENT and load that which will + * trap into DTLB Error with store bit set accordinly. + */ + /* PRESENT=0x1, ACCESSED=0x20 + * r11 = ((r10 & PRESENT) & ((r10 & ACCESSED) >> 5)); + * r10 = (r10 & ~PRESENT) | r11; + */ + rlwinm r11, r10, 32-5, _PAGE_PRESENT + and r11, r11, r10 + rlwimi r10, r11, 0, _PAGE_PRESENT + + /* Honour kernel RO, User NA */ + /* 0x200 == Extended encoding, bit 22 */ + /* r11 = (r10 & _PAGE_USER) >> 2 */ + rlwinm r11, r10, 32-2, 0x200 + or r10, r11, r10 + /* r11 = (r10 & _PAGE_RW) >> 1 */ + rlwinm r11, r10, 32-1, 0x200 + or r10, r11, r10 + /* invert RW and 0x200 bits */ + xori r10, r10, _PAGE_RW | 0x200 /* The Linux PTE won't go exactly into the MMU TLB. - * Software indicator bits 21, 22 and 28 must be clear. + * Software indicator bits 22 and 28 must be clear. * Software indicator bits 24, 25, 26, and 27 must be * set. All other Linux PTE bits control the behavior * of the MMU. */ 2: li r11, 0x00f0 + mtspr SPRN_DAR,r11 /* Tag DAR */ rlwimi r10, r11, 0, 24, 28 /* Set 24-27, clear 28 */ DO_8xx_CPU6(0x3d80, r3) mtspr SPRN_MD_RPN, r10 /* Update TLB entry */ @@ -469,97 +501,10 @@ DataTLBError: stw r10, 0(r0) stw r11, 4(r0) - /* First, make sure this was a store operation. - */ - mfspr r10, SPRN_DSISR - andis. r11, r10, 0x0200 /* If set, indicates store op */ - beq 2f - - /* The EA of a data TLB miss is automatically stored in the MD_EPN - * register. The EA of a data TLB error is automatically stored in - * the DAR, but not the MD_EPN register. We must copy the 20 most - * significant bits of the EA from the DAR to MD_EPN before we - * start walking the page tables. We also need to copy the CASID - * value from the M_CASID register. - * Addendum: The EA of a data TLB error is _supposed_ to be stored - * in DAR, but it seems that this doesn't happen in some cases, such - * as when the error is due to a dcbi instruction to a page with a - * TLB that doesn't have the changed bit set. In such cases, there - * does not appear to be any way to recover the EA of the error - * since it is neither in DAR nor MD_EPN. As a workaround, the - * _PAGE_HWWRITE bit is set for all kernel data pages when the PTEs - * are initialized in mapin_ram(). This will avoid the problem, - * assuming we only use the dcbi instruction on kernel addresses. - */ mfspr r10, SPRN_DAR - rlwinm r11, r10, 0, 0, 19 - ori r11, r11, MD_EVALID - mfspr r10, SPRN_M_CASID - rlwimi r11, r10, 0, 28, 31 - DO_8xx_CPU6(0x3780, r3) - mtspr SPRN_MD_EPN, r11 - - mfspr r10, SPRN_M_TWB /* Get level 1 table entry address */ - - /* If we are faulting a kernel address, we have to use the - * kernel page tables. - */ - andi. r11, r10, 0x0800 - beq 3f - lis r11, swapper_pg_dir@h - ori r11, r11, swapper_pg_dir@l - rlwimi r10, r11, 0, 2, 19 -3: - lwz r11, 0(r10) /* Get the level 1 entry */ - rlwinm. r10, r11,0,0,19 /* Extract page descriptor page address */ - beq 2f /* If zero, bail */ - - /* We have a pte table, so fetch the pte from the table. - */ - ori r11, r11, 1 /* Set valid bit in physical L2 page */ - DO_8xx_CPU6(0x3b80, r3) - mtspr SPRN_MD_TWC, r11 /* Load pte table base address */ - mfspr r11, SPRN_MD_TWC /* ....and get the pte address */ - lwz r10, 0(r11) /* Get the pte */ - - andi. r11, r10, _PAGE_RW /* Is it writeable? */ - beq 2f /* Bail out if not */ - - /* Update 'changed', among others. - */ -#ifdef CONFIG_SWAP - ori r10, r10, _PAGE_DIRTY|_PAGE_HWWRITE - /* do not set the _PAGE_ACCESSED bit of a non-present page */ - andi. r11, r10, _PAGE_PRESENT - beq 4f - ori r10, r10, _PAGE_ACCESSED -4: -#else - ori r10, r10, _PAGE_DIRTY|_PAGE_ACCESSED|_PAGE_HWWRITE -#endif - mfspr r11, SPRN_MD_TWC /* Get pte address again */ - stw r10, 0(r11) /* and update pte in table */ - - /* The Linux PTE won't go exactly into the MMU TLB. - * Software indicator bits 21, 22 and 28 must be clear. - * Software indicator bits 24, 25, 26, and 27 must be - * set. All other Linux PTE bits control the behavior - * of the MMU. - */ - li r11, 0x00f0 - rlwimi r10, r11, 0, 24, 28 /* Set 24-27, clear 28 */ - DO_8xx_CPU6(0x3d80, r3) - mtspr SPRN_MD_RPN, r10 /* Update TLB entry */ - - mfspr r10, SPRN_M_TW /* Restore registers */ - lwz r11, 0(r0) - mtcr r11 - lwz r11, 4(r0) -#ifdef CONFIG_8xx_CPU6 - lwz r3, 8(r0) -#endif - rfi -2: + cmpwi cr0, r10, 0x00f0 + beq- FixupDAR /* must be a buggy dcbX, icbi insn. */ +DARFixed:/* Return from dcbx instruction bug workaround, r10 holds value of DAR */ mfspr r10, SPRN_M_TW /* Restore registers */ lwz r11, 0(r0) mtcr r11 @@ -588,6 +533,140 @@ DataTLBError: . = 0x2000 +/* This is the procedure to calculate the data EA for buggy dcbx,dcbi instructions + * by decoding the registers used by the dcbx instruction and adding them. + * DAR is set to the calculated address and r10 also holds the EA on exit. + */ + /* define if you don't want to use self modifying code */ +#define NO_SELF_MODIFYING_CODE +FixupDAR:/* Entry point for dcbx workaround. */ + /* fetch instruction from memory. */ + mfspr r10, SPRN_SRR0 + andis. r11, r10, 0x8000 /* Address >= 0x80000000 */ + DO_8xx_CPU6(0x3780, r3) + mtspr SPRN_MD_EPN, r10 + mfspr r11, SPRN_M_TWB /* Get level 1 table entry address */ + beq- 3f /* Branch if user space */ + lis r11, (swapper_pg_dir-PAGE_OFFSET)@h + ori r11, r11, (swapper_pg_dir-PAGE_OFFSET)@l + rlwimi r11, r10, 32-20, 0xffc /* r11 = r11&~0xffc|(r10>>20)&0xffc */ +3: lwz r11, 0(r11) /* Get the level 1 entry */ + DO_8xx_CPU6(0x3b80, r3) + mtspr SPRN_MD_TWC, r11 /* Load pte table base address */ + mfspr r11, SPRN_MD_TWC /* ....and get the pte address */ + lwz r11, 0(r11) /* Get the pte */ + /* concat physical page address(r11) and page offset(r10) */ + rlwimi r11, r10, 0, 20, 31 + lwz r11,0(r11) +/* Check if it really is a dcbx instruction. */ +/* dcbt and dcbtst does not generate DTLB Misses/Errors, + * no need to include them here */ + srwi r10, r11, 26 /* check if major OP code is 31 */ + cmpwi cr0, r10, 31 + bne- 141f + rlwinm r10, r11, 0, 21, 30 + cmpwi cr0, r10, 2028 /* Is dcbz? */ + beq+ 142f + cmpwi cr0, r10, 940 /* Is dcbi? */ + beq+ 142f + cmpwi cr0, r10, 108 /* Is dcbst? */ + beq+ 144f /* Fix up store bit! */ + cmpwi cr0, r10, 172 /* Is dcbf? */ + beq+ 142f + cmpwi cr0, r10, 1964 /* Is icbi? */ + beq+ 142f +141: mfspr r10, SPRN_DAR /* r10 must hold DAR at exit */ + b DARFixed /* Nope, go back to normal TLB processing */ + +144: mfspr r10, SPRN_DSISR + rlwinm r10, r10,0,7,5 /* Clear store bit for buggy dcbst insn */ + mtspr SPRN_DSISR, r10 +142: /* continue, it was a dcbx, dcbi instruction. */ +#ifdef CONFIG_8xx_CPU6 + lwz r3, 8(r0) /* restore r3 from memory */ +#endif +#ifndef NO_SELF_MODIFYING_CODE + andis. r10,r11,0x1f /* test if reg RA is r0 */ + li r10,modified_instr@l + dcbtst r0,r10 /* touch for store */ + rlwinm r11,r11,0,0,20 /* Zero lower 10 bits */ + oris r11,r11,640 /* Transform instr. to a "add r10,RA,RB" */ + ori r11,r11,532 + stw r11,0(r10) /* store add/and instruction */ + dcbf 0,r10 /* flush new instr. to memory. */ + icbi 0,r10 /* invalidate instr. cache line */ + lwz r11, 4(r0) /* restore r11 from memory */ + mfspr r10, SPRN_M_TW /* restore r10 from M_TW */ + isync /* Wait until new instr is loaded from memory */ +modified_instr: + .space 4 /* this is where the add instr. is stored */ + bne+ 143f + subf r10,r0,r10 /* r10=r10-r0, only if reg RA is r0 */ +143: mtdar r10 /* store faulting EA in DAR */ + b DARFixed /* Go back to normal TLB handling */ +#else + mfctr r10 + mtdar r10 /* save ctr reg in DAR */ + rlwinm r10, r11, 24, 24, 28 /* offset into jump table for reg RB */ + addi r10, r10, 150f@l /* add start of table */ + mtctr r10 /* load ctr with jump address */ + xor r10, r10, r10 /* sum starts at zero */ + bctr /* jump into table */ +150: + add r10, r10, r0 ;b 151f + add r10, r10, r1 ;b 151f + add r10, r10, r2 ;b 151f + add r10, r10, r3 ;b 151f + add r10, r10, r4 ;b 151f + add r10, r10, r5 ;b 151f + add r10, r10, r6 ;b 151f + add r10, r10, r7 ;b 151f + add r10, r10, r8 ;b 151f + add r10, r10, r9 ;b 151f + mtctr r11 ;b 154f /* r10 needs special handling */ + mtctr r11 ;b 153f /* r11 needs special handling */ + add r10, r10, r12 ;b 151f + add r10, r10, r13 ;b 151f + add r10, r10, r14 ;b 151f + add r10, r10, r15 ;b 151f + add r10, r10, r16 ;b 151f + add r10, r10, r17 ;b 151f + add r10, r10, r18 ;b 151f + add r10, r10, r19 ;b 151f + add r10, r10, r20 ;b 151f + add r10, r10, r21 ;b 151f + add r10, r10, r22 ;b 151f + add r10, r10, r23 ;b 151f + add r10, r10, r24 ;b 151f + add r10, r10, r25 ;b 151f + add r10, r10, r26 ;b 151f + add r10, r10, r27 ;b 151f + add r10, r10, r28 ;b 151f + add r10, r10, r29 ;b 151f + add r10, r10, r30 ;b 151f + add r10, r10, r31 +151: + rlwinm. r11,r11,19,24,28 /* offset into jump table for reg RA */ + beq 152f /* if reg RA is zero, don't add it */ + addi r11, r11, 150b@l /* add start of table */ + mtctr r11 /* load ctr with jump address */ + rlwinm r11,r11,0,16,10 /* make sure we don't execute this more than once */ + bctr /* jump into table */ +152: + mfdar r11 + mtctr r11 /* restore ctr reg from DAR */ + mtdar r10 /* save fault EA to DAR */ + b DARFixed /* Go back to normal TLB handling */ + + /* special handling for r10,r11 since these are modified already */ +153: lwz r11, 4(r0) /* load r11 from memory */ + b 155f +154: mfspr r11, SPRN_M_TW /* load r10 from M_TW */ +155: add r10, r10, r11 /* add it */ + mfctr r11 /* restore r11 */ + b 151b +#endif + .globl giveup_fpu giveup_fpu: blr @@ -689,12 +768,12 @@ start_here: */ initial_mmu: tlbia /* Invalidate all TLB entries */ -#ifdef CONFIG_PIN_TLB +/* Always pin the first 8 MB ITLB to prevent ITLB + misses while mucking around with SRR0/SRR1 in asm +*/ lis r8, MI_RSV4I@h ori r8, r8, 0x1c00 -#else - li r8, 0 -#endif + mtspr SPRN_MI_CTR, r8 /* Set instruction MMU control */ #ifdef CONFIG_PIN_TLB diff --git a/arch/powerpc/kernel/head_fsl_booke.S b/arch/powerpc/kernel/head_fsl_booke.S index 975788ca05d2..25793bb0e782 100644 --- a/arch/powerpc/kernel/head_fsl_booke.S +++ b/arch/powerpc/kernel/head_fsl_booke.S @@ -214,11 +214,11 @@ skpinv: addi r6,r6,1 /* Increment */ bl 1f /* Find our address */ 1: mflr r9 rlwimi r7,r9,0,20,31 - addi r7,r7,24 + addi r7,r7,(2f - 1b) mtspr SPRN_SRR0,r7 mtspr SPRN_SRR1,r6 rfi - +2: /* 4. Clear out PIDs & Search info */ li r6,0 mtspr SPRN_MAS6,r6 @@ -944,28 +944,6 @@ _GLOBAL(__setup_e500mc_ivors) blr /* - * extern void loadcam_entry(unsigned int index) - * - * Load TLBCAM[index] entry in to the L2 CAM MMU - */ -_GLOBAL(loadcam_entry) - lis r4,TLBCAM@ha - addi r4,r4,TLBCAM@l - mulli r5,r3,TLBCAM_SIZE - add r3,r5,r4 - lwz r4,0(r3) - mtspr SPRN_MAS0,r4 - lwz r4,4(r3) - mtspr SPRN_MAS1,r4 - lwz r4,8(r3) - mtspr SPRN_MAS2,r4 - lwz r4,12(r3) - mtspr SPRN_MAS3,r4 - tlbwe - isync - blr - -/* * extern void giveup_altivec(struct task_struct *prev) * * The e500 core does not have an AltiVec unit. diff --git a/arch/powerpc/kernel/idle.c b/arch/powerpc/kernel/idle.c index 88d9c1d5e5fb..049dda60e475 100644 --- a/arch/powerpc/kernel/idle.c +++ b/arch/powerpc/kernel/idle.c @@ -110,18 +110,16 @@ int powersave_nap; */ static ctl_table powersave_nap_ctl_table[]={ { - .ctl_name = KERN_PPC_POWERSAVE_NAP, .procname = "powersave-nap", .data = &powersave_nap, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec, + .proc_handler = proc_dointvec, }, {} }; static ctl_table powersave_nap_sysctl_root[] = { { - .ctl_name = CTL_KERN, .procname = "kernel", .mode = 0555, .child = powersave_nap_ctl_table, diff --git a/arch/powerpc/kernel/io.c b/arch/powerpc/kernel/io.c index 1882bf419fa6..8dc7547c2377 100644 --- a/arch/powerpc/kernel/io.c +++ b/arch/powerpc/kernel/io.c @@ -161,7 +161,7 @@ void _memcpy_fromio(void *dest, const volatile void __iomem *src, dest++; n--; } - while(n > 4) { + while(n >= 4) { *((u32 *)dest) = *((volatile u32 *)vsrc); eieio(); vsrc += 4; @@ -190,7 +190,7 @@ void _memcpy_toio(volatile void __iomem *dest, const void *src, unsigned long n) vdest++; n--; } - while(n > 4) { + while(n >= 4) { *((volatile u32 *)vdest) = *((volatile u32 *)src); src += 4; vdest += 4; diff --git a/arch/powerpc/kernel/iommu.c b/arch/powerpc/kernel/iommu.c index fd51578e29dd..5547ae6e6b0b 100644 --- a/arch/powerpc/kernel/iommu.c +++ b/arch/powerpc/kernel/iommu.c @@ -30,7 +30,7 @@ #include <linux/spinlock.h> #include <linux/string.h> #include <linux/dma-mapping.h> -#include <linux/bitops.h> +#include <linux/bitmap.h> #include <linux/iommu-helper.h> #include <linux/crash_dump.h> #include <asm/io.h> @@ -251,7 +251,7 @@ static void __iommu_free(struct iommu_table *tbl, dma_addr_t dma_addr, } ppc_md.tce_free(tbl, entry, npages); - iommu_area_free(tbl->it_map, free_entry, npages); + bitmap_clear(tbl->it_map, free_entry, npages); } static void iommu_free(struct iommu_table *tbl, dma_addr_t dma_addr, diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c index e5d121177984..64f6f2031c22 100644 --- a/arch/powerpc/kernel/irq.c +++ b/arch/powerpc/kernel/irq.c @@ -70,9 +70,13 @@ #include <asm/firmware.h> #include <asm/lv1call.h> #endif +#define CREATE_TRACE_POINTS +#include <asm/trace.h> + +DEFINE_PER_CPU_SHARED_ALIGNED(irq_cpustat_t, irq_stat); +EXPORT_PER_CPU_SYMBOL(irq_stat); int __irq_offset_value; -static int ppc_spurious_interrupts; #ifdef CONFIG_PPC32 EXPORT_SYMBOL(__irq_offset_value); @@ -85,7 +89,10 @@ extern int tau_interrupts(int); #endif /* CONFIG_PPC32 */ #ifdef CONFIG_PPC64 + +#ifndef CONFIG_SPARSE_IRQ EXPORT_SYMBOL(irq_desc); +#endif int distribute_irqs = 1; @@ -175,78 +182,135 @@ notrace void raw_local_irq_restore(unsigned long en) EXPORT_SYMBOL(raw_local_irq_restore); #endif /* CONFIG_PPC64 */ +static int show_other_interrupts(struct seq_file *p, int prec) +{ + int j; + +#if defined(CONFIG_PPC32) && defined(CONFIG_TAU_INT) + if (tau_initialized) { + seq_printf(p, "%*s: ", prec, "TAU"); + for_each_online_cpu(j) + seq_printf(p, "%10u ", tau_interrupts(j)); + seq_puts(p, " PowerPC Thermal Assist (cpu temp)\n"); + } +#endif /* CONFIG_PPC32 && CONFIG_TAU_INT */ + + seq_printf(p, "%*s: ", prec, "LOC"); + for_each_online_cpu(j) + seq_printf(p, "%10u ", per_cpu(irq_stat, j).timer_irqs); + seq_printf(p, " Local timer interrupts\n"); + + seq_printf(p, "%*s: ", prec, "SPU"); + for_each_online_cpu(j) + seq_printf(p, "%10u ", per_cpu(irq_stat, j).spurious_irqs); + seq_printf(p, " Spurious interrupts\n"); + + seq_printf(p, "%*s: ", prec, "CNT"); + for_each_online_cpu(j) + seq_printf(p, "%10u ", per_cpu(irq_stat, j).pmu_irqs); + seq_printf(p, " Performance monitoring interrupts\n"); + + seq_printf(p, "%*s: ", prec, "MCE"); + for_each_online_cpu(j) + seq_printf(p, "%10u ", per_cpu(irq_stat, j).mce_exceptions); + seq_printf(p, " Machine check exceptions\n"); + + return 0; +} + int show_interrupts(struct seq_file *p, void *v) { - int i = *(loff_t *)v, j; + unsigned long flags, any_count = 0; + int i = *(loff_t *) v, j, prec; struct irqaction *action; struct irq_desc *desc; - unsigned long flags; + if (i > nr_irqs) + return 0; + + for (prec = 3, j = 1000; prec < 10 && j <= nr_irqs; ++prec) + j *= 10; + + if (i == nr_irqs) + return show_other_interrupts(p, prec); + + /* print header */ if (i == 0) { - seq_puts(p, " "); + seq_printf(p, "%*s", prec + 8, ""); for_each_online_cpu(j) - seq_printf(p, "CPU%d ", j); + seq_printf(p, "CPU%-8d", j); seq_putc(p, '\n'); } - if (i < NR_IRQS) { - desc = get_irq_desc(i); - spin_lock_irqsave(&desc->lock, flags); - action = desc->action; - if (!action || !action->handler) - goto skip; - seq_printf(p, "%3d: ", i); -#ifdef CONFIG_SMP - for_each_online_cpu(j) - seq_printf(p, "%10u ", kstat_irqs_cpu(i, j)); -#else - seq_printf(p, "%10u ", kstat_irqs(i)); -#endif /* CONFIG_SMP */ - if (desc->chip) - seq_printf(p, " %s ", desc->chip->typename); - else - seq_puts(p, " None "); - seq_printf(p, "%s", (desc->status & IRQ_LEVEL) ? "Level " : "Edge "); - seq_printf(p, " %s", action->name); - for (action = action->next; action; action = action->next) + desc = irq_to_desc(i); + if (!desc) + return 0; + + raw_spin_lock_irqsave(&desc->lock, flags); + for_each_online_cpu(j) + any_count |= kstat_irqs_cpu(i, j); + action = desc->action; + if (!action && !any_count) + goto out; + + seq_printf(p, "%*d: ", prec, i); + for_each_online_cpu(j) + seq_printf(p, "%10u ", kstat_irqs_cpu(i, j)); + + if (desc->chip) + seq_printf(p, " %-16s", desc->chip->name); + else + seq_printf(p, " %-16s", "None"); + seq_printf(p, " %-8s", (desc->status & IRQ_LEVEL) ? "Level" : "Edge"); + + if (action) { + seq_printf(p, " %s", action->name); + while ((action = action->next) != NULL) seq_printf(p, ", %s", action->name); - seq_putc(p, '\n'); -skip: - spin_unlock_irqrestore(&desc->lock, flags); - } else if (i == NR_IRQS) { -#if defined(CONFIG_PPC32) && defined(CONFIG_TAU_INT) - if (tau_initialized){ - seq_puts(p, "TAU: "); - for_each_online_cpu(j) - seq_printf(p, "%10u ", tau_interrupts(j)); - seq_puts(p, " PowerPC Thermal Assist (cpu temp)\n"); - } -#endif /* CONFIG_PPC32 && CONFIG_TAU_INT*/ - seq_printf(p, "BAD: %10u\n", ppc_spurious_interrupts); } + + seq_putc(p, '\n'); +out: + raw_spin_unlock_irqrestore(&desc->lock, flags); return 0; } +/* + * /proc/stat helpers + */ +u64 arch_irq_stat_cpu(unsigned int cpu) +{ + u64 sum = per_cpu(irq_stat, cpu).timer_irqs; + + sum += per_cpu(irq_stat, cpu).pmu_irqs; + sum += per_cpu(irq_stat, cpu).mce_exceptions; + sum += per_cpu(irq_stat, cpu).spurious_irqs; + + return sum; +} + #ifdef CONFIG_HOTPLUG_CPU void fixup_irqs(cpumask_t map) { + struct irq_desc *desc; unsigned int irq; static int warned; for_each_irq(irq) { cpumask_t mask; - if (irq_desc[irq].status & IRQ_PER_CPU) + desc = irq_to_desc(irq); + if (desc && desc->status & IRQ_PER_CPU) continue; - cpumask_and(&mask, irq_desc[irq].affinity, &map); + cpumask_and(&mask, desc->affinity, &map); if (any_online_cpu(mask) == NR_CPUS) { printk("Breaking affinity for irq %i\n", irq); mask = map; } - if (irq_desc[irq].chip->set_affinity) - irq_desc[irq].chip->set_affinity(irq, &mask); - else if (irq_desc[irq].action && !(warned++)) + if (desc->chip->set_affinity) + desc->chip->set_affinity(irq, &mask); + else if (desc->action && !(warned++)) printk("Cannot set affinity for irq %i\n", irq); } @@ -273,7 +337,7 @@ static inline void handle_one_irq(unsigned int irq) return; } - desc = irq_desc + irq; + desc = irq_to_desc(irq); saved_sp_limit = current->thread.ksp_limit; irqtp->task = curtp->task; @@ -325,6 +389,8 @@ void do_IRQ(struct pt_regs *regs) struct pt_regs *old_regs = set_irq_regs(regs); unsigned int irq; + trace_irq_entry(regs); + irq_enter(); check_stack_overflow(); @@ -334,8 +400,7 @@ void do_IRQ(struct pt_regs *regs) if (irq != NO_IRQ && irq != NO_IRQ_IGNORE) handle_one_irq(irq); else if (irq != NO_IRQ_IGNORE) - /* That's not SMP safe ... but who cares ? */ - ppc_spurious_interrupts++; + __get_cpu_var(irq_stat).spurious_irqs++; irq_exit(); set_irq_regs(old_regs); @@ -348,6 +413,8 @@ void do_IRQ(struct pt_regs *regs) timer_interrupt(regs); } #endif + + trace_irq_exit(regs); } void __init init_IRQ(void) @@ -453,7 +520,7 @@ void do_softirq(void) */ static LIST_HEAD(irq_hosts); -static DEFINE_SPINLOCK(irq_big_lock); +static DEFINE_RAW_SPINLOCK(irq_big_lock); static unsigned int revmap_trees_allocated; static DEFINE_MUTEX(revmap_trees_mutex); struct irq_map_entry irq_map[NR_IRQS]; @@ -499,14 +566,14 @@ struct irq_host *irq_alloc_host(struct device_node *of_node, if (host->ops->match == NULL) host->ops->match = default_irq_host_match; - spin_lock_irqsave(&irq_big_lock, flags); + raw_spin_lock_irqsave(&irq_big_lock, flags); /* If it's a legacy controller, check for duplicates and * mark it as allocated (we use irq 0 host pointer for that */ if (revmap_type == IRQ_HOST_MAP_LEGACY) { if (irq_map[0].host != NULL) { - spin_unlock_irqrestore(&irq_big_lock, flags); + raw_spin_unlock_irqrestore(&irq_big_lock, flags); /* If we are early boot, we can't free the structure, * too bad... * this will be fixed once slab is made available early @@ -520,7 +587,7 @@ struct irq_host *irq_alloc_host(struct device_node *of_node, } list_add(&host->link, &irq_hosts); - spin_unlock_irqrestore(&irq_big_lock, flags); + raw_spin_unlock_irqrestore(&irq_big_lock, flags); /* Additional setups per revmap type */ switch(revmap_type) { @@ -535,7 +602,7 @@ struct irq_host *irq_alloc_host(struct device_node *of_node, smp_wmb(); /* Clear norequest flags */ - get_irq_desc(i)->status &= ~IRQ_NOREQUEST; + irq_to_desc(i)->status &= ~IRQ_NOREQUEST; /* Legacy flags are left to default at this point, * one can then use irq_create_mapping() to @@ -571,13 +638,13 @@ struct irq_host *irq_find_host(struct device_node *node) * the absence of a device node. This isn't a problem so far * yet though... */ - spin_lock_irqsave(&irq_big_lock, flags); + raw_spin_lock_irqsave(&irq_big_lock, flags); list_for_each_entry(h, &irq_hosts, link) if (h->ops->match(h, node)) { found = h; break; } - spin_unlock_irqrestore(&irq_big_lock, flags); + raw_spin_unlock_irqrestore(&irq_big_lock, flags); return found; } EXPORT_SYMBOL_GPL(irq_find_host); @@ -601,8 +668,16 @@ void irq_set_virq_count(unsigned int count) static int irq_setup_virq(struct irq_host *host, unsigned int virq, irq_hw_number_t hwirq) { + struct irq_desc *desc; + + desc = irq_to_desc_alloc_node(virq, 0); + if (!desc) { + pr_debug("irq: -> allocating desc failed\n"); + goto error; + } + /* Clear IRQ_NOREQUEST flag */ - get_irq_desc(virq)->status &= ~IRQ_NOREQUEST; + desc->status &= ~IRQ_NOREQUEST; /* map it */ smp_wmb(); @@ -611,11 +686,14 @@ static int irq_setup_virq(struct irq_host *host, unsigned int virq, if (host->ops->map(host, virq, hwirq)) { pr_debug("irq: -> mapping failed, freeing\n"); - irq_free_virt(virq, 1); - return -1; + goto error; } return 0; + +error: + irq_free_virt(virq, 1); + return -1; } unsigned int irq_create_direct_mapping(struct irq_host *host) @@ -699,7 +777,7 @@ unsigned int irq_create_mapping(struct irq_host *host, EXPORT_SYMBOL_GPL(irq_create_mapping); unsigned int irq_create_of_mapping(struct device_node *controller, - u32 *intspec, unsigned int intsize) + const u32 *intspec, unsigned int intsize) { struct irq_host *host; irq_hw_number_t hwirq; @@ -732,7 +810,7 @@ unsigned int irq_create_of_mapping(struct device_node *controller, /* Set type if specified and different than the current one */ if (type != IRQ_TYPE_NONE && - type != (get_irq_desc(virq)->status & IRQF_TRIGGER_MASK)) + type != (irq_to_desc(virq)->status & IRQF_TRIGGER_MASK)) set_irq_type(virq, type); return virq; } @@ -804,7 +882,7 @@ void irq_dispose_mapping(unsigned int virq) irq_map[virq].hwirq = host->inval_irq; /* Set some flags */ - get_irq_desc(virq)->status |= IRQ_NOREQUEST; + irq_to_desc(virq)->status |= IRQ_NOREQUEST; /* Free it */ irq_free_virt(virq, 1); @@ -935,7 +1013,7 @@ unsigned int irq_alloc_virt(struct irq_host *host, if (count == 0 || count > (irq_virq_count - NUM_ISA_INTERRUPTS)) return NO_IRQ; - spin_lock_irqsave(&irq_big_lock, flags); + raw_spin_lock_irqsave(&irq_big_lock, flags); /* Use hint for 1 interrupt if any */ if (count == 1 && hint >= NUM_ISA_INTERRUPTS && @@ -959,7 +1037,7 @@ unsigned int irq_alloc_virt(struct irq_host *host, } } if (found == NO_IRQ) { - spin_unlock_irqrestore(&irq_big_lock, flags); + raw_spin_unlock_irqrestore(&irq_big_lock, flags); return NO_IRQ; } hint_found: @@ -968,7 +1046,7 @@ unsigned int irq_alloc_virt(struct irq_host *host, smp_wmb(); irq_map[i].host = host; } - spin_unlock_irqrestore(&irq_big_lock, flags); + raw_spin_unlock_irqrestore(&irq_big_lock, flags); return found; } @@ -980,7 +1058,7 @@ void irq_free_virt(unsigned int virq, unsigned int count) WARN_ON (virq < NUM_ISA_INTERRUPTS); WARN_ON (count == 0 || (virq + count) > irq_virq_count); - spin_lock_irqsave(&irq_big_lock, flags); + raw_spin_lock_irqsave(&irq_big_lock, flags); for (i = virq; i < (virq + count); i++) { struct irq_host *host; @@ -993,15 +1071,27 @@ void irq_free_virt(unsigned int virq, unsigned int count) smp_wmb(); irq_map[i].host = NULL; } - spin_unlock_irqrestore(&irq_big_lock, flags); + raw_spin_unlock_irqrestore(&irq_big_lock, flags); } -void irq_early_init(void) +int arch_early_irq_init(void) { - unsigned int i; + struct irq_desc *desc; + int i; + + for (i = 0; i < NR_IRQS; i++) { + desc = irq_to_desc(i); + if (desc) + desc->status |= IRQ_NOREQUEST; + } - for (i = 0; i < NR_IRQS; i++) - get_irq_desc(i)->status |= IRQ_NOREQUEST; + return 0; +} + +int arch_init_chip_data(struct irq_desc *desc, int node) +{ + desc->status |= IRQ_NOREQUEST; + return 0; } /* We need to create the radix trees late */ @@ -1063,16 +1153,19 @@ static int virq_debug_show(struct seq_file *m, void *private) seq_printf(m, "%-5s %-7s %-15s %s\n", "virq", "hwirq", "chip name", "host name"); - for (i = 1; i < NR_IRQS; i++) { - desc = get_irq_desc(i); - spin_lock_irqsave(&desc->lock, flags); + for (i = 1; i < nr_irqs; i++) { + desc = irq_to_desc(i); + if (!desc) + continue; + + raw_spin_lock_irqsave(&desc->lock, flags); if (desc->action && desc->action->handler) { seq_printf(m, "%5d ", i); seq_printf(m, "0x%05lx ", virq_to_hw(i)); - if (desc->chip && desc->chip->typename) - p = desc->chip->typename; + if (desc->chip && desc->chip->name) + p = desc->chip->name; else p = none; seq_printf(m, "%-15s ", p); @@ -1084,7 +1177,7 @@ static int virq_debug_show(struct seq_file *m, void *private) seq_printf(m, "%s\n", p); } - spin_unlock_irqrestore(&desc->lock, flags); + raw_spin_unlock_irqrestore(&desc->lock, flags); } return 0; diff --git a/arch/powerpc/kernel/kgdb.c b/arch/powerpc/kernel/kgdb.c index fe8f71dd0b3f..41bada0298c8 100644 --- a/arch/powerpc/kernel/kgdb.c +++ b/arch/powerpc/kernel/kgdb.c @@ -52,7 +52,7 @@ static struct hard_trap_info { 0x2030, 0x08 /* SIGFPE */ }, /* spe fp data */ { 0x2040, 0x08 /* SIGFPE */ }, /* spe fp data */ { 0x2050, 0x08 /* SIGFPE */ }, /* spe fp round */ - { 0x2060, 0x0e /* SIGILL */ }, /* performace monitor */ + { 0x2060, 0x0e /* SIGILL */ }, /* performance monitor */ { 0x2900, 0x08 /* SIGFPE */ }, /* apu unavailable */ { 0x3100, 0x0e /* SIGALRM */ }, /* fixed interval timer */ { 0x3200, 0x02 /* SIGINT */ }, /* watchdog */ @@ -282,12 +282,6 @@ void gdb_regs_to_pt_regs(unsigned long *gdb_regs, struct pt_regs *regs) { unsigned long *ptr = gdb_regs; int reg; -#ifdef CONFIG_SPE - union { - u32 v32[2]; - u64 v64; - } acc; -#endif for (reg = 0; reg < 32; reg++) UNPACK64(regs->gpr[reg], ptr); @@ -339,7 +333,7 @@ int kgdb_arch_handle_exception(int vector, int signo, int err_code, atomic_set(&kgdb_cpu_doing_single_step, -1); /* set the trace bit if we're stepping */ if (remcom_in_buffer[0] == 's') { -#if defined(CONFIG_40x) || defined(CONFIG_BOOKE) +#ifdef CONFIG_PPC_ADV_DEBUG_REGS mtspr(SPRN_DBCR0, mfspr(SPRN_DBCR0) | DBCR0_IC | DBCR0_IDM); linux_regs->msr |= MSR_DE; diff --git a/arch/powerpc/kernel/kprobes.c b/arch/powerpc/kernel/kprobes.c index c9329786073b..3fd1af902112 100644 --- a/arch/powerpc/kernel/kprobes.c +++ b/arch/powerpc/kernel/kprobes.c @@ -36,7 +36,7 @@ #include <asm/uaccess.h> #include <asm/system.h> -#ifdef CONFIG_BOOKE +#ifdef CONFIG_PPC_ADV_DEBUG_REGS #define MSR_SINGLESTEP (MSR_DE) #else #define MSR_SINGLESTEP (MSR_SE) @@ -110,7 +110,7 @@ static void __kprobes prepare_singlestep(struct kprobe *p, struct pt_regs *regs) * like Decrementer or External Interrupt */ regs->msr &= ~MSR_EE; regs->msr |= MSR_SINGLESTEP; -#ifdef CONFIG_BOOKE +#ifdef CONFIG_PPC_ADV_DEBUG_REGS regs->msr &= ~MSR_CE; mtspr(SPRN_DBCR0, mfspr(SPRN_DBCR0) | DBCR0_IC | DBCR0_IDM); #endif diff --git a/arch/powerpc/kernel/legacy_serial.c b/arch/powerpc/kernel/legacy_serial.c index 9ddfaef1a184..035ada5443ee 100644 --- a/arch/powerpc/kernel/legacy_serial.c +++ b/arch/powerpc/kernel/legacy_serial.c @@ -469,7 +469,7 @@ static int __init serial_dev_init(void) return -ENODEV; /* - * Before we register the platfrom serial devices, we need + * Before we register the platform serial devices, we need * to fixup their interrupts and their IO ports. */ DBG("Fixing serial ports interrupts and IO ports ...\n"); diff --git a/arch/powerpc/kernel/lparcfg.c b/arch/powerpc/kernel/lparcfg.c index ed0ac4e4b8d8..d09d1c615150 100644 --- a/arch/powerpc/kernel/lparcfg.c +++ b/arch/powerpc/kernel/lparcfg.c @@ -359,7 +359,7 @@ static void parse_system_parameter_string(struct seq_file *m) unsigned char *local_buffer = kmalloc(SPLPAR_MAXLENGTH, GFP_KERNEL); if (!local_buffer) { - printk(KERN_ERR "%s %s kmalloc failure at line %d \n", + printk(KERN_ERR "%s %s kmalloc failure at line %d\n", __FILE__, __func__, __LINE__); return; } @@ -383,13 +383,13 @@ static void parse_system_parameter_string(struct seq_file *m) int idx, w_idx; char *workbuffer = kzalloc(SPLPAR_MAXLENGTH, GFP_KERNEL); if (!workbuffer) { - printk(KERN_ERR "%s %s kmalloc failure at line %d \n", + printk(KERN_ERR "%s %s kmalloc failure at line %d\n", __FILE__, __func__, __LINE__); kfree(local_buffer); return; } #ifdef LPARCFG_DEBUG - printk(KERN_INFO "success calling get-system-parameter \n"); + printk(KERN_INFO "success calling get-system-parameter\n"); #endif splpar_strlen = local_buffer[0] * 256 + local_buffer[1]; local_buffer += 2; /* step over strlen value */ @@ -440,7 +440,7 @@ static int lparcfg_count_active_processors(void) while ((cpus_dn = of_find_node_by_type(cpus_dn, "cpu"))) { #ifdef LPARCFG_DEBUG - printk(KERN_ERR "cpus_dn %p \n", cpus_dn); + printk(KERN_ERR "cpus_dn %p\n", cpus_dn); #endif count++; } @@ -725,7 +725,7 @@ static int lparcfg_data(struct seq_file *m, void *v) const unsigned int *lp_index_ptr; unsigned int lp_index = 0; - seq_printf(m, "%s %s \n", MODULE_NAME, MODULE_VERS); + seq_printf(m, "%s %s\n", MODULE_NAME, MODULE_VERS); rootdn = of_find_node_by_path("/"); if (rootdn) { @@ -781,9 +781,9 @@ static int __init lparcfg_init(void) !firmware_has_feature(FW_FEATURE_ISERIES)) mode |= S_IWUSR; - ent = proc_create("ppc64/lparcfg", mode, NULL, &lparcfg_fops); + ent = proc_create("powerpc/lparcfg", mode, NULL, &lparcfg_fops); if (!ent) { - printk(KERN_ERR "Failed to create ppc64/lparcfg\n"); + printk(KERN_ERR "Failed to create powerpc/lparcfg\n"); return -EIO; } diff --git a/arch/powerpc/kernel/misc_32.S b/arch/powerpc/kernel/misc_32.S index da9c0c4c10f3..8649f536f8df 100644 --- a/arch/powerpc/kernel/misc_32.S +++ b/arch/powerpc/kernel/misc_32.S @@ -502,15 +502,7 @@ _GLOBAL(clear_pages) li r0,PAGE_SIZE/L1_CACHE_BYTES slw r0,r0,r4 mtctr r0 -#ifdef CONFIG_8xx - li r4, 0 -1: stw r4, 0(r3) - stw r4, 4(r3) - stw r4, 8(r3) - stw r4, 12(r3) -#else 1: dcbz 0,r3 -#endif addi r3,r3,L1_CACHE_BYTES bdnz 1b blr @@ -535,15 +527,6 @@ _GLOBAL(copy_page) addi r3,r3,-4 addi r4,r4,-4 -#ifdef CONFIG_8xx - /* don't use prefetch on 8xx */ - li r0,4096/L1_CACHE_BYTES - mtctr r0 -1: COPY_16_BYTES - bdnz 1b - blr - -#else /* not 8xx, we can prefetch */ li r5,4 #if MAX_COPY_PREFETCH > 1 @@ -584,7 +567,6 @@ _GLOBAL(copy_page) li r0,MAX_COPY_PREFETCH li r11,4 b 2b -#endif /* CONFIG_8xx */ /* * void atomic_clear_mask(atomic_t mask, atomic_t *addr) diff --git a/arch/powerpc/kernel/nvram_64.c b/arch/powerpc/kernel/nvram_64.c index 0ed31f220482..9cf197f01e94 100644 --- a/arch/powerpc/kernel/nvram_64.c +++ b/arch/powerpc/kernel/nvram_64.c @@ -139,8 +139,8 @@ out: } -static int dev_nvram_ioctl(struct inode *inode, struct file *file, - unsigned int cmd, unsigned long arg) +static long dev_nvram_ioctl(struct file *file, unsigned int cmd, + unsigned long arg) { switch(cmd) { #ifdef CONFIG_PPC_PMAC @@ -169,11 +169,11 @@ static int dev_nvram_ioctl(struct inode *inode, struct file *file, } const struct file_operations nvram_fops = { - .owner = THIS_MODULE, - .llseek = dev_nvram_llseek, - .read = dev_nvram_read, - .write = dev_nvram_write, - .ioctl = dev_nvram_ioctl, + .owner = THIS_MODULE, + .llseek = dev_nvram_llseek, + .read = dev_nvram_read, + .write = dev_nvram_write, + .unlocked_ioctl = dev_nvram_ioctl, }; static struct miscdevice nvram_dev = { @@ -184,7 +184,7 @@ static struct miscdevice nvram_dev = { #ifdef DEBUG_NVRAM -static void nvram_print_partitions(char * label) +static void __init nvram_print_partitions(char * label) { struct list_head * p; struct nvram_partition * tmp_part; @@ -202,7 +202,7 @@ static void nvram_print_partitions(char * label) #endif -static int nvram_write_header(struct nvram_partition * part) +static int __init nvram_write_header(struct nvram_partition * part) { loff_t tmp_index; int rc; @@ -214,7 +214,7 @@ static int nvram_write_header(struct nvram_partition * part) } -static unsigned char nvram_checksum(struct nvram_header *p) +static unsigned char __init nvram_checksum(struct nvram_header *p) { unsigned int c_sum, c_sum2; unsigned short *sp = (unsigned short *)p->name; /* assume 6 shorts */ @@ -228,32 +228,7 @@ static unsigned char nvram_checksum(struct nvram_header *p) return c_sum; } - -/* - * Find an nvram partition, sig can be 0 for any - * partition or name can be NULL for any name, else - * tries to match both - */ -struct nvram_partition *nvram_find_partition(int sig, const char *name) -{ - struct nvram_partition * part; - struct list_head * p; - - list_for_each(p, &nvram_part->partition) { - part = list_entry(p, struct nvram_partition, partition); - - if (sig && part->header.signature != sig) - continue; - if (name && 0 != strncmp(name, part->header.name, 12)) - continue; - return part; - } - return NULL; -} -EXPORT_SYMBOL(nvram_find_partition); - - -static int nvram_remove_os_partition(void) +static int __init nvram_remove_os_partition(void) { struct list_head *i; struct list_head *j; @@ -319,7 +294,7 @@ static int nvram_remove_os_partition(void) * Will create a partition starting at the first free * space found if space has enough room. */ -static int nvram_create_os_partition(void) +static int __init nvram_create_os_partition(void) { struct nvram_partition *part; struct nvram_partition *new_part; @@ -363,8 +338,8 @@ static int nvram_create_os_partition(void) rc = nvram_write_header(new_part); if (rc <= 0) { - printk(KERN_ERR "nvram_create_os_partition: nvram_write_header \ - failed (%d)\n", rc); + printk(KERN_ERR "nvram_create_os_partition: nvram_write_header " + "failed (%d)\n", rc); return rc; } @@ -374,7 +349,7 @@ static int nvram_create_os_partition(void) rc = ppc_md.nvram_write((char *)&seq_init, sizeof(seq_init), &tmp_index); if (rc <= 0) { printk(KERN_ERR "nvram_create_os_partition: nvram_write " - "failed (%d)\n", rc); + "failed (%d)\n", rc); return rc; } @@ -422,7 +397,7 @@ static int nvram_create_os_partition(void) * 5.) If the max chunk cannot be allocated then try finding a chunk * that will satisfy the minum needed (NVRAM_MIN_REQ). */ -static int nvram_setup_partition(void) +static int __init nvram_setup_partition(void) { struct list_head * p; struct nvram_partition * part; @@ -480,7 +455,7 @@ static int nvram_setup_partition(void) } -static int nvram_scan_partitions(void) +static int __init nvram_scan_partitions(void) { loff_t cur_index = 0; struct nvram_header phead; @@ -706,6 +681,9 @@ int nvram_clear_error_log(void) int clear_word = ERR_FLAG_ALREADY_LOGGED; int rc; + if (nvram_error_log_index == -1) + return -1; + tmp_index = nvram_error_log_index; rc = ppc_md.nvram_write((char *)&clear_word, sizeof(int), &tmp_index); diff --git a/arch/powerpc/kernel/of_platform.c b/arch/powerpc/kernel/of_platform.c index 1a4fc0d11a03..666d08db319e 100644 --- a/arch/powerpc/kernel/of_platform.c +++ b/arch/powerpc/kernel/of_platform.c @@ -214,7 +214,7 @@ EXPORT_SYMBOL(of_find_device_by_node); static int of_dev_phandle_match(struct device *dev, void *data) { phandle *ph = data; - return to_of_device(dev)->node->linux_phandle == *ph; + return to_of_device(dev)->node->phandle == *ph; } struct of_device *of_find_device_by_phandle(phandle ph) diff --git a/arch/powerpc/kernel/paca.c b/arch/powerpc/kernel/paca.c index d16b1ea55d44..0c40c6f476fe 100644 --- a/arch/powerpc/kernel/paca.c +++ b/arch/powerpc/kernel/paca.c @@ -9,11 +9,15 @@ #include <linux/threads.h> #include <linux/module.h> +#include <linux/lmb.h> +#include <asm/firmware.h> #include <asm/lppaca.h> #include <asm/paca.h> #include <asm/sections.h> #include <asm/pgtable.h> +#include <asm/iseries/lpar_map.h> +#include <asm/iseries/hv_types.h> /* This symbol is provided by the linker - let it fill in the paca * field correctly */ @@ -70,37 +74,82 @@ struct slb_shadow slb_shadow[] __cacheline_aligned = { * processors. The processor VPD array needs one entry per physical * processor (not thread). */ -struct paca_struct paca[NR_CPUS]; +struct paca_struct *paca; EXPORT_SYMBOL(paca); -void __init initialise_pacas(void) -{ - int cpu; +struct paca_struct boot_paca; - /* The TOC register (GPR2) points 32kB into the TOC, so that 64kB - * of the TOC can be addressed using a single machine instruction. - */ +void __init initialise_paca(struct paca_struct *new_paca, int cpu) +{ + /* The TOC register (GPR2) points 32kB into the TOC, so that 64kB + * of the TOC can be addressed using a single machine instruction. + */ unsigned long kernel_toc = (unsigned long)(&__toc_start) + 0x8000UL; - /* Can't use for_each_*_cpu, as they aren't functional yet */ - for (cpu = 0; cpu < NR_CPUS; cpu++) { - struct paca_struct *new_paca = &paca[cpu]; - #ifdef CONFIG_PPC_BOOK3S - new_paca->lppaca_ptr = &lppaca[cpu]; + new_paca->lppaca_ptr = &lppaca[cpu]; #else - new_paca->kernel_pgd = swapper_pg_dir; + new_paca->kernel_pgd = swapper_pg_dir; #endif - new_paca->lock_token = 0x8000; - new_paca->paca_index = cpu; - new_paca->kernel_toc = kernel_toc; - new_paca->kernelbase = (unsigned long) _stext; - new_paca->kernel_msr = MSR_KERNEL; - new_paca->hw_cpu_id = 0xffff; - new_paca->__current = &init_task; + new_paca->lock_token = 0x8000; + new_paca->paca_index = cpu; + new_paca->kernel_toc = kernel_toc; + new_paca->kernelbase = (unsigned long) _stext; + new_paca->kernel_msr = MSR_KERNEL; + new_paca->hw_cpu_id = 0xffff; + new_paca->__current = &init_task; #ifdef CONFIG_PPC_STD_MMU_64 - new_paca->slb_shadow_ptr = &slb_shadow[cpu]; + new_paca->slb_shadow_ptr = &slb_shadow[cpu]; #endif /* CONFIG_PPC_STD_MMU_64 */ +} + +static int __initdata paca_size; + +void __init allocate_pacas(void) +{ + int nr_cpus, cpu, limit; + + /* + * We can't take SLB misses on the paca, and we want to access them + * in real mode, so allocate them within the RMA and also within + * the first segment. On iSeries they must be within the area mapped + * by the HV, which is HvPagesToMap * HVPAGESIZE bytes. + */ + limit = min(0x10000000ULL, lmb.rmo_size); + if (firmware_has_feature(FW_FEATURE_ISERIES)) + limit = min(limit, HvPagesToMap * HVPAGESIZE); + + nr_cpus = NR_CPUS; + /* On iSeries we know we can never have more than 64 cpus */ + if (firmware_has_feature(FW_FEATURE_ISERIES)) + nr_cpus = min(64, nr_cpus); + + paca_size = PAGE_ALIGN(sizeof(struct paca_struct) * nr_cpus); + + paca = __va(lmb_alloc_base(paca_size, PAGE_SIZE, limit)); + memset(paca, 0, paca_size); + + printk(KERN_DEBUG "Allocated %u bytes for %d pacas at %p\n", + paca_size, nr_cpus, paca); + + /* Can't use for_each_*_cpu, as they aren't functional yet */ + for (cpu = 0; cpu < nr_cpus; cpu++) + initialise_paca(&paca[cpu], cpu); +} + +void __init free_unused_pacas(void) +{ + int new_size; + + new_size = PAGE_ALIGN(sizeof(struct paca_struct) * num_possible_cpus()); + + if (new_size >= paca_size) + return; + + lmb_free(__pa(paca) + new_size, paca_size - new_size); + + printk(KERN_DEBUG "Freed %u bytes for unused pacas\n", + paca_size - new_size); - } + paca_size = new_size; } diff --git a/arch/powerpc/kernel/pci-common.c b/arch/powerpc/kernel/pci-common.c index bb8209e34931..f3c42ce516e7 100644 --- a/arch/powerpc/kernel/pci-common.c +++ b/arch/powerpc/kernel/pci-common.c @@ -63,21 +63,6 @@ struct dma_map_ops *get_pci_dma_ops(void) } EXPORT_SYMBOL(get_pci_dma_ops); -int pci_set_dma_mask(struct pci_dev *dev, u64 mask) -{ - return dma_set_mask(&dev->dev, mask); -} - -int pci_set_consistent_dma_mask(struct pci_dev *dev, u64 mask) -{ - int rc; - - rc = dma_set_mask(&dev->dev, mask); - dev->dev.coherent_dma_mask = dev->dma_mask; - - return rc; -} - struct pci_controller *pcibios_alloc_controller(struct device_node *dev) { struct pci_controller *phb; @@ -1047,10 +1032,8 @@ static void __devinit pcibios_fixup_bridge(struct pci_bus *bus) struct pci_dev *dev = bus->self; - for (i = 0; i < PCI_BUS_NUM_RESOURCES; ++i) { - if ((res = bus->resource[i]) == NULL) - continue; - if (!res->flags) + pci_bus_for_each_resource(bus, res, i) { + if (!res || !res->flags) continue; if (i >= 3 && bus->self->transparent) continue; @@ -1107,6 +1090,12 @@ void __devinit pcibios_setup_bus_devices(struct pci_bus *bus) list_for_each_entry(dev, &bus->devices, bus_list) { struct dev_archdata *sd = &dev->dev.archdata; + /* Cardbus can call us to add new devices to a bus, so ignore + * those who are already fully discovered + */ + if (dev->is_added) + continue; + /* Setup OF node pointer in archdata */ sd->of_node = pci_device_to_OF_node(dev); @@ -1147,6 +1136,13 @@ void __devinit pcibios_fixup_bus(struct pci_bus *bus) } EXPORT_SYMBOL(pcibios_fixup_bus); +void __devinit pci_fixup_cardbus(struct pci_bus *bus) +{ + /* Now fixup devices on that bus */ + pcibios_setup_bus_devices(bus); +} + + static int skip_isa_ioresource_align(struct pci_dev *dev) { if ((ppc_pci_flags & PPC_PCI_CAN_SKIP_ISA_ALIGN) && @@ -1168,21 +1164,20 @@ static int skip_isa_ioresource_align(struct pci_dev *dev) * but we want to try to avoid allocating at 0x2900-0x2bff * which might have be mirrored at 0x0100-0x03ff.. */ -void pcibios_align_resource(void *data, struct resource *res, +resource_size_t pcibios_align_resource(void *data, const struct resource *res, resource_size_t size, resource_size_t align) { struct pci_dev *dev = data; + resource_size_t start = res->start; if (res->flags & IORESOURCE_IO) { - resource_size_t start = res->start; - if (skip_isa_ioresource_align(dev)) - return; - if (start & 0x300) { + return start; + if (start & 0x300) start = (start + 0x3ff) & ~0x3ff; - res->start = start; - } } + + return start; } EXPORT_SYMBOL(pcibios_align_resource); @@ -1190,7 +1185,7 @@ EXPORT_SYMBOL(pcibios_align_resource); * Reparent resource children of pr that conflict with res * under res, and make res replace those children. */ -static int __init reparent_resources(struct resource *parent, +static int reparent_resources(struct resource *parent, struct resource *res) { struct resource *p, **pp; @@ -1265,9 +1260,8 @@ void pcibios_allocate_bus_resources(struct pci_bus *bus) pr_debug("PCI: Allocating bus resources for %04x:%02x...\n", pci_domain_nr(bus), bus->number); - for (i = 0; i < PCI_BUS_NUM_RESOURCES; ++i) { - if ((res = bus->resource[i]) == NULL || !res->flags - || res->start > res->end || res->parent) + pci_bus_for_each_resource(bus, res, i) { + if (!res || !res->flags || res->start > res->end || res->parent) continue; if (bus->parent == NULL) pr = (res->flags & IORESOURCE_IO) ? diff --git a/arch/powerpc/kernel/pci_64.c b/arch/powerpc/kernel/pci_64.c index ba949a2c93ac..d43fc65749c1 100644 --- a/arch/powerpc/kernel/pci_64.c +++ b/arch/powerpc/kernel/pci_64.c @@ -97,7 +97,9 @@ int pcibios_unmap_io_space(struct pci_bus *bus) * to do an appropriate TLB flush here too */ if (bus->self) { +#ifdef CONFIG_PPC_STD_MMU_64 struct resource *res = bus->resource[0]; +#endif pr_debug("IO unmapping for PCI-PCI bridge %s\n", pci_name(bus->self)); @@ -222,7 +224,7 @@ long sys_pciconfig_iobase(long which, unsigned long in_bus, * G5 machines... So when something asks for bus 0 io base * (bus 0 is HT root), we return the AGP one instead. */ - if (in_bus == 0 && machine_is_compatible("MacRISC4")) { + if (in_bus == 0 && of_machine_is_compatible("MacRISC4")) { struct device_node *agp; agp = of_find_compatible_node(NULL, NULL, "u3-agp"); diff --git a/arch/powerpc/kernel/pci_of_scan.c b/arch/powerpc/kernel/pci_of_scan.c index 7311fdfb9bf8..cd11d5ca80df 100644 --- a/arch/powerpc/kernel/pci_of_scan.c +++ b/arch/powerpc/kernel/pci_of_scan.c @@ -123,6 +123,7 @@ struct pci_dev *of_create_pci_dev(struct device_node *node, { struct pci_dev *dev; const char *type; + struct pci_slot *slot; dev = alloc_pci_dev(); if (!dev) @@ -140,6 +141,11 @@ struct pci_dev *of_create_pci_dev(struct device_node *node, dev->devfn = devfn; dev->multifunction = 0; /* maybe a lie? */ dev->needs_freset = 0; /* pcie fundamental reset required */ + set_pcie_port_type(dev); + + list_for_each_entry(slot, &dev->bus->slots, list) + if (PCI_SLOT(dev->devfn) == slot->number) + dev->slot = slot; dev->vendor = get_int_prop(node, "vendor-id", 0xffff); dev->device = get_int_prop(node, "device-id", 0xffff); @@ -160,10 +166,14 @@ struct pci_dev *of_create_pci_dev(struct device_node *node, dev->error_state = pci_channel_io_normal; dev->dma_mask = 0xffffffff; + /* Early fixups, before probing the BARs */ + pci_fixup_device(pci_fixup_early, dev); + if (!strcmp(type, "pci") || !strcmp(type, "pciex")) { /* a PCI-PCI bridge */ dev->hdr_type = PCI_HEADER_TYPE_BRIDGE; dev->rom_base_reg = PCI_ROM_ADDRESS1; + set_pcie_hotplug_bridge(dev); } else if (!strcmp(type, "cardbus")) { dev->hdr_type = PCI_HEADER_TYPE_CARDBUS; } else { @@ -294,7 +304,7 @@ static void __devinit __of_scan_bus(struct device_node *node, int reglen, devfn; struct pci_dev *dev; - pr_debug("of_scan_bus(%s) bus no %d... \n", + pr_debug("of_scan_bus(%s) bus no %d...\n", node->full_name, bus->number); /* Scan direct children */ diff --git a/arch/powerpc/kernel/perf_callchain.c b/arch/powerpc/kernel/perf_callchain.c index 0a03cf70d247..95ad9dad298e 100644 --- a/arch/powerpc/kernel/perf_callchain.c +++ b/arch/powerpc/kernel/perf_callchain.c @@ -119,13 +119,6 @@ static void perf_callchain_kernel(struct pt_regs *regs, } #ifdef CONFIG_PPC64 - -#ifdef CONFIG_HUGETLB_PAGE -#define is_huge_psize(pagesize) (HPAGE_SHIFT && mmu_huge_psizes[pagesize]) -#else -#define is_huge_psize(pagesize) 0 -#endif - /* * On 64-bit we don't want to invoke hash_page on user addresses from * interrupt context, so if the access faults, we read the page tables @@ -135,7 +128,7 @@ static int read_user_stack_slow(void __user *ptr, void *ret, int nb) { pgd_t *pgdir; pte_t *ptep, pte; - int pagesize; + unsigned shift; unsigned long addr = (unsigned long) ptr; unsigned long offset; unsigned long pfn; @@ -145,17 +138,14 @@ static int read_user_stack_slow(void __user *ptr, void *ret, int nb) if (!pgdir) return -EFAULT; - pagesize = get_slice_psize(current->mm, addr); + ptep = find_linux_pte_or_hugepte(pgdir, addr, &shift); + if (!shift) + shift = PAGE_SHIFT; /* align address to page boundary */ - offset = addr & ((1ul << mmu_psize_defs[pagesize].shift) - 1); + offset = addr & ((1UL << shift) - 1); addr -= offset; - if (is_huge_psize(pagesize)) - ptep = huge_pte_offset(current->mm, addr); - else - ptep = find_linux_pte(pgdir, addr); - if (ptep == NULL) return -EFAULT; pte = *ptep; @@ -497,17 +487,14 @@ static void perf_callchain_user_32(struct pt_regs *regs, * Since we can't get PMU interrupts inside a PMU interrupt handler, * we don't need separate irq and nmi entries here. */ -static DEFINE_PER_CPU(struct perf_callchain_entry, callchain); +static DEFINE_PER_CPU(struct perf_callchain_entry, cpu_perf_callchain); struct perf_callchain_entry *perf_callchain(struct pt_regs *regs) { - struct perf_callchain_entry *entry = &__get_cpu_var(callchain); + struct perf_callchain_entry *entry = &__get_cpu_var(cpu_perf_callchain); entry->nr = 0; - if (current->pid == 0) /* idle task? */ - return entry; - if (!user_mode(regs)) { perf_callchain_kernel(regs, entry); if (current->mm) diff --git a/arch/powerpc/kernel/perf_event.c b/arch/powerpc/kernel/perf_event.c index bbcbae183e92..08460a2e9f41 100644 --- a/arch/powerpc/kernel/perf_event.c +++ b/arch/powerpc/kernel/perf_event.c @@ -116,20 +116,23 @@ static inline void perf_get_data_addr(struct pt_regs *regs, u64 *addrp) static inline u32 perf_get_misc_flags(struct pt_regs *regs) { unsigned long mmcra = regs->dsisr; + unsigned long sihv = MMCRA_SIHV; + unsigned long sipr = MMCRA_SIPR; if (TRAP(regs) != 0xf00) return 0; /* not a PMU interrupt */ if (ppmu->flags & PPMU_ALT_SIPR) { - if (mmcra & POWER6_MMCRA_SIHV) - return PERF_RECORD_MISC_HYPERVISOR; - return (mmcra & POWER6_MMCRA_SIPR) ? - PERF_RECORD_MISC_USER : PERF_RECORD_MISC_KERNEL; + sihv = POWER6_MMCRA_SIHV; + sipr = POWER6_MMCRA_SIPR; } - if (mmcra & MMCRA_SIHV) + + /* PR has priority over HV, so order below is important */ + if (mmcra & sipr) + return PERF_RECORD_MISC_USER; + if ((mmcra & sihv) && (freeze_events_kernel != MMCR0_FCHV)) return PERF_RECORD_MISC_HYPERVISOR; - return (mmcra & MMCRA_SIPR) ? PERF_RECORD_MISC_USER : - PERF_RECORD_MISC_KERNEL; + return PERF_RECORD_MISC_KERNEL; } /* @@ -715,10 +718,10 @@ static int collect_events(struct perf_event *group, int max_count, return n; } -static void event_sched_in(struct perf_event *event, int cpu) +static void event_sched_in(struct perf_event *event) { event->state = PERF_EVENT_STATE_ACTIVE; - event->oncpu = cpu; + event->oncpu = smp_processor_id(); event->tstamp_running += event->ctx->time - event->tstamp_stopped; if (is_software_event(event)) event->pmu->enable(event); @@ -732,7 +735,7 @@ static void event_sched_in(struct perf_event *event, int cpu) */ int hw_perf_group_sched_in(struct perf_event *group_leader, struct perf_cpu_context *cpuctx, - struct perf_event_context *ctx, int cpu) + struct perf_event_context *ctx) { struct cpu_hw_events *cpuhw; long i, n, n0; @@ -763,10 +766,10 @@ int hw_perf_group_sched_in(struct perf_event *group_leader, cpuhw->event[i]->hw.config = cpuhw->events[i]; cpuctx->active_oncpu += n; n = 1; - event_sched_in(group_leader, cpu); + event_sched_in(group_leader); list_for_each_entry(sub, &group_leader->sibling_list, group_entry) { if (sub->state != PERF_EVENT_STATE_OFF) { - event_sched_in(sub, cpu); + event_sched_in(sub); ++n; } } @@ -1161,10 +1164,10 @@ static void record_and_restart(struct perf_event *event, unsigned long val, * Finally record data if requested. */ if (record) { - struct perf_sample_data data = { - .addr = 0, - .period = event->hw.last_period, - }; + struct perf_sample_data data; + + perf_sample_data_init(&data, ~0ULL); + data.period = event->hw.last_period; if (event->attr.sample_type & PERF_SAMPLE_ADDR) perf_get_data_addr(regs, &data.addr); @@ -1284,7 +1287,7 @@ static void perf_event_interrupt(struct pt_regs *regs) irq_exit(); } -void hw_perf_event_setup(int cpu) +static void power_pmu_setup(int cpu) { struct cpu_hw_events *cpuhw = &per_cpu(cpu_hw_events, cpu); @@ -1294,6 +1297,23 @@ void hw_perf_event_setup(int cpu) cpuhw->mmcr[0] = MMCR0_FC; } +static int __cpuinit +power_pmu_notifier(struct notifier_block *self, unsigned long action, void *hcpu) +{ + unsigned int cpu = (long)hcpu; + + switch (action & ~CPU_TASKS_FROZEN) { + case CPU_UP_PREPARE: + power_pmu_setup(cpu); + break; + + default: + break; + } + + return NOTIFY_OK; +} + int register_power_pmu(struct power_pmu *pmu) { if (ppmu) @@ -1311,5 +1331,7 @@ int register_power_pmu(struct power_pmu *pmu) freeze_events_kernel = MMCR0_FCHV; #endif /* CONFIG_PPC64 */ + perf_cpu_notifier(power_pmu_notifier); + return 0; } diff --git a/arch/powerpc/kernel/perf_event_fsl_emb.c b/arch/powerpc/kernel/perf_event_fsl_emb.c new file mode 100644 index 000000000000..369872f6cf78 --- /dev/null +++ b/arch/powerpc/kernel/perf_event_fsl_emb.c @@ -0,0 +1,654 @@ +/* + * Performance event support - Freescale Embedded Performance Monitor + * + * Copyright 2008-2009 Paul Mackerras, IBM Corporation. + * Copyright 2010 Freescale Semiconductor, Inc. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ +#include <linux/kernel.h> +#include <linux/sched.h> +#include <linux/perf_event.h> +#include <linux/percpu.h> +#include <linux/hardirq.h> +#include <asm/reg_fsl_emb.h> +#include <asm/pmc.h> +#include <asm/machdep.h> +#include <asm/firmware.h> +#include <asm/ptrace.h> + +struct cpu_hw_events { + int n_events; + int disabled; + u8 pmcs_enabled; + struct perf_event *event[MAX_HWEVENTS]; +}; +static DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events); + +static struct fsl_emb_pmu *ppmu; + +/* Number of perf_events counting hardware events */ +static atomic_t num_events; +/* Used to avoid races in calling reserve/release_pmc_hardware */ +static DEFINE_MUTEX(pmc_reserve_mutex); + +/* + * If interrupts were soft-disabled when a PMU interrupt occurs, treat + * it as an NMI. + */ +static inline int perf_intr_is_nmi(struct pt_regs *regs) +{ +#ifdef __powerpc64__ + return !regs->softe; +#else + return 0; +#endif +} + +static void perf_event_interrupt(struct pt_regs *regs); + +/* + * Read one performance monitor counter (PMC). + */ +static unsigned long read_pmc(int idx) +{ + unsigned long val; + + switch (idx) { + case 0: + val = mfpmr(PMRN_PMC0); + break; + case 1: + val = mfpmr(PMRN_PMC1); + break; + case 2: + val = mfpmr(PMRN_PMC2); + break; + case 3: + val = mfpmr(PMRN_PMC3); + break; + default: + printk(KERN_ERR "oops trying to read PMC%d\n", idx); + val = 0; + } + return val; +} + +/* + * Write one PMC. + */ +static void write_pmc(int idx, unsigned long val) +{ + switch (idx) { + case 0: + mtpmr(PMRN_PMC0, val); + break; + case 1: + mtpmr(PMRN_PMC1, val); + break; + case 2: + mtpmr(PMRN_PMC2, val); + break; + case 3: + mtpmr(PMRN_PMC3, val); + break; + default: + printk(KERN_ERR "oops trying to write PMC%d\n", idx); + } + + isync(); +} + +/* + * Write one local control A register + */ +static void write_pmlca(int idx, unsigned long val) +{ + switch (idx) { + case 0: + mtpmr(PMRN_PMLCA0, val); + break; + case 1: + mtpmr(PMRN_PMLCA1, val); + break; + case 2: + mtpmr(PMRN_PMLCA2, val); + break; + case 3: + mtpmr(PMRN_PMLCA3, val); + break; + default: + printk(KERN_ERR "oops trying to write PMLCA%d\n", idx); + } + + isync(); +} + +/* + * Write one local control B register + */ +static void write_pmlcb(int idx, unsigned long val) +{ + switch (idx) { + case 0: + mtpmr(PMRN_PMLCB0, val); + break; + case 1: + mtpmr(PMRN_PMLCB1, val); + break; + case 2: + mtpmr(PMRN_PMLCB2, val); + break; + case 3: + mtpmr(PMRN_PMLCB3, val); + break; + default: + printk(KERN_ERR "oops trying to write PMLCB%d\n", idx); + } + + isync(); +} + +static void fsl_emb_pmu_read(struct perf_event *event) +{ + s64 val, delta, prev; + + /* + * Performance monitor interrupts come even when interrupts + * are soft-disabled, as long as interrupts are hard-enabled. + * Therefore we treat them like NMIs. + */ + do { + prev = atomic64_read(&event->hw.prev_count); + barrier(); + val = read_pmc(event->hw.idx); + } while (atomic64_cmpxchg(&event->hw.prev_count, prev, val) != prev); + + /* The counters are only 32 bits wide */ + delta = (val - prev) & 0xfffffffful; + atomic64_add(delta, &event->count); + atomic64_sub(delta, &event->hw.period_left); +} + +/* + * Disable all events to prevent PMU interrupts and to allow + * events to be added or removed. + */ +void hw_perf_disable(void) +{ + struct cpu_hw_events *cpuhw; + unsigned long flags; + + local_irq_save(flags); + cpuhw = &__get_cpu_var(cpu_hw_events); + + if (!cpuhw->disabled) { + cpuhw->disabled = 1; + + /* + * Check if we ever enabled the PMU on this cpu. + */ + if (!cpuhw->pmcs_enabled) { + ppc_enable_pmcs(); + cpuhw->pmcs_enabled = 1; + } + + if (atomic_read(&num_events)) { + /* + * Set the 'freeze all counters' bit, and disable + * interrupts. The barrier is to make sure the + * mtpmr has been executed and the PMU has frozen + * the events before we return. + */ + + mtpmr(PMRN_PMGC0, PMGC0_FAC); + isync(); + } + } + local_irq_restore(flags); +} + +/* + * Re-enable all events if disable == 0. + * If we were previously disabled and events were added, then + * put the new config on the PMU. + */ +void hw_perf_enable(void) +{ + struct cpu_hw_events *cpuhw; + unsigned long flags; + + local_irq_save(flags); + cpuhw = &__get_cpu_var(cpu_hw_events); + if (!cpuhw->disabled) + goto out; + + cpuhw->disabled = 0; + ppc_set_pmu_inuse(cpuhw->n_events != 0); + + if (cpuhw->n_events > 0) { + mtpmr(PMRN_PMGC0, PMGC0_PMIE | PMGC0_FCECE); + isync(); + } + + out: + local_irq_restore(flags); +} + +static int collect_events(struct perf_event *group, int max_count, + struct perf_event *ctrs[]) +{ + int n = 0; + struct perf_event *event; + + if (!is_software_event(group)) { + if (n >= max_count) + return -1; + ctrs[n] = group; + n++; + } + list_for_each_entry(event, &group->sibling_list, group_entry) { + if (!is_software_event(event) && + event->state != PERF_EVENT_STATE_OFF) { + if (n >= max_count) + return -1; + ctrs[n] = event; + n++; + } + } + return n; +} + +/* perf must be disabled, context locked on entry */ +static int fsl_emb_pmu_enable(struct perf_event *event) +{ + struct cpu_hw_events *cpuhw; + int ret = -EAGAIN; + int num_counters = ppmu->n_counter; + u64 val; + int i; + + cpuhw = &get_cpu_var(cpu_hw_events); + + if (event->hw.config & FSL_EMB_EVENT_RESTRICTED) + num_counters = ppmu->n_restricted; + + /* + * Allocate counters from top-down, so that restricted-capable + * counters are kept free as long as possible. + */ + for (i = num_counters - 1; i >= 0; i--) { + if (cpuhw->event[i]) + continue; + + break; + } + + if (i < 0) + goto out; + + event->hw.idx = i; + cpuhw->event[i] = event; + ++cpuhw->n_events; + + val = 0; + if (event->hw.sample_period) { + s64 left = atomic64_read(&event->hw.period_left); + if (left < 0x80000000L) + val = 0x80000000L - left; + } + atomic64_set(&event->hw.prev_count, val); + write_pmc(i, val); + perf_event_update_userpage(event); + + write_pmlcb(i, event->hw.config >> 32); + write_pmlca(i, event->hw.config_base); + + ret = 0; + out: + put_cpu_var(cpu_hw_events); + return ret; +} + +/* perf must be disabled, context locked on entry */ +static void fsl_emb_pmu_disable(struct perf_event *event) +{ + struct cpu_hw_events *cpuhw; + int i = event->hw.idx; + + if (i < 0) + goto out; + + fsl_emb_pmu_read(event); + + cpuhw = &get_cpu_var(cpu_hw_events); + + WARN_ON(event != cpuhw->event[event->hw.idx]); + + write_pmlca(i, 0); + write_pmlcb(i, 0); + write_pmc(i, 0); + + cpuhw->event[i] = NULL; + event->hw.idx = -1; + + /* + * TODO: if at least one restricted event exists, and we + * just freed up a non-restricted-capable counter, and + * there is a restricted-capable counter occupied by + * a non-restricted event, migrate that event to the + * vacated counter. + */ + + cpuhw->n_events--; + + out: + put_cpu_var(cpu_hw_events); +} + +/* + * Re-enable interrupts on a event after they were throttled + * because they were coming too fast. + * + * Context is locked on entry, but perf is not disabled. + */ +static void fsl_emb_pmu_unthrottle(struct perf_event *event) +{ + s64 val, left; + unsigned long flags; + + if (event->hw.idx < 0 || !event->hw.sample_period) + return; + local_irq_save(flags); + perf_disable(); + fsl_emb_pmu_read(event); + left = event->hw.sample_period; + event->hw.last_period = left; + val = 0; + if (left < 0x80000000L) + val = 0x80000000L - left; + write_pmc(event->hw.idx, val); + atomic64_set(&event->hw.prev_count, val); + atomic64_set(&event->hw.period_left, left); + perf_event_update_userpage(event); + perf_enable(); + local_irq_restore(flags); +} + +static struct pmu fsl_emb_pmu = { + .enable = fsl_emb_pmu_enable, + .disable = fsl_emb_pmu_disable, + .read = fsl_emb_pmu_read, + .unthrottle = fsl_emb_pmu_unthrottle, +}; + +/* + * Release the PMU if this is the last perf_event. + */ +static void hw_perf_event_destroy(struct perf_event *event) +{ + if (!atomic_add_unless(&num_events, -1, 1)) { + mutex_lock(&pmc_reserve_mutex); + if (atomic_dec_return(&num_events) == 0) + release_pmc_hardware(); + mutex_unlock(&pmc_reserve_mutex); + } +} + +/* + * Translate a generic cache event_id config to a raw event_id code. + */ +static int hw_perf_cache_event(u64 config, u64 *eventp) +{ + unsigned long type, op, result; + int ev; + + if (!ppmu->cache_events) + return -EINVAL; + + /* unpack config */ + type = config & 0xff; + op = (config >> 8) & 0xff; + result = (config >> 16) & 0xff; + + if (type >= PERF_COUNT_HW_CACHE_MAX || + op >= PERF_COUNT_HW_CACHE_OP_MAX || + result >= PERF_COUNT_HW_CACHE_RESULT_MAX) + return -EINVAL; + + ev = (*ppmu->cache_events)[type][op][result]; + if (ev == 0) + return -EOPNOTSUPP; + if (ev == -1) + return -EINVAL; + *eventp = ev; + return 0; +} + +const struct pmu *hw_perf_event_init(struct perf_event *event) +{ + u64 ev; + struct perf_event *events[MAX_HWEVENTS]; + int n; + int err; + int num_restricted; + int i; + + switch (event->attr.type) { + case PERF_TYPE_HARDWARE: + ev = event->attr.config; + if (ev >= ppmu->n_generic || ppmu->generic_events[ev] == 0) + return ERR_PTR(-EOPNOTSUPP); + ev = ppmu->generic_events[ev]; + break; + + case PERF_TYPE_HW_CACHE: + err = hw_perf_cache_event(event->attr.config, &ev); + if (err) + return ERR_PTR(err); + break; + + case PERF_TYPE_RAW: + ev = event->attr.config; + break; + + default: + return ERR_PTR(-EINVAL); + } + + event->hw.config = ppmu->xlate_event(ev); + if (!(event->hw.config & FSL_EMB_EVENT_VALID)) + return ERR_PTR(-EINVAL); + + /* + * If this is in a group, check if it can go on with all the + * other hardware events in the group. We assume the event + * hasn't been linked into its leader's sibling list at this point. + */ + n = 0; + if (event->group_leader != event) { + n = collect_events(event->group_leader, + ppmu->n_counter - 1, events); + if (n < 0) + return ERR_PTR(-EINVAL); + } + + if (event->hw.config & FSL_EMB_EVENT_RESTRICTED) { + num_restricted = 0; + for (i = 0; i < n; i++) { + if (events[i]->hw.config & FSL_EMB_EVENT_RESTRICTED) + num_restricted++; + } + + if (num_restricted >= ppmu->n_restricted) + return ERR_PTR(-EINVAL); + } + + event->hw.idx = -1; + + event->hw.config_base = PMLCA_CE | PMLCA_FCM1 | + (u32)((ev << 16) & PMLCA_EVENT_MASK); + + if (event->attr.exclude_user) + event->hw.config_base |= PMLCA_FCU; + if (event->attr.exclude_kernel) + event->hw.config_base |= PMLCA_FCS; + if (event->attr.exclude_idle) + return ERR_PTR(-ENOTSUPP); + + event->hw.last_period = event->hw.sample_period; + atomic64_set(&event->hw.period_left, event->hw.last_period); + + /* + * See if we need to reserve the PMU. + * If no events are currently in use, then we have to take a + * mutex to ensure that we don't race with another task doing + * reserve_pmc_hardware or release_pmc_hardware. + */ + err = 0; + if (!atomic_inc_not_zero(&num_events)) { + mutex_lock(&pmc_reserve_mutex); + if (atomic_read(&num_events) == 0 && + reserve_pmc_hardware(perf_event_interrupt)) + err = -EBUSY; + else + atomic_inc(&num_events); + mutex_unlock(&pmc_reserve_mutex); + + mtpmr(PMRN_PMGC0, PMGC0_FAC); + isync(); + } + event->destroy = hw_perf_event_destroy; + + if (err) + return ERR_PTR(err); + return &fsl_emb_pmu; +} + +/* + * A counter has overflowed; update its count and record + * things if requested. Note that interrupts are hard-disabled + * here so there is no possibility of being interrupted. + */ +static void record_and_restart(struct perf_event *event, unsigned long val, + struct pt_regs *regs, int nmi) +{ + u64 period = event->hw.sample_period; + s64 prev, delta, left; + int record = 0; + + /* we don't have to worry about interrupts here */ + prev = atomic64_read(&event->hw.prev_count); + delta = (val - prev) & 0xfffffffful; + atomic64_add(delta, &event->count); + + /* + * See if the total period for this event has expired, + * and update for the next period. + */ + val = 0; + left = atomic64_read(&event->hw.period_left) - delta; + if (period) { + if (left <= 0) { + left += period; + if (left <= 0) + left = period; + record = 1; + } + if (left < 0x80000000LL) + val = 0x80000000LL - left; + } + + /* + * Finally record data if requested. + */ + if (record) { + struct perf_sample_data data = { + .period = event->hw.last_period, + }; + + if (perf_event_overflow(event, nmi, &data, regs)) { + /* + * Interrupts are coming too fast - throttle them + * by setting the event to 0, so it will be + * at least 2^30 cycles until the next interrupt + * (assuming each event counts at most 2 counts + * per cycle). + */ + val = 0; + left = ~0ULL >> 1; + } + } + + write_pmc(event->hw.idx, val); + atomic64_set(&event->hw.prev_count, val); + atomic64_set(&event->hw.period_left, left); + perf_event_update_userpage(event); +} + +static void perf_event_interrupt(struct pt_regs *regs) +{ + int i; + struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events); + struct perf_event *event; + unsigned long val; + int found = 0; + int nmi; + + nmi = perf_intr_is_nmi(regs); + if (nmi) + nmi_enter(); + else + irq_enter(); + + for (i = 0; i < ppmu->n_counter; ++i) { + event = cpuhw->event[i]; + + val = read_pmc(i); + if ((int)val < 0) { + if (event) { + /* event has overflowed */ + found = 1; + record_and_restart(event, val, regs, nmi); + } else { + /* + * Disabled counter is negative, + * reset it just in case. + */ + write_pmc(i, 0); + } + } + } + + /* PMM will keep counters frozen until we return from the interrupt. */ + mtmsr(mfmsr() | MSR_PMM); + mtpmr(PMRN_PMGC0, PMGC0_PMIE | PMGC0_FCECE); + isync(); + + if (nmi) + nmi_exit(); + else + irq_exit(); +} + +void hw_perf_event_setup(int cpu) +{ + struct cpu_hw_events *cpuhw = &per_cpu(cpu_hw_events, cpu); + + memset(cpuhw, 0, sizeof(*cpuhw)); +} + +int register_fsl_emb_pmu(struct fsl_emb_pmu *pmu) +{ + if (ppmu) + return -EBUSY; /* something's already registered */ + + ppmu = pmu; + pr_info("%s performance monitor hardware support registered\n", + pmu->name); + + return 0; +} diff --git a/arch/powerpc/kernel/pmc.c b/arch/powerpc/kernel/pmc.c index 0516e2d3e02e..461499b43cff 100644 --- a/arch/powerpc/kernel/pmc.c +++ b/arch/powerpc/kernel/pmc.c @@ -37,7 +37,7 @@ static void dummy_perf(struct pt_regs *regs) } -static DEFINE_SPINLOCK(pmc_owner_lock); +static DEFINE_RAW_SPINLOCK(pmc_owner_lock); static void *pmc_owner_caller; /* mostly for debugging */ perf_irq_t perf_irq = dummy_perf; @@ -45,7 +45,7 @@ int reserve_pmc_hardware(perf_irq_t new_perf_irq) { int err = 0; - spin_lock(&pmc_owner_lock); + raw_spin_lock(&pmc_owner_lock); if (pmc_owner_caller) { printk(KERN_WARNING "reserve_pmc_hardware: " @@ -59,21 +59,21 @@ int reserve_pmc_hardware(perf_irq_t new_perf_irq) perf_irq = new_perf_irq ? new_perf_irq : dummy_perf; out: - spin_unlock(&pmc_owner_lock); + raw_spin_unlock(&pmc_owner_lock); return err; } EXPORT_SYMBOL_GPL(reserve_pmc_hardware); void release_pmc_hardware(void) { - spin_lock(&pmc_owner_lock); + raw_spin_lock(&pmc_owner_lock); WARN_ON(! pmc_owner_caller); pmc_owner_caller = NULL; perf_irq = dummy_perf; - spin_unlock(&pmc_owner_lock); + raw_spin_unlock(&pmc_owner_lock); } EXPORT_SYMBOL_GPL(release_pmc_hardware); diff --git a/arch/powerpc/kernel/power5+-pmu.c b/arch/powerpc/kernel/power5+-pmu.c index 0f4c1c73a6ad..199de527d411 100644 --- a/arch/powerpc/kernel/power5+-pmu.c +++ b/arch/powerpc/kernel/power5+-pmu.c @@ -73,10 +73,6 @@ #define MMCR1_PMCSEL_MSK 0x7f /* - * Bits in MMCRA - */ - -/* * Layout of constraint bits: * 6666555555555544444444443333333333222222222211111111110000000000 * 3210987654321098765432109876543210987654321098765432109876543210 diff --git a/arch/powerpc/kernel/power5-pmu.c b/arch/powerpc/kernel/power5-pmu.c index c351b3a57fbb..98b6a729a9dd 100644 --- a/arch/powerpc/kernel/power5-pmu.c +++ b/arch/powerpc/kernel/power5-pmu.c @@ -73,10 +73,6 @@ #define MMCR1_PMCSEL_MSK 0x7f /* - * Bits in MMCRA - */ - -/* * Layout of constraint bits: * 6666555555555544444444443333333333222222222211111111110000000000 * 3210987654321098765432109876543210987654321098765432109876543210 @@ -390,7 +386,7 @@ static int power5_compute_mmcr(u64 event[], int n_ev, unsigned int hwc[], unsigned long mmcr[]) { unsigned long mmcr1 = 0; - unsigned long mmcra = 0; + unsigned long mmcra = MMCRA_SDAR_DCACHE_MISS | MMCRA_SDAR_ERAT_MISS; unsigned int pmc, unit, byte, psel; unsigned int ttm, grp; int i, isbus, bit, grsel; diff --git a/arch/powerpc/kernel/power6-pmu.c b/arch/powerpc/kernel/power6-pmu.c index ca399ba5034c..84a607bda8fb 100644 --- a/arch/powerpc/kernel/power6-pmu.c +++ b/arch/powerpc/kernel/power6-pmu.c @@ -178,7 +178,7 @@ static int p6_compute_mmcr(u64 event[], int n_ev, unsigned int hwc[], unsigned long mmcr[]) { unsigned long mmcr1 = 0; - unsigned long mmcra = 0; + unsigned long mmcra = MMCRA_SDAR_DCACHE_MISS | MMCRA_SDAR_ERAT_MISS; int i; unsigned int pmc, ev, b, u, s, psel; unsigned int ttmset = 0; diff --git a/arch/powerpc/kernel/power7-pmu.c b/arch/powerpc/kernel/power7-pmu.c index 28a4daacdc02..852f7b7f6b40 100644 --- a/arch/powerpc/kernel/power7-pmu.c +++ b/arch/powerpc/kernel/power7-pmu.c @@ -51,10 +51,6 @@ #define MMCR1_PMCSEL_MSK 0xff /* - * Bits in MMCRA - */ - -/* * Layout of constraint bits: * 6666555555555544444444443333333333222222222211111111110000000000 * 3210987654321098765432109876543210987654321098765432109876543210 @@ -230,7 +226,7 @@ static int power7_compute_mmcr(u64 event[], int n_ev, unsigned int hwc[], unsigned long mmcr[]) { unsigned long mmcr1 = 0; - unsigned long mmcra = 0; + unsigned long mmcra = MMCRA_SDAR_DCACHE_MISS | MMCRA_SDAR_ERAT_MISS; unsigned int pmc, unit, combine, l2sel, psel; unsigned int pmc_inuse = 0; int i; diff --git a/arch/powerpc/kernel/ppc970-pmu.c b/arch/powerpc/kernel/ppc970-pmu.c index 479574413a93..8eff48e20dba 100644 --- a/arch/powerpc/kernel/ppc970-pmu.c +++ b/arch/powerpc/kernel/ppc970-pmu.c @@ -84,10 +84,6 @@ static short mmcr1_adder_bits[8] = { }; /* - * Bits in MMCRA - */ - -/* * Layout of constraint bits: * 6666555555555544444444443333333333222222222211111111110000000000 * 3210987654321098765432109876543210987654321098765432109876543210 diff --git a/arch/powerpc/kernel/ppc_ksyms.c b/arch/powerpc/kernel/ppc_ksyms.c index c8b27bb4dbde..ab3e392ac63c 100644 --- a/arch/powerpc/kernel/ppc_ksyms.c +++ b/arch/powerpc/kernel/ppc_ksyms.c @@ -96,8 +96,6 @@ EXPORT_SYMBOL(copy_4K_page); EXPORT_SYMBOL(isa_io_base); EXPORT_SYMBOL(isa_mem_base); EXPORT_SYMBOL(pci_dram_offset); -EXPORT_SYMBOL(pci_alloc_consistent); -EXPORT_SYMBOL(pci_free_consistent); #endif /* CONFIG_PCI */ EXPORT_SYMBOL(start_thread); @@ -109,6 +107,7 @@ EXPORT_SYMBOL(giveup_altivec); #endif /* CONFIG_ALTIVEC */ #ifdef CONFIG_VSX EXPORT_SYMBOL(giveup_vsx); +EXPORT_SYMBOL_GPL(__giveup_vsx); #endif /* CONFIG_VSX */ #ifdef CONFIG_SPE EXPORT_SYMBOL(giveup_spe); @@ -162,7 +161,6 @@ EXPORT_SYMBOL(screen_info); #ifdef CONFIG_PPC32 EXPORT_SYMBOL(timer_interrupt); -EXPORT_SYMBOL(irq_desc); EXPORT_SYMBOL(tb_ticks_per_jiffy); EXPORT_SYMBOL(cacheable_memcpy); EXPORT_SYMBOL(cacheable_memzero); diff --git a/arch/powerpc/kernel/proc_ppc64.c b/arch/powerpc/kernel/proc_powerpc.c index c647ddef40dc..1ed3b8d7981e 100644 --- a/arch/powerpc/kernel/proc_ppc64.c +++ b/arch/powerpc/kernel/proc_powerpc.c @@ -28,55 +28,7 @@ #include <asm/uaccess.h> #include <asm/prom.h> -static loff_t page_map_seek( struct file *file, loff_t off, int whence); -static ssize_t page_map_read( struct file *file, char __user *buf, size_t nbytes, - loff_t *ppos); -static int page_map_mmap( struct file *file, struct vm_area_struct *vma ); - -static const struct file_operations page_map_fops = { - .llseek = page_map_seek, - .read = page_map_read, - .mmap = page_map_mmap -}; - -/* - * Create the ppc64 and ppc64/rtas directories early. This allows us to - * assume that they have been previously created in drivers. - */ -static int __init proc_ppc64_create(void) -{ - struct proc_dir_entry *root; - - root = proc_mkdir("ppc64", NULL); - if (!root) - return 1; - - if (!of_find_node_by_path("/rtas")) - return 0; - - if (!proc_mkdir("rtas", root)) - return 1; - - if (!proc_symlink("rtas", NULL, "ppc64/rtas")) - return 1; - - return 0; -} -core_initcall(proc_ppc64_create); - -static int __init proc_ppc64_init(void) -{ - struct proc_dir_entry *pde; - - pde = proc_create_data("ppc64/systemcfg", S_IFREG|S_IRUGO, NULL, - &page_map_fops, vdso_data); - if (!pde) - return 1; - pde->size = PAGE_SIZE; - - return 0; -} -__initcall(proc_ppc64_init); +#ifdef CONFIG_PPC64 static loff_t page_map_seek( struct file *file, loff_t off, int whence) { @@ -120,3 +72,55 @@ static int page_map_mmap( struct file *file, struct vm_area_struct *vma ) return 0; } +static const struct file_operations page_map_fops = { + .llseek = page_map_seek, + .read = page_map_read, + .mmap = page_map_mmap +}; + + +static int __init proc_ppc64_init(void) +{ + struct proc_dir_entry *pde; + + pde = proc_create_data("powerpc/systemcfg", S_IFREG|S_IRUGO, NULL, + &page_map_fops, vdso_data); + if (!pde) + return 1; + pde->size = PAGE_SIZE; + + return 0; +} +__initcall(proc_ppc64_init); + +#endif /* CONFIG_PPC64 */ + +/* + * Create the ppc64 and ppc64/rtas directories early. This allows us to + * assume that they have been previously created in drivers. + */ +static int __init proc_ppc64_create(void) +{ + struct proc_dir_entry *root; + + root = proc_mkdir("powerpc", NULL); + if (!root) + return 1; + +#ifdef CONFIG_PPC64 + if (!proc_symlink("ppc64", NULL, "powerpc")) + pr_err("Failed to create link /proc/ppc64 -> /proc/powerpc\n"); +#endif + + if (!of_find_node_by_path("/rtas")) + return 0; + + if (!proc_mkdir("rtas", root)) + return 1; + + if (!proc_symlink("rtas", NULL, "powerpc/rtas")) + return 1; + + return 0; +} +core_initcall(proc_ppc64_create); diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index 1168c5f440ab..e4d71ced97ef 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -245,6 +245,24 @@ void discard_lazy_cpu_state(void) } #endif /* CONFIG_SMP */ +#ifdef CONFIG_PPC_ADV_DEBUG_REGS +void do_send_trap(struct pt_regs *regs, unsigned long address, + unsigned long error_code, int signal_code, int breakpt) +{ + siginfo_t info; + + if (notify_die(DIE_DABR_MATCH, "dabr_match", regs, error_code, + 11, SIGSEGV) == NOTIFY_STOP) + return; + + /* Deliver the signal to userspace */ + info.si_signo = SIGTRAP; + info.si_errno = breakpt; /* breakpoint or watchpoint id */ + info.si_code = signal_code; + info.si_addr = (void __user *)address; + force_sig_info(SIGTRAP, &info, current); +} +#else /* !CONFIG_PPC_ADV_DEBUG_REGS */ void do_dabr(struct pt_regs *regs, unsigned long address, unsigned long error_code) { @@ -257,12 +275,6 @@ void do_dabr(struct pt_regs *regs, unsigned long address, if (debugger_dabr_match(regs)) return; - /* Clear the DAC and struct entries. One shot trigger */ -#if defined(CONFIG_BOOKE) - mtspr(SPRN_DBCR0, mfspr(SPRN_DBCR0) & ~(DBSR_DAC1R | DBSR_DAC1W - | DBCR0_IDM)); -#endif - /* Clear the DABR */ set_dabr(0); @@ -273,9 +285,82 @@ void do_dabr(struct pt_regs *regs, unsigned long address, info.si_addr = (void __user *)address; force_sig_info(SIGTRAP, &info, current); } +#endif /* CONFIG_PPC_ADV_DEBUG_REGS */ static DEFINE_PER_CPU(unsigned long, current_dabr); +#ifdef CONFIG_PPC_ADV_DEBUG_REGS +/* + * Set the debug registers back to their default "safe" values. + */ +static void set_debug_reg_defaults(struct thread_struct *thread) +{ + thread->iac1 = thread->iac2 = 0; +#if CONFIG_PPC_ADV_DEBUG_IACS > 2 + thread->iac3 = thread->iac4 = 0; +#endif + thread->dac1 = thread->dac2 = 0; +#if CONFIG_PPC_ADV_DEBUG_DVCS > 0 + thread->dvc1 = thread->dvc2 = 0; +#endif + thread->dbcr0 = 0; +#ifdef CONFIG_BOOKE + /* + * Force User/Supervisor bits to b11 (user-only MSR[PR]=1) + */ + thread->dbcr1 = DBCR1_IAC1US | DBCR1_IAC2US | \ + DBCR1_IAC3US | DBCR1_IAC4US; + /* + * Force Data Address Compare User/Supervisor bits to be User-only + * (0b11 MSR[PR]=1) and set all other bits in DBCR2 register to be 0. + */ + thread->dbcr2 = DBCR2_DAC1US | DBCR2_DAC2US; +#else + thread->dbcr1 = 0; +#endif +} + +static void prime_debug_regs(struct thread_struct *thread) +{ + mtspr(SPRN_IAC1, thread->iac1); + mtspr(SPRN_IAC2, thread->iac2); +#if CONFIG_PPC_ADV_DEBUG_IACS > 2 + mtspr(SPRN_IAC3, thread->iac3); + mtspr(SPRN_IAC4, thread->iac4); +#endif + mtspr(SPRN_DAC1, thread->dac1); + mtspr(SPRN_DAC2, thread->dac2); +#if CONFIG_PPC_ADV_DEBUG_DVCS > 0 + mtspr(SPRN_DVC1, thread->dvc1); + mtspr(SPRN_DVC2, thread->dvc2); +#endif + mtspr(SPRN_DBCR0, thread->dbcr0); + mtspr(SPRN_DBCR1, thread->dbcr1); +#ifdef CONFIG_BOOKE + mtspr(SPRN_DBCR2, thread->dbcr2); +#endif +} +/* + * Unless neither the old or new thread are making use of the + * debug registers, set the debug registers from the values + * stored in the new thread. + */ +static void switch_booke_debug_regs(struct thread_struct *new_thread) +{ + if ((current->thread.dbcr0 & DBCR0_IDM) + || (new_thread->dbcr0 & DBCR0_IDM)) + prime_debug_regs(new_thread); +} +#else /* !CONFIG_PPC_ADV_DEBUG_REGS */ +static void set_debug_reg_defaults(struct thread_struct *thread) +{ + if (thread->dabr) { + thread->dabr = 0; + set_dabr(0); + } +} +#endif /* CONFIG_PPC_ADV_DEBUG_REGS */ + int set_dabr(unsigned long dabr) { __get_cpu_var(current_dabr) = dabr; @@ -284,7 +369,7 @@ int set_dabr(unsigned long dabr) return ppc_md.set_dabr(dabr); /* XXX should we have a CPU_FTR_HAS_DABR ? */ -#if defined(CONFIG_BOOKE) +#ifdef CONFIG_PPC_ADV_DEBUG_REGS mtspr(SPRN_DAC1, dabr); #elif defined(CONFIG_PPC_BOOK3S) mtspr(SPRN_DABR, dabr); @@ -371,10 +456,8 @@ struct task_struct *__switch_to(struct task_struct *prev, #endif /* CONFIG_SMP */ -#if defined(CONFIG_BOOKE) - /* If new thread DAC (HW breakpoint) is the same then leave it */ - if (new->thread.dabr) - set_dabr(new->thread.dabr); +#ifdef CONFIG_PPC_ADV_DEBUG_REGS + switch_booke_debug_regs(&new->thread); #else if (unlikely(__get_cpu_var(current_dabr) != new->thread.dabr)) set_dabr(new->thread.dabr); @@ -514,7 +597,7 @@ void show_regs(struct pt_regs * regs) printk(" CR: %08lx XER: %08lx\n", regs->ccr, regs->xer); trap = TRAP(regs); if (trap == 0x300 || trap == 0x600) -#if defined(CONFIG_4xx) || defined(CONFIG_BOOKE) +#ifdef CONFIG_PPC_ADV_DEBUG_REGS printk("DEAR: "REG", ESR: "REG"\n", regs->dar, regs->dsisr); #else printk("DAR: "REG", DSISR: "REG"\n", regs->dar, regs->dsisr); @@ -554,28 +637,9 @@ void exit_thread(void) void flush_thread(void) { -#ifdef CONFIG_PPC64 - struct thread_info *t = current_thread_info(); - - if (test_ti_thread_flag(t, TIF_ABI_PENDING)) { - clear_ti_thread_flag(t, TIF_ABI_PENDING); - if (test_ti_thread_flag(t, TIF_32BIT)) - clear_ti_thread_flag(t, TIF_32BIT); - else - set_ti_thread_flag(t, TIF_32BIT); - } -#endif - discard_lazy_cpu_state(); - if (current->thread.dabr) { - current->thread.dabr = 0; - set_dabr(0); - -#if defined(CONFIG_BOOKE) - current->thread.dbcr0 &= ~(DBSR_DAC1R | DBSR_DAC1W); -#endif - } + set_debug_reg_defaults(¤t->thread); } void @@ -1016,9 +1080,13 @@ void show_stack(struct task_struct *tsk, unsigned long *stack) #ifdef CONFIG_FUNCTION_GRAPH_TRACER int curr_frame = current->curr_ret_stack; extern void return_to_handler(void); - unsigned long addr = (unsigned long)return_to_handler; + unsigned long rth = (unsigned long)return_to_handler; + unsigned long mrth = -1; #ifdef CONFIG_PPC64 - addr = *(unsigned long*)addr; + extern void mod_return_to_handler(void); + rth = *(unsigned long *)rth; + mrth = (unsigned long)mod_return_to_handler; + mrth = *(unsigned long *)mrth; #endif #endif @@ -1044,7 +1112,7 @@ void show_stack(struct task_struct *tsk, unsigned long *stack) if (!firstframe || ip != lr) { printk("["REG"] ["REG"] %pS", sp, ip, (void *)ip); #ifdef CONFIG_FUNCTION_GRAPH_TRACER - if (ip == addr && curr_frame >= 0) { + if ((ip == rth || ip == mrth) && curr_frame >= 0) { printk(" (%pS)", (void *)current->ret_stack[curr_frame].ret); curr_frame--; @@ -1168,7 +1236,7 @@ unsigned long arch_randomize_brk(struct mm_struct *mm) unsigned long base = mm->brk; unsigned long ret; -#ifdef CONFIG_PPC64 +#ifdef CONFIG_PPC_STD_MMU_64 /* * If we are using 1TB segments and we are allowed to randomise * the heap, we can put it above 1TB so it is backed by a 1TB diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c index d4405b95bfaa..05131d634e73 100644 --- a/arch/powerpc/kernel/prom.c +++ b/arch/powerpc/kernel/prom.c @@ -43,6 +43,7 @@ #include <asm/smp.h> #include <asm/system.h> #include <asm/mmu.h> +#include <asm/paca.h> #include <asm/pgtable.h> #include <asm/pci.h> #include <asm/iommu.h> @@ -61,365 +62,12 @@ #define DBG(fmt...) #endif - -static int __initdata dt_root_addr_cells; -static int __initdata dt_root_size_cells; - #ifdef CONFIG_PPC64 int __initdata iommu_is_off; int __initdata iommu_force_on; unsigned long tce_alloc_start, tce_alloc_end; #endif -typedef u32 cell_t; - -#if 0 -static struct boot_param_header *initial_boot_params __initdata; -#else -struct boot_param_header *initial_boot_params; -#endif - -extern struct device_node *allnodes; /* temporary while merging */ - -extern rwlock_t devtree_lock; /* temporary while merging */ - -/* export that to outside world */ -struct device_node *of_chosen; - -static inline char *find_flat_dt_string(u32 offset) -{ - return ((char *)initial_boot_params) + - initial_boot_params->off_dt_strings + offset; -} - -/** - * This function is used to scan the flattened device-tree, it is - * used to extract the memory informations at boot before we can - * unflatten the tree - */ -int __init of_scan_flat_dt(int (*it)(unsigned long node, - const char *uname, int depth, - void *data), - void *data) -{ - unsigned long p = ((unsigned long)initial_boot_params) + - initial_boot_params->off_dt_struct; - int rc = 0; - int depth = -1; - - do { - u32 tag = *((u32 *)p); - char *pathp; - - p += 4; - if (tag == OF_DT_END_NODE) { - depth --; - continue; - } - if (tag == OF_DT_NOP) - continue; - if (tag == OF_DT_END) - break; - if (tag == OF_DT_PROP) { - u32 sz = *((u32 *)p); - p += 8; - if (initial_boot_params->version < 0x10) - p = _ALIGN(p, sz >= 8 ? 8 : 4); - p += sz; - p = _ALIGN(p, 4); - continue; - } - if (tag != OF_DT_BEGIN_NODE) { - printk(KERN_WARNING "Invalid tag %x scanning flattened" - " device tree !\n", tag); - return -EINVAL; - } - depth++; - pathp = (char *)p; - p = _ALIGN(p + strlen(pathp) + 1, 4); - if ((*pathp) == '/') { - char *lp, *np; - for (lp = NULL, np = pathp; *np; np++) - if ((*np) == '/') - lp = np+1; - if (lp != NULL) - pathp = lp; - } - rc = it(p, pathp, depth, data); - if (rc != 0) - break; - } while(1); - - return rc; -} - -unsigned long __init of_get_flat_dt_root(void) -{ - unsigned long p = ((unsigned long)initial_boot_params) + - initial_boot_params->off_dt_struct; - - while(*((u32 *)p) == OF_DT_NOP) - p += 4; - BUG_ON (*((u32 *)p) != OF_DT_BEGIN_NODE); - p += 4; - return _ALIGN(p + strlen((char *)p) + 1, 4); -} - -/** - * This function can be used within scan_flattened_dt callback to get - * access to properties - */ -void* __init of_get_flat_dt_prop(unsigned long node, const char *name, - unsigned long *size) -{ - unsigned long p = node; - - do { - u32 tag = *((u32 *)p); - u32 sz, noff; - const char *nstr; - - p += 4; - if (tag == OF_DT_NOP) - continue; - if (tag != OF_DT_PROP) - return NULL; - - sz = *((u32 *)p); - noff = *((u32 *)(p + 4)); - p += 8; - if (initial_boot_params->version < 0x10) - p = _ALIGN(p, sz >= 8 ? 8 : 4); - - nstr = find_flat_dt_string(noff); - if (nstr == NULL) { - printk(KERN_WARNING "Can't find property index" - " name !\n"); - return NULL; - } - if (strcmp(name, nstr) == 0) { - if (size) - *size = sz; - return (void *)p; - } - p += sz; - p = _ALIGN(p, 4); - } while(1); -} - -int __init of_flat_dt_is_compatible(unsigned long node, const char *compat) -{ - const char* cp; - unsigned long cplen, l; - - cp = of_get_flat_dt_prop(node, "compatible", &cplen); - if (cp == NULL) - return 0; - while (cplen > 0) { - if (strncasecmp(cp, compat, strlen(compat)) == 0) - return 1; - l = strlen(cp) + 1; - cp += l; - cplen -= l; - } - - return 0; -} - -static void *__init unflatten_dt_alloc(unsigned long *mem, unsigned long size, - unsigned long align) -{ - void *res; - - *mem = _ALIGN(*mem, align); - res = (void *)*mem; - *mem += size; - - return res; -} - -static unsigned long __init unflatten_dt_node(unsigned long mem, - unsigned long *p, - struct device_node *dad, - struct device_node ***allnextpp, - unsigned long fpsize) -{ - struct device_node *np; - struct property *pp, **prev_pp = NULL; - char *pathp; - u32 tag; - unsigned int l, allocl; - int has_name = 0; - int new_format = 0; - - tag = *((u32 *)(*p)); - if (tag != OF_DT_BEGIN_NODE) { - printk("Weird tag at start of node: %x\n", tag); - return mem; - } - *p += 4; - pathp = (char *)*p; - l = allocl = strlen(pathp) + 1; - *p = _ALIGN(*p + l, 4); - - /* version 0x10 has a more compact unit name here instead of the full - * path. we accumulate the full path size using "fpsize", we'll rebuild - * it later. We detect this because the first character of the name is - * not '/'. - */ - if ((*pathp) != '/') { - new_format = 1; - if (fpsize == 0) { - /* root node: special case. fpsize accounts for path - * plus terminating zero. root node only has '/', so - * fpsize should be 2, but we want to avoid the first - * level nodes to have two '/' so we use fpsize 1 here - */ - fpsize = 1; - allocl = 2; - } else { - /* account for '/' and path size minus terminal 0 - * already in 'l' - */ - fpsize += l; - allocl = fpsize; - } - } - - - np = unflatten_dt_alloc(&mem, sizeof(struct device_node) + allocl, - __alignof__(struct device_node)); - if (allnextpp) { - memset(np, 0, sizeof(*np)); - np->full_name = ((char*)np) + sizeof(struct device_node); - if (new_format) { - char *p = np->full_name; - /* rebuild full path for new format */ - if (dad && dad->parent) { - strcpy(p, dad->full_name); -#ifdef DEBUG - if ((strlen(p) + l + 1) != allocl) { - DBG("%s: p: %d, l: %d, a: %d\n", - pathp, (int)strlen(p), l, allocl); - } -#endif - p += strlen(p); - } - *(p++) = '/'; - memcpy(p, pathp, l); - } else - memcpy(np->full_name, pathp, l); - prev_pp = &np->properties; - **allnextpp = np; - *allnextpp = &np->allnext; - if (dad != NULL) { - np->parent = dad; - /* we temporarily use the next field as `last_child'*/ - if (dad->next == 0) - dad->child = np; - else - dad->next->sibling = np; - dad->next = np; - } - kref_init(&np->kref); - } - while(1) { - u32 sz, noff; - char *pname; - - tag = *((u32 *)(*p)); - if (tag == OF_DT_NOP) { - *p += 4; - continue; - } - if (tag != OF_DT_PROP) - break; - *p += 4; - sz = *((u32 *)(*p)); - noff = *((u32 *)((*p) + 4)); - *p += 8; - if (initial_boot_params->version < 0x10) - *p = _ALIGN(*p, sz >= 8 ? 8 : 4); - - pname = find_flat_dt_string(noff); - if (pname == NULL) { - printk("Can't find property name in list !\n"); - break; - } - if (strcmp(pname, "name") == 0) - has_name = 1; - l = strlen(pname) + 1; - pp = unflatten_dt_alloc(&mem, sizeof(struct property), - __alignof__(struct property)); - if (allnextpp) { - if (strcmp(pname, "linux,phandle") == 0) { - np->node = *((u32 *)*p); - if (np->linux_phandle == 0) - np->linux_phandle = np->node; - } - if (strcmp(pname, "ibm,phandle") == 0) - np->linux_phandle = *((u32 *)*p); - pp->name = pname; - pp->length = sz; - pp->value = (void *)*p; - *prev_pp = pp; - prev_pp = &pp->next; - } - *p = _ALIGN((*p) + sz, 4); - } - /* with version 0x10 we may not have the name property, recreate - * it here from the unit name if absent - */ - if (!has_name) { - char *p = pathp, *ps = pathp, *pa = NULL; - int sz; - - while (*p) { - if ((*p) == '@') - pa = p; - if ((*p) == '/') - ps = p + 1; - p++; - } - if (pa < ps) - pa = p; - sz = (pa - ps) + 1; - pp = unflatten_dt_alloc(&mem, sizeof(struct property) + sz, - __alignof__(struct property)); - if (allnextpp) { - pp->name = "name"; - pp->length = sz; - pp->value = pp + 1; - *prev_pp = pp; - prev_pp = &pp->next; - memcpy(pp->value, ps, sz - 1); - ((char *)pp->value)[sz - 1] = 0; - DBG("fixed up name for %s -> %s\n", pathp, - (char *)pp->value); - } - } - if (allnextpp) { - *prev_pp = NULL; - np->name = of_get_property(np, "name", NULL); - np->type = of_get_property(np, "device_type", NULL); - - if (!np->name) - np->name = "<NULL>"; - if (!np->type) - np->type = "<NULL>"; - } - while (tag == OF_DT_BEGIN_NODE) { - mem = unflatten_dt_node(mem, p, np, allnextpp, fpsize); - tag = *((u32 *)(*p)); - } - if (tag != OF_DT_END_NODE) { - printk("Weird tag at end of node: %x\n", tag); - return mem; - } - *p += 4; - return mem; -} - static int __init early_parse_mem(char *p) { if (!p) @@ -446,7 +94,7 @@ static void __init move_device_tree(void) DBG("-> move_device_tree\n"); start = __pa(initial_boot_params); - size = initial_boot_params->totalsize; + size = be32_to_cpu(initial_boot_params->totalsize); if ((memory_limit && (start + size) > memory_limit) || overlaps_crashkernel(start, size)) { @@ -459,54 +107,6 @@ static void __init move_device_tree(void) DBG("<- move_device_tree\n"); } -/** - * unflattens the device-tree passed by the firmware, creating the - * tree of struct device_node. It also fills the "name" and "type" - * pointers of the nodes so the normal device-tree walking functions - * can be used (this used to be done by finish_device_tree) - */ -void __init unflatten_device_tree(void) -{ - unsigned long start, mem, size; - struct device_node **allnextp = &allnodes; - - DBG(" -> unflatten_device_tree()\n"); - - /* First pass, scan for size */ - start = ((unsigned long)initial_boot_params) + - initial_boot_params->off_dt_struct; - size = unflatten_dt_node(0, &start, NULL, NULL, 0); - size = (size | 3) + 1; - - DBG(" size is %lx, allocating...\n", size); - - /* Allocate memory for the expanded device tree */ - mem = lmb_alloc(size + 4, __alignof__(struct device_node)); - mem = (unsigned long) __va(mem); - - ((u32 *)mem)[size / 4] = 0xdeadbeef; - - DBG(" unflattening %lx...\n", mem); - - /* Second pass, do actual unflattening */ - start = ((unsigned long)initial_boot_params) + - initial_boot_params->off_dt_struct; - unflatten_dt_node(mem, &start, NULL, &allnextp, 0); - if (*((u32 *)start) != OF_DT_END) - printk(KERN_WARNING "Weird tag at end of tree: %08x\n", *((u32 *)start)); - if (((u32 *)mem)[size / 4] != 0xdeadbeef) - printk(KERN_WARNING "End of tree marker overwritten: %08x\n", - ((u32 *)mem)[size / 4] ); - *allnextp = NULL; - - /* Get pointer to OF "/chosen" node for use everywhere */ - of_chosen = of_find_node_by_path("/chosen"); - if (of_chosen == NULL) - of_chosen = of_find_node_by_path("/chosen@0"); - - DBG(" <- unflatten_device_tree()\n"); -} - /* * ibm,pa-features is a per-cpu property that contains a string of * attribute descriptors, each of which has a 2 byte header plus up @@ -763,48 +363,9 @@ static int __init early_init_dt_scan_cpus(unsigned long node, return 0; } -#ifdef CONFIG_BLK_DEV_INITRD -static void __init early_init_dt_check_for_initrd(unsigned long node) -{ - unsigned long l; - u32 *prop; - - DBG("Looking for initrd properties... "); - - prop = of_get_flat_dt_prop(node, "linux,initrd-start", &l); - if (prop) { - initrd_start = (unsigned long)__va(of_read_ulong(prop, l/4)); - - prop = of_get_flat_dt_prop(node, "linux,initrd-end", &l); - if (prop) { - initrd_end = (unsigned long) - __va(of_read_ulong(prop, l/4)); - initrd_below_start_ok = 1; - } else { - initrd_start = 0; - } - } - - DBG("initrd_start=0x%lx initrd_end=0x%lx\n", initrd_start, initrd_end); -} -#else -static inline void early_init_dt_check_for_initrd(unsigned long node) -{ -} -#endif /* CONFIG_BLK_DEV_INITRD */ - -static int __init early_init_dt_scan_chosen(unsigned long node, - const char *uname, int depth, void *data) +void __init early_init_dt_scan_chosen_arch(unsigned long node) { unsigned long *lprop; - unsigned long l; - char *p; - - DBG("search \"chosen\", depth: %d, uname: %s\n", depth, uname); - - if (depth != 1 || - (strcmp(uname, "chosen") != 0 && strcmp(uname, "chosen@0") != 0)) - return 0; #ifdef CONFIG_PPC64 /* check if iommu is forced on or off */ @@ -815,17 +376,17 @@ static int __init early_init_dt_scan_chosen(unsigned long node, #endif /* mem=x on the command line is the preferred mechanism */ - lprop = of_get_flat_dt_prop(node, "linux,memory-limit", NULL); - if (lprop) - memory_limit = *lprop; + lprop = of_get_flat_dt_prop(node, "linux,memory-limit", NULL); + if (lprop) + memory_limit = *lprop; #ifdef CONFIG_PPC64 - lprop = of_get_flat_dt_prop(node, "linux,tce-alloc-start", NULL); - if (lprop) - tce_alloc_start = *lprop; - lprop = of_get_flat_dt_prop(node, "linux,tce-alloc-end", NULL); - if (lprop) - tce_alloc_end = *lprop; + lprop = of_get_flat_dt_prop(node, "linux,tce-alloc-start", NULL); + if (lprop) + tce_alloc_start = *lprop; + lprop = of_get_flat_dt_prop(node, "linux,tce-alloc-end", NULL); + if (lprop) + tce_alloc_end = *lprop; #endif #ifdef CONFIG_KEXEC @@ -837,51 +398,6 @@ static int __init early_init_dt_scan_chosen(unsigned long node, if (lprop) crashk_res.end = crashk_res.start + *lprop - 1; #endif - - early_init_dt_check_for_initrd(node); - - /* Retreive command line */ - p = of_get_flat_dt_prop(node, "bootargs", &l); - if (p != NULL && l > 0) - strlcpy(cmd_line, p, min((int)l, COMMAND_LINE_SIZE)); - -#ifdef CONFIG_CMDLINE - if (p == NULL || l == 0 || (l == 1 && (*p) == 0)) - strlcpy(cmd_line, CONFIG_CMDLINE, COMMAND_LINE_SIZE); -#endif /* CONFIG_CMDLINE */ - - DBG("Command line is: %s\n", cmd_line); - - /* break now */ - return 1; -} - -static int __init early_init_dt_scan_root(unsigned long node, - const char *uname, int depth, void *data) -{ - u32 *prop; - - if (depth != 0) - return 0; - - prop = of_get_flat_dt_prop(node, "#size-cells", NULL); - dt_root_size_cells = (prop == NULL) ? 1 : *prop; - DBG("dt_root_size_cells = %x\n", dt_root_size_cells); - - prop = of_get_flat_dt_prop(node, "#address-cells", NULL); - dt_root_addr_cells = (prop == NULL) ? 2 : *prop; - DBG("dt_root_addr_cells = %x\n", dt_root_addr_cells); - - /* break now */ - return 1; -} - -static u64 __init dt_mem_next_cell(int s, cell_t **cellp) -{ - cell_t *p = *cellp; - - *cellp = p + s; - return of_read_number(p, s); } #ifdef CONFIG_PPC_PSERIES @@ -893,22 +409,22 @@ static u64 __init dt_mem_next_cell(int s, cell_t **cellp) */ static int __init early_init_dt_scan_drconf_memory(unsigned long node) { - cell_t *dm, *ls, *usm; + __be32 *dm, *ls, *usm; unsigned long l, n, flags; u64 base, size, lmb_size; unsigned int is_kexec_kdump = 0, rngs; ls = of_get_flat_dt_prop(node, "ibm,lmb-size", &l); - if (ls == NULL || l < dt_root_size_cells * sizeof(cell_t)) + if (ls == NULL || l < dt_root_size_cells * sizeof(__be32)) return 0; lmb_size = dt_mem_next_cell(dt_root_size_cells, &ls); dm = of_get_flat_dt_prop(node, "ibm,dynamic-memory", &l); - if (dm == NULL || l < sizeof(cell_t)) + if (dm == NULL || l < sizeof(__be32)) return 0; n = *dm++; /* number of entries */ - if (l < (n * (dt_root_addr_cells + 4) + 1) * sizeof(cell_t)) + if (l < (n * (dt_root_addr_cells + 4) + 1) * sizeof(__be32)) return 0; /* check if this is a kexec/kdump kernel. */ @@ -963,65 +479,47 @@ static int __init early_init_dt_scan_drconf_memory(unsigned long node) #define early_init_dt_scan_drconf_memory(node) 0 #endif /* CONFIG_PPC_PSERIES */ -static int __init early_init_dt_scan_memory(unsigned long node, - const char *uname, int depth, void *data) +static int __init early_init_dt_scan_memory_ppc(unsigned long node, + const char *uname, + int depth, void *data) { - char *type = of_get_flat_dt_prop(node, "device_type", NULL); - cell_t *reg, *endp; - unsigned long l; - - /* Look for the ibm,dynamic-reconfiguration-memory node */ if (depth == 1 && strcmp(uname, "ibm,dynamic-reconfiguration-memory") == 0) return early_init_dt_scan_drconf_memory(node); + + return early_init_dt_scan_memory(node, uname, depth, data); +} - /* We are scanning "memory" nodes only */ - if (type == NULL) { - /* - * The longtrail doesn't have a device_type on the - * /memory node, so look for the node called /memory@0. - */ - if (depth != 1 || strcmp(uname, "memory@0") != 0) - return 0; - } else if (strcmp(type, "memory") != 0) - return 0; - - reg = of_get_flat_dt_prop(node, "linux,usable-memory", &l); - if (reg == NULL) - reg = of_get_flat_dt_prop(node, "reg", &l); - if (reg == NULL) - return 0; - - endp = reg + (l / sizeof(cell_t)); - - DBG("memory scan node %s, reg size %ld, data: %x %x %x %x,\n", - uname, l, reg[0], reg[1], reg[2], reg[3]); - - while ((endp - reg) >= (dt_root_addr_cells + dt_root_size_cells)) { - u64 base, size; +void __init early_init_dt_add_memory_arch(u64 base, u64 size) +{ +#if defined(CONFIG_PPC64) + if (iommu_is_off) { + if (base >= 0x80000000ul) + return; + if ((base + size) > 0x80000000ul) + size = 0x80000000ul - base; + } +#endif - base = dt_mem_next_cell(dt_root_addr_cells, ®); - size = dt_mem_next_cell(dt_root_size_cells, ®); + lmb_add(base, size); - if (size == 0) - continue; - DBG(" - %llx , %llx\n", (unsigned long long)base, - (unsigned long long)size); -#ifdef CONFIG_PPC64 - if (iommu_is_off) { - if (base >= 0x80000000ul) - continue; - if ((base + size) > 0x80000000ul) - size = 0x80000000ul - base; - } -#endif - lmb_add(base, size); + memstart_addr = min((u64)memstart_addr, base); +} - memstart_addr = min((u64)memstart_addr, base); - } +u64 __init early_init_dt_alloc_memory_arch(u64 size, u64 align) +{ + return lmb_alloc(size, align); +} - return 0; +#ifdef CONFIG_BLK_DEV_INITRD +void __init early_init_dt_setup_initrd_arch(unsigned long start, + unsigned long end) +{ + initrd_start = (unsigned long)__va(start); + initrd_end = (unsigned long)__va(end); + initrd_below_start_ok = 1; } +#endif static void __init early_reserve_mem(void) { @@ -1186,7 +684,7 @@ void __init early_init_devtree(void *params) /* Scan memory nodes and rebuild LMBs */ lmb_init(); of_scan_flat_dt(early_init_dt_scan_root, NULL); - of_scan_flat_dt(early_init_dt_scan_memory, NULL); + of_scan_flat_dt(early_init_dt_scan_memory_ppc, NULL); /* Save command line for /proc/cmdline and then parse parameters */ strlcpy(boot_command_line, cmd_line, COMMAND_LINE_SIZE); @@ -1224,6 +722,8 @@ void __init early_init_devtree(void *params) * FIXME .. and the initrd too? */ move_device_tree(); + allocate_pacas(); + DBG("Scanning CPUs ...\n"); /* Retreive CPU related informations from the flat tree @@ -1234,25 +734,6 @@ void __init early_init_devtree(void *params) DBG(" <- early_init_devtree()\n"); } - -/** - * Indicates whether the root node has a given value in its - * compatible property. - */ -int machine_is_compatible(const char *compat) -{ - struct device_node *root; - int rc = 0; - - root = of_find_node_by_path("/"); - if (root) { - rc = of_device_is_compatible(root, compat); - of_node_put(root); - } - return rc; -} -EXPORT_SYMBOL(machine_is_compatible); - /******* * * New implementation of the OF "find" APIs, return a refcounted @@ -1265,27 +746,6 @@ EXPORT_SYMBOL(machine_is_compatible); *******/ /** - * of_find_node_by_phandle - Find a node given a phandle - * @handle: phandle of the node to find - * - * Returns a node pointer with refcount incremented, use - * of_node_put() on it when done. - */ -struct device_node *of_find_node_by_phandle(phandle handle) -{ - struct device_node *np; - - read_lock(&devtree_lock); - for (np = allnodes; np != 0; np = np->allnext) - if (np->linux_phandle == handle) - break; - of_node_get(np); - read_unlock(&devtree_lock); - return np; -} -EXPORT_SYMBOL(of_find_node_by_phandle); - -/** * of_find_next_cache_node - Find a node's subsidiary cache * @np: node of type "cpu" or "cache" * @@ -1316,161 +776,6 @@ struct device_node *of_find_next_cache_node(struct device_node *np) return NULL; } -/** - * of_find_all_nodes - Get next node in global list - * @prev: Previous node or NULL to start iteration - * of_node_put() will be called on it - * - * Returns a node pointer with refcount incremented, use - * of_node_put() on it when done. - */ -struct device_node *of_find_all_nodes(struct device_node *prev) -{ - struct device_node *np; - - read_lock(&devtree_lock); - np = prev ? prev->allnext : allnodes; - for (; np != 0; np = np->allnext) - if (of_node_get(np)) - break; - of_node_put(prev); - read_unlock(&devtree_lock); - return np; -} -EXPORT_SYMBOL(of_find_all_nodes); - -/** - * of_node_get - Increment refcount of a node - * @node: Node to inc refcount, NULL is supported to - * simplify writing of callers - * - * Returns node. - */ -struct device_node *of_node_get(struct device_node *node) -{ - if (node) - kref_get(&node->kref); - return node; -} -EXPORT_SYMBOL(of_node_get); - -static inline struct device_node * kref_to_device_node(struct kref *kref) -{ - return container_of(kref, struct device_node, kref); -} - -/** - * of_node_release - release a dynamically allocated node - * @kref: kref element of the node to be released - * - * In of_node_put() this function is passed to kref_put() - * as the destructor. - */ -static void of_node_release(struct kref *kref) -{ - struct device_node *node = kref_to_device_node(kref); - struct property *prop = node->properties; - - /* We should never be releasing nodes that haven't been detached. */ - if (!of_node_check_flag(node, OF_DETACHED)) { - printk("WARNING: Bad of_node_put() on %s\n", node->full_name); - dump_stack(); - kref_init(&node->kref); - return; - } - - if (!of_node_check_flag(node, OF_DYNAMIC)) - return; - - while (prop) { - struct property *next = prop->next; - kfree(prop->name); - kfree(prop->value); - kfree(prop); - prop = next; - - if (!prop) { - prop = node->deadprops; - node->deadprops = NULL; - } - } - kfree(node->full_name); - kfree(node->data); - kfree(node); -} - -/** - * of_node_put - Decrement refcount of a node - * @node: Node to dec refcount, NULL is supported to - * simplify writing of callers - * - */ -void of_node_put(struct device_node *node) -{ - if (node) - kref_put(&node->kref, of_node_release); -} -EXPORT_SYMBOL(of_node_put); - -/* - * Plug a device node into the tree and global list. - */ -void of_attach_node(struct device_node *np) -{ - unsigned long flags; - - write_lock_irqsave(&devtree_lock, flags); - np->sibling = np->parent->child; - np->allnext = allnodes; - np->parent->child = np; - allnodes = np; - write_unlock_irqrestore(&devtree_lock, flags); -} - -/* - * "Unplug" a node from the device tree. The caller must hold - * a reference to the node. The memory associated with the node - * is not freed until its refcount goes to zero. - */ -void of_detach_node(struct device_node *np) -{ - struct device_node *parent; - unsigned long flags; - - write_lock_irqsave(&devtree_lock, flags); - - parent = np->parent; - if (!parent) - goto out_unlock; - - if (allnodes == np) - allnodes = np->allnext; - else { - struct device_node *prev; - for (prev = allnodes; - prev->allnext != np; - prev = prev->allnext) - ; - prev->allnext = np->allnext; - } - - if (parent->child == np) - parent->child = np->sibling; - else { - struct device_node *prevsib; - for (prevsib = np->parent->child; - prevsib->sibling != np; - prevsib = prevsib->sibling) - ; - prevsib->sibling = np->sibling; - } - - of_node_set_flag(np, OF_DETACHED); - -out_unlock: - write_unlock_irqrestore(&devtree_lock, flags); -} - #ifdef CONFIG_PPC_PSERIES /* * Fix up the uninitialized fields in a new device node: @@ -1502,9 +807,9 @@ static int of_finish_dynamic_node(struct device_node *node) if (machine_is(powermac)) return -ENODEV; - /* fix up new node's linux_phandle field */ + /* fix up new node's phandle field */ if ((ibm_phandle = of_get_property(node, "ibm,phandle", NULL))) - node->linux_phandle = *ibm_phandle; + node->phandle = *ibm_phandle; out: of_node_put(parent); @@ -1543,120 +848,6 @@ static int __init prom_reconfig_setup(void) __initcall(prom_reconfig_setup); #endif -/* - * Add a property to a node - */ -int prom_add_property(struct device_node* np, struct property* prop) -{ - struct property **next; - unsigned long flags; - - prop->next = NULL; - write_lock_irqsave(&devtree_lock, flags); - next = &np->properties; - while (*next) { - if (strcmp(prop->name, (*next)->name) == 0) { - /* duplicate ! don't insert it */ - write_unlock_irqrestore(&devtree_lock, flags); - return -1; - } - next = &(*next)->next; - } - *next = prop; - write_unlock_irqrestore(&devtree_lock, flags); - -#ifdef CONFIG_PROC_DEVICETREE - /* try to add to proc as well if it was initialized */ - if (np->pde) - proc_device_tree_add_prop(np->pde, prop); -#endif /* CONFIG_PROC_DEVICETREE */ - - return 0; -} - -/* - * Remove a property from a node. Note that we don't actually - * remove it, since we have given out who-knows-how-many pointers - * to the data using get-property. Instead we just move the property - * to the "dead properties" list, so it won't be found any more. - */ -int prom_remove_property(struct device_node *np, struct property *prop) -{ - struct property **next; - unsigned long flags; - int found = 0; - - write_lock_irqsave(&devtree_lock, flags); - next = &np->properties; - while (*next) { - if (*next == prop) { - /* found the node */ - *next = prop->next; - prop->next = np->deadprops; - np->deadprops = prop; - found = 1; - break; - } - next = &(*next)->next; - } - write_unlock_irqrestore(&devtree_lock, flags); - - if (!found) - return -ENODEV; - -#ifdef CONFIG_PROC_DEVICETREE - /* try to remove the proc node as well */ - if (np->pde) - proc_device_tree_remove_prop(np->pde, prop); -#endif /* CONFIG_PROC_DEVICETREE */ - - return 0; -} - -/* - * Update a property in a node. Note that we don't actually - * remove it, since we have given out who-knows-how-many pointers - * to the data using get-property. Instead we just move the property - * to the "dead properties" list, and add the new property to the - * property list - */ -int prom_update_property(struct device_node *np, - struct property *newprop, - struct property *oldprop) -{ - struct property **next; - unsigned long flags; - int found = 0; - - write_lock_irqsave(&devtree_lock, flags); - next = &np->properties; - while (*next) { - if (*next == oldprop) { - /* found the node */ - newprop->next = oldprop->next; - *next = newprop; - oldprop->next = np->deadprops; - np->deadprops = oldprop; - found = 1; - break; - } - next = &(*next)->next; - } - write_unlock_irqrestore(&devtree_lock, flags); - - if (!found) - return -ENODEV; - -#ifdef CONFIG_PROC_DEVICETREE - /* try to add to proc as well if it was initialized */ - if (np->pde) - proc_device_tree_update_prop(np->pde, newprop, oldprop); -#endif /* CONFIG_PROC_DEVICETREE */ - - return 0; -} - - /* Find the device node for a given logical cpu number, also returns the cpu * local thread number (index in ibm,interrupt-server#s) if relevant and * asked for (non NULL) diff --git a/arch/powerpc/kernel/prom_init.c b/arch/powerpc/kernel/prom_init.c index bafac2e41ae1..5f306c4946e5 100644 --- a/arch/powerpc/kernel/prom_init.c +++ b/arch/powerpc/kernel/prom_init.c @@ -654,6 +654,9 @@ static void __init early_cmdline_parse(void) #define OV5_CMO 0x00 #endif +/* Option Vector 6: IBM PAPR hints */ +#define OV6_LINUX 0x02 /* Linux is our OS */ + /* * The architecture vector has an array of PVR mask/value pairs, * followed by # option vectors - 1, followed by the option vectors. @@ -665,7 +668,7 @@ static unsigned char ibm_architecture_vec[] = { W(0xffffffff), W(0x0f000003), /* all 2.06-compliant */ W(0xffffffff), W(0x0f000002), /* all 2.05-compliant */ W(0xfffffffe), W(0x0f000001), /* all 2.04-compliant and earlier */ - 5 - 1, /* 5 option vectors */ + 6 - 1, /* 6 option vectors */ /* option vector 1: processor architectures supported */ 3 - 2, /* length */ @@ -697,12 +700,29 @@ static unsigned char ibm_architecture_vec[] = { 0, /* don't halt */ /* option vector 5: PAPR/OF options */ - 5 - 2, /* length */ + 13 - 2, /* length */ 0, /* don't ignore, don't halt */ OV5_LPAR | OV5_SPLPAR | OV5_LARGE_PAGES | OV5_DRCONF_MEMORY | OV5_DONATE_DEDICATE_CPU | OV5_MSI, 0, OV5_CMO, + 0, + 0, + 0, + 0, + /* WARNING: The offset of the "number of cores" field below + * must match by the macro below. Update the definition if + * the structure layout changes. + */ +#define IBM_ARCH_VEC_NRCORES_OFFSET 100 + W(NR_CPUS), /* number of cores supported */ + + /* option vector 6: IBM PAPR hints */ + 4 - 2, /* length */ + 0, + 0, + OV6_LINUX, + }; /* Old method - ELF header with PT_NOTE sections */ @@ -792,13 +812,70 @@ static struct fake_elf { } }; +static int __init prom_count_smt_threads(void) +{ + phandle node; + char type[64]; + unsigned int plen; + + /* Pick up th first CPU node we can find */ + for (node = 0; prom_next_node(&node); ) { + type[0] = 0; + prom_getprop(node, "device_type", type, sizeof(type)); + + if (strcmp(type, RELOC("cpu"))) + continue; + /* + * There is an entry for each smt thread, each entry being + * 4 bytes long. All cpus should have the same number of + * smt threads, so return after finding the first. + */ + plen = prom_getproplen(node, "ibm,ppc-interrupt-server#s"); + if (plen == PROM_ERROR) + break; + plen >>= 2; + prom_debug("Found 0x%x smt threads per core\n", (unsigned long)plen); + + /* Sanity check */ + if (plen < 1 || plen > 64) { + prom_printf("Threads per core 0x%x out of bounds, assuming 1\n", + (unsigned long)plen); + return 1; + } + return plen; + } + prom_debug("No threads found, assuming 1 per core\n"); + + return 1; + +} + + static void __init prom_send_capabilities(void) { ihandle elfloader, root; prom_arg_t ret; + u32 *cores; root = call_prom("open", 1, 1, ADDR("/")); if (root != 0) { + /* We need to tell the FW about the number of cores we support. + * + * To do that, we count the number of threads on the first core + * (we assume this is the same for all cores) and use it to + * divide NR_CPUS. + */ + cores = (u32 *)PTRRELOC(&ibm_architecture_vec[IBM_ARCH_VEC_NRCORES_OFFSET]); + if (*cores != NR_CPUS) { + prom_printf("WARNING ! " + "ibm_architecture_vec structure inconsistent: 0x%x !\n", + *cores); + } else { + *cores = NR_CPUS / prom_count_smt_threads(); + prom_printf("Max number of cores passed to firmware: 0x%x\n", + (unsigned long)*cores); + } + /* try calling the ibm,client-architecture-support method */ prom_printf("Calling ibm,client-architecture-support..."); if (call_prom_ret("call-method", 3, 2, &ret, diff --git a/arch/powerpc/kernel/ptrace.c b/arch/powerpc/kernel/ptrace.c index ef149880c145..ed2cfe17d25e 100644 --- a/arch/powerpc/kernel/ptrace.c +++ b/arch/powerpc/kernel/ptrace.c @@ -46,7 +46,7 @@ /* * Set of msr bits that gdb can change on behalf of a process. */ -#if defined(CONFIG_40x) || defined(CONFIG_BOOKE) +#ifdef CONFIG_PPC_ADV_DEBUG_REGS #define MSR_DEBUGCHANGE 0 #else #define MSR_DEBUGCHANGE (MSR_SE | MSR_BE) @@ -703,7 +703,7 @@ void user_enable_single_step(struct task_struct *task) struct pt_regs *regs = task->thread.regs; if (regs != NULL) { -#if defined(CONFIG_40x) || defined(CONFIG_BOOKE) +#ifdef CONFIG_PPC_ADV_DEBUG_REGS task->thread.dbcr0 &= ~DBCR0_BT; task->thread.dbcr0 |= DBCR0_IDM | DBCR0_IC; regs->msr |= MSR_DE; @@ -720,7 +720,7 @@ void user_enable_block_step(struct task_struct *task) struct pt_regs *regs = task->thread.regs; if (regs != NULL) { -#if defined(CONFIG_40x) || defined(CONFIG_BOOKE) +#ifdef CONFIG_PPC_ADV_DEBUG_REGS task->thread.dbcr0 &= ~DBCR0_IC; task->thread.dbcr0 = DBCR0_IDM | DBCR0_BT; regs->msr |= MSR_DE; @@ -737,17 +737,25 @@ void user_disable_single_step(struct task_struct *task) struct pt_regs *regs = task->thread.regs; if (regs != NULL) { -#if defined(CONFIG_BOOKE) - /* If DAC don't clear DBCRO_IDM or MSR_DE */ - if (task->thread.dabr) - task->thread.dbcr0 &= ~(DBCR0_IC | DBCR0_BT); - else { - task->thread.dbcr0 &= ~(DBCR0_IC | DBCR0_BT | DBCR0_IDM); +#ifdef CONFIG_PPC_ADV_DEBUG_REGS + /* + * The logic to disable single stepping should be as + * simple as turning off the Instruction Complete flag. + * And, after doing so, if all debug flags are off, turn + * off DBCR0(IDM) and MSR(DE) .... Torez + */ + task->thread.dbcr0 &= ~DBCR0_IC; + /* + * Test to see if any of the DBCR_ACTIVE_EVENTS bits are set. + */ + if (!DBCR_ACTIVE_EVENTS(task->thread.dbcr0, + task->thread.dbcr1)) { + /* + * All debug events were off..... + */ + task->thread.dbcr0 &= ~DBCR0_IDM; regs->msr &= ~MSR_DE; } -#elif defined(CONFIG_40x) - task->thread.dbcr0 &= ~(DBCR0_IC | DBCR0_BT | DBCR0_IDM); - regs->msr &= ~MSR_DE; #else regs->msr &= ~(MSR_SE | MSR_BE); #endif @@ -769,8 +777,7 @@ int ptrace_set_debugreg(struct task_struct *task, unsigned long addr, if ((data & ~0x7UL) >= TASK_SIZE) return -EIO; -#ifndef CONFIG_BOOKE - +#ifndef CONFIG_PPC_ADV_DEBUG_REGS /* For processors using DABR (i.e. 970), the bottom 3 bits are flags. * It was assumed, on previous implementations, that 3 bits were * passed together with the data address, fitting the design of the @@ -789,21 +796,22 @@ int ptrace_set_debugreg(struct task_struct *task, unsigned long addr, /* Move contents to the DABR register */ task->thread.dabr = data; - -#endif -#if defined(CONFIG_BOOKE) - +#else /* CONFIG_PPC_ADV_DEBUG_REGS */ /* As described above, it was assumed 3 bits were passed with the data * address, but we will assume only the mode bits will be passed * as to not cause alignment restrictions for DAC-based processors. */ /* DAC's hold the whole address without any mode flags */ - task->thread.dabr = data & ~0x3UL; - - if (task->thread.dabr == 0) { - task->thread.dbcr0 &= ~(DBSR_DAC1R | DBSR_DAC1W | DBCR0_IDM); - task->thread.regs->msr &= ~MSR_DE; + task->thread.dac1 = data & ~0x3UL; + + if (task->thread.dac1 == 0) { + dbcr_dac(task) &= ~(DBCR_DAC1R | DBCR_DAC1W); + if (!DBCR_ACTIVE_EVENTS(task->thread.dbcr0, + task->thread.dbcr1)) { + task->thread.regs->msr &= ~MSR_DE; + task->thread.dbcr0 &= ~DBCR0_IDM; + } return 0; } @@ -814,17 +822,17 @@ int ptrace_set_debugreg(struct task_struct *task, unsigned long addr, /* Set the Internal Debugging flag (IDM bit 1) for the DBCR0 register */ - task->thread.dbcr0 = DBCR0_IDM; + task->thread.dbcr0 |= DBCR0_IDM; /* Check for write and read flags and set DBCR0 accordingly */ + dbcr_dac(task) &= ~(DBCR_DAC1R|DBCR_DAC1W); if (data & 0x1UL) - task->thread.dbcr0 |= DBSR_DAC1R; + dbcr_dac(task) |= DBCR_DAC1R; if (data & 0x2UL) - task->thread.dbcr0 |= DBSR_DAC1W; - + dbcr_dac(task) |= DBCR_DAC1W; task->thread.regs->msr |= MSR_DE; -#endif +#endif /* CONFIG_PPC_ADV_DEBUG_REGS */ return 0; } @@ -839,6 +847,394 @@ void ptrace_disable(struct task_struct *child) user_disable_single_step(child); } +#ifdef CONFIG_PPC_ADV_DEBUG_REGS +static long set_intruction_bp(struct task_struct *child, + struct ppc_hw_breakpoint *bp_info) +{ + int slot; + int slot1_in_use = ((child->thread.dbcr0 & DBCR0_IAC1) != 0); + int slot2_in_use = ((child->thread.dbcr0 & DBCR0_IAC2) != 0); + int slot3_in_use = ((child->thread.dbcr0 & DBCR0_IAC3) != 0); + int slot4_in_use = ((child->thread.dbcr0 & DBCR0_IAC4) != 0); + + if (dbcr_iac_range(child) & DBCR_IAC12MODE) + slot2_in_use = 1; + if (dbcr_iac_range(child) & DBCR_IAC34MODE) + slot4_in_use = 1; + + if (bp_info->addr >= TASK_SIZE) + return -EIO; + + if (bp_info->addr_mode != PPC_BREAKPOINT_MODE_EXACT) { + + /* Make sure range is valid. */ + if (bp_info->addr2 >= TASK_SIZE) + return -EIO; + + /* We need a pair of IAC regsisters */ + if ((!slot1_in_use) && (!slot2_in_use)) { + slot = 1; + child->thread.iac1 = bp_info->addr; + child->thread.iac2 = bp_info->addr2; + child->thread.dbcr0 |= DBCR0_IAC1; + if (bp_info->addr_mode == + PPC_BREAKPOINT_MODE_RANGE_EXCLUSIVE) + dbcr_iac_range(child) |= DBCR_IAC12X; + else + dbcr_iac_range(child) |= DBCR_IAC12I; +#if CONFIG_PPC_ADV_DEBUG_IACS > 2 + } else if ((!slot3_in_use) && (!slot4_in_use)) { + slot = 3; + child->thread.iac3 = bp_info->addr; + child->thread.iac4 = bp_info->addr2; + child->thread.dbcr0 |= DBCR0_IAC3; + if (bp_info->addr_mode == + PPC_BREAKPOINT_MODE_RANGE_EXCLUSIVE) + dbcr_iac_range(child) |= DBCR_IAC34X; + else + dbcr_iac_range(child) |= DBCR_IAC34I; +#endif + } else + return -ENOSPC; + } else { + /* We only need one. If possible leave a pair free in + * case a range is needed later + */ + if (!slot1_in_use) { + /* + * Don't use iac1 if iac1-iac2 are free and either + * iac3 or iac4 (but not both) are free + */ + if (slot2_in_use || (slot3_in_use == slot4_in_use)) { + slot = 1; + child->thread.iac1 = bp_info->addr; + child->thread.dbcr0 |= DBCR0_IAC1; + goto out; + } + } + if (!slot2_in_use) { + slot = 2; + child->thread.iac2 = bp_info->addr; + child->thread.dbcr0 |= DBCR0_IAC2; +#if CONFIG_PPC_ADV_DEBUG_IACS > 2 + } else if (!slot3_in_use) { + slot = 3; + child->thread.iac3 = bp_info->addr; + child->thread.dbcr0 |= DBCR0_IAC3; + } else if (!slot4_in_use) { + slot = 4; + child->thread.iac4 = bp_info->addr; + child->thread.dbcr0 |= DBCR0_IAC4; +#endif + } else + return -ENOSPC; + } +out: + child->thread.dbcr0 |= DBCR0_IDM; + child->thread.regs->msr |= MSR_DE; + + return slot; +} + +static int del_instruction_bp(struct task_struct *child, int slot) +{ + switch (slot) { + case 1: + if ((child->thread.dbcr0 & DBCR0_IAC1) == 0) + return -ENOENT; + + if (dbcr_iac_range(child) & DBCR_IAC12MODE) { + /* address range - clear slots 1 & 2 */ + child->thread.iac2 = 0; + dbcr_iac_range(child) &= ~DBCR_IAC12MODE; + } + child->thread.iac1 = 0; + child->thread.dbcr0 &= ~DBCR0_IAC1; + break; + case 2: + if ((child->thread.dbcr0 & DBCR0_IAC2) == 0) + return -ENOENT; + + if (dbcr_iac_range(child) & DBCR_IAC12MODE) + /* used in a range */ + return -EINVAL; + child->thread.iac2 = 0; + child->thread.dbcr0 &= ~DBCR0_IAC2; + break; +#if CONFIG_PPC_ADV_DEBUG_IACS > 2 + case 3: + if ((child->thread.dbcr0 & DBCR0_IAC3) == 0) + return -ENOENT; + + if (dbcr_iac_range(child) & DBCR_IAC34MODE) { + /* address range - clear slots 3 & 4 */ + child->thread.iac4 = 0; + dbcr_iac_range(child) &= ~DBCR_IAC34MODE; + } + child->thread.iac3 = 0; + child->thread.dbcr0 &= ~DBCR0_IAC3; + break; + case 4: + if ((child->thread.dbcr0 & DBCR0_IAC4) == 0) + return -ENOENT; + + if (dbcr_iac_range(child) & DBCR_IAC34MODE) + /* Used in a range */ + return -EINVAL; + child->thread.iac4 = 0; + child->thread.dbcr0 &= ~DBCR0_IAC4; + break; +#endif + default: + return -EINVAL; + } + return 0; +} + +static int set_dac(struct task_struct *child, struct ppc_hw_breakpoint *bp_info) +{ + int byte_enable = + (bp_info->condition_mode >> PPC_BREAKPOINT_CONDITION_BE_SHIFT) + & 0xf; + int condition_mode = + bp_info->condition_mode & PPC_BREAKPOINT_CONDITION_MODE; + int slot; + + if (byte_enable && (condition_mode == 0)) + return -EINVAL; + + if (bp_info->addr >= TASK_SIZE) + return -EIO; + + if ((dbcr_dac(child) & (DBCR_DAC1R | DBCR_DAC1W)) == 0) { + slot = 1; + if (bp_info->trigger_type & PPC_BREAKPOINT_TRIGGER_READ) + dbcr_dac(child) |= DBCR_DAC1R; + if (bp_info->trigger_type & PPC_BREAKPOINT_TRIGGER_WRITE) + dbcr_dac(child) |= DBCR_DAC1W; + child->thread.dac1 = (unsigned long)bp_info->addr; +#if CONFIG_PPC_ADV_DEBUG_DVCS > 0 + if (byte_enable) { + child->thread.dvc1 = + (unsigned long)bp_info->condition_value; + child->thread.dbcr2 |= + ((byte_enable << DBCR2_DVC1BE_SHIFT) | + (condition_mode << DBCR2_DVC1M_SHIFT)); + } +#endif +#ifdef CONFIG_PPC_ADV_DEBUG_DAC_RANGE + } else if (child->thread.dbcr2 & DBCR2_DAC12MODE) { + /* Both dac1 and dac2 are part of a range */ + return -ENOSPC; +#endif + } else if ((dbcr_dac(child) & (DBCR_DAC2R | DBCR_DAC2W)) == 0) { + slot = 2; + if (bp_info->trigger_type & PPC_BREAKPOINT_TRIGGER_READ) + dbcr_dac(child) |= DBCR_DAC2R; + if (bp_info->trigger_type & PPC_BREAKPOINT_TRIGGER_WRITE) + dbcr_dac(child) |= DBCR_DAC2W; + child->thread.dac2 = (unsigned long)bp_info->addr; +#if CONFIG_PPC_ADV_DEBUG_DVCS > 0 + if (byte_enable) { + child->thread.dvc2 = + (unsigned long)bp_info->condition_value; + child->thread.dbcr2 |= + ((byte_enable << DBCR2_DVC2BE_SHIFT) | + (condition_mode << DBCR2_DVC2M_SHIFT)); + } +#endif + } else + return -ENOSPC; + child->thread.dbcr0 |= DBCR0_IDM; + child->thread.regs->msr |= MSR_DE; + + return slot + 4; +} + +static int del_dac(struct task_struct *child, int slot) +{ + if (slot == 1) { + if ((dbcr_dac(child) & (DBCR_DAC1R | DBCR_DAC1W)) == 0) + return -ENOENT; + + child->thread.dac1 = 0; + dbcr_dac(child) &= ~(DBCR_DAC1R | DBCR_DAC1W); +#ifdef CONFIG_PPC_ADV_DEBUG_DAC_RANGE + if (child->thread.dbcr2 & DBCR2_DAC12MODE) { + child->thread.dac2 = 0; + child->thread.dbcr2 &= ~DBCR2_DAC12MODE; + } + child->thread.dbcr2 &= ~(DBCR2_DVC1M | DBCR2_DVC1BE); +#endif +#if CONFIG_PPC_ADV_DEBUG_DVCS > 0 + child->thread.dvc1 = 0; +#endif + } else if (slot == 2) { + if ((dbcr_dac(child) & (DBCR_DAC2R | DBCR_DAC2W)) == 0) + return -ENOENT; + +#ifdef CONFIG_PPC_ADV_DEBUG_DAC_RANGE + if (child->thread.dbcr2 & DBCR2_DAC12MODE) + /* Part of a range */ + return -EINVAL; + child->thread.dbcr2 &= ~(DBCR2_DVC2M | DBCR2_DVC2BE); +#endif +#if CONFIG_PPC_ADV_DEBUG_DVCS > 0 + child->thread.dvc2 = 0; +#endif + child->thread.dac2 = 0; + dbcr_dac(child) &= ~(DBCR_DAC2R | DBCR_DAC2W); + } else + return -EINVAL; + + return 0; +} +#endif /* CONFIG_PPC_ADV_DEBUG_REGS */ + +#ifdef CONFIG_PPC_ADV_DEBUG_DAC_RANGE +static int set_dac_range(struct task_struct *child, + struct ppc_hw_breakpoint *bp_info) +{ + int mode = bp_info->addr_mode & PPC_BREAKPOINT_MODE_MASK; + + /* We don't allow range watchpoints to be used with DVC */ + if (bp_info->condition_mode) + return -EINVAL; + + /* + * Best effort to verify the address range. The user/supervisor bits + * prevent trapping in kernel space, but let's fail on an obvious bad + * range. The simple test on the mask is not fool-proof, and any + * exclusive range will spill over into kernel space. + */ + if (bp_info->addr >= TASK_SIZE) + return -EIO; + if (mode == PPC_BREAKPOINT_MODE_MASK) { + /* + * dac2 is a bitmask. Don't allow a mask that makes a + * kernel space address from a valid dac1 value + */ + if (~((unsigned long)bp_info->addr2) >= TASK_SIZE) + return -EIO; + } else { + /* + * For range breakpoints, addr2 must also be a valid address + */ + if (bp_info->addr2 >= TASK_SIZE) + return -EIO; + } + + if (child->thread.dbcr0 & + (DBCR0_DAC1R | DBCR0_DAC1W | DBCR0_DAC2R | DBCR0_DAC2W)) + return -ENOSPC; + + if (bp_info->trigger_type & PPC_BREAKPOINT_TRIGGER_READ) + child->thread.dbcr0 |= (DBCR0_DAC1R | DBCR0_IDM); + if (bp_info->trigger_type & PPC_BREAKPOINT_TRIGGER_WRITE) + child->thread.dbcr0 |= (DBCR0_DAC1W | DBCR0_IDM); + child->thread.dac1 = bp_info->addr; + child->thread.dac2 = bp_info->addr2; + if (mode == PPC_BREAKPOINT_MODE_RANGE_INCLUSIVE) + child->thread.dbcr2 |= DBCR2_DAC12M; + else if (mode == PPC_BREAKPOINT_MODE_RANGE_EXCLUSIVE) + child->thread.dbcr2 |= DBCR2_DAC12MX; + else /* PPC_BREAKPOINT_MODE_MASK */ + child->thread.dbcr2 |= DBCR2_DAC12MM; + child->thread.regs->msr |= MSR_DE; + + return 5; +} +#endif /* CONFIG_PPC_ADV_DEBUG_DAC_RANGE */ + +static long ppc_set_hwdebug(struct task_struct *child, + struct ppc_hw_breakpoint *bp_info) +{ + if (bp_info->version != 1) + return -ENOTSUPP; +#ifdef CONFIG_PPC_ADV_DEBUG_REGS + /* + * Check for invalid flags and combinations + */ + if ((bp_info->trigger_type == 0) || + (bp_info->trigger_type & ~(PPC_BREAKPOINT_TRIGGER_EXECUTE | + PPC_BREAKPOINT_TRIGGER_RW)) || + (bp_info->addr_mode & ~PPC_BREAKPOINT_MODE_MASK) || + (bp_info->condition_mode & + ~(PPC_BREAKPOINT_CONDITION_MODE | + PPC_BREAKPOINT_CONDITION_BE_ALL))) + return -EINVAL; +#if CONFIG_PPC_ADV_DEBUG_DVCS == 0 + if (bp_info->condition_mode != PPC_BREAKPOINT_CONDITION_NONE) + return -EINVAL; +#endif + + if (bp_info->trigger_type & PPC_BREAKPOINT_TRIGGER_EXECUTE) { + if ((bp_info->trigger_type != PPC_BREAKPOINT_TRIGGER_EXECUTE) || + (bp_info->condition_mode != PPC_BREAKPOINT_CONDITION_NONE)) + return -EINVAL; + return set_intruction_bp(child, bp_info); + } + if (bp_info->addr_mode == PPC_BREAKPOINT_MODE_EXACT) + return set_dac(child, bp_info); + +#ifdef CONFIG_PPC_ADV_DEBUG_DAC_RANGE + return set_dac_range(child, bp_info); +#else + return -EINVAL; +#endif +#else /* !CONFIG_PPC_ADV_DEBUG_DVCS */ + /* + * We only support one data breakpoint + */ + if (((bp_info->trigger_type & PPC_BREAKPOINT_TRIGGER_RW) == 0) || + ((bp_info->trigger_type & ~PPC_BREAKPOINT_TRIGGER_RW) != 0) || + (bp_info->trigger_type != PPC_BREAKPOINT_TRIGGER_WRITE) || + (bp_info->addr_mode != PPC_BREAKPOINT_MODE_EXACT) || + (bp_info->condition_mode != PPC_BREAKPOINT_CONDITION_NONE)) + return -EINVAL; + + if (child->thread.dabr) + return -ENOSPC; + + if ((unsigned long)bp_info->addr >= TASK_SIZE) + return -EIO; + + child->thread.dabr = (unsigned long)bp_info->addr; + + return 1; +#endif /* !CONFIG_PPC_ADV_DEBUG_DVCS */ +} + +static long ppc_del_hwdebug(struct task_struct *child, long addr, long data) +{ +#ifdef CONFIG_PPC_ADV_DEBUG_REGS + int rc; + + if (data <= 4) + rc = del_instruction_bp(child, (int)data); + else + rc = del_dac(child, (int)data - 4); + + if (!rc) { + if (!DBCR_ACTIVE_EVENTS(child->thread.dbcr0, + child->thread.dbcr1)) { + child->thread.dbcr0 &= ~DBCR0_IDM; + child->thread.regs->msr &= ~MSR_DE; + } + } + return rc; +#else + if (data != 1) + return -EINVAL; + if (child->thread.dabr == 0) + return -ENOENT; + + child->thread.dabr = 0; + + return 0; +#endif +} + /* * Here are the old "legacy" powerpc specific getregs/setregs ptrace calls, * we mark them as obsolete now, they will be removed in a future version @@ -932,13 +1328,77 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data) break; } + case PPC_PTRACE_GETHWDBGINFO: { + struct ppc_debug_info dbginfo; + + dbginfo.version = 1; +#ifdef CONFIG_PPC_ADV_DEBUG_REGS + dbginfo.num_instruction_bps = CONFIG_PPC_ADV_DEBUG_IACS; + dbginfo.num_data_bps = CONFIG_PPC_ADV_DEBUG_DACS; + dbginfo.num_condition_regs = CONFIG_PPC_ADV_DEBUG_DVCS; + dbginfo.data_bp_alignment = 4; + dbginfo.sizeof_condition = 4; + dbginfo.features = PPC_DEBUG_FEATURE_INSN_BP_RANGE | + PPC_DEBUG_FEATURE_INSN_BP_MASK; +#ifdef CONFIG_PPC_ADV_DEBUG_DAC_RANGE + dbginfo.features |= + PPC_DEBUG_FEATURE_DATA_BP_RANGE | + PPC_DEBUG_FEATURE_DATA_BP_MASK; +#endif +#else /* !CONFIG_PPC_ADV_DEBUG_REGS */ + dbginfo.num_instruction_bps = 0; + dbginfo.num_data_bps = 1; + dbginfo.num_condition_regs = 0; +#ifdef CONFIG_PPC64 + dbginfo.data_bp_alignment = 8; +#else + dbginfo.data_bp_alignment = 4; +#endif + dbginfo.sizeof_condition = 0; + dbginfo.features = 0; +#endif /* CONFIG_PPC_ADV_DEBUG_REGS */ + + if (!access_ok(VERIFY_WRITE, data, + sizeof(struct ppc_debug_info))) + return -EFAULT; + ret = __copy_to_user((struct ppc_debug_info __user *)data, + &dbginfo, sizeof(struct ppc_debug_info)) ? + -EFAULT : 0; + break; + } + + case PPC_PTRACE_SETHWDEBUG: { + struct ppc_hw_breakpoint bp_info; + + if (!access_ok(VERIFY_READ, data, + sizeof(struct ppc_hw_breakpoint))) + return -EFAULT; + ret = __copy_from_user(&bp_info, + (struct ppc_hw_breakpoint __user *)data, + sizeof(struct ppc_hw_breakpoint)) ? + -EFAULT : 0; + if (!ret) + ret = ppc_set_hwdebug(child, &bp_info); + break; + } + + case PPC_PTRACE_DELHWDEBUG: { + ret = ppc_del_hwdebug(child, addr, data); + break; + } + case PTRACE_GET_DEBUGREG: { ret = -EINVAL; /* We only support one DABR and no IABRS at the moment */ if (addr > 0) break; +#ifdef CONFIG_PPC_ADV_DEBUG_REGS + ret = put_user(child->thread.dac1, + (unsigned long __user *)data); +#else ret = put_user(child->thread.dabr, (unsigned long __user *)data); +#endif break; } diff --git a/arch/powerpc/kernel/rtas-proc.c b/arch/powerpc/kernel/rtas-proc.c index 1be9fe38bcb5..8777fb02349f 100644 --- a/arch/powerpc/kernel/rtas-proc.c +++ b/arch/powerpc/kernel/rtas-proc.c @@ -262,19 +262,19 @@ static int __init proc_rtas_init(void) if (rtas_node == NULL) return -ENODEV; - proc_create("ppc64/rtas/progress", S_IRUGO|S_IWUSR, NULL, + proc_create("powerpc/rtas/progress", S_IRUGO|S_IWUSR, NULL, &ppc_rtas_progress_operations); - proc_create("ppc64/rtas/clock", S_IRUGO|S_IWUSR, NULL, + proc_create("powerpc/rtas/clock", S_IRUGO|S_IWUSR, NULL, &ppc_rtas_clock_operations); - proc_create("ppc64/rtas/poweron", S_IWUSR|S_IRUGO, NULL, + proc_create("powerpc/rtas/poweron", S_IWUSR|S_IRUGO, NULL, &ppc_rtas_poweron_operations); - proc_create("ppc64/rtas/sensors", S_IRUGO, NULL, + proc_create("powerpc/rtas/sensors", S_IRUGO, NULL, &ppc_rtas_sensors_operations); - proc_create("ppc64/rtas/frequency", S_IWUSR|S_IRUGO, NULL, + proc_create("powerpc/rtas/frequency", S_IWUSR|S_IRUGO, NULL, &ppc_rtas_tone_freq_operations); - proc_create("ppc64/rtas/volume", S_IWUSR|S_IRUGO, NULL, + proc_create("powerpc/rtas/volume", S_IWUSR|S_IRUGO, NULL, &ppc_rtas_tone_volume_operations); - proc_create("ppc64/rtas/rmo_buffer", S_IRUSR, NULL, + proc_create("powerpc/rtas/rmo_buffer", S_IRUSR, NULL, &ppc_rtas_rmo_buf_ops); return 0; } diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c index bf90361bb70f..fd0d29493fd6 100644 --- a/arch/powerpc/kernel/rtas.c +++ b/arch/powerpc/kernel/rtas.c @@ -42,7 +42,7 @@ #include <asm/mmu.h> struct rtas_t rtas = { - .lock = __RAW_SPIN_LOCK_UNLOCKED + .lock = __ARCH_SPIN_LOCK_UNLOCKED }; EXPORT_SYMBOL(rtas); @@ -80,13 +80,13 @@ static unsigned long lock_rtas(void) local_irq_save(flags); preempt_disable(); - __raw_spin_lock_flags(&rtas.lock, flags); + arch_spin_lock_flags(&rtas.lock, flags); return flags; } static void unlock_rtas(unsigned long flags) { - __raw_spin_unlock(&rtas.lock); + arch_spin_unlock(&rtas.lock); local_irq_restore(flags); preempt_enable(); } @@ -978,7 +978,7 @@ int __init early_init_dt_scan_rtas(unsigned long node, return 1; } -static raw_spinlock_t timebase_lock; +static arch_spinlock_t timebase_lock; static u64 timebase = 0; void __cpuinit rtas_give_timebase(void) @@ -987,10 +987,10 @@ void __cpuinit rtas_give_timebase(void) local_irq_save(flags); hard_irq_disable(); - __raw_spin_lock(&timebase_lock); + arch_spin_lock(&timebase_lock); rtas_call(rtas_token("freeze-time-base"), 0, 1, NULL); timebase = get_tb(); - __raw_spin_unlock(&timebase_lock); + arch_spin_unlock(&timebase_lock); while (timebase) barrier(); @@ -1002,8 +1002,8 @@ void __cpuinit rtas_take_timebase(void) { while (!timebase) barrier(); - __raw_spin_lock(&timebase_lock); + arch_spin_lock(&timebase_lock); set_tb(timebase >> 32, timebase & 0xffffffff); timebase = 0; - __raw_spin_unlock(&timebase_lock); + arch_spin_unlock(&timebase_lock); } diff --git a/arch/powerpc/kernel/rtas_flash.c b/arch/powerpc/kernel/rtas_flash.c index 13011a96a977..a85117d5c9a4 100644 --- a/arch/powerpc/kernel/rtas_flash.c +++ b/arch/powerpc/kernel/rtas_flash.c @@ -6,7 +6,7 @@ * as published by the Free Software Foundation; either version * 2 of the License, or (at your option) any later version. * - * /proc/ppc64/rtas/firmware_flash interface + * /proc/powerpc/rtas/firmware_flash interface * * This file implements a firmware_flash interface to pump a firmware * image into the kernel. At reboot time rtas_restart() will see the @@ -740,7 +740,7 @@ static int __init rtas_flash_init(void) return 1; } - firmware_flash_pde = create_flash_pde("ppc64/rtas/" + firmware_flash_pde = create_flash_pde("powerpc/rtas/" FIRMWARE_FLASH_NAME, &rtas_flash_operations); if (firmware_flash_pde == NULL) { @@ -754,7 +754,7 @@ static int __init rtas_flash_init(void) if (rc != 0) goto cleanup; - firmware_update_pde = create_flash_pde("ppc64/rtas/" + firmware_update_pde = create_flash_pde("powerpc/rtas/" FIRMWARE_UPDATE_NAME, &rtas_flash_operations); if (firmware_update_pde == NULL) { @@ -768,7 +768,7 @@ static int __init rtas_flash_init(void) if (rc != 0) goto cleanup; - validate_pde = create_flash_pde("ppc64/rtas/" VALIDATE_FLASH_NAME, + validate_pde = create_flash_pde("powerpc/rtas/" VALIDATE_FLASH_NAME, &validate_flash_operations); if (validate_pde == NULL) { rc = -ENOMEM; @@ -781,7 +781,7 @@ static int __init rtas_flash_init(void) if (rc != 0) goto cleanup; - manage_pde = create_flash_pde("ppc64/rtas/" MANAGE_FLASH_NAME, + manage_pde = create_flash_pde("powerpc/rtas/" MANAGE_FLASH_NAME, &manage_flash_operations); if (manage_pde == NULL) { rc = -ENOMEM; diff --git a/arch/powerpc/kernel/rtasd.c b/arch/powerpc/kernel/rtasd.c new file mode 100644 index 000000000000..2e4832ab2108 --- /dev/null +++ b/arch/powerpc/kernel/rtasd.c @@ -0,0 +1,539 @@ +/* + * Copyright (C) 2001 Anton Blanchard <anton@au.ibm.com>, IBM + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * Communication to userspace based on kernel/printk.c + */ + +#include <linux/types.h> +#include <linux/errno.h> +#include <linux/sched.h> +#include <linux/kernel.h> +#include <linux/poll.h> +#include <linux/proc_fs.h> +#include <linux/init.h> +#include <linux/vmalloc.h> +#include <linux/spinlock.h> +#include <linux/cpu.h> +#include <linux/workqueue.h> + +#include <asm/uaccess.h> +#include <asm/io.h> +#include <asm/rtas.h> +#include <asm/prom.h> +#include <asm/nvram.h> +#include <asm/atomic.h> +#include <asm/machdep.h> + + +static DEFINE_SPINLOCK(rtasd_log_lock); + +static DECLARE_WAIT_QUEUE_HEAD(rtas_log_wait); + +static char *rtas_log_buf; +static unsigned long rtas_log_start; +static unsigned long rtas_log_size; + +static int surveillance_timeout = -1; + +static unsigned int rtas_error_log_max; +static unsigned int rtas_error_log_buffer_max; + +/* RTAS service tokens */ +static unsigned int event_scan; +static unsigned int rtas_event_scan_rate; + +static int full_rtas_msgs = 0; + +/* Stop logging to nvram after first fatal error */ +static int logging_enabled; /* Until we initialize everything, + * make sure we don't try logging + * anything */ +static int error_log_cnt; + +/* + * Since we use 32 bit RTAS, the physical address of this must be below + * 4G or else bad things happen. Allocate this in the kernel data and + * make it big enough. + */ +static unsigned char logdata[RTAS_ERROR_LOG_MAX]; + +static char *rtas_type[] = { + "Unknown", "Retry", "TCE Error", "Internal Device Failure", + "Timeout", "Data Parity", "Address Parity", "Cache Parity", + "Address Invalid", "ECC Uncorrected", "ECC Corrupted", +}; + +static char *rtas_event_type(int type) +{ + if ((type > 0) && (type < 11)) + return rtas_type[type]; + + switch (type) { + case RTAS_TYPE_EPOW: + return "EPOW"; + case RTAS_TYPE_PLATFORM: + return "Platform Error"; + case RTAS_TYPE_IO: + return "I/O Event"; + case RTAS_TYPE_INFO: + return "Platform Information Event"; + case RTAS_TYPE_DEALLOC: + return "Resource Deallocation Event"; + case RTAS_TYPE_DUMP: + return "Dump Notification Event"; + } + + return rtas_type[0]; +} + +/* To see this info, grep RTAS /var/log/messages and each entry + * will be collected together with obvious begin/end. + * There will be a unique identifier on the begin and end lines. + * This will persist across reboots. + * + * format of error logs returned from RTAS: + * bytes (size) : contents + * -------------------------------------------------------- + * 0-7 (8) : rtas_error_log + * 8-47 (40) : extended info + * 48-51 (4) : vendor id + * 52-1023 (vendor specific) : location code and debug data + */ +static void printk_log_rtas(char *buf, int len) +{ + + int i,j,n = 0; + int perline = 16; + char buffer[64]; + char * str = "RTAS event"; + + if (full_rtas_msgs) { + printk(RTAS_DEBUG "%d -------- %s begin --------\n", + error_log_cnt, str); + + /* + * Print perline bytes on each line, each line will start + * with RTAS and a changing number, so syslogd will + * print lines that are otherwise the same. Separate every + * 4 bytes with a space. + */ + for (i = 0; i < len; i++) { + j = i % perline; + if (j == 0) { + memset(buffer, 0, sizeof(buffer)); + n = sprintf(buffer, "RTAS %d:", i/perline); + } + + if ((i % 4) == 0) + n += sprintf(buffer+n, " "); + + n += sprintf(buffer+n, "%02x", (unsigned char)buf[i]); + + if (j == (perline-1)) + printk(KERN_DEBUG "%s\n", buffer); + } + if ((i % perline) != 0) + printk(KERN_DEBUG "%s\n", buffer); + + printk(RTAS_DEBUG "%d -------- %s end ----------\n", + error_log_cnt, str); + } else { + struct rtas_error_log *errlog = (struct rtas_error_log *)buf; + + printk(RTAS_DEBUG "event: %d, Type: %s, Severity: %d\n", + error_log_cnt, rtas_event_type(errlog->type), + errlog->severity); + } +} + +static int log_rtas_len(char * buf) +{ + int len; + struct rtas_error_log *err; + + /* rtas fixed header */ + len = 8; + err = (struct rtas_error_log *)buf; + if (err->extended_log_length) { + + /* extended header */ + len += err->extended_log_length; + } + + if (rtas_error_log_max == 0) + rtas_error_log_max = rtas_get_error_log_max(); + + if (len > rtas_error_log_max) + len = rtas_error_log_max; + + return len; +} + +/* + * First write to nvram, if fatal error, that is the only + * place we log the info. The error will be picked up + * on the next reboot by rtasd. If not fatal, run the + * method for the type of error. Currently, only RTAS + * errors have methods implemented, but in the future + * there might be a need to store data in nvram before a + * call to panic(). + * + * XXX We write to nvram periodically, to indicate error has + * been written and sync'd, but there is a possibility + * that if we don't shutdown correctly, a duplicate error + * record will be created on next reboot. + */ +void pSeries_log_error(char *buf, unsigned int err_type, int fatal) +{ + unsigned long offset; + unsigned long s; + int len = 0; + + pr_debug("rtasd: logging event\n"); + if (buf == NULL) + return; + + spin_lock_irqsave(&rtasd_log_lock, s); + + /* get length and increase count */ + switch (err_type & ERR_TYPE_MASK) { + case ERR_TYPE_RTAS_LOG: + len = log_rtas_len(buf); + if (!(err_type & ERR_FLAG_BOOT)) + error_log_cnt++; + break; + case ERR_TYPE_KERNEL_PANIC: + default: + WARN_ON_ONCE(!irqs_disabled()); /* @@@ DEBUG @@@ */ + spin_unlock_irqrestore(&rtasd_log_lock, s); + return; + } + +#ifdef CONFIG_PPC64 + /* Write error to NVRAM */ + if (logging_enabled && !(err_type & ERR_FLAG_BOOT)) + nvram_write_error_log(buf, len, err_type, error_log_cnt); +#endif /* CONFIG_PPC64 */ + + /* + * rtas errors can occur during boot, and we do want to capture + * those somewhere, even if nvram isn't ready (why not?), and even + * if rtasd isn't ready. Put them into the boot log, at least. + */ + if ((err_type & ERR_TYPE_MASK) == ERR_TYPE_RTAS_LOG) + printk_log_rtas(buf, len); + + /* Check to see if we need to or have stopped logging */ + if (fatal || !logging_enabled) { + logging_enabled = 0; + WARN_ON_ONCE(!irqs_disabled()); /* @@@ DEBUG @@@ */ + spin_unlock_irqrestore(&rtasd_log_lock, s); + return; + } + + /* call type specific method for error */ + switch (err_type & ERR_TYPE_MASK) { + case ERR_TYPE_RTAS_LOG: + offset = rtas_error_log_buffer_max * + ((rtas_log_start+rtas_log_size) & LOG_NUMBER_MASK); + + /* First copy over sequence number */ + memcpy(&rtas_log_buf[offset], (void *) &error_log_cnt, sizeof(int)); + + /* Second copy over error log data */ + offset += sizeof(int); + memcpy(&rtas_log_buf[offset], buf, len); + + if (rtas_log_size < LOG_NUMBER) + rtas_log_size += 1; + else + rtas_log_start += 1; + + WARN_ON_ONCE(!irqs_disabled()); /* @@@ DEBUG @@@ */ + spin_unlock_irqrestore(&rtasd_log_lock, s); + wake_up_interruptible(&rtas_log_wait); + break; + case ERR_TYPE_KERNEL_PANIC: + default: + WARN_ON_ONCE(!irqs_disabled()); /* @@@ DEBUG @@@ */ + spin_unlock_irqrestore(&rtasd_log_lock, s); + return; + } + +} + +static int rtas_log_open(struct inode * inode, struct file * file) +{ + return 0; +} + +static int rtas_log_release(struct inode * inode, struct file * file) +{ + return 0; +} + +/* This will check if all events are logged, if they are then, we + * know that we can safely clear the events in NVRAM. + * Next we'll sit and wait for something else to log. + */ +static ssize_t rtas_log_read(struct file * file, char __user * buf, + size_t count, loff_t *ppos) +{ + int error; + char *tmp; + unsigned long s; + unsigned long offset; + + if (!buf || count < rtas_error_log_buffer_max) + return -EINVAL; + + count = rtas_error_log_buffer_max; + + if (!access_ok(VERIFY_WRITE, buf, count)) + return -EFAULT; + + tmp = kmalloc(count, GFP_KERNEL); + if (!tmp) + return -ENOMEM; + + spin_lock_irqsave(&rtasd_log_lock, s); + + /* if it's 0, then we know we got the last one (the one in NVRAM) */ + while (rtas_log_size == 0) { + if (file->f_flags & O_NONBLOCK) { + spin_unlock_irqrestore(&rtasd_log_lock, s); + error = -EAGAIN; + goto out; + } + + if (!logging_enabled) { + spin_unlock_irqrestore(&rtasd_log_lock, s); + error = -ENODATA; + goto out; + } +#ifdef CONFIG_PPC64 + nvram_clear_error_log(); +#endif /* CONFIG_PPC64 */ + + spin_unlock_irqrestore(&rtasd_log_lock, s); + error = wait_event_interruptible(rtas_log_wait, rtas_log_size); + if (error) + goto out; + spin_lock_irqsave(&rtasd_log_lock, s); + } + + offset = rtas_error_log_buffer_max * (rtas_log_start & LOG_NUMBER_MASK); + memcpy(tmp, &rtas_log_buf[offset], count); + + rtas_log_start += 1; + rtas_log_size -= 1; + spin_unlock_irqrestore(&rtasd_log_lock, s); + + error = copy_to_user(buf, tmp, count) ? -EFAULT : count; +out: + kfree(tmp); + return error; +} + +static unsigned int rtas_log_poll(struct file *file, poll_table * wait) +{ + poll_wait(file, &rtas_log_wait, wait); + if (rtas_log_size) + return POLLIN | POLLRDNORM; + return 0; +} + +static const struct file_operations proc_rtas_log_operations = { + .read = rtas_log_read, + .poll = rtas_log_poll, + .open = rtas_log_open, + .release = rtas_log_release, +}; + +static int enable_surveillance(int timeout) +{ + int error; + + error = rtas_set_indicator(SURVEILLANCE_TOKEN, 0, timeout); + + if (error == 0) + return 0; + + if (error == -EINVAL) { + printk(KERN_DEBUG "rtasd: surveillance not supported\n"); + return 0; + } + + printk(KERN_ERR "rtasd: could not update surveillance\n"); + return -1; +} + +static void do_event_scan(void) +{ + int error; + do { + memset(logdata, 0, rtas_error_log_max); + error = rtas_call(event_scan, 4, 1, NULL, + RTAS_EVENT_SCAN_ALL_EVENTS, 0, + __pa(logdata), rtas_error_log_max); + if (error == -1) { + printk(KERN_ERR "event-scan failed\n"); + break; + } + + if (error == 0) + pSeries_log_error(logdata, ERR_TYPE_RTAS_LOG, 0); + + } while(error == 0); +} + +static void rtas_event_scan(struct work_struct *w); +DECLARE_DELAYED_WORK(event_scan_work, rtas_event_scan); + +/* + * Delay should be at least one second since some machines have problems if + * we call event-scan too quickly. + */ +static unsigned long event_scan_delay = 1*HZ; +static int first_pass = 1; + +static void rtas_event_scan(struct work_struct *w) +{ + unsigned int cpu; + + do_event_scan(); + + get_online_cpus(); + + cpu = next_cpu(smp_processor_id(), cpu_online_map); + if (cpu == NR_CPUS) { + cpu = first_cpu(cpu_online_map); + + if (first_pass) { + first_pass = 0; + event_scan_delay = 30*HZ/rtas_event_scan_rate; + + if (surveillance_timeout != -1) { + pr_debug("rtasd: enabling surveillance\n"); + enable_surveillance(surveillance_timeout); + pr_debug("rtasd: surveillance enabled\n"); + } + } + } + + schedule_delayed_work_on(cpu, &event_scan_work, + __round_jiffies_relative(event_scan_delay, cpu)); + + put_online_cpus(); +} + +#ifdef CONFIG_PPC64 +static void retreive_nvram_error_log(void) +{ + unsigned int err_type ; + int rc ; + + /* See if we have any error stored in NVRAM */ + memset(logdata, 0, rtas_error_log_max); + rc = nvram_read_error_log(logdata, rtas_error_log_max, + &err_type, &error_log_cnt); + /* We can use rtas_log_buf now */ + logging_enabled = 1; + if (!rc) { + if (err_type != ERR_FLAG_ALREADY_LOGGED) { + pSeries_log_error(logdata, err_type | ERR_FLAG_BOOT, 0); + } + } +} +#else /* CONFIG_PPC64 */ +static void retreive_nvram_error_log(void) +{ +} +#endif /* CONFIG_PPC64 */ + +static void start_event_scan(void) +{ + printk(KERN_DEBUG "RTAS daemon started\n"); + pr_debug("rtasd: will sleep for %d milliseconds\n", + (30000 / rtas_event_scan_rate)); + + /* Retreive errors from nvram if any */ + retreive_nvram_error_log(); + + schedule_delayed_work_on(first_cpu(cpu_online_map), &event_scan_work, + event_scan_delay); +} + +static int __init rtas_init(void) +{ + struct proc_dir_entry *entry; + + if (!machine_is(pseries) && !machine_is(chrp)) + return 0; + + /* No RTAS */ + event_scan = rtas_token("event-scan"); + if (event_scan == RTAS_UNKNOWN_SERVICE) { + printk(KERN_INFO "rtasd: No event-scan on system\n"); + return -ENODEV; + } + + rtas_event_scan_rate = rtas_token("rtas-event-scan-rate"); + if (rtas_event_scan_rate == RTAS_UNKNOWN_SERVICE) { + printk(KERN_ERR "rtasd: no rtas-event-scan-rate on system\n"); + return -ENODEV; + } + + /* Make room for the sequence number */ + rtas_error_log_max = rtas_get_error_log_max(); + rtas_error_log_buffer_max = rtas_error_log_max + sizeof(int); + + rtas_log_buf = vmalloc(rtas_error_log_buffer_max*LOG_NUMBER); + if (!rtas_log_buf) { + printk(KERN_ERR "rtasd: no memory\n"); + return -ENOMEM; + } + + entry = proc_create("powerpc/rtas/error_log", S_IRUSR, NULL, + &proc_rtas_log_operations); + if (!entry) + printk(KERN_ERR "Failed to create error_log proc entry\n"); + + start_event_scan(); + + return 0; +} +__initcall(rtas_init); + +static int __init surveillance_setup(char *str) +{ + int i; + + /* We only do surveillance on pseries */ + if (!machine_is(pseries)) + return 0; + + if (get_option(&str,&i)) { + if (i >= 0 && i <= 255) + surveillance_timeout = i; + } + + return 1; +} +__setup("surveillance=", surveillance_setup); + +static int __init rtasmsgs_setup(char *str) +{ + if (strcmp(str, "on") == 0) + full_rtas_msgs = 1; + else if (strcmp(str, "off") == 0) + full_rtas_msgs = 0; + + return 1; +} +__setup("rtasmsgs=", rtasmsgs_setup); diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c index 4271f7a655a3..48f0a008b20b 100644 --- a/arch/powerpc/kernel/setup-common.c +++ b/arch/powerpc/kernel/setup-common.c @@ -36,6 +36,7 @@ #include <linux/lmb.h> #include <linux/of_platform.h> #include <asm/io.h> +#include <asm/paca.h> #include <asm/prom.h> #include <asm/processor.h> #include <asm/vdso_datapage.h> @@ -157,7 +158,7 @@ extern u32 cpu_temp_both(unsigned long cpu); #endif /* CONFIG_TAU */ #ifdef CONFIG_SMP -DEFINE_PER_CPU(unsigned int, pvr); +DEFINE_PER_CPU(unsigned int, cpu_pvr); #endif static int show_cpuinfo(struct seq_file *m, void *v) @@ -209,7 +210,7 @@ static int show_cpuinfo(struct seq_file *m, void *v) } #ifdef CONFIG_SMP - pvr = per_cpu(pvr, cpu_id); + pvr = per_cpu(cpu_pvr, cpu_id); #else pvr = mfspr(SPRN_PVR); #endif @@ -493,6 +494,8 @@ void __init smp_setup_cpu_maps(void) * here will have to be reworked */ cpu_init_thread_core_maps(nthreads); + + free_unused_pacas(); } #endif /* CONFIG_SMP */ @@ -660,6 +663,7 @@ late_initcall(check_cache_coherency); #ifdef CONFIG_DEBUG_FS struct dentry *powerpc_debugfs_root; +EXPORT_SYMBOL(powerpc_debugfs_root); static int powerpc_debugfs_init(void) { diff --git a/arch/powerpc/kernel/setup_32.c b/arch/powerpc/kernel/setup_32.c index 53bcf3d792db..b152de3e64d4 100644 --- a/arch/powerpc/kernel/setup_32.c +++ b/arch/powerpc/kernel/setup_32.c @@ -345,7 +345,7 @@ void __init setup_arch(char **cmdline_p) #ifdef CONFIG_SWIOTLB if (ppc_swiotlb_enable) - swiotlb_init(); + swiotlb_init(1); #endif paging_init(); diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c index 797ea95aae2e..63547394048c 100644 --- a/arch/powerpc/kernel/setup_64.c +++ b/arch/powerpc/kernel/setup_64.c @@ -57,7 +57,6 @@ #include <asm/cache.h> #include <asm/page.h> #include <asm/mmu.h> -#include <asm/mmu-hash64.h> #include <asm/firmware.h> #include <asm/xmon.h> #include <asm/udbg.h> @@ -145,9 +144,9 @@ early_param("smt-enabled", early_smt_enabled); #endif /* CONFIG_SMP */ /* Put the paca pointer into r13 and SPRG_PACA */ -void __init setup_paca(int cpu) +static void __init setup_paca(struct paca_struct *new_paca) { - local_paca = &paca[cpu]; + local_paca = new_paca; mtspr(SPRN_SPRG_PACA, local_paca); #ifdef CONFIG_PPC_BOOK3E mtspr(SPRN_SPRG_TLB_EXFRAME, local_paca->extlb); @@ -177,14 +176,12 @@ void __init early_setup(unsigned long dt_ptr) { /* -------- printk is _NOT_ safe to use here ! ------- */ - /* Fill in any unititialised pacas */ - initialise_pacas(); - /* Identify CPU type */ identify_cpu(0, mfspr(SPRN_PVR)); /* Assume we're on cpu 0 for now. Don't write to the paca yet! */ - setup_paca(0); + initialise_paca(&boot_paca, 0); + setup_paca(&boot_paca); /* Initialize lockdep early or else spinlocks will blow */ lockdep_init(); @@ -204,7 +201,7 @@ void __init early_setup(unsigned long dt_ptr) early_init_devtree(__va(dt_ptr)); /* Now we know the logical id of our boot cpu, setup the paca. */ - setup_paca(boot_cpuid); + setup_paca(&paca[boot_cpuid]); /* Fix up paca fields required for the boot cpu */ get_paca()->cpu_start = 1; @@ -357,11 +354,6 @@ void __init setup_system(void) */ initialize_cache_info(); - /* - * Initialize irq remapping subsystem - */ - irq_early_init(); - #ifdef CONFIG_PPC_RTAS /* * Initialize RTAS if available @@ -551,7 +543,7 @@ void __init setup_arch(char **cmdline_p) #ifdef CONFIG_SWIOTLB if (ppc_swiotlb_enable) - swiotlb_init(); + swiotlb_init(1); #endif paging_init(); diff --git a/arch/powerpc/kernel/signal.c b/arch/powerpc/kernel/signal.c index 00b5078da9a3..a0afb555a7c9 100644 --- a/arch/powerpc/kernel/signal.c +++ b/arch/powerpc/kernel/signal.c @@ -140,17 +140,15 @@ static int do_signal_pending(sigset_t *oldset, struct pt_regs *regs) return 0; /* no signals delivered */ } +#ifndef CONFIG_PPC_ADV_DEBUG_REGS /* * Reenable the DABR before delivering the signal to * user space. The DABR will have been cleared if it * triggered inside the kernel. */ - if (current->thread.dabr) { + if (current->thread.dabr) set_dabr(current->thread.dabr); -#if defined(CONFIG_BOOKE) - mtspr(SPRN_DBCR0, current->thread.dbcr0); #endif - } if (is32) { if (ka.sa.sa_flags & SA_SIGINFO) diff --git a/arch/powerpc/kernel/signal_32.c b/arch/powerpc/kernel/signal_32.c index d670429a1608..266610119f66 100644 --- a/arch/powerpc/kernel/signal_32.c +++ b/arch/powerpc/kernel/signal_32.c @@ -1078,7 +1078,7 @@ int sys_debug_setcontext(struct ucontext __user *ctx, int i; unsigned char tmp; unsigned long new_msr = regs->msr; -#if defined(CONFIG_4xx) || defined(CONFIG_BOOKE) +#ifdef CONFIG_PPC_ADV_DEBUG_REGS unsigned long new_dbcr0 = current->thread.dbcr0; #endif @@ -1087,13 +1087,17 @@ int sys_debug_setcontext(struct ucontext __user *ctx, return -EFAULT; switch (op.dbg_type) { case SIG_DBG_SINGLE_STEPPING: -#if defined(CONFIG_4xx) || defined(CONFIG_BOOKE) +#ifdef CONFIG_PPC_ADV_DEBUG_REGS if (op.dbg_value) { new_msr |= MSR_DE; new_dbcr0 |= (DBCR0_IDM | DBCR0_IC); } else { - new_msr &= ~MSR_DE; - new_dbcr0 &= ~(DBCR0_IDM | DBCR0_IC); + new_dbcr0 &= ~DBCR0_IC; + if (!DBCR_ACTIVE_EVENTS(new_dbcr0, + current->thread.dbcr1)) { + new_msr &= ~MSR_DE; + new_dbcr0 &= ~DBCR0_IDM; + } } #else if (op.dbg_value) @@ -1103,7 +1107,7 @@ int sys_debug_setcontext(struct ucontext __user *ctx, #endif break; case SIG_DBG_BRANCH_TRACING: -#if defined(CONFIG_4xx) || defined(CONFIG_BOOKE) +#ifdef CONFIG_PPC_ADV_DEBUG_REGS return -EINVAL; #else if (op.dbg_value) @@ -1124,7 +1128,7 @@ int sys_debug_setcontext(struct ucontext __user *ctx, failure is a problem, anyway, and it's very unlikely unless the user is really doing something wrong. */ regs->msr = new_msr; -#if defined(CONFIG_4xx) || defined(CONFIG_BOOKE) +#ifdef CONFIG_PPC_ADV_DEBUG_REGS current->thread.dbcr0 = new_dbcr0; #endif diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c index 9b86a74d2815..c2ee14498077 100644 --- a/arch/powerpc/kernel/smp.c +++ b/arch/powerpc/kernel/smp.c @@ -218,6 +218,9 @@ void crash_send_ipi(void (*crash_ipi_callback)(struct pt_regs *)) static void stop_this_cpu(void *dummy) { + /* Remove this CPU */ + set_cpu_online(smp_processor_id(), false); + local_irq_disable(); while (1) ; @@ -232,7 +235,7 @@ struct thread_info *current_set[NR_CPUS]; static void __devinit smp_store_cpu_info(int id) { - per_cpu(pvr, id) = mfspr(SPRN_PVR); + per_cpu(cpu_pvr, id) = mfspr(SPRN_PVR); } static void __init smp_create_idle(unsigned int cpu) @@ -616,4 +619,16 @@ void __cpu_die(unsigned int cpu) if (smp_ops->cpu_die) smp_ops->cpu_die(cpu); } + +static DEFINE_MUTEX(powerpc_cpu_hotplug_driver_mutex); + +void cpu_hotplug_driver_lock() +{ + mutex_lock(&powerpc_cpu_hotplug_driver_mutex); +} + +void cpu_hotplug_driver_unlock() +{ + mutex_unlock(&powerpc_cpu_hotplug_driver_mutex); +} #endif diff --git a/arch/powerpc/kernel/swsusp_32.S b/arch/powerpc/kernel/swsusp_32.S index b47d8ceffb52..b0754e237438 100644 --- a/arch/powerpc/kernel/swsusp_32.S +++ b/arch/powerpc/kernel/swsusp_32.S @@ -303,7 +303,7 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_USE_HIGH_BATS) lis r4,0x1000 1: addic. r4,r4,-0x1000 tlbie r4 - blt 1b + bgt 1b sync /* restore the MSR and turn on the MMU */ diff --git a/arch/powerpc/kernel/sys_ppc32.c b/arch/powerpc/kernel/sys_ppc32.c index b97c2d67f4ac..c5a4732bcc48 100644 --- a/arch/powerpc/kernel/sys_ppc32.c +++ b/arch/powerpc/kernel/sys_ppc32.c @@ -520,58 +520,6 @@ asmlinkage long compat_sys_umask(u32 mask) return sys_umask((int)mask); } -#ifdef CONFIG_SYSCTL_SYSCALL -struct __sysctl_args32 { - u32 name; - int nlen; - u32 oldval; - u32 oldlenp; - u32 newval; - u32 newlen; - u32 __unused[4]; -}; - -asmlinkage long compat_sys_sysctl(struct __sysctl_args32 __user *args) -{ - struct __sysctl_args32 tmp; - int error; - size_t oldlen; - size_t __user *oldlenp = NULL; - unsigned long addr = (((unsigned long)&args->__unused[0]) + 7) & ~7; - - if (copy_from_user(&tmp, args, sizeof(tmp))) - return -EFAULT; - - if (tmp.oldval && tmp.oldlenp) { - /* Duh, this is ugly and might not work if sysctl_args - is in read-only memory, but do_sysctl does indirectly - a lot of uaccess in both directions and we'd have to - basically copy the whole sysctl.c here, and - glibc's __sysctl uses rw memory for the structure - anyway. */ - oldlenp = (size_t __user *)addr; - if (get_user(oldlen, (compat_size_t __user *)compat_ptr(tmp.oldlenp)) || - put_user(oldlen, oldlenp)) - return -EFAULT; - } - - lock_kernel(); - error = do_sysctl(compat_ptr(tmp.name), tmp.nlen, - compat_ptr(tmp.oldval), oldlenp, - compat_ptr(tmp.newval), tmp.newlen); - unlock_kernel(); - if (oldlenp) { - if (!error) { - if (get_user(oldlen, oldlenp) || - put_user(oldlen, (compat_size_t __user *)compat_ptr(tmp.oldlenp))) - error = -EFAULT; - } - copy_to_user(args->__unused, tmp.__unused, sizeof(tmp.__unused)); - } - return error; -} -#endif - unsigned long compat_sys_mmap2(unsigned long addr, size_t len, unsigned long prot, unsigned long flags, unsigned long fd, unsigned long pgoff) diff --git a/arch/powerpc/kernel/syscalls.c b/arch/powerpc/kernel/syscalls.c index c04832c4a02e..f2496f2faecc 100644 --- a/arch/powerpc/kernel/syscalls.c +++ b/arch/powerpc/kernel/syscalls.c @@ -42,105 +42,10 @@ #include <asm/time.h> #include <asm/unistd.h> -/* - * sys_ipc() is the de-multiplexer for the SysV IPC calls.. - * - * This is really horribly ugly. - */ -int sys_ipc(uint call, int first, unsigned long second, long third, - void __user *ptr, long fifth) -{ - int version, ret; - - version = call >> 16; /* hack for backward compatibility */ - call &= 0xffff; - - ret = -ENOSYS; - switch (call) { - case SEMOP: - ret = sys_semtimedop(first, (struct sembuf __user *)ptr, - (unsigned)second, NULL); - break; - case SEMTIMEDOP: - ret = sys_semtimedop(first, (struct sembuf __user *)ptr, - (unsigned)second, - (const struct timespec __user *) fifth); - break; - case SEMGET: - ret = sys_semget (first, (int)second, third); - break; - case SEMCTL: { - union semun fourth; - - ret = -EINVAL; - if (!ptr) - break; - if ((ret = get_user(fourth.__pad, (void __user * __user *)ptr))) - break; - ret = sys_semctl(first, (int)second, third, fourth); - break; - } - case MSGSND: - ret = sys_msgsnd(first, (struct msgbuf __user *)ptr, - (size_t)second, third); - break; - case MSGRCV: - switch (version) { - case 0: { - struct ipc_kludge tmp; - - ret = -EINVAL; - if (!ptr) - break; - if ((ret = copy_from_user(&tmp, - (struct ipc_kludge __user *) ptr, - sizeof (tmp)) ? -EFAULT : 0)) - break; - ret = sys_msgrcv(first, tmp.msgp, (size_t) second, - tmp.msgtyp, third); - break; - } - default: - ret = sys_msgrcv (first, (struct msgbuf __user *) ptr, - (size_t)second, fifth, third); - break; - } - break; - case MSGGET: - ret = sys_msgget((key_t)first, (int)second); - break; - case MSGCTL: - ret = sys_msgctl(first, (int)second, - (struct msqid_ds __user *)ptr); - break; - case SHMAT: { - ulong raddr; - ret = do_shmat(first, (char __user *)ptr, (int)second, &raddr); - if (ret) - break; - ret = put_user(raddr, (ulong __user *) third); - break; - } - case SHMDT: - ret = sys_shmdt((char __user *)ptr); - break; - case SHMGET: - ret = sys_shmget(first, (size_t)second, third); - break; - case SHMCTL: - ret = sys_shmctl(first, (int)second, - (struct shmid_ds __user *)ptr); - break; - } - - return ret; -} - static inline unsigned long do_mmap2(unsigned long addr, size_t len, unsigned long prot, unsigned long flags, unsigned long fd, unsigned long off, int shift) { - struct file * file = NULL; unsigned long ret = -EINVAL; if (!arch_validate_prot(prot)) @@ -151,20 +56,8 @@ static inline unsigned long do_mmap2(unsigned long addr, size_t len, goto out; off >>= shift; } - - ret = -EBADF; - if (!(flags & MAP_ANONYMOUS)) { - if (!(file = fget(fd))) - goto out; - } - flags &= ~(MAP_EXECUTABLE | MAP_DENYWRITE); - - down_write(¤t->mm->mmap_sem); - ret = do_mmap_pgoff(file, addr, len, prot, flags, off); - up_write(¤t->mm->mmap_sem); - if (file) - fput(file); + ret = sys_mmap_pgoff(addr, len, prot, flags, fd, off); out: return ret; } @@ -223,76 +116,6 @@ long ppc64_personality(unsigned long personality) } #endif -#ifdef CONFIG_PPC64 -#define OVERRIDE_MACHINE (personality(current->personality) == PER_LINUX32) -#else -#define OVERRIDE_MACHINE 0 -#endif - -static inline int override_machine(char __user *mach) -{ - if (OVERRIDE_MACHINE) { - /* change ppc64 to ppc */ - if (__put_user(0, mach+3) || __put_user(0, mach+4)) - return -EFAULT; - } - return 0; -} - -long ppc_newuname(struct new_utsname __user * name) -{ - int err = 0; - - down_read(&uts_sem); - if (copy_to_user(name, utsname(), sizeof(*name))) - err = -EFAULT; - up_read(&uts_sem); - if (!err) - err = override_machine(name->machine); - return err; -} - -int sys_uname(struct old_utsname __user *name) -{ - int err = 0; - - down_read(&uts_sem); - if (copy_to_user(name, utsname(), sizeof(*name))) - err = -EFAULT; - up_read(&uts_sem); - if (!err) - err = override_machine(name->machine); - return err; -} - -int sys_olduname(struct oldold_utsname __user *name) -{ - int error; - - if (!access_ok(VERIFY_WRITE, name, sizeof(struct oldold_utsname))) - return -EFAULT; - - down_read(&uts_sem); - error = __copy_to_user(&name->sysname, &utsname()->sysname, - __OLD_UTS_LEN); - error |= __put_user(0, name->sysname + __OLD_UTS_LEN); - error |= __copy_to_user(&name->nodename, &utsname()->nodename, - __OLD_UTS_LEN); - error |= __put_user(0, name->nodename + __OLD_UTS_LEN); - error |= __copy_to_user(&name->release, &utsname()->release, - __OLD_UTS_LEN); - error |= __put_user(0, name->release + __OLD_UTS_LEN); - error |= __copy_to_user(&name->version, &utsname()->version, - __OLD_UTS_LEN); - error |= __put_user(0, name->version + __OLD_UTS_LEN); - error |= __copy_to_user(&name->machine, &utsname()->machine, - __OLD_UTS_LEN); - error |= override_machine(name->machine); - up_read(&uts_sem); - - return error? -EFAULT: 0; -} - long ppc_fadvise64_64(int fd, int advice, u32 offset_high, u32 offset_low, u32 len_high, u32 len_low) { diff --git a/arch/powerpc/kernel/sysfs.c b/arch/powerpc/kernel/sysfs.c index 956ab33fd73f..e235e52dc4fe 100644 --- a/arch/powerpc/kernel/sysfs.c +++ b/arch/powerpc/kernel/sysfs.c @@ -461,6 +461,25 @@ static void unregister_cpu_online(unsigned int cpu) cacheinfo_cpu_offline(cpu); } + +#ifdef CONFIG_ARCH_CPU_PROBE_RELEASE +ssize_t arch_cpu_probe(const char *buf, size_t count) +{ + if (ppc_md.cpu_probe) + return ppc_md.cpu_probe(buf, count); + + return -EINVAL; +} + +ssize_t arch_cpu_release(const char *buf, size_t count) +{ + if (ppc_md.cpu_release) + return ppc_md.cpu_release(buf, count); + + return -EINVAL; +} +#endif /* CONFIG_ARCH_CPU_PROBE_RELEASE */ + #endif /* CONFIG_HOTPLUG_CPU */ static int __cpuinit sysfs_cpu_notify(struct notifier_block *self, diff --git a/arch/powerpc/kernel/tau_6xx.c b/arch/powerpc/kernel/tau_6xx.c index c3a56d65c5a9..a753b72efbc0 100644 --- a/arch/powerpc/kernel/tau_6xx.c +++ b/arch/powerpc/kernel/tau_6xx.c @@ -59,7 +59,7 @@ void set_thresholds(unsigned long cpu) mtspr(SPRN_THRM1, THRM1_THRES(tau[cpu].low) | THRM1_V | THRM1_TIE | THRM1_TID); /* setup THRM2, - * threshold, valid bit, enable interrupts, interrupt when above threshhold + * threshold, valid bit, enable interrupts, interrupt when above threshold */ mtspr (SPRN_THRM2, THRM1_THRES(tau[cpu].high) | THRM1_V | THRM1_TIE); #else diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c index 92dc844299b6..1b16b9a3e49a 100644 --- a/arch/powerpc/kernel/time.c +++ b/arch/powerpc/kernel/time.c @@ -54,6 +54,7 @@ #include <linux/irq.h> #include <linux/delay.h> #include <linux/perf_event.h> +#include <asm/trace.h> #include <asm/io.h> #include <asm/processor.h> @@ -264,10 +265,11 @@ void account_system_vtime(struct task_struct *tsk) account_system_time(tsk, 0, delta, deltascaled); else account_idle_time(delta); - per_cpu(cputime_last_delta, smp_processor_id()) = delta; - per_cpu(cputime_scaled_last_delta, smp_processor_id()) = deltascaled; + __get_cpu_var(cputime_last_delta) = delta; + __get_cpu_var(cputime_scaled_last_delta) = deltascaled; local_irq_restore(flags); } +EXPORT_SYMBOL_GPL(account_system_vtime); /* * Transfer the user and system times accumulated in the paca @@ -571,6 +573,10 @@ void timer_interrupt(struct pt_regs * regs) struct clock_event_device *evt = &decrementer->event; u64 now; + trace_timer_interrupt_entry(regs); + + __get_cpu_var(irq_stat).timer_irqs++; + /* Ensure a positive value is written to the decrementer, or else * some CPUs will continuue to take decrementer exceptions */ set_dec(DECREMENTER_MAX); @@ -590,6 +596,7 @@ void timer_interrupt(struct pt_regs * regs) now = decrementer->next_tb - now; if (now <= DECREMENTER_MAX) set_dec((int)now); + trace_timer_interrupt_exit(regs); return; } old_regs = set_irq_regs(regs); @@ -620,6 +627,8 @@ void timer_interrupt(struct pt_regs * regs) irq_exit(); set_irq_regs(old_regs); + + trace_timer_interrupt_exit(regs); } void wakeup_decrementer(void) @@ -777,7 +786,7 @@ int update_persistent_clock(struct timespec now) return ppc_md.set_rtc_time(&tm); } -void read_persistent_clock(struct timespec *ts) +static void __read_persistent_clock(struct timespec *ts) { struct rtc_time tm; static int first = 1; @@ -800,10 +809,23 @@ void read_persistent_clock(struct timespec *ts) return; } ppc_md.get_rtc_time(&tm); + ts->tv_sec = mktime(tm.tm_year+1900, tm.tm_mon+1, tm.tm_mday, tm.tm_hour, tm.tm_min, tm.tm_sec); } +void read_persistent_clock(struct timespec *ts) +{ + __read_persistent_clock(ts); + + /* Sanitize it in case real time clock is set below EPOCH */ + if (ts->tv_sec < 0) { + ts->tv_sec = 0; + ts->tv_nsec = 0; + } + +} + /* clocksource code */ static cycle_t rtc_read(struct clocksource *cs) { @@ -815,7 +837,8 @@ static cycle_t timebase_read(struct clocksource *cs) return (cycle_t)get_tb(); } -void update_vsyscall(struct timespec *wall_time, struct clocksource *clock) +void update_vsyscall(struct timespec *wall_time, struct clocksource *clock, + u32 mult) { u64 t2x, stamp_xsec; @@ -828,7 +851,7 @@ void update_vsyscall(struct timespec *wall_time, struct clocksource *clock) /* XXX this assumes clock->shift == 22 */ /* 4611686018 ~= 2^(20+64-22) / 1e9 */ - t2x = (u64) clock->mult * 4611686018ULL; + t2x = (u64) mult * 4611686018ULL; stamp_xsec = (u64) xtime.tv_nsec * XSEC_PER_SEC; do_div(stamp_xsec, 1000000000); stamp_xsec += (u64) xtime.tv_sec * XSEC_PER_SEC; @@ -882,12 +905,21 @@ static void decrementer_set_mode(enum clock_event_mode mode, decrementer_set_next_event(DECREMENTER_MAX, dev); } +static inline uint64_t div_sc64(unsigned long ticks, unsigned long nsec, + int shift) +{ + uint64_t tmp = ((uint64_t)ticks) << shift; + + do_div(tmp, nsec); + return tmp; +} + static void __init setup_clockevent_multiplier(unsigned long hz) { u64 mult, shift = 32; while (1) { - mult = div_sc(hz, NSEC_PER_SEC, shift); + mult = div_sc64(hz, NSEC_PER_SEC, shift); if (mult && (mult >> 32UL) == 0UL) break; @@ -905,8 +937,8 @@ static void register_decrementer_clockevent(int cpu) *dec = decrementer_clockevent; dec->cpumask = cpumask_of(cpu); - printk(KERN_DEBUG "clockevent: %s mult[%lx] shift[%d] cpu[%d]\n", - dec->name, dec->mult, dec->shift, cpu); + printk_once(KERN_DEBUG "clockevent: %s mult[%x] shift[%d] cpu[%d]\n", + dec->name, dec->mult, dec->shift, cpu); clockevents_register_device(dec); } diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c index 6f0ae1a9bfae..696626a2e835 100644 --- a/arch/powerpc/kernel/traps.c +++ b/arch/powerpc/kernel/traps.c @@ -60,13 +60,13 @@ #endif #if defined(CONFIG_DEBUGGER) || defined(CONFIG_KEXEC) -int (*__debugger)(struct pt_regs *regs); -int (*__debugger_ipi)(struct pt_regs *regs); -int (*__debugger_bpt)(struct pt_regs *regs); -int (*__debugger_sstep)(struct pt_regs *regs); -int (*__debugger_iabr_match)(struct pt_regs *regs); -int (*__debugger_dabr_match)(struct pt_regs *regs); -int (*__debugger_fault_handler)(struct pt_regs *regs); +int (*__debugger)(struct pt_regs *regs) __read_mostly; +int (*__debugger_ipi)(struct pt_regs *regs) __read_mostly; +int (*__debugger_bpt)(struct pt_regs *regs) __read_mostly; +int (*__debugger_sstep)(struct pt_regs *regs) __read_mostly; +int (*__debugger_iabr_match)(struct pt_regs *regs) __read_mostly; +int (*__debugger_dabr_match)(struct pt_regs *regs) __read_mostly; +int (*__debugger_fault_handler)(struct pt_regs *regs) __read_mostly; EXPORT_SYMBOL(__debugger); EXPORT_SYMBOL(__debugger_ipi); @@ -102,11 +102,11 @@ static inline void pmac_backlight_unblank(void) { } int die(const char *str, struct pt_regs *regs, long err) { static struct { - spinlock_t lock; + raw_spinlock_t lock; u32 lock_owner; int lock_owner_depth; } die = { - .lock = __SPIN_LOCK_UNLOCKED(die.lock), + .lock = __RAW_SPIN_LOCK_UNLOCKED(die.lock), .lock_owner = -1, .lock_owner_depth = 0 }; @@ -120,7 +120,7 @@ int die(const char *str, struct pt_regs *regs, long err) if (die.lock_owner != raw_smp_processor_id()) { console_verbose(); - spin_lock_irqsave(&die.lock, flags); + raw_spin_lock_irqsave(&die.lock, flags); die.lock_owner = smp_processor_id(); die.lock_owner_depth = 0; bust_spinlocks(1); @@ -146,6 +146,11 @@ int die(const char *str, struct pt_regs *regs, long err) #endif printk("%s\n", ppc_md.name ? ppc_md.name : ""); + sysfs_printk_last_file(); + if (notify_die(DIE_OOPS, str, regs, err, 255, + SIGSEGV) == NOTIFY_STOP) + return 1; + print_modules(); show_regs(regs); } else { @@ -155,7 +160,7 @@ int die(const char *str, struct pt_regs *regs, long err) bust_spinlocks(0); die.lock_owner = -1; add_taint(TAINT_DIE); - spin_unlock_irqrestore(&die.lock, flags); + raw_spin_unlock_irqrestore(&die.lock, flags); if (kexec_should_crash(current) || kexec_sr_activated(smp_processor_id())) @@ -174,6 +179,15 @@ int die(const char *str, struct pt_regs *regs, long err) return 0; } +void user_single_step_siginfo(struct task_struct *tsk, + struct pt_regs *regs, siginfo_t *info) +{ + memset(info, 0, sizeof(*info)); + info->si_signo = SIGTRAP; + info->si_code = TRAP_TRACE; + info->si_addr = (void __user *)regs->nip; +} + void _exception(int signr, struct pt_regs *regs, int code, unsigned long addr) { siginfo_t info; @@ -198,28 +212,6 @@ void _exception(int signr, struct pt_regs *regs, int code, unsigned long addr) info.si_code = code; info.si_addr = (void __user *) addr; force_sig_info(signr, &info, current); - - /* - * Init gets no signals that it doesn't have a handler for. - * That's all very well, but if it has caused a synchronous - * exception and we ignore the resulting signal, it will just - * generate the same exception over and over again and we get - * nowhere. Better to kill it and let the kernel panic. - */ - if (is_global_init(current)) { - __sighandler_t handler; - - spin_lock_irq(¤t->sighand->siglock); - handler = current->sighand->action[signr-1].sa.sa_handler; - spin_unlock_irq(¤t->sighand->siglock); - if (handler == SIG_DFL) { - /* init has generated a synchronous exception - and it doesn't have a handler for the signal */ - printk(KERN_CRIT "init has generated signal %d " - "but has no handler for it\n", signr); - do_exit(signr); - } - } } #ifdef CONFIG_PPC64 @@ -307,7 +299,7 @@ static inline int check_io_access(struct pt_regs *regs) return 0; } -#if defined(CONFIG_4xx) || defined(CONFIG_BOOKE) +#ifdef CONFIG_PPC_ADV_DEBUG_REGS /* On 4xx, the reason for the machine check or program exception is in the ESR. */ #define get_reason(regs) ((regs)->dsisr) @@ -491,6 +483,8 @@ void machine_check_exception(struct pt_regs *regs) { int recover = 0; + __get_cpu_var(irq_stat).mce_exceptions++; + /* See if any machine dependent calls. In theory, we would want * to call the CPU first, and call the ppc_md. one if the CPU * one returns a positive number. However there is existing code @@ -759,7 +753,7 @@ static int emulate_instruction(struct pt_regs *regs) /* Emulate the mfspr rD, PVR. */ if ((instword & PPC_INST_MFSPR_PVR_MASK) == PPC_INST_MFSPR_PVR) { - PPC_WARN_EMULATED(mfpvr); + PPC_WARN_EMULATED(mfpvr, regs); rd = (instword >> 21) & 0x1f; regs->gpr[rd] = mfspr(SPRN_PVR); return 0; @@ -767,7 +761,7 @@ static int emulate_instruction(struct pt_regs *regs) /* Emulating the dcba insn is just a no-op. */ if ((instword & PPC_INST_DCBA_MASK) == PPC_INST_DCBA) { - PPC_WARN_EMULATED(dcba); + PPC_WARN_EMULATED(dcba, regs); return 0; } @@ -776,7 +770,7 @@ static int emulate_instruction(struct pt_regs *regs) int shift = (instword >> 21) & 0x1c; unsigned long msk = 0xf0000000UL >> shift; - PPC_WARN_EMULATED(mcrxr); + PPC_WARN_EMULATED(mcrxr, regs); regs->ccr = (regs->ccr & ~msk) | ((regs->xer >> shift) & msk); regs->xer &= ~0xf0000000UL; return 0; @@ -784,19 +778,19 @@ static int emulate_instruction(struct pt_regs *regs) /* Emulate load/store string insn. */ if ((instword & PPC_INST_STRING_GEN_MASK) == PPC_INST_STRING) { - PPC_WARN_EMULATED(string); + PPC_WARN_EMULATED(string, regs); return emulate_string_inst(regs, instword); } /* Emulate the popcntb (Population Count Bytes) instruction. */ if ((instword & PPC_INST_POPCNTB_MASK) == PPC_INST_POPCNTB) { - PPC_WARN_EMULATED(popcntb); + PPC_WARN_EMULATED(popcntb, regs); return emulate_popcntb_inst(regs, instword); } /* Emulate isel (Integer Select) instruction */ if ((instword & PPC_INST_ISEL_MASK) == PPC_INST_ISEL) { - PPC_WARN_EMULATED(isel); + PPC_WARN_EMULATED(isel, regs); return emulate_isel(regs, instword); } @@ -973,6 +967,8 @@ void vsx_unavailable_exception(struct pt_regs *regs) void performance_monitor_exception(struct pt_regs *regs) { + __get_cpu_var(irq_stat).pmu_irqs++; + perf_irq(regs); } @@ -995,7 +991,7 @@ void SoftwareEmulation(struct pt_regs *regs) #ifdef CONFIG_MATH_EMULATION errcode = do_mathemu(regs); if (errcode >= 0) - PPC_WARN_EMULATED(math); + PPC_WARN_EMULATED(math, regs); switch (errcode) { case 0: @@ -1018,7 +1014,7 @@ void SoftwareEmulation(struct pt_regs *regs) #elif defined(CONFIG_8XX_MINIMAL_FPEMU) errcode = Soft_emulate_8xx(regs); if (errcode >= 0) - PPC_WARN_EMULATED(8xx); + PPC_WARN_EMULATED(8xx, regs); switch (errcode) { case 0: @@ -1037,10 +1033,69 @@ void SoftwareEmulation(struct pt_regs *regs) } #endif /* CONFIG_8xx */ -#if defined(CONFIG_40x) || defined(CONFIG_BOOKE) +#ifdef CONFIG_PPC_ADV_DEBUG_REGS +static void handle_debug(struct pt_regs *regs, unsigned long debug_status) +{ + int changed = 0; + /* + * Determine the cause of the debug event, clear the + * event flags and send a trap to the handler. Torez + */ + if (debug_status & (DBSR_DAC1R | DBSR_DAC1W)) { + dbcr_dac(current) &= ~(DBCR_DAC1R | DBCR_DAC1W); +#ifdef CONFIG_PPC_ADV_DEBUG_DAC_RANGE + current->thread.dbcr2 &= ~DBCR2_DAC12MODE; +#endif + do_send_trap(regs, mfspr(SPRN_DAC1), debug_status, TRAP_HWBKPT, + 5); + changed |= 0x01; + } else if (debug_status & (DBSR_DAC2R | DBSR_DAC2W)) { + dbcr_dac(current) &= ~(DBCR_DAC2R | DBCR_DAC2W); + do_send_trap(regs, mfspr(SPRN_DAC2), debug_status, TRAP_HWBKPT, + 6); + changed |= 0x01; + } else if (debug_status & DBSR_IAC1) { + current->thread.dbcr0 &= ~DBCR0_IAC1; + dbcr_iac_range(current) &= ~DBCR_IAC12MODE; + do_send_trap(regs, mfspr(SPRN_IAC1), debug_status, TRAP_HWBKPT, + 1); + changed |= 0x01; + } else if (debug_status & DBSR_IAC2) { + current->thread.dbcr0 &= ~DBCR0_IAC2; + do_send_trap(regs, mfspr(SPRN_IAC2), debug_status, TRAP_HWBKPT, + 2); + changed |= 0x01; + } else if (debug_status & DBSR_IAC3) { + current->thread.dbcr0 &= ~DBCR0_IAC3; + dbcr_iac_range(current) &= ~DBCR_IAC34MODE; + do_send_trap(regs, mfspr(SPRN_IAC3), debug_status, TRAP_HWBKPT, + 3); + changed |= 0x01; + } else if (debug_status & DBSR_IAC4) { + current->thread.dbcr0 &= ~DBCR0_IAC4; + do_send_trap(regs, mfspr(SPRN_IAC4), debug_status, TRAP_HWBKPT, + 4); + changed |= 0x01; + } + /* + * At the point this routine was called, the MSR(DE) was turned off. + * Check all other debug flags and see if that bit needs to be turned + * back on or not. + */ + if (DBCR_ACTIVE_EVENTS(current->thread.dbcr0, current->thread.dbcr1)) + regs->msr |= MSR_DE; + else + /* Make sure the IDM flag is off */ + current->thread.dbcr0 &= ~DBCR0_IDM; + + if (changed & 0x01) + mtspr(SPRN_DBCR0, current->thread.dbcr0); +} void __kprobes DebugException(struct pt_regs *regs, unsigned long debug_status) { + current->thread.dbsr = debug_status; + /* Hack alert: On BookE, Branch Taken stops on the branch itself, while * on server, it stops on the target of the branch. In order to simulate * the server behaviour, we thus restart right away with a single step @@ -1084,29 +1139,23 @@ void __kprobes DebugException(struct pt_regs *regs, unsigned long debug_status) if (debugger_sstep(regs)) return; - if (user_mode(regs)) - current->thread.dbcr0 &= ~(DBCR0_IC); - - _exception(SIGTRAP, regs, TRAP_TRACE, regs->nip); - } else if (debug_status & (DBSR_DAC1R | DBSR_DAC1W)) { - regs->msr &= ~MSR_DE; - if (user_mode(regs)) { - current->thread.dbcr0 &= ~(DBSR_DAC1R | DBSR_DAC1W | - DBCR0_IDM); - } else { - /* Disable DAC interupts */ - mtspr(SPRN_DBCR0, mfspr(SPRN_DBCR0) & ~(DBSR_DAC1R | - DBSR_DAC1W | DBCR0_IDM)); - - /* Clear the DAC event */ - mtspr(SPRN_DBSR, (DBSR_DAC1R | DBSR_DAC1W)); + current->thread.dbcr0 &= ~DBCR0_IC; +#ifdef CONFIG_PPC_ADV_DEBUG_REGS + if (DBCR_ACTIVE_EVENTS(current->thread.dbcr0, + current->thread.dbcr1)) + regs->msr |= MSR_DE; + else + /* Make sure the IDM bit is off */ + current->thread.dbcr0 &= ~DBCR0_IDM; +#endif } - /* Setup and send the trap to the handler */ - do_dabr(regs, mfspr(SPRN_DAC1), debug_status); - } + + _exception(SIGTRAP, regs, TRAP_TRACE, regs->nip); + } else + handle_debug(regs, debug_status); } -#endif /* CONFIG_4xx || CONFIG_BOOKE */ +#endif /* CONFIG_PPC_ADV_DEBUG_REGS */ #if !defined(CONFIG_TAU_INT) void TAUException(struct pt_regs *regs) @@ -1129,7 +1178,7 @@ void altivec_assist_exception(struct pt_regs *regs) flush_altivec_to_thread(current); - PPC_WARN_EMULATED(altivec); + PPC_WARN_EMULATED(altivec, regs); err = emulate_altivec(regs); if (err == 0) { regs->nip += 4; /* skip emulated instruction */ diff --git a/arch/powerpc/kernel/udbg.c b/arch/powerpc/kernel/udbg.c index fc9af47e2128..e39cad83c884 100644 --- a/arch/powerpc/kernel/udbg.c +++ b/arch/powerpc/kernel/udbg.c @@ -60,6 +60,8 @@ void __init udbg_early_init(void) udbg_init_40x_realmode(); #elif defined(CONFIG_PPC_EARLY_DEBUG_CPM) udbg_init_cpm(); +#elif defined(CONFIG_PPC_EARLY_DEBUG_USBGECKO) + udbg_init_usbgecko(); #endif #ifdef CONFIG_PPC_EARLY_DEBUG diff --git a/arch/powerpc/kernel/vdso.c b/arch/powerpc/kernel/vdso.c index 94e2df3cae07..d84d19224a95 100644 --- a/arch/powerpc/kernel/vdso.c +++ b/arch/powerpc/kernel/vdso.c @@ -50,6 +50,9 @@ /* Max supported size for symbol names */ #define MAX_SYMNAME 64 +/* The alignment of the vDSO */ +#define VDSO_ALIGNMENT (1 << 16) + extern char vdso32_start, vdso32_end; static void *vdso32_kbase = &vdso32_start; static unsigned int vdso32_pages; @@ -231,15 +234,21 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp) * pick a base address for the vDSO in process space. We try to put it * at vdso_base which is the "natural" base for it, but we might fail * and end up putting it elsewhere. + * Add enough to the size so that the result can be aligned. */ down_write(&mm->mmap_sem); vdso_base = get_unmapped_area(NULL, vdso_base, - vdso_pages << PAGE_SHIFT, 0, 0); + (vdso_pages << PAGE_SHIFT) + + ((VDSO_ALIGNMENT - 1) & PAGE_MASK), + 0, 0); if (IS_ERR_VALUE(vdso_base)) { rc = vdso_base; goto fail_mmapsem; } + /* Add required alignment. */ + vdso_base = ALIGN(vdso_base, VDSO_ALIGNMENT); + /* * Put vDSO base into mm struct. We need to do this before calling * install_special_mapping or the perf counter mmap tracking code @@ -712,7 +721,7 @@ static int __init vdso_init(void) #ifdef CONFIG_PPC64 /* - * Fill up the "systemcfg" stuff for backward compatiblity + * Fill up the "systemcfg" stuff for backward compatibility */ strcpy((char *)vdso_data->eye_catcher, "SYSTEMCFG:PPC64"); vdso_data->version.major = SYSTEMCFG_MAJOR; diff --git a/arch/powerpc/kernel/vdso32/vdso32.lds.S b/arch/powerpc/kernel/vdso32/vdso32.lds.S index 904ef1360dd7..0546bcd49cd0 100644 --- a/arch/powerpc/kernel/vdso32/vdso32.lds.S +++ b/arch/powerpc/kernel/vdso32/vdso32.lds.S @@ -25,7 +25,7 @@ SECTIONS . = ALIGN(16); .text : { *(.text .stub .text.* .gnu.linkonce.t.* __ftr_alt_*) - } + } :text PROVIDE(__etext = .); PROVIDE(_etext = .); PROVIDE(etext = .); @@ -56,7 +56,7 @@ SECTIONS .fixup : { *(.fixup) } .dynamic : { *(.dynamic) } :text :dynamic - .got : { *(.got) } + .got : { *(.got) } :text .plt : { *(.plt) } _end = .; diff --git a/arch/powerpc/kernel/vector.S b/arch/powerpc/kernel/vector.S index 67b6916f0e94..fe460482fa68 100644 --- a/arch/powerpc/kernel/vector.S +++ b/arch/powerpc/kernel/vector.S @@ -58,7 +58,7 @@ _GLOBAL(load_up_altivec) * all 1's */ mfspr r4,SPRN_VRSAVE - cmpdi 0,r4,0 + cmpwi 0,r4,0 bne+ 1f li r4,-1 mtspr SPRN_VRSAVE,r4 diff --git a/arch/powerpc/kernel/vmlinux.lds.S b/arch/powerpc/kernel/vmlinux.lds.S index f56429362a12..dcd01c82e701 100644 --- a/arch/powerpc/kernel/vmlinux.lds.S +++ b/arch/powerpc/kernel/vmlinux.lds.S @@ -38,6 +38,9 @@ jiffies = jiffies_64 + 4; #endif SECTIONS { + . = 0; + reloc_start = .; + . = KERNELBASE; /* @@ -236,6 +239,7 @@ SECTIONS READ_MOSTLY_DATA(L1_CACHE_BYTES) } + . = ALIGN(PAGE_SIZE); .data_nosave : AT(ADDR(.data_nosave) - LOAD_OFFSET) { NOSAVE_DATA } |