summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Documentation/ABI/testing/sysfs-class-ocxl35
-rw-r--r--Documentation/accelerators/ocxl.rst160
-rw-r--r--Documentation/devicetree/booting-without-of.txt2
-rw-r--r--Documentation/filesystems/dax.txt1
-rw-r--r--Documentation/ioctl/ioctl-number.txt1
-rw-r--r--MAINTAINERS12
-rw-r--r--arch/powerpc/Kconfig19
-rw-r--r--arch/powerpc/Kconfig.debug6
-rw-r--r--arch/powerpc/Makefile1
-rw-r--r--arch/powerpc/boot/Makefile8
-rw-r--r--arch/powerpc/boot/dts/a3m071.dts10
-rw-r--r--arch/powerpc/boot/dts/akebono.dts4
-rw-r--r--arch/powerpc/boot/dts/c2k.dts6
-rw-r--r--arch/powerpc/boot/dts/currituck.dts2
-rw-r--r--arch/powerpc/boot/dts/fsl/mpc8568mds.dts12
-rw-r--r--arch/powerpc/boot/dts/fsl/mpc8569mds.dts20
-rw-r--r--arch/powerpc/boot/dts/fsl/p1021mds.dts6
-rw-r--r--arch/powerpc/boot/dts/fsl/p1025rdb.dtsi8
-rw-r--r--arch/powerpc/boot/dts/fsl/p1025rdb_32b.dts2
-rw-r--r--arch/powerpc/boot/dts/fsl/p1025twr.dtsi8
-rw-r--r--arch/powerpc/boot/dts/fsl/t1040rdb.dts2
-rw-r--r--arch/powerpc/boot/dts/fsl/t1042d4rdb.dts10
-rw-r--r--arch/powerpc/boot/dts/fsl/t1042rdb.dts2
-rw-r--r--arch/powerpc/boot/dts/fsl/t104xrdb.dtsi6
-rw-r--r--arch/powerpc/boot/dts/fsp2.dts6
-rw-r--r--arch/powerpc/boot/dts/gamecube.dts14
-rw-r--r--arch/powerpc/boot/dts/haleakala.dts2
-rw-r--r--arch/powerpc/boot/dts/kilauea.dts4
-rw-r--r--arch/powerpc/boot/dts/kmeter1.dts10
-rw-r--r--arch/powerpc/boot/dts/makalu.dts4
-rw-r--r--arch/powerpc/boot/dts/mpc832x_mds.dts10
-rw-r--r--arch/powerpc/boot/dts/mpc832x_rdb.dts8
-rw-r--r--arch/powerpc/boot/dts/mpc836x_mds.dts8
-rw-r--r--arch/powerpc/boot/dts/sbc8548-altflash.dts8
-rw-r--r--arch/powerpc/boot/dts/sbc8548.dts8
-rw-r--r--arch/powerpc/boot/dts/wii.dts32
-rw-r--r--arch/powerpc/boot/serial.c6
-rw-r--r--arch/powerpc/configs/mpc866_ads_defconfig1
-rw-r--r--arch/powerpc/configs/powernv_defconfig2
-rw-r--r--arch/powerpc/include/asm/book3s/32/pgtable.h23
-rw-r--r--arch/powerpc/include/asm/book3s/64/hash-4k.h20
-rw-r--r--arch/powerpc/include/asm/book3s/64/hash-64k.h71
-rw-r--r--arch/powerpc/include/asm/book3s/64/hash.h8
-rw-r--r--arch/powerpc/include/asm/book3s/64/mmu-hash.h5
-rw-r--r--arch/powerpc/include/asm/book3s/64/mmu.h10
-rw-r--r--arch/powerpc/include/asm/book3s/64/pgtable.h79
-rw-r--r--arch/powerpc/include/asm/book3s/64/tlbflush-hash.h1
-rw-r--r--arch/powerpc/include/asm/book3s/64/tlbflush-radix.h7
-rw-r--r--arch/powerpc/include/asm/book3s/64/tlbflush.h38
-rw-r--r--arch/powerpc/include/asm/bug.h4
-rw-r--r--arch/powerpc/include/asm/code-patching.h2
-rw-r--r--arch/powerpc/include/asm/cpm.h2
-rw-r--r--arch/powerpc/include/asm/cpm1.h2
-rw-r--r--arch/powerpc/include/asm/cputable.h21
-rw-r--r--arch/powerpc/include/asm/drmem.h102
-rw-r--r--arch/powerpc/include/asm/eeh.h2
-rw-r--r--arch/powerpc/include/asm/exception-64s.h103
-rw-r--r--arch/powerpc/include/asm/firmware.h5
-rw-r--r--arch/powerpc/include/asm/hardirq.h1
-rw-r--r--arch/powerpc/include/asm/head-64.h47
-rw-r--r--arch/powerpc/include/asm/hmi.h4
-rw-r--r--arch/powerpc/include/asm/hugetlb.h3
-rw-r--r--arch/powerpc/include/asm/hw_irq.h161
-rw-r--r--arch/powerpc/include/asm/imc-pmu.h9
-rw-r--r--arch/powerpc/include/asm/irqflags.h14
-rw-r--r--arch/powerpc/include/asm/kexec.h2
-rw-r--r--arch/powerpc/include/asm/kvm_ppc.h2
-rw-r--r--arch/powerpc/include/asm/local.h200
-rw-r--r--arch/powerpc/include/asm/machdep.h8
-rw-r--r--arch/powerpc/include/asm/mman.h13
-rw-r--r--arch/powerpc/include/asm/mmu-8xx.h60
-rw-r--r--arch/powerpc/include/asm/mmu.h9
-rw-r--r--arch/powerpc/include/asm/mmu_context.h22
-rw-r--r--arch/powerpc/include/asm/mpic_timer.h8
-rw-r--r--arch/powerpc/include/asm/nmi.h4
-rw-r--r--arch/powerpc/include/asm/nohash/32/pgalloc.h3
-rw-r--r--arch/powerpc/include/asm/nohash/32/pgtable.h2
-rw-r--r--arch/powerpc/include/asm/nohash/32/pte-8xx.h25
-rw-r--r--arch/powerpc/include/asm/nohash/pgtable.h27
-rw-r--r--arch/powerpc/include/asm/nohash/pte-book3e.h1
-rw-r--r--arch/powerpc/include/asm/opal-api.h5
-rw-r--r--arch/powerpc/include/asm/opal.h6
-rw-r--r--arch/powerpc/include/asm/paca.h5
-rw-r--r--arch/powerpc/include/asm/pci-bridge.h9
-rw-r--r--arch/powerpc/include/asm/pci.h2
-rw-r--r--arch/powerpc/include/asm/pkeys.h218
-rw-r--r--arch/powerpc/include/asm/pnv-ocxl.h36
-rw-r--r--arch/powerpc/include/asm/ppc-opcode.h2
-rw-r--r--arch/powerpc/include/asm/processor.h5
-rw-r--r--arch/powerpc/include/asm/prom.h27
-rw-r--r--arch/powerpc/include/asm/pte-common.h37
-rw-r--r--arch/powerpc/include/asm/reg.h6
-rw-r--r--arch/powerpc/include/asm/reg_8xx.h82
-rw-r--r--arch/powerpc/include/asm/systbl.h3
-rw-r--r--arch/powerpc/include/asm/unistd.h6
-rw-r--r--arch/powerpc/include/asm/xive-regs.h35
-rw-r--r--arch/powerpc/include/asm/xive.h40
-rw-r--r--arch/powerpc/include/uapi/asm/elf.h1
-rw-r--r--arch/powerpc/include/uapi/asm/mman.h6
-rw-r--r--arch/powerpc/include/uapi/asm/unistd.h3
-rw-r--r--arch/powerpc/kernel/asm-offsets.c7
-rw-r--r--arch/powerpc/kernel/cpu_setup_power.S50
-rw-r--r--arch/powerpc/kernel/cputable.c15
-rw-r--r--arch/powerpc/kernel/crash.c16
-rw-r--r--arch/powerpc/kernel/dt_cpu_ftrs.c30
-rw-r--r--arch/powerpc/kernel/eeh.c59
-rw-r--r--arch/powerpc/kernel/eeh_driver.c10
-rw-r--r--arch/powerpc/kernel/eeh_sysfs.c64
-rw-r--r--arch/powerpc/kernel/entry_32.S10
-rw-r--r--arch/powerpc/kernel/entry_64.S67
-rw-r--r--arch/powerpc/kernel/exceptions-64e.S20
-rw-r--r--arch/powerpc/kernel/exceptions-64s.S118
-rw-r--r--arch/powerpc/kernel/head_64.S11
-rw-r--r--arch/powerpc/kernel/head_8xx.S275
-rw-r--r--arch/powerpc/kernel/idle_book3e.S5
-rw-r--r--arch/powerpc/kernel/idle_power4.S5
-rw-r--r--arch/powerpc/kernel/irq.c29
-rw-r--r--arch/powerpc/kernel/mce.c142
-rw-r--r--arch/powerpc/kernel/mce_power.c115
-rw-r--r--arch/powerpc/kernel/module.lds8
-rw-r--r--arch/powerpc/kernel/module_64.c35
-rw-r--r--arch/powerpc/kernel/optprobes_head.S2
-rw-r--r--arch/powerpc/kernel/paca.c13
-rw-r--r--arch/powerpc/kernel/pci-common.c27
-rw-r--r--arch/powerpc/kernel/pci_dn.c6
-rw-r--r--arch/powerpc/kernel/pci_of_scan.c2
-rw-r--r--arch/powerpc/kernel/proc_powerpc.c2
-rw-r--r--arch/powerpc/kernel/process.c28
-rw-r--r--arch/powerpc/kernel/prom.c115
-rw-r--r--arch/powerpc/kernel/prom_init.c2
-rw-r--r--arch/powerpc/kernel/ptrace.c78
-rw-r--r--arch/powerpc/kernel/rtas-proc.c14
-rw-r--r--arch/powerpc/kernel/rtas_flash.c2
-rw-r--r--arch/powerpc/kernel/rtasd.c2
-rw-r--r--arch/powerpc/kernel/setup-common.c21
-rw-r--r--arch/powerpc/kernel/setup.h4
-rw-r--r--arch/powerpc/kernel/setup_64.c48
-rw-r--r--arch/powerpc/kernel/signal_32.c8
-rw-r--r--arch/powerpc/kernel/signal_64.c11
-rw-r--r--arch/powerpc/kernel/smp.c18
-rw-r--r--arch/powerpc/kernel/sysfs.c8
-rw-r--r--arch/powerpc/kernel/time.c6
-rw-r--r--arch/powerpc/kernel/traps.c51
-rw-r--r--arch/powerpc/kernel/vdso64/gettimeofday.S67
-rw-r--r--arch/powerpc/kernel/vmlinux.lds.S1
-rw-r--r--arch/powerpc/kernel/watchdog.c100
-rw-r--r--arch/powerpc/kvm/book3s_hv.c10
-rw-r--r--arch/powerpc/kvm/book3s_hv_ras.c14
-rw-r--r--arch/powerpc/kvm/book3s_hv_rm_mmu.c9
-rw-r--r--arch/powerpc/kvm/book3s_hv_rmhandlers.S11
-rw-r--r--arch/powerpc/kvm/book3s_xics.c2
-rw-r--r--arch/powerpc/lib/code-patching.c37
-rw-r--r--arch/powerpc/lib/feature-fixups.c8
-rw-r--r--arch/powerpc/mm/8xx_mmu.c4
-rw-r--r--arch/powerpc/mm/Makefile3
-rw-r--r--arch/powerpc/mm/drmem.c439
-rw-r--r--arch/powerpc/mm/dump_linuxpagetables.c32
-rw-r--r--arch/powerpc/mm/fault.c53
-rw-r--r--arch/powerpc/mm/hash64_4k.c14
-rw-r--r--arch/powerpc/mm/hash64_64k.c123
-rw-r--r--arch/powerpc/mm/hash_native_64.c97
-rw-r--r--arch/powerpc/mm/hash_utils_64.c97
-rw-r--r--arch/powerpc/mm/hugetlbpage-hash64.c16
-rw-r--r--arch/powerpc/mm/hugetlbpage.c8
-rw-r--r--arch/powerpc/mm/init_64.c5
-rw-r--r--arch/powerpc/mm/mem.c3
-rw-r--r--arch/powerpc/mm/mmu_context_book3s64.c2
-rw-r--r--arch/powerpc/mm/numa.c333
-rw-r--r--arch/powerpc/mm/pgtable-radix.c23
-rw-r--r--arch/powerpc/mm/pgtable.c3
-rw-r--r--arch/powerpc/mm/pgtable_32.c9
-rw-r--r--arch/powerpc/mm/pgtable_64.c14
-rw-r--r--arch/powerpc/mm/pkeys.c468
-rw-r--r--arch/powerpc/mm/subpage-prot.c3
-rw-r--r--arch/powerpc/mm/tlb-radix.c68
-rw-r--r--arch/powerpc/mm/tlb_nohash.c5
-rw-r--r--arch/powerpc/perf/8xx-pmu.c52
-rw-r--r--arch/powerpc/perf/Makefile2
-rw-r--r--arch/powerpc/perf/core-book3s.c2
-rw-r--r--arch/powerpc/perf/imc-pmu.c102
-rw-r--r--arch/powerpc/platforms/44x/fsp2.c259
-rw-r--r--arch/powerpc/platforms/44x/fsp2.h272
-rw-r--r--arch/powerpc/platforms/512x/mpc512x_shared.c4
-rw-r--r--arch/powerpc/platforms/52xx/mpc52xx_gpt.c52
-rw-r--r--arch/powerpc/platforms/83xx/mcu_mpc8349emitx.c2
-rw-r--r--arch/powerpc/platforms/83xx/mpc832x_mds.c2
-rw-r--r--arch/powerpc/platforms/83xx/mpc832x_rdb.c2
-rw-r--r--arch/powerpc/platforms/83xx/mpc836x_mds.c2
-rw-r--r--arch/powerpc/platforms/85xx/socrates_fpga_pic.c7
-rw-r--r--arch/powerpc/platforms/86xx/mpc86xx_hpcn.c2
-rw-r--r--arch/powerpc/platforms/8xx/Kconfig12
-rw-r--r--arch/powerpc/platforms/Kconfig11
-rw-r--r--arch/powerpc/platforms/Kconfig.cputype8
-rw-r--r--arch/powerpc/platforms/cell/interrupt.c3
-rw-r--r--arch/powerpc/platforms/cell/setup.c3
-rw-r--r--arch/powerpc/platforms/cell/spider-pic.c3
-rw-r--r--arch/powerpc/platforms/cell/spu_manage.c4
-rw-r--r--arch/powerpc/platforms/cell/spufs/file.c6
-rw-r--r--arch/powerpc/platforms/cell/spufs/inode.c4
-rw-r--r--arch/powerpc/platforms/cell/spufs/spufs.h2
-rw-r--r--arch/powerpc/platforms/pasemi/dma_lib.c4
-rw-r--r--arch/powerpc/platforms/powermac/backlight.c6
-rw-r--r--arch/powerpc/platforms/powermac/feature.c3
-rw-r--r--arch/powerpc/platforms/powermac/pic.c8
-rw-r--r--arch/powerpc/platforms/powermac/smp.c4
-rw-r--r--arch/powerpc/platforms/powernv/Makefile1
-rw-r--r--arch/powerpc/platforms/powernv/eeh-powernv.c101
-rw-r--r--arch/powerpc/platforms/powernv/npu-dma.c7
-rw-r--r--arch/powerpc/platforms/powernv/ocxl.c515
-rw-r--r--arch/powerpc/platforms/powernv/opal-dump.c4
-rw-r--r--arch/powerpc/platforms/powernv/opal-elog.c4
-rw-r--r--arch/powerpc/platforms/powernv/opal-imc.c77
-rw-r--r--arch/powerpc/platforms/powernv/opal-sysparam.c6
-rw-r--r--arch/powerpc/platforms/powernv/opal-wrappers.S3
-rw-r--r--arch/powerpc/platforms/powernv/opal.c28
-rw-r--r--arch/powerpc/platforms/powernv/pci-ioda.c88
-rw-r--r--arch/powerpc/platforms/powernv/pci.c4
-rw-r--r--arch/powerpc/platforms/powernv/pci.h8
-rw-r--r--arch/powerpc/platforms/powernv/smp.c28
-rw-r--r--arch/powerpc/platforms/ps3/device-init.c12
-rw-r--r--arch/powerpc/platforms/ps3/mm.c4
-rw-r--r--arch/powerpc/platforms/ps3/os-area.c2
-rw-r--r--arch/powerpc/platforms/ps3/setup.c1
-rw-r--r--arch/powerpc/platforms/pseries/cmm.c16
-rw-r--r--arch/powerpc/platforms/pseries/eeh_pseries.c162
-rw-r--r--arch/powerpc/platforms/pseries/firmware.c2
-rw-r--r--arch/powerpc/platforms/pseries/hotplug-cpu.c3
-rw-r--r--arch/powerpc/platforms/pseries/hotplug-memory.c522
-rw-r--r--arch/powerpc/platforms/pseries/hvCall_inst.c2
-rw-r--r--arch/powerpc/platforms/pseries/ibmebus.c4
-rw-r--r--arch/powerpc/platforms/pseries/iommu.c8
-rw-r--r--arch/powerpc/platforms/pseries/lparcfg.c8
-rw-r--r--arch/powerpc/platforms/pseries/mobility.c2
-rw-r--r--arch/powerpc/platforms/pseries/of_helpers.c60
-rw-r--r--arch/powerpc/platforms/pseries/pci.c177
-rw-r--r--arch/powerpc/platforms/pseries/pseries_energy.c126
-rw-r--r--arch/powerpc/platforms/pseries/reconfig.c2
-rw-r--r--arch/powerpc/platforms/pseries/scanlog.c2
-rw-r--r--arch/powerpc/platforms/pseries/setup.c176
-rw-r--r--arch/powerpc/platforms/pseries/suspend.c3
-rw-r--r--arch/powerpc/sysdev/Makefile4
-rw-r--r--arch/powerpc/sysdev/axonram.c383
-rw-r--r--arch/powerpc/sysdev/cpm1.c33
-rw-r--r--arch/powerpc/sysdev/cpm2.c11
-rw-r--r--arch/powerpc/sysdev/cpm_common.c5
-rw-r--r--arch/powerpc/sysdev/cpm_gpio.c80
-rw-r--r--arch/powerpc/sysdev/fsl_mpic_timer_wakeup.c16
-rw-r--r--arch/powerpc/sysdev/fsl_pci.c14
-rw-r--r--arch/powerpc/sysdev/mpic.c10
-rw-r--r--arch/powerpc/sysdev/mpic_timer.c55
-rw-r--r--arch/powerpc/sysdev/mv64x60_pci.c2
-rw-r--r--arch/powerpc/sysdev/xics/icp-native.c10
-rw-r--r--arch/powerpc/sysdev/xics/ics-opal.c4
-rw-r--r--arch/powerpc/sysdev/xics/ics-rtas.c4
-rw-r--r--arch/powerpc/sysdev/xics/xics-common.c8
-rw-r--r--arch/powerpc/sysdev/xive/common.c8
-rw-r--r--arch/powerpc/xmon/ppc-dis.c4
-rw-r--r--arch/powerpc/xmon/xmon.c6
-rw-r--r--drivers/cpuidle/cpuidle-powernv.c2
-rw-r--r--drivers/cpuidle/cpuidle-pseries.c16
-rw-r--r--drivers/macintosh/adb.c24
-rw-r--r--drivers/macintosh/adbhid.c55
-rw-r--r--drivers/macintosh/ams/ams-input.c4
-rw-r--r--drivers/macintosh/therm_adt746x.c4
-rw-r--r--drivers/macintosh/via-pmu-backlight.c2
-rw-r--r--drivers/macintosh/windfarm_pm112.c8
-rw-r--r--drivers/macintosh/windfarm_pm121.c5
-rw-r--r--drivers/macintosh/windfarm_pm72.c2
-rw-r--r--drivers/macintosh/windfarm_pm81.c5
-rw-r--r--drivers/macintosh/windfarm_pm91.c5
-rw-r--r--drivers/macintosh/windfarm_rm31.c2
-rw-r--r--drivers/misc/Kconfig1
-rw-r--r--drivers/misc/Makefile3
-rw-r--r--drivers/misc/cxl/context.c2
-rw-r--r--drivers/misc/cxl/cxl.h3
-rw-r--r--drivers/misc/cxl/cxllib.c3
-rw-r--r--drivers/misc/cxl/file.c15
-rw-r--r--drivers/misc/cxl/native.c13
-rw-r--r--drivers/misc/cxl/pci.c2
-rw-r--r--drivers/misc/ocxl/Kconfig31
-rw-r--r--drivers/misc/ocxl/Makefile11
-rw-r--r--drivers/misc/ocxl/afu_irq.c202
-rw-r--r--drivers/misc/ocxl/config.c723
-rw-r--r--drivers/misc/ocxl/context.c279
-rw-r--r--drivers/misc/ocxl/file.c432
-rw-r--r--drivers/misc/ocxl/link.c647
-rw-r--r--drivers/misc/ocxl/main.c33
-rw-r--r--drivers/misc/ocxl/ocxl_internal.h131
-rw-r--r--drivers/misc/ocxl/pasid.c107
-rw-r--r--drivers/misc/ocxl/pci.c585
-rw-r--r--drivers/misc/ocxl/sysfs.c142
-rw-r--r--drivers/misc/ocxl/trace.c6
-rw-r--r--drivers/misc/ocxl/trace.h182
-rw-r--r--drivers/pci/hotplug/rpadlpar_core.c13
-rw-r--r--drivers/pci/hotplug/rpadlpar_sysfs.c3
-rw-r--r--drivers/pci/hotplug/rpaphp.h8
-rw-r--r--drivers/pci/hotplug/rpaphp_core.c107
-rw-r--r--drivers/pci/iov.c11
-rw-r--r--drivers/pci/pcie/aer/aerdrv_core.c3
-rw-r--r--drivers/ps3/ps3av.c11
-rw-r--r--drivers/rtc/rtc-opal.c12
-rw-r--r--include/linux/pci.h38
-rw-r--r--include/misc/ocxl-config.h45
-rw-r--r--include/misc/ocxl.h214
-rw-r--r--include/uapi/linux/elf.h1
-rw-r--r--include/uapi/misc/cxl.h10
-rw-r--r--include/uapi/misc/ocxl.h49
-rw-r--r--tools/testing/selftests/powerpc/alignment/Makefile3
-rw-r--r--tools/testing/selftests/powerpc/alignment/alignment_handler.c491
-rw-r--r--tools/testing/selftests/powerpc/benchmarks/mmap_bench.c53
-rw-r--r--tools/testing/selftests/powerpc/mm/.gitignore3
-rw-r--r--tools/testing/selftests/powerpc/mm/Makefile2
-rw-r--r--tools/testing/selftests/powerpc/mm/segv_errors.c78
-rw-r--r--tools/testing/selftests/powerpc/ptrace/ptrace-tm-spd-vsx.c4
-rw-r--r--tools/testing/selftests/powerpc/ptrace/ptrace-tm-spr.c3
-rw-r--r--tools/testing/selftests/powerpc/ptrace/ptrace-tm-vsx.c2
-rw-r--r--tools/testing/selftests/powerpc/tm/.gitignore1
-rw-r--r--tools/testing/selftests/powerpc/tm/Makefile3
-rw-r--r--tools/testing/selftests/powerpc/tm/tm-trap.c329
-rw-r--r--tools/testing/selftests/powerpc/tm/tm-unavailable.c43
320 files changed, 11609 insertions, 3056 deletions
diff --git a/Documentation/ABI/testing/sysfs-class-ocxl b/Documentation/ABI/testing/sysfs-class-ocxl
new file mode 100644
index 000000000000..b5b1fa197592
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-class-ocxl
@@ -0,0 +1,35 @@
+What: /sys/class/ocxl/<afu name>/afu_version
+Date: January 2018
+Contact: linuxppc-dev@lists.ozlabs.org
+Description: read only
+ Version of the AFU, in the format <major>:<minor>
+ Reflects what is read in the configuration space of the AFU
+
+What: /sys/class/ocxl/<afu name>/contexts
+Date: January 2018
+Contact: linuxppc-dev@lists.ozlabs.org
+Description: read only
+ Number of contexts for the AFU, in the format <n>/<max>
+ where:
+ n: number of currently active contexts, for debug
+ max: maximum number of contexts supported by the AFU
+
+What: /sys/class/ocxl/<afu name>/pp_mmio_size
+Date: January 2018
+Contact: linuxppc-dev@lists.ozlabs.org
+Description: read only
+ Size of the per-process mmio area, as defined in the
+ configuration space of the AFU
+
+What: /sys/class/ocxl/<afu name>/global_mmio_size
+Date: January 2018
+Contact: linuxppc-dev@lists.ozlabs.org
+Description: read only
+ Size of the global mmio area, as defined in the
+ configuration space of the AFU
+
+What: /sys/class/ocxl/<afu name>/global_mmio_area
+Date: January 2018
+Contact: linuxppc-dev@lists.ozlabs.org
+Description: read/write
+ Give access the global mmio area for the AFU
diff --git a/Documentation/accelerators/ocxl.rst b/Documentation/accelerators/ocxl.rst
new file mode 100644
index 000000000000..4f7af841d935
--- /dev/null
+++ b/Documentation/accelerators/ocxl.rst
@@ -0,0 +1,160 @@
+========================================================
+OpenCAPI (Open Coherent Accelerator Processor Interface)
+========================================================
+
+OpenCAPI is an interface between processors and accelerators. It aims
+at being low-latency and high-bandwidth. The specification is
+developed by the `OpenCAPI Consortium <http://opencapi.org/>`_.
+
+It allows an accelerator (which could be a FPGA, ASICs, ...) to access
+the host memory coherently, using virtual addresses. An OpenCAPI
+device can also host its own memory, that can be accessed from the
+host.
+
+OpenCAPI is known in linux as 'ocxl', as the open, processor-agnostic
+evolution of 'cxl' (the driver for the IBM CAPI interface for
+powerpc), which was named that way to avoid confusion with the ISDN
+CAPI subsystem.
+
+
+High-level view
+===============
+
+OpenCAPI defines a Data Link Layer (DL) and Transaction Layer (TL), to
+be implemented on top of a physical link. Any processor or device
+implementing the DL and TL can start sharing memory.
+
+::
+
+ +-----------+ +-------------+
+ | | | |
+ | | | Accelerated |
+ | Processor | | Function |
+ | | +--------+ | Unit | +--------+
+ | |--| Memory | | (AFU) |--| Memory |
+ | | +--------+ | | +--------+
+ +-----------+ +-------------+
+ | |
+ +-----------+ +-------------+
+ | TL | | TLX |
+ +-----------+ +-------------+
+ | |
+ +-----------+ +-------------+
+ | DL | | DLX |
+ +-----------+ +-------------+
+ | |
+ | PHY |
+ +---------------------------------------+
+
+
+
+Device discovery
+================
+
+OpenCAPI relies on a PCI-like configuration space, implemented on the
+device. So the host can discover AFUs by querying the config space.
+
+OpenCAPI devices in Linux are treated like PCI devices (with a few
+caveats). The firmware is expected to abstract the hardware as if it
+was a PCI link. A lot of the existing PCI infrastructure is reused:
+devices are scanned and BARs are assigned during the standard PCI
+enumeration. Commands like 'lspci' can therefore be used to see what
+devices are available.
+
+The configuration space defines the AFU(s) that can be found on the
+physical adapter, such as its name, how many memory contexts it can
+work with, the size of its MMIO areas, ...
+
+
+
+MMIO
+====
+
+OpenCAPI defines two MMIO areas for each AFU:
+
+* the global MMIO area, with registers pertinent to the whole AFU.
+* a per-process MMIO area, which has a fixed size for each context.
+
+
+
+AFU interrupts
+==============
+
+OpenCAPI includes the possibility for an AFU to send an interrupt to a
+host process. It is done through a 'intrp_req' defined in the
+Transaction Layer, specifying a 64-bit object handle which defines the
+interrupt.
+
+The driver allows a process to allocate an interrupt and obtain its
+64-bit object handle, that can be passed to the AFU.
+
+
+
+char devices
+============
+
+The driver creates one char device per AFU found on the physical
+device. A physical device may have multiple functions and each
+function can have multiple AFUs. At the time of this writing though,
+it has only been tested with devices exporting only one AFU.
+
+Char devices can be found in /dev/ocxl/ and are named as:
+/dev/ocxl/<AFU name>.<location>.<index>
+
+where <AFU name> is a max 20-character long name, as found in the
+config space of the AFU.
+<location> is added by the driver and can help distinguish devices
+when a system has more than one instance of the same OpenCAPI device.
+<index> is also to help distinguish AFUs in the unlikely case where a
+device carries multiple copies of the same AFU.
+
+
+
+Sysfs class
+===========
+
+An ocxl class is added for the devices representing the AFUs. See
+/sys/class/ocxl. The layout is described in
+Documentation/ABI/testing/sysfs-class-ocxl
+
+
+
+User API
+========
+
+open
+----
+
+Based on the AFU definition found in the config space, an AFU may
+support working with more than one memory context, in which case the
+associated char device may be opened multiple times by different
+processes.
+
+
+ioctl
+-----
+
+OCXL_IOCTL_ATTACH:
+
+ Attach the memory context of the calling process to the AFU so that
+ the AFU can access its memory.
+
+OCXL_IOCTL_IRQ_ALLOC:
+
+ Allocate an AFU interrupt and return an identifier.
+
+OCXL_IOCTL_IRQ_FREE:
+
+ Free a previously allocated AFU interrupt.
+
+OCXL_IOCTL_IRQ_SET_FD:
+
+ Associate an event fd to an AFU interrupt so that the user process
+ can be notified when the AFU sends an interrupt.
+
+
+mmap
+----
+
+A process can mmap the per-process MMIO area for interactions with the
+AFU.
diff --git a/Documentation/devicetree/booting-without-of.txt b/Documentation/devicetree/booting-without-of.txt
index 417f91110010..e86bd2f64117 100644
--- a/Documentation/devicetree/booting-without-of.txt
+++ b/Documentation/devicetree/booting-without-of.txt
@@ -1309,7 +1309,7 @@ number and level/sense information. All interrupt children in an
OpenPIC interrupt domain use 2 cells per interrupt in their interrupts
property.
-The PCI bus binding specifies a #interrupt-cell value of 1 to encode
+The PCI bus binding specifies a #interrupt-cells value of 1 to encode
which interrupt pin (INTA,INTB,INTC,INTD) is used.
2) interrupt-parent property
diff --git a/Documentation/filesystems/dax.txt b/Documentation/filesystems/dax.txt
index 3be3b266be41..70cb68bed2e8 100644
--- a/Documentation/filesystems/dax.txt
+++ b/Documentation/filesystems/dax.txt
@@ -46,7 +46,6 @@ stall the CPU for an extended period, you should also not attempt to
implement direct_access.
These block devices may be used for inspiration:
-- axonram: Axon DDR2 device driver
- brd: RAM backed block device driver
- dcssblk: s390 dcss block device driver
- pmem: NVDIMM persistent memory driver
diff --git a/Documentation/ioctl/ioctl-number.txt b/Documentation/ioctl/ioctl-number.txt
index 3e3fdae5f3ed..6501389d55b9 100644
--- a/Documentation/ioctl/ioctl-number.txt
+++ b/Documentation/ioctl/ioctl-number.txt
@@ -326,6 +326,7 @@ Code Seq#(hex) Include File Comments
0xB5 00-0F uapi/linux/rpmsg.h <mailto:linux-remoteproc@vger.kernel.org>
0xC0 00-0F linux/usb/iowarrior.h
0xCA 00-0F uapi/misc/cxl.h
+0xCA 10-2F uapi/misc/ocxl.h
0xCA 80-BF uapi/scsi/cxlflash_ioctl.h
0xCB 00-1F CBM serial IEC bus in development:
<mailto:michael.klein@puffin.lb.shuttle.de>
diff --git a/MAINTAINERS b/MAINTAINERS
index f981c5678eeb..1dc846d0add6 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -9909,6 +9909,18 @@ M: Josh Poimboeuf <jpoimboe@redhat.com>
S: Supported
F: tools/objtool/
+OCXL (Open Coherent Accelerator Processor Interface OpenCAPI) DRIVER
+M: Frederic Barrat <fbarrat@linux.vnet.ibm.com>
+M: Andrew Donnellan <andrew.donnellan@au1.ibm.com>
+L: linuxppc-dev@lists.ozlabs.org
+S: Supported
+F: arch/powerpc/platforms/powernv/ocxl.c
+F: arch/powerpc/include/asm/pnv-ocxl.h
+F: drivers/misc/ocxl/
+F: include/misc/ocxl*
+F: include/uapi/misc/ocxl.h
+F: Documentation/accelerators/ocxl.txt
+
OMAP AUDIO SUPPORT
M: Peter Ujfalusi <peter.ujfalusi@ti.com>
M: Jarkko Nikula <jarkko.nikula@bitmer.com>
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 73fcf592ee91..9d3329811cc1 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -143,6 +143,7 @@ config PPC
select ARCH_HAS_PMEM_API if PPC64
select ARCH_HAS_SCALED_CPUTIME if VIRT_CPU_ACCOUNTING_NATIVE
select ARCH_HAS_SG_CHAIN
+ select ARCH_HAS_STRICT_KERNEL_RWX if ((PPC_BOOK3S_64 || PPC32) && !RELOCATABLE && !HIBERNATION)
select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST
select ARCH_HAS_UACCESS_FLUSHCACHE if PPC64
select ARCH_HAS_UBSAN_SANITIZE_ALL
@@ -150,6 +151,7 @@ config PPC
select ARCH_HAVE_NMI_SAFE_CMPXCHG
select ARCH_MIGHT_HAVE_PC_PARPORT
select ARCH_MIGHT_HAVE_PC_SERIO
+ select ARCH_OPTIONAL_KERNEL_RWX if ARCH_HAS_STRICT_KERNEL_RWX
select ARCH_SUPPORTS_ATOMIC_RMW
select ARCH_USE_BUILTIN_BSWAP
select ARCH_USE_CMPXCHG_LOCKREF if PPC64
@@ -180,8 +182,6 @@ config PPC
select HAVE_ARCH_MMAP_RND_COMPAT_BITS if COMPAT
select HAVE_ARCH_SECCOMP_FILTER
select HAVE_ARCH_TRACEHOOK
- select ARCH_HAS_STRICT_KERNEL_RWX if ((PPC_BOOK3S_64 || PPC32) && !RELOCATABLE && !HIBERNATION)
- select ARCH_OPTIONAL_KERNEL_RWX if ARCH_HAS_STRICT_KERNEL_RWX
select HAVE_CBPF_JIT if !PPC64
select HAVE_CONTEXT_TRACKING if PPC64
select HAVE_DEBUG_KMEMLEAK
@@ -868,6 +868,21 @@ config SECCOMP
If unsure, say Y. Only embedded should say N here.
+config PPC_MEM_KEYS
+ prompt "PowerPC Memory Protection Keys"
+ def_bool y
+ depends on PPC_BOOK3S_64
+ select ARCH_USES_HIGH_VMA_FLAGS
+ select ARCH_HAS_PKEYS
+ help
+ Memory Protection Keys provides a mechanism for enforcing
+ page-based protections, but without requiring modification of the
+ page tables when an application changes protection domains.
+
+ For details, see Documentation/vm/protection-keys.txt
+
+ If unsure, say y.
+
endmenu
config ISA_DMA_API
diff --git a/arch/powerpc/Kconfig.debug b/arch/powerpc/Kconfig.debug
index 657c33cd4eee..c45424c64e19 100644
--- a/arch/powerpc/Kconfig.debug
+++ b/arch/powerpc/Kconfig.debug
@@ -90,6 +90,10 @@ config MSI_BITMAP_SELFTEST
depends on DEBUG_KERNEL
default n
+config PPC_IRQ_SOFT_MASK_DEBUG
+ bool "Include extra checks for powerpc irq soft masking"
+ default n
+
config XMON
bool "Include xmon kernel debugger"
depends on DEBUG_KERNEL
@@ -368,7 +372,7 @@ config PPC_PTDUMP
config PPC_HTDUMP
def_bool y
- depends on PPC_PTDUMP && PPC_BOOK3S
+ depends on PPC_PTDUMP && PPC_BOOK3S_64
config PPC_FAST_ENDIAN_SWITCH
bool "Deprecated fast endian-switch syscall"
diff --git a/arch/powerpc/Makefile b/arch/powerpc/Makefile
index 1381693a4a51..ccd2556bdb53 100644
--- a/arch/powerpc/Makefile
+++ b/arch/powerpc/Makefile
@@ -63,6 +63,7 @@ UTS_MACHINE := $(subst $(space),,$(machine-y))
ifdef CONFIG_PPC32
KBUILD_LDFLAGS_MODULE += arch/powerpc/lib/crtsavres.o
else
+KBUILD_LDFLAGS_MODULE += -T $(srctree)/arch/powerpc/kernel/module.lds
ifeq ($(call ld-ifversion, -ge, 225000000, y),y)
# Have the linker provide sfpr if possible.
# There is a corresponding test in arch/powerpc/lib/Makefile
diff --git a/arch/powerpc/boot/Makefile b/arch/powerpc/boot/Makefile
index 08782f55b89f..ef6549e57157 100644
--- a/arch/powerpc/boot/Makefile
+++ b/arch/powerpc/boot/Makefile
@@ -108,10 +108,10 @@ src-wlib-y := string.S crt0.S stdio.c decompress.c main.c \
$(libfdt) libfdt-wrapper.c \
ns16550.c serial.c simple_alloc.c div64.S util.S \
elf_util.c $(zlib-y) devtree.c stdlib.c \
- oflib.c ofconsole.c cuboot.c cpm-serial.c \
- uartlite.c opal.c
+ oflib.c ofconsole.c cuboot.c
+
src-wlib-$(CONFIG_PPC_MPC52XX) += mpc52xx-psc.c
-src-wlib-$(CONFIG_PPC64_BOOT_WRAPPER) += opal-calls.S
+src-wlib-$(CONFIG_PPC64_BOOT_WRAPPER) += opal-calls.S opal.c
ifndef CONFIG_PPC64_BOOT_WRAPPER
src-wlib-y += crtsavres.S
endif
@@ -120,6 +120,8 @@ src-wlib-$(CONFIG_44x) += 4xx.c ebony.c bamboo.c
src-wlib-$(CONFIG_PPC_8xx) += mpc8xx.c planetcore.c fsl-soc.c
src-wlib-$(CONFIG_PPC_82xx) += pq2.c fsl-soc.c planetcore.c
src-wlib-$(CONFIG_EMBEDDED6xx) += mpsc.c mv64x60.c mv64x60_i2c.c ugecon.c fsl-soc.c
+src-wlib-$(CONFIG_XILINX_VIRTEX) += uartlite.c
+src-wlib-$(CONFIG_CPM) += cpm-serial.c
src-plat-y := of.c epapr.c
src-plat-$(CONFIG_40x) += fixed-head.S ep405.c cuboot-hotfoot.c \
diff --git a/arch/powerpc/boot/dts/a3m071.dts b/arch/powerpc/boot/dts/a3m071.dts
index bf81b8f9704c..187ce458d03a 100644
--- a/arch/powerpc/boot/dts/a3m071.dts
+++ b/arch/powerpc/boot/dts/a3m071.dts
@@ -105,24 +105,24 @@
reg = <0 0x0 0x02000000>;
compatible = "cfi-flash";
bank-width = <2>;
- partition@0x0 {
+ partition@0 {
label = "u-boot";
reg = <0x00000000 0x00040000>;
read-only;
};
- partition@0x00040000 {
+ partition@40000 {
label = "env";
reg = <0x00040000 0x00020000>;
};
- partition@0x00060000 {
+ partition@60000 {
label = "dtb";
reg = <0x00060000 0x00020000>;
};
- partition@0x00080000 {
+ partition@80000 {
label = "kernel";
reg = <0x00080000 0x00500000>;
};
- partition@0x00580000 {
+ partition@580000 {
label = "root";
reg = <0x00580000 0x00A80000>;
};
diff --git a/arch/powerpc/boot/dts/akebono.dts b/arch/powerpc/boot/dts/akebono.dts
index e61d5dc598c1..746779202a12 100644
--- a/arch/powerpc/boot/dts/akebono.dts
+++ b/arch/powerpc/boot/dts/akebono.dts
@@ -216,7 +216,7 @@
interrupts = <39 2>;
};
- IIC0: i2c@00000000 {
+ IIC0: i2c@0 {
compatible = "ibm,iic-476gtr", "ibm,iic";
reg = <0x0 0x00000020>;
interrupt-parent = <&MPIC>;
@@ -229,7 +229,7 @@
};
};
- IIC1: i2c@00000100 {
+ IIC1: i2c@100 {
compatible = "ibm,iic-476gtr", "ibm,iic";
reg = <0x100 0x00000020>;
interrupt-parent = <&MPIC>;
diff --git a/arch/powerpc/boot/dts/c2k.dts b/arch/powerpc/boot/dts/c2k.dts
index 1e32903cb0a8..27f169e3ade9 100644
--- a/arch/powerpc/boot/dts/c2k.dts
+++ b/arch/powerpc/boot/dts/c2k.dts
@@ -276,14 +276,14 @@
>;
};
- cpu-error@0070 {
+ cpu-error@70 {
compatible = "marvell,mv64360-cpu-error";
reg = <0x0070 0x10 0x0128 0x28>;
interrupts = <3>;
interrupt-parent = <&PIC>;
};
- sram-ctrl@0380 {
+ sram-ctrl@380 {
compatible = "marvell,mv64360-sram-ctrl";
reg = <0x0380 0x80>;
interrupts = <13>;
@@ -311,7 +311,7 @@
interrupt-parent = <&PIC>;
};
/* Devices attached to the device controller */
- devicebus@045c {
+ devicebus@45c {
#address-cells = <2>;
#size-cells = <1>;
compatible = "marvell,mv64306-devctrl";
diff --git a/arch/powerpc/boot/dts/currituck.dts b/arch/powerpc/boot/dts/currituck.dts
index 4191e1850ea1..f2ad5815f08d 100644
--- a/arch/powerpc/boot/dts/currituck.dts
+++ b/arch/powerpc/boot/dts/currituck.dts
@@ -108,7 +108,7 @@
reg = <0x50000000 0x4>;
};
- IIC0: i2c@00000000 {
+ IIC0: i2c@0 {
compatible = "ibm,iic-currituck", "ibm,iic";
reg = <0x0 0x00000014>;
interrupt-parent = <&MPIC>;
diff --git a/arch/powerpc/boot/dts/fsl/mpc8568mds.dts b/arch/powerpc/boot/dts/fsl/mpc8568mds.dts
index 01706a339603..bc3e8039bdc7 100644
--- a/arch/powerpc/boot/dts/fsl/mpc8568mds.dts
+++ b/arch/powerpc/boot/dts/fsl/mpc8568mds.dts
@@ -126,7 +126,7 @@
par_io@e0100 {
num-ports = <7>;
- pio1: ucc_pin@01 {
+ pio1: ucc_pin@1 {
pio-map = <
/* port pin dir open_drain assignment has_irq */
0x4 0xa 0x1 0x0 0x2 0x0 /* TxD0 */
@@ -154,7 +154,7 @@
0x1 0x1f 0x2 0x0 0x3 0x0>; /* GTX125 */
};
- pio2: ucc_pin@02 {
+ pio2: ucc_pin@2 {
pio-map = <
/* port pin dir open_drain assignment has_irq */
0x5 0xa 0x1 0x0 0x2 0x0 /* TxD0 */
@@ -228,22 +228,22 @@
/* These are the same PHYs as on
* gianfar's MDIO bus */
- qe_phy0: ethernet-phy@07 {
+ qe_phy0: ethernet-phy@7 {
interrupt-parent = <&mpic>;
interrupts = <1 1 0 0>;
reg = <0x7>;
};
- qe_phy1: ethernet-phy@01 {
+ qe_phy1: ethernet-phy@1 {
interrupt-parent = <&mpic>;
interrupts = <2 1 0 0>;
reg = <0x1>;
};
- qe_phy2: ethernet-phy@02 {
+ qe_phy2: ethernet-phy@2 {
interrupt-parent = <&mpic>;
interrupts = <1 1 0 0>;
reg = <0x2>;
};
- qe_phy3: ethernet-phy@03 {
+ qe_phy3: ethernet-phy@3 {
interrupt-parent = <&mpic>;
interrupts = <2 1 0 0>;
reg = <0x3>;
diff --git a/arch/powerpc/boot/dts/fsl/mpc8569mds.dts b/arch/powerpc/boot/dts/fsl/mpc8569mds.dts
index 76b2bd6f7742..d8367ceddea6 100644
--- a/arch/powerpc/boot/dts/fsl/mpc8569mds.dts
+++ b/arch/powerpc/boot/dts/fsl/mpc8569mds.dts
@@ -141,7 +141,7 @@
gpio-controller;
};
- pio1: ucc_pin@01 {
+ pio1: ucc_pin@1 {
pio-map = <
/* port pin dir open_drain assignment has_irq */
0x2 0x1f 0x1 0x0 0x1 0x0 /* QE_MUX_MDC */
@@ -161,7 +161,7 @@
0x2 0x14 0x1 0x0 0x2 0x0>; /* ENET1_GTXCLK */
};
- pio2: ucc_pin@02 {
+ pio2: ucc_pin@2 {
pio-map = <
/* port pin dir open_drain assignment has_irq */
0x2 0x1f 0x1 0x0 0x1 0x0 /* QE_MUX_MDC */
@@ -181,7 +181,7 @@
0x2 0x2 0x1 0x0 0x2 0x0>; /* ENET2_GTXCLK */
};
- pio3: ucc_pin@03 {
+ pio3: ucc_pin@3 {
pio-map = <
/* port pin dir open_drain assignment has_irq */
0x2 0x1f 0x1 0x0 0x1 0x0 /* QE_MUX_MDC */
@@ -201,7 +201,7 @@
0x2 0x19 0x1 0x0 0x2 0x0>; /* ENET3_GTXCLK */
};
- pio4: ucc_pin@04 {
+ pio4: ucc_pin@4 {
pio-map = <
/* port pin dir open_drain assignment has_irq */
0x2 0x1f 0x1 0x0 0x1 0x0 /* QE_MUX_MDC */
@@ -272,30 +272,30 @@
reg = <0x2120 0x18>;
compatible = "fsl,ucc-mdio";
- qe_phy0: ethernet-phy@07 {
+ qe_phy0: ethernet-phy@7 {
interrupt-parent = <&mpic>;
interrupts = <1 1 0 0>;
reg = <0x7>;
};
- qe_phy1: ethernet-phy@01 {
+ qe_phy1: ethernet-phy@1 {
interrupt-parent = <&mpic>;
interrupts = <2 1 0 0>;
reg = <0x1>;
};
- qe_phy2: ethernet-phy@02 {
+ qe_phy2: ethernet-phy@2 {
interrupt-parent = <&mpic>;
interrupts = <3 1 0 0>;
reg = <0x2>;
};
- qe_phy3: ethernet-phy@03 {
+ qe_phy3: ethernet-phy@3 {
interrupt-parent = <&mpic>;
interrupts = <4 1 0 0>;
reg = <0x3>;
};
- qe_phy5: ethernet-phy@04 {
+ qe_phy5: ethernet-phy@4 {
reg = <0x04>;
};
- qe_phy7: ethernet-phy@06 {
+ qe_phy7: ethernet-phy@6 {
reg = <0x6>;
};
tbi1: tbi-phy@11 {
diff --git a/arch/powerpc/boot/dts/fsl/p1021mds.dts b/arch/powerpc/boot/dts/fsl/p1021mds.dts
index 291454c75dda..1047802f4d2a 100644
--- a/arch/powerpc/boot/dts/fsl/p1021mds.dts
+++ b/arch/powerpc/boot/dts/fsl/p1021mds.dts
@@ -202,7 +202,7 @@
ranges = <0x0 0xe0100 0x60>;
device_type = "par_io";
num-ports = <3>;
- pio1: ucc_pin@01 {
+ pio1: ucc_pin@1 {
pio-map = <
/* port pin dir open_drain assignment has_irq */
0x1 0x13 0x1 0x0 0x1 0x0 /* QE_MUX_MDC */
@@ -225,7 +225,7 @@
0x0 0x10 0x2 0x0 0x2 0x0>; /* ENET1_COL */
};
- pio2: ucc_pin@02 {
+ pio2: ucc_pin@2 {
pio-map = <
/* port pin dir open_drain assignment has_irq */
0x1 0x13 0x1 0x0 0x1 0x0 /* QE_MUX_MDC */
@@ -296,7 +296,7 @@
interrupts = <4 1 0 0>;
reg = <0x0>;
};
- qe_phy1: ethernet-phy@03 {
+ qe_phy1: ethernet-phy@3 {
interrupt-parent = <&mpic>;
interrupts = <5 1 0 0>;
reg = <0x3>;
diff --git a/arch/powerpc/boot/dts/fsl/p1025rdb.dtsi b/arch/powerpc/boot/dts/fsl/p1025rdb.dtsi
index d44bb12debb0..0a5434a631c3 100644
--- a/arch/powerpc/boot/dts/fsl/p1025rdb.dtsi
+++ b/arch/powerpc/boot/dts/fsl/p1025rdb.dtsi
@@ -245,7 +245,7 @@
ranges = <0x0 0xe0100 0x60>;
device_type = "par_io";
num-ports = <3>;
- pio1: ucc_pin@01 {
+ pio1: ucc_pin@1 {
pio-map = <
/* port pin dir open_drain assignment has_irq */
0x1 0x13 0x1 0x0 0x1 0x0 /* QE_MUX_MDC */
@@ -268,7 +268,7 @@
0x0 0x10 0x2 0x0 0x2 0x0>; /* ENET1_COL */
};
- pio2: ucc_pin@02 {
+ pio2: ucc_pin@2 {
pio-map = <
/* port pin dir open_drain assignment has_irq */
0x1 0x13 0x1 0x0 0x1 0x0 /* QE_MUX_MDC */
@@ -283,7 +283,7 @@
0x1 0x8 0x2 0x0 0x2 0x0>; /* ENET5_RX_ER_SER5_CD_B */
};
- pio3: ucc_pin@03 {
+ pio3: ucc_pin@3 {
pio-map = <
/* port pin dir open_drain assignment has_irq */
0x0 0x16 0x2 0x0 0x2 0x0 /* SER7_CD_B*/
@@ -293,7 +293,7 @@
0x0 0x15 0x1 0x0 0x2 0x0>; /* SER7_TXD0*/
};
- pio4: ucc_pin@04 {
+ pio4: ucc_pin@4 {
pio-map = <
/* port pin dir open_drain assignment has_irq */
0x1 0x0 0x2 0x0 0x2 0x0 /* SER3_CD_B*/
diff --git a/arch/powerpc/boot/dts/fsl/p1025rdb_32b.dts b/arch/powerpc/boot/dts/fsl/p1025rdb_32b.dts
index b15acbaea34b..ea33b57f8774 100644
--- a/arch/powerpc/boot/dts/fsl/p1025rdb_32b.dts
+++ b/arch/powerpc/boot/dts/fsl/p1025rdb_32b.dts
@@ -106,7 +106,7 @@
interrupts = <4 1 0 0>;
reg = <0x6>;
};
- qe_phy1: ethernet-phy@03 {
+ qe_phy1: ethernet-phy@3 {
interrupt-parent = <&mpic>;
interrupts = <5 1 0 0>;
reg = <0x3>;
diff --git a/arch/powerpc/boot/dts/fsl/p1025twr.dtsi b/arch/powerpc/boot/dts/fsl/p1025twr.dtsi
index 08816fb474f5..ab75b8f29ae2 100644
--- a/arch/powerpc/boot/dts/fsl/p1025twr.dtsi
+++ b/arch/powerpc/boot/dts/fsl/p1025twr.dtsi
@@ -172,7 +172,7 @@
ranges = <0x0 0xe0100 0x60>;
device_type = "par_io";
num-ports = <3>;
- pio1: ucc_pin@01 {
+ pio1: ucc_pin@1 {
pio-map = <
/* port pin dir open_drain assignment has_irq */
0x1 0x13 0x1 0x0 0x1 0x0 /* QE_MUX_MDC */
@@ -195,7 +195,7 @@
0x0 0x10 0x2 0x0 0x2 0x0>; /* ENET1_COL */
};
- pio2: ucc_pin@02 {
+ pio2: ucc_pin@2 {
pio-map = <
/* port pin dir open_drain assignment has_irq */
0x1 0x13 0x1 0x0 0x1 0x0 /* QE_MUX_MDC */
@@ -210,7 +210,7 @@
0x1 0x8 0x2 0x0 0x2 0x0>; /* ENET5_RX_ER_SER5_CD_B */
};
- pio3: ucc_pin@03 {
+ pio3: ucc_pin@3 {
pio-map = <
/* port pin dir open_drain assignment has_irq */
0x0 0x16 0x2 0x0 0x2 0x0 /* SER7_CD_B*/
@@ -220,7 +220,7 @@
0x0 0x15 0x1 0x0 0x2 0x0>; /* SER7_TXD0*/
};
- pio4: ucc_pin@04 {
+ pio4: ucc_pin@4 {
pio-map = <
/* port pin dir open_drain assignment has_irq */
0x1 0x0 0x2 0x0 0x2 0x0 /* SER3_CD_B*/
diff --git a/arch/powerpc/boot/dts/fsl/t1040rdb.dts b/arch/powerpc/boot/dts/fsl/t1040rdb.dts
index 621f2c6ee6ad..65ff34c49025 100644
--- a/arch/powerpc/boot/dts/fsl/t1040rdb.dts
+++ b/arch/powerpc/boot/dts/fsl/t1040rdb.dts
@@ -61,7 +61,7 @@
};
mdio@fc000 {
- phy_sgmii_2: ethernet-phy@03 {
+ phy_sgmii_2: ethernet-phy@3 {
reg = <0x03>;
};
};
diff --git a/arch/powerpc/boot/dts/fsl/t1042d4rdb.dts b/arch/powerpc/boot/dts/fsl/t1042d4rdb.dts
index fcd2aeb5b8ac..4fa15f48a4c3 100644
--- a/arch/powerpc/boot/dts/fsl/t1042d4rdb.dts
+++ b/arch/powerpc/boot/dts/fsl/t1042d4rdb.dts
@@ -77,23 +77,23 @@
};
mdio0: mdio@fc000 {
- phy_sgmii_0: ethernet-phy@02 {
+ phy_sgmii_0: ethernet-phy@2 {
reg = <0x02>;
};
- phy_sgmii_1: ethernet-phy@03 {
+ phy_sgmii_1: ethernet-phy@3 {
reg = <0x03>;
};
- phy_sgmii_2: ethernet-phy@01 {
+ phy_sgmii_2: ethernet-phy@1 {
reg = <0x01>;
};
- phy_rgmii_0: ethernet-phy@04 {
+ phy_rgmii_0: ethernet-phy@4 {
reg = <0x04>;
};
- phy_rgmii_1: ethernet-phy@05 {
+ phy_rgmii_1: ethernet-phy@5 {
reg = <0x05>;
};
};
diff --git a/arch/powerpc/boot/dts/fsl/t1042rdb.dts b/arch/powerpc/boot/dts/fsl/t1042rdb.dts
index 2c138627b1b4..3ebb712224cb 100644
--- a/arch/powerpc/boot/dts/fsl/t1042rdb.dts
+++ b/arch/powerpc/boot/dts/fsl/t1042rdb.dts
@@ -59,7 +59,7 @@
};
mdio@fc000 {
- phy_sgmii_2: ethernet-phy@03 {
+ phy_sgmii_2: ethernet-phy@3 {
reg = <0x03>;
};
};
diff --git a/arch/powerpc/boot/dts/fsl/t104xrdb.dtsi b/arch/powerpc/boot/dts/fsl/t104xrdb.dtsi
index 5fdddbd2a62b..099a598c74c0 100644
--- a/arch/powerpc/boot/dts/fsl/t104xrdb.dtsi
+++ b/arch/powerpc/boot/dts/fsl/t104xrdb.dtsi
@@ -148,15 +148,15 @@
};
mdio0: mdio@fc000 {
- phy_sgmii_2: ethernet-phy@03 {
+ phy_sgmii_2: ethernet-phy@3 {
reg = <0x03>;
};
- phy_rgmii_0: ethernet-phy@01 {
+ phy_rgmii_0: ethernet-phy@1 {
reg = <0x01>;
};
- phy_rgmii_1: ethernet-phy@02 {
+ phy_rgmii_1: ethernet-phy@2 {
reg = <0x02>;
};
};
diff --git a/arch/powerpc/boot/dts/fsp2.dts b/arch/powerpc/boot/dts/fsp2.dts
index f10a64aeb83b..6560283c5aec 100644
--- a/arch/powerpc/boot/dts/fsp2.dts
+++ b/arch/powerpc/boot/dts/fsp2.dts
@@ -583,21 +583,21 @@
};
};
- OHCI1: ohci@02040000 {
+ OHCI1: ohci@2040000 {
compatible = "ohci-le";
reg = <0x02040000 0xa0>;
interrupt-parent = <&UIC1_3>;
interrupts = <28 0x8 29 0x8>;
};
- OHCI2: ohci@02080000 {
+ OHCI2: ohci@2080000 {
compatible = "ohci-le";
reg = <0x02080000 0xa0>;
interrupt-parent = <&UIC1_3>;
interrupts = <30 0x8 31 0x8>;
};
- EHCI: ehci@02000000 {
+ EHCI: ehci@2000000 {
compatible = "usb-ehci";
reg = <0x02000000 0xa4>;
interrupt-parent = <&UIC1_3>;
diff --git a/arch/powerpc/boot/dts/gamecube.dts b/arch/powerpc/boot/dts/gamecube.dts
index ef3be0e58b02..58d06c9ee08b 100644
--- a/arch/powerpc/boot/dts/gamecube.dts
+++ b/arch/powerpc/boot/dts/gamecube.dts
@@ -54,13 +54,13 @@
ranges = <0x0c000000 0x0c000000 0x00010000>;
interrupt-parent = <&PIC>;
- video@0c002000 {
+ video@c002000 {
compatible = "nintendo,flipper-vi";
reg = <0x0c002000 0x100>;
interrupts = <8>;
};
- processor-interface@0c003000 {
+ processor-interface@c003000 {
compatible = "nintendo,flipper-pi";
reg = <0x0c003000 0x100>;
@@ -71,7 +71,7 @@
};
};
- dsp@0c005000 {
+ dsp@c005000 {
#address-cells = <1>;
#size-cells = <1>;
compatible = "nintendo,flipper-dsp";
@@ -84,26 +84,26 @@
};
};
- disk@0c006000 {
+ disk@c006000 {
compatible = "nintendo,flipper-di";
reg = <0x0c006000 0x40>;
interrupts = <2>;
};
- audio@0c006c00 {
+ audio@c006c00 {
compatible = "nintendo,flipper-ai";
reg = <0x0c006c00 0x20>;
interrupts = <6>;
};
- gamepad-controller@0c006400 {
+ gamepad-controller@c006400 {
compatible = "nintendo,flipper-si";
reg = <0x0c006400 0x100>;
interrupts = <3>;
};
/* External Interface bus */
- exi@0c006800 {
+ exi@c006800 {
compatible = "nintendo,flipper-exi";
reg = <0x0c006800 0x40>;
virtual-reg = <0x0c006800>;
diff --git a/arch/powerpc/boot/dts/haleakala.dts b/arch/powerpc/boot/dts/haleakala.dts
index 2b256694eca6..cb16dad43c92 100644
--- a/arch/powerpc/boot/dts/haleakala.dts
+++ b/arch/powerpc/boot/dts/haleakala.dts
@@ -237,7 +237,7 @@
};
};
- PCIE0: pciex@0a0000000 {
+ PCIE0: pciex@a0000000 {
device_type = "pci";
#interrupt-cells = <1>;
#size-cells = <2>;
diff --git a/arch/powerpc/boot/dts/kilauea.dts b/arch/powerpc/boot/dts/kilauea.dts
index 5ba7f01e2a29..2a3413221cc1 100644
--- a/arch/powerpc/boot/dts/kilauea.dts
+++ b/arch/powerpc/boot/dts/kilauea.dts
@@ -322,7 +322,7 @@
};
};
- PCIE0: pciex@0a0000000 {
+ PCIE0: pciex@a0000000 {
device_type = "pci";
#interrupt-cells = <1>;
#size-cells = <2>;
@@ -363,7 +363,7 @@
0x0 0x0 0x0 0x4 &UIC2 0x3 0x4 /* swizzled int D */>;
};
- PCIE1: pciex@0c0000000 {
+ PCIE1: pciex@c0000000 {
device_type = "pci";
#interrupt-cells = <1>;
#size-cells = <2>;
diff --git a/arch/powerpc/boot/dts/kmeter1.dts b/arch/powerpc/boot/dts/kmeter1.dts
index 983aee185793..9fa33d9ba966 100644
--- a/arch/powerpc/boot/dts/kmeter1.dts
+++ b/arch/powerpc/boot/dts/kmeter1.dts
@@ -434,27 +434,27 @@
compatible = "fsl,ucc-mdio";
/* Piggy2 (UCC4, MDIO 0x00, RMII) */
- phy_piggy2: ethernet-phy@00 {
+ phy_piggy2: ethernet-phy@0 {
reg = <0x0>;
};
/* Eth-1 (UCC5, MDIO 0x08, RMII) */
- phy_eth1: ethernet-phy@08 {
+ phy_eth1: ethernet-phy@8 {
reg = <0x08>;
};
/* Eth-2 (UCC6, MDIO 0x09, RMII) */
- phy_eth2: ethernet-phy@09 {
+ phy_eth2: ethernet-phy@9 {
reg = <0x09>;
};
/* Eth-3 (UCC7, MDIO 0x0a, RMII) */
- phy_eth3: ethernet-phy@0a {
+ phy_eth3: ethernet-phy@a {
reg = <0x0a>;
};
/* Eth-4 (UCC8, MDIO 0x0b, RMII) */
- phy_eth4: ethernet-phy@0b {
+ phy_eth4: ethernet-phy@b {
reg = <0x0b>;
};
diff --git a/arch/powerpc/boot/dts/makalu.dts b/arch/powerpc/boot/dts/makalu.dts
index 63d48b632c84..bf8fe1629392 100644
--- a/arch/powerpc/boot/dts/makalu.dts
+++ b/arch/powerpc/boot/dts/makalu.dts
@@ -268,7 +268,7 @@
};
};
- PCIE0: pciex@0a0000000 {
+ PCIE0: pciex@a0000000 {
device_type = "pci";
#interrupt-cells = <1>;
#size-cells = <2>;
@@ -309,7 +309,7 @@
0x0 0x0 0x0 0x4 &UIC2 0x3 0x4 /* swizzled int D */>;
};
- PCIE1: pciex@0c0000000 {
+ PCIE1: pciex@c0000000 {
device_type = "pci";
#interrupt-cells = <1>;
#size-cells = <2>;
diff --git a/arch/powerpc/boot/dts/mpc832x_mds.dts b/arch/powerpc/boot/dts/mpc832x_mds.dts
index 0793cdf0d46e..49c7d657118a 100644
--- a/arch/powerpc/boot/dts/mpc832x_mds.dts
+++ b/arch/powerpc/boot/dts/mpc832x_mds.dts
@@ -186,7 +186,7 @@
device_type = "par_io";
num-ports = <7>;
- pio3: ucc_pin@03 {
+ pio3: ucc_pin@3 {
pio-map = <
/* port pin dir open_drain assignment has_irq */
3 4 3 0 2 0 /* MDIO */
@@ -208,7 +208,7 @@
1 12 1 0 1 0 /* TX_EN */
1 13 2 0 1 0>; /* CRS */
};
- pio4: ucc_pin@04 {
+ pio4: ucc_pin@4 {
pio-map = <
/* port pin dir open_drain assignment has_irq */
3 31 2 0 1 0 /* RX_CLK (CLK7) */
@@ -228,7 +228,7 @@
1 30 1 0 1 0 /* TX_EN */
1 31 2 0 1 0>; /* CRS */
};
- pio5: ucc_pin@05 {
+ pio5: ucc_pin@5 {
pio-map = <
/*
* open has
@@ -352,12 +352,12 @@
reg = <0x2320 0x18>;
compatible = "fsl,ucc-mdio";
- phy3: ethernet-phy@03 {
+ phy3: ethernet-phy@3 {
interrupt-parent = <&ipic>;
interrupts = <17 0x8>;
reg = <0x3>;
};
- phy4: ethernet-phy@04 {
+ phy4: ethernet-phy@4 {
interrupt-parent = <&ipic>;
interrupts = <18 0x8>;
reg = <0x4>;
diff --git a/arch/powerpc/boot/dts/mpc832x_rdb.dts b/arch/powerpc/boot/dts/mpc832x_rdb.dts
index 91df1eb16667..647cae14c16d 100644
--- a/arch/powerpc/boot/dts/mpc832x_rdb.dts
+++ b/arch/powerpc/boot/dts/mpc832x_rdb.dts
@@ -175,7 +175,7 @@
gpio-controller;
};
- ucc2pio:ucc_pin@02 {
+ ucc2pio:ucc_pin@2 {
pio-map = <
/* port pin dir open_drain assignment has_irq */
3 4 3 0 2 0 /* MDIO */
@@ -197,7 +197,7 @@
0 30 1 0 1 0 /* TX_EN */
0 31 2 0 1 0>; /* CRS */
};
- ucc3pio:ucc_pin@03 {
+ ucc3pio:ucc_pin@3 {
pio-map = <
/* port pin dir open_drain assignment has_irq */
0 13 2 0 1 0 /* RX_CLK (CLK9) */
@@ -310,12 +310,12 @@
reg = <0x3120 0x18>;
compatible = "fsl,ucc-mdio";
- phy00:ethernet-phy@00 {
+ phy00:ethernet-phy@0 {
interrupt-parent = <&ipic>;
interrupts = <0>;
reg = <0x0>;
};
- phy04:ethernet-phy@04 {
+ phy04:ethernet-phy@4 {
interrupt-parent = <&ipic>;
interrupts = <0>;
reg = <0x4>;
diff --git a/arch/powerpc/boot/dts/mpc836x_mds.dts b/arch/powerpc/boot/dts/mpc836x_mds.dts
index ecb6ccd3a6aa..539fd9f72eda 100644
--- a/arch/powerpc/boot/dts/mpc836x_mds.dts
+++ b/arch/powerpc/boot/dts/mpc836x_mds.dts
@@ -228,7 +228,7 @@
gpio-controller;
};
- pio1: ucc_pin@01 {
+ pio1: ucc_pin@1 {
pio-map = <
/* port pin dir open_drain assignment has_irq */
0 3 1 0 1 0 /* TxD0 */
@@ -255,7 +255,7 @@
2 9 1 0 3 0 /* GTX_CLK - CLK10 */
2 8 2 0 1 0>; /* GTX125 - CLK9 */
};
- pio2: ucc_pin@02 {
+ pio2: ucc_pin@2 {
pio-map = <
/* port pin dir open_drain assignment has_irq */
0 17 1 0 1 0 /* TxD0 */
@@ -393,12 +393,12 @@
reg = <0x2120 0x18>;
compatible = "fsl,ucc-mdio";
- phy0: ethernet-phy@00 {
+ phy0: ethernet-phy@0 {
interrupt-parent = <&ipic>;
interrupts = <17 0x8>;
reg = <0x0>;
};
- phy1: ethernet-phy@01 {
+ phy1: ethernet-phy@1 {
interrupt-parent = <&ipic>;
interrupts = <18 0x8>;
reg = <0x1>;
diff --git a/arch/powerpc/boot/dts/sbc8548-altflash.dts b/arch/powerpc/boot/dts/sbc8548-altflash.dts
index 0b38a0defd2c..8967a56adad4 100644
--- a/arch/powerpc/boot/dts/sbc8548-altflash.dts
+++ b/arch/powerpc/boot/dts/sbc8548-altflash.dts
@@ -40,12 +40,12 @@
compatible = "intel,JS28F128", "cfi-flash";
bank-width = <4>;
device-width = <1>;
- partition@0x0 {
+ partition@0 {
label = "space";
/* FC000000 -> FFEFFFFF */
reg = <0x00000000 0x03f00000>;
};
- partition@0x03f00000 {
+ partition@3f00000 {
label = "bootloader";
/* FFF00000 -> FFFFFFFF */
reg = <0x03f00000 0x00100000>;
@@ -95,12 +95,12 @@
reg = <0x6 0x0 0x800000>;
bank-width = <1>;
device-width = <1>;
- partition@0x0 {
+ partition@0 {
label = "space";
/* EF800000 -> EFF9FFFF */
reg = <0x00000000 0x007a0000>;
};
- partition@0x7a0000 {
+ partition@7a0000 {
label = "bootloader";
/* EFFA0000 -> EFFFFFFF */
reg = <0x007a0000 0x00060000>;
diff --git a/arch/powerpc/boot/dts/sbc8548.dts b/arch/powerpc/boot/dts/sbc8548.dts
index 1df2a0955668..9bdb828a504e 100644
--- a/arch/powerpc/boot/dts/sbc8548.dts
+++ b/arch/powerpc/boot/dts/sbc8548.dts
@@ -38,12 +38,12 @@
reg = <0x0 0x0 0x800000>;
bank-width = <1>;
device-width = <1>;
- partition@0x0 {
+ partition@0 {
label = "space";
/* FF800000 -> FFF9FFFF */
reg = <0x00000000 0x007a0000>;
};
- partition@0x7a0000 {
+ partition@7a0000 {
label = "bootloader";
/* FFFA0000 -> FFFFFFFF */
reg = <0x007a0000 0x00060000>;
@@ -92,12 +92,12 @@
compatible = "intel,JS28F128", "cfi-flash";
bank-width = <4>;
device-width = <1>;
- partition@0x0 {
+ partition@0 {
label = "space";
/* EC000000 -> EFEFFFFF */
reg = <0x00000000 0x03f00000>;
};
- partition@0x03f00000 {
+ partition@3f00000 {
label = "bootloader";
/* EFF00000 -> EFFFFFFF */
reg = <0x03f00000 0x00100000>;
diff --git a/arch/powerpc/boot/dts/wii.dts b/arch/powerpc/boot/dts/wii.dts
index 77528c9a8dbd..17a5babb098d 100644
--- a/arch/powerpc/boot/dts/wii.dts
+++ b/arch/powerpc/boot/dts/wii.dts
@@ -65,14 +65,14 @@
0x0d800000 0x0d800000 0x00800000>;
interrupt-parent = <&PIC0>;
- video@0c002000 {
+ video@c002000 {
compatible = "nintendo,hollywood-vi",
"nintendo,flipper-vi";
reg = <0x0c002000 0x100>;
interrupts = <8>;
};
- processor-interface@0c003000 {
+ processor-interface@c003000 {
compatible = "nintendo,hollywood-pi",
"nintendo,flipper-pi";
reg = <0x0c003000 0x100>;
@@ -84,7 +84,7 @@
};
};
- dsp@0c005000 {
+ dsp@c005000 {
#address-cells = <1>;
#size-cells = <1>;
compatible = "nintendo,hollywood-dsp",
@@ -93,14 +93,14 @@
interrupts = <6>;
};
- gamepad-controller@0d006400 {
+ gamepad-controller@d006400 {
compatible = "nintendo,hollywood-si",
"nintendo,flipper-si";
reg = <0x0d006400 0x100>;
interrupts = <3>;
};
- audio@0c006c00 {
+ audio@c006c00 {
compatible = "nintendo,hollywood-ai",
"nintendo,flipper-ai";
reg = <0x0d006c00 0x20>;
@@ -108,7 +108,7 @@
};
/* External Interface bus */
- exi@0d006800 {
+ exi@d006800 {
compatible = "nintendo,hollywood-exi",
"nintendo,flipper-exi";
reg = <0x0d006800 0x40>;
@@ -116,7 +116,7 @@
interrupts = <4>;
};
- usb@0d040000 {
+ usb@d040000 {
compatible = "nintendo,hollywood-usb-ehci",
"usb-ehci";
reg = <0x0d040000 0x100>;
@@ -124,7 +124,7 @@
interrupt-parent = <&PIC1>;
};
- usb@0d050000 {
+ usb@d050000 {
compatible = "nintendo,hollywood-usb-ohci",
"usb-ohci";
reg = <0x0d050000 0x100>;
@@ -132,7 +132,7 @@
interrupt-parent = <&PIC1>;
};
- usb@0d060000 {
+ usb@d060000 {
compatible = "nintendo,hollywood-usb-ohci",
"usb-ohci";
reg = <0x0d060000 0x100>;
@@ -140,7 +140,7 @@
interrupt-parent = <&PIC1>;
};
- sd@0d070000 {
+ sd@d070000 {
compatible = "nintendo,hollywood-sdhci",
"sdhci";
reg = <0x0d070000 0x200>;
@@ -148,7 +148,7 @@
interrupt-parent = <&PIC1>;
};
- sdio@0d080000 {
+ sdio@d080000 {
compatible = "nintendo,hollywood-sdhci",
"sdhci";
reg = <0x0d080000 0x200>;
@@ -156,14 +156,14 @@
interrupt-parent = <&PIC1>;
};
- ipc@0d000000 {
+ ipc@d000000 {
compatible = "nintendo,hollywood-ipc";
reg = <0x0d000000 0x10>;
interrupts = <30>;
interrupt-parent = <&PIC1>;
};
- PIC1: pic1@0d800030 {
+ PIC1: pic1@d800030 {
#interrupt-cells = <1>;
compatible = "nintendo,hollywood-pic";
reg = <0x0d800030 0x10>;
@@ -171,7 +171,7 @@
interrupts = <14>;
};
- GPIO: gpio@0d8000c0 {
+ GPIO: gpio@d8000c0 {
#gpio-cells = <2>;
compatible = "nintendo,hollywood-gpio";
reg = <0x0d8000c0 0x40>;
@@ -203,12 +203,12 @@
*/
};
- control@0d800100 {
+ control@d800100 {
compatible = "nintendo,hollywood-control";
reg = <0x0d800100 0x300>;
};
- disk@0d806000 {
+ disk@d806000 {
compatible = "nintendo,hollywood-di";
reg = <0x0d806000 0x40>;
interrupts = <2>;
diff --git a/arch/powerpc/boot/serial.c b/arch/powerpc/boot/serial.c
index 7b5c02b1afd0..88955095ec07 100644
--- a/arch/powerpc/boot/serial.c
+++ b/arch/powerpc/boot/serial.c
@@ -124,20 +124,26 @@ int serial_console_init(void)
else if (dt_is_compatible(devp, "marvell,mv64360-mpsc"))
rc = mpsc_console_init(devp, &serial_cd);
#endif
+#ifdef CONFIG_CPM
else if (dt_is_compatible(devp, "fsl,cpm1-scc-uart") ||
dt_is_compatible(devp, "fsl,cpm1-smc-uart") ||
dt_is_compatible(devp, "fsl,cpm2-scc-uart") ||
dt_is_compatible(devp, "fsl,cpm2-smc-uart"))
rc = cpm_console_init(devp, &serial_cd);
+#endif
#ifdef CONFIG_PPC_MPC52XX
else if (dt_is_compatible(devp, "fsl,mpc5200-psc-uart"))
rc = mpc5200_psc_console_init(devp, &serial_cd);
#endif
+#ifdef CONFIG_XILINX_VIRTEX
else if (dt_is_compatible(devp, "xlnx,opb-uartlite-1.00.b") ||
dt_is_compatible(devp, "xlnx,xps-uartlite-1.00.a"))
rc = uartlite_console_init(devp, &serial_cd);
+#endif
+#ifdef CONFIG_PPC64_BOOT_WRAPPER
else if (dt_is_compatible(devp, "ibm,opal-console-raw"))
rc = opal_console_init(devp, &serial_cd);
+#endif
/* Add other serial console driver calls here */
diff --git a/arch/powerpc/configs/mpc866_ads_defconfig b/arch/powerpc/configs/mpc866_ads_defconfig
index f1f176c29fa3..5320735395e7 100644
--- a/arch/powerpc/configs/mpc866_ads_defconfig
+++ b/arch/powerpc/configs/mpc866_ads_defconfig
@@ -13,7 +13,6 @@ CONFIG_EXPERT=y
CONFIG_PARTITION_ADVANCED=y
CONFIG_MPC86XADS=y
CONFIG_8xx_COPYBACK=y
-CONFIG_8xx_CPU6=y
CONFIG_GEN_RTC=y
CONFIG_HZ_1000=y
CONFIG_MATH_EMULATION=y
diff --git a/arch/powerpc/configs/powernv_defconfig b/arch/powerpc/configs/powernv_defconfig
index 73dab7a37386..9e92aa6a52ba 100644
--- a/arch/powerpc/configs/powernv_defconfig
+++ b/arch/powerpc/configs/powernv_defconfig
@@ -96,6 +96,7 @@ CONFIG_BLK_DEV_NBD=m
CONFIG_BLK_DEV_RAM=y
CONFIG_BLK_DEV_RAM_SIZE=65536
CONFIG_VIRTIO_BLK=m
+CONFIG_BLK_DEV_NVME=y
CONFIG_IDE=y
CONFIG_BLK_DEV_IDECD=y
CONFIG_BLK_DEV_GENERIC=y
@@ -112,6 +113,7 @@ CONFIG_SCSI_CXGB3_ISCSI=m
CONFIG_SCSI_CXGB4_ISCSI=m
CONFIG_SCSI_BNX2_ISCSI=m
CONFIG_BE2ISCSI=m
+CONFIG_SCSI_AACRAID=y
CONFIG_SCSI_MPT2SAS=m
CONFIG_SCSI_SYM53C8XX_2=m
CONFIG_SCSI_SYM53C8XX_DMA_ADDRESSING_MODE=0
diff --git a/arch/powerpc/include/asm/book3s/32/pgtable.h b/arch/powerpc/include/asm/book3s/32/pgtable.h
index 016579ef16d3..30a155c0a6b0 100644
--- a/arch/powerpc/include/asm/book3s/32/pgtable.h
+++ b/arch/powerpc/include/asm/book3s/32/pgtable.h
@@ -311,6 +311,29 @@ static inline int pte_present(pte_t pte)
return pte_val(pte) & _PAGE_PRESENT;
}
+/*
+ * We only find page table entry in the last level
+ * Hence no need for other accessors
+ */
+#define pte_access_permitted pte_access_permitted
+static inline bool pte_access_permitted(pte_t pte, bool write)
+{
+ unsigned long pteval = pte_val(pte);
+ /*
+ * A read-only access is controlled by _PAGE_USER bit.
+ * We have _PAGE_READ set for WRITE and EXECUTE
+ */
+ unsigned long need_pte_bits = _PAGE_PRESENT | _PAGE_USER;
+
+ if (write)
+ need_pte_bits |= _PAGE_WRITE;
+
+ if ((pteval & need_pte_bits) != need_pte_bits)
+ return false;
+
+ return true;
+}
+
/* Conversion functions: convert a page and protection to a page entry,
* and a page entry and page directory to the page they refer to.
*
diff --git a/arch/powerpc/include/asm/book3s/64/hash-4k.h b/arch/powerpc/include/asm/book3s/64/hash-4k.h
index 2d9df40446f6..949d691094a4 100644
--- a/arch/powerpc/include/asm/book3s/64/hash-4k.h
+++ b/arch/powerpc/include/asm/book3s/64/hash-4k.h
@@ -17,6 +17,12 @@
#define H_PUD_TABLE_SIZE (sizeof(pud_t) << H_PUD_INDEX_SIZE)
#define H_PGD_TABLE_SIZE (sizeof(pgd_t) << H_PGD_INDEX_SIZE)
+#define H_PAGE_F_GIX_SHIFT 53
+#define H_PAGE_F_SECOND _RPAGE_RPN44 /* HPTE is in 2ndary HPTEG */
+#define H_PAGE_F_GIX (_RPAGE_RPN43 | _RPAGE_RPN42 | _RPAGE_RPN41)
+#define H_PAGE_BUSY _RPAGE_RSV1 /* software: PTE & hash are busy */
+#define H_PAGE_HASHPTE _RPAGE_RSV2 /* software: PTE & hash are busy */
+
/* PTE flags to conserve for HPTE identification */
#define _PAGE_HPTEFLAGS (H_PAGE_BUSY | H_PAGE_HASHPTE | \
H_PAGE_F_SECOND | H_PAGE_F_GIX)
@@ -49,6 +55,20 @@ static inline int hash__hugepd_ok(hugepd_t hpd)
}
#endif
+/*
+ * 4K PTE format is different from 64K PTE format. Saving the hash_slot is just
+ * a matter of returning the PTE bits that need to be modified. On 64K PTE,
+ * things are a little more involved and hence needs many more parameters to
+ * accomplish the same. However we want to abstract this out from the caller by
+ * keeping the prototype consistent across the two formats.
+ */
+static inline unsigned long pte_set_hidx(pte_t *ptep, real_pte_t rpte,
+ unsigned int subpg_index, unsigned long hidx)
+{
+ return (hidx << H_PAGE_F_GIX_SHIFT) &
+ (H_PAGE_F_SECOND | H_PAGE_F_GIX);
+}
+
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
static inline char *get_hpte_slot_array(pmd_t *pmdp)
diff --git a/arch/powerpc/include/asm/book3s/64/hash-64k.h b/arch/powerpc/include/asm/book3s/64/hash-64k.h
index cb46d1034f33..338b7da468ce 100644
--- a/arch/powerpc/include/asm/book3s/64/hash-64k.h
+++ b/arch/powerpc/include/asm/book3s/64/hash-64k.h
@@ -13,21 +13,17 @@
*/
#define H_PAGE_COMBO _RPAGE_RPN0 /* this is a combo 4k page */
#define H_PAGE_4K_PFN _RPAGE_RPN1 /* PFN is for a single 4k page */
+#define H_PAGE_BUSY _RPAGE_RPN44 /* software: PTE & hash are busy */
+#define H_PAGE_HASHPTE _RPAGE_RPN43 /* PTE has associated HPTE */
+
/*
* We need to differentiate between explicit huge page and THP huge
* page, since THP huge page also need to track real subpage details
*/
#define H_PAGE_THP_HUGE H_PAGE_4K_PFN
-/*
- * Used to track subpage group valid if H_PAGE_COMBO is set
- * This overloads H_PAGE_F_GIX and H_PAGE_F_SECOND
- */
-#define H_PAGE_COMBO_VALID (H_PAGE_F_GIX | H_PAGE_F_SECOND)
-
/* PTE flags to conserve for HPTE identification */
-#define _PAGE_HPTEFLAGS (H_PAGE_BUSY | H_PAGE_F_SECOND | \
- H_PAGE_F_GIX | H_PAGE_HASHPTE | H_PAGE_COMBO)
+#define _PAGE_HPTEFLAGS (H_PAGE_BUSY | H_PAGE_HASHPTE | H_PAGE_COMBO)
/*
* we support 16 fragments per PTE page of 64K size.
*/
@@ -55,24 +51,57 @@ static inline real_pte_t __real_pte(pte_t pte, pte_t *ptep)
unsigned long *hidxp;
rpte.pte = pte;
- rpte.hidx = 0;
- if (pte_val(pte) & H_PAGE_COMBO) {
- /*
- * Make sure we order the hidx load against the H_PAGE_COMBO
- * check. The store side ordering is done in __hash_page_4K
- */
- smp_rmb();
- hidxp = (unsigned long *)(ptep + PTRS_PER_PTE);
- rpte.hidx = *hidxp;
- }
+
+ /*
+ * Ensure that we do not read the hidx before we read the PTE. Because
+ * the writer side is expected to finish writing the hidx first followed
+ * by the PTE, by using smp_wmb(). pte_set_hash_slot() ensures that.
+ */
+ smp_rmb();
+
+ hidxp = (unsigned long *)(ptep + PTRS_PER_PTE);
+ rpte.hidx = *hidxp;
return rpte;
}
+/*
+ * shift the hidx representation by one-modulo-0xf; i.e hidx 0 is respresented
+ * as 1, 1 as 2,... , and 0xf as 0. This convention lets us represent a
+ * invalid hidx 0xf with a 0x0 bit value. PTEs are anyway zero'd when
+ * allocated. We dont have to zero them gain; thus save on the initialization.
+ */
+#define HIDX_UNSHIFT_BY_ONE(x) ((x + 0xfUL) & 0xfUL) /* shift backward by one */
+#define HIDX_SHIFT_BY_ONE(x) ((x + 0x1UL) & 0xfUL) /* shift forward by one */
+#define HIDX_BITS(x, index) (x << (index << 2))
+#define BITS_TO_HIDX(x, index) ((x >> (index << 2)) & 0xfUL)
+#define INVALID_RPTE_HIDX 0x0UL
+
static inline unsigned long __rpte_to_hidx(real_pte_t rpte, unsigned long index)
{
- if ((pte_val(rpte.pte) & H_PAGE_COMBO))
- return (rpte.hidx >> (index<<2)) & 0xf;
- return (pte_val(rpte.pte) >> H_PAGE_F_GIX_SHIFT) & 0xf;
+ return HIDX_UNSHIFT_BY_ONE(BITS_TO_HIDX(rpte.hidx, index));
+}
+
+/*
+ * Commit the hidx and return PTE bits that needs to be modified. The caller is
+ * expected to modify the PTE bits accordingly and commit the PTE to memory.
+ */
+static inline unsigned long pte_set_hidx(pte_t *ptep, real_pte_t rpte,
+ unsigned int subpg_index, unsigned long hidx)
+{
+ unsigned long *hidxp = (unsigned long *)(ptep + PTRS_PER_PTE);
+
+ rpte.hidx &= ~HIDX_BITS(0xfUL, subpg_index);
+ *hidxp = rpte.hidx | HIDX_BITS(HIDX_SHIFT_BY_ONE(hidx), subpg_index);
+
+ /*
+ * Anyone reading PTE must ensure hidx bits are read after reading the
+ * PTE by using the read-side barrier smp_rmb(). __real_pte() can be
+ * used for that.
+ */
+ smp_wmb();
+
+ /* No PTE bits to be modified, return 0x0UL */
+ return 0x0UL;
}
#define __rpte_to_pte(r) ((r).pte)
diff --git a/arch/powerpc/include/asm/book3s/64/hash.h b/arch/powerpc/include/asm/book3s/64/hash.h
index ecb1239d74f4..0920eff731b3 100644
--- a/arch/powerpc/include/asm/book3s/64/hash.h
+++ b/arch/powerpc/include/asm/book3s/64/hash.h
@@ -9,11 +9,6 @@
*
*/
#define H_PTE_NONE_MASK _PAGE_HPTEFLAGS
-#define H_PAGE_F_GIX_SHIFT 56
-#define H_PAGE_BUSY _RPAGE_RSV1 /* software: PTE & hash are busy */
-#define H_PAGE_F_SECOND _RPAGE_RSV2 /* HPTE is in 2ndary HPTEG */
-#define H_PAGE_F_GIX (_RPAGE_RSV3 | _RPAGE_RSV4 | _RPAGE_RPN44)
-#define H_PAGE_HASHPTE _RPAGE_RPN43 /* PTE has associated HPTE */
#ifdef CONFIG_PPC_64K_PAGES
#include <asm/book3s/64/hash-64k.h>
@@ -167,6 +162,9 @@ static inline int hash__pte_none(pte_t pte)
return (pte_val(pte) & ~H_PTE_NONE_MASK) == 0;
}
+unsigned long pte_get_hash_gslot(unsigned long vpn, unsigned long shift,
+ int ssize, real_pte_t rpte, unsigned int subpg_index);
+
/* This low level function performs the actual PTE insertion
* Setting the PTE depends on the MMU type and other factors. It's
* an horrible mess that I'm not going to try to clean up now but
diff --git a/arch/powerpc/include/asm/book3s/64/mmu-hash.h b/arch/powerpc/include/asm/book3s/64/mmu-hash.h
index e91e115a816f..50ed64fba4ae 100644
--- a/arch/powerpc/include/asm/book3s/64/mmu-hash.h
+++ b/arch/powerpc/include/asm/book3s/64/mmu-hash.h
@@ -90,6 +90,8 @@
#define HPTE_R_PP0 ASM_CONST(0x8000000000000000)
#define HPTE_R_TS ASM_CONST(0x4000000000000000)
#define HPTE_R_KEY_HI ASM_CONST(0x3000000000000000)
+#define HPTE_R_KEY_BIT0 ASM_CONST(0x2000000000000000)
+#define HPTE_R_KEY_BIT1 ASM_CONST(0x1000000000000000)
#define HPTE_R_RPN_SHIFT 12
#define HPTE_R_RPN ASM_CONST(0x0ffffffffffff000)
#define HPTE_R_RPN_3_0 ASM_CONST(0x01fffffffffff000)
@@ -104,6 +106,9 @@
#define HPTE_R_C ASM_CONST(0x0000000000000080)
#define HPTE_R_R ASM_CONST(0x0000000000000100)
#define HPTE_R_KEY_LO ASM_CONST(0x0000000000000e00)
+#define HPTE_R_KEY_BIT2 ASM_CONST(0x0000000000000800)
+#define HPTE_R_KEY_BIT3 ASM_CONST(0x0000000000000400)
+#define HPTE_R_KEY_BIT4 ASM_CONST(0x0000000000000200)
#define HPTE_R_KEY (HPTE_R_KEY_LO | HPTE_R_KEY_HI)
#define HPTE_V_1TB_SEG ASM_CONST(0x4000000000000000)
diff --git a/arch/powerpc/include/asm/book3s/64/mmu.h b/arch/powerpc/include/asm/book3s/64/mmu.h
index c9448e19847a..0abeb0e2d616 100644
--- a/arch/powerpc/include/asm/book3s/64/mmu.h
+++ b/arch/powerpc/include/asm/book3s/64/mmu.h
@@ -108,6 +108,16 @@ typedef struct {
#ifdef CONFIG_SPAPR_TCE_IOMMU
struct list_head iommu_group_mem_list;
#endif
+
+#ifdef CONFIG_PPC_MEM_KEYS
+ /*
+ * Each bit represents one protection key.
+ * bit set -> key allocated
+ * bit unset -> key available for allocation
+ */
+ u32 pkey_allocation_map;
+ s16 execute_only_pkey; /* key holding execute-only protection */
+#endif
} mm_context_t;
/*
diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h b/arch/powerpc/include/asm/book3s/64/pgtable.h
index 6ca1208cedcb..51017726d495 100644
--- a/arch/powerpc/include/asm/book3s/64/pgtable.h
+++ b/arch/powerpc/include/asm/book3s/64/pgtable.h
@@ -14,8 +14,9 @@
*/
#define _PAGE_BIT_SWAP_TYPE 0
+#define _PAGE_NA 0
#define _PAGE_RO 0
-#define _PAGE_SHARED 0
+#define _PAGE_USER 0
#define _PAGE_EXEC 0x00001 /* execute permission */
#define _PAGE_WRITE 0x00002 /* write access allowed */
@@ -39,6 +40,7 @@
#define _RPAGE_RSV2 0x0800000000000000UL
#define _RPAGE_RSV3 0x0400000000000000UL
#define _RPAGE_RSV4 0x0200000000000000UL
+#define _RPAGE_RSV5 0x00040UL
#define _PAGE_PTE 0x4000000000000000UL /* distinguishes PTEs from pointers */
#define _PAGE_PRESENT 0x8000000000000000UL /* pte contains a translation */
@@ -58,6 +60,25 @@
/* Max physical address bit as per radix table */
#define _RPAGE_PA_MAX 57
+#ifdef CONFIG_PPC_MEM_KEYS
+#ifdef CONFIG_PPC_64K_PAGES
+#define H_PTE_PKEY_BIT0 _RPAGE_RSV1
+#define H_PTE_PKEY_BIT1 _RPAGE_RSV2
+#else /* CONFIG_PPC_64K_PAGES */
+#define H_PTE_PKEY_BIT0 0 /* _RPAGE_RSV1 is not available */
+#define H_PTE_PKEY_BIT1 0 /* _RPAGE_RSV2 is not available */
+#endif /* CONFIG_PPC_64K_PAGES */
+#define H_PTE_PKEY_BIT2 _RPAGE_RSV3
+#define H_PTE_PKEY_BIT3 _RPAGE_RSV4
+#define H_PTE_PKEY_BIT4 _RPAGE_RSV5
+#else /* CONFIG_PPC_MEM_KEYS */
+#define H_PTE_PKEY_BIT0 0
+#define H_PTE_PKEY_BIT1 0
+#define H_PTE_PKEY_BIT2 0
+#define H_PTE_PKEY_BIT3 0
+#define H_PTE_PKEY_BIT4 0
+#endif /* CONFIG_PPC_MEM_KEYS */
+
/*
* Max physical address bit we will use for now.
*
@@ -121,13 +142,16 @@
#define _PAGE_CHG_MASK (PTE_RPN_MASK | _PAGE_HPTEFLAGS | _PAGE_DIRTY | \
_PAGE_ACCESSED | _PAGE_SPECIAL | _PAGE_PTE | \
_PAGE_SOFT_DIRTY)
+
+#define H_PTE_PKEY (H_PTE_PKEY_BIT0 | H_PTE_PKEY_BIT1 | H_PTE_PKEY_BIT2 | \
+ H_PTE_PKEY_BIT3 | H_PTE_PKEY_BIT4)
/*
* Mask of bits returned by pte_pgprot()
*/
#define PAGE_PROT_BITS (_PAGE_SAO | _PAGE_NON_IDEMPOTENT | _PAGE_TOLERANT | \
H_PAGE_4K_PFN | _PAGE_PRIVILEGED | _PAGE_ACCESSED | \
_PAGE_READ | _PAGE_WRITE | _PAGE_DIRTY | _PAGE_EXEC | \
- _PAGE_SOFT_DIRTY)
+ _PAGE_SOFT_DIRTY | H_PTE_PKEY)
/*
* We define 2 sets of base prot bits, one for basic pages (ie,
* cacheable kernel and user pages) and one for non cacheable
@@ -546,6 +570,40 @@ static inline int pte_present(pte_t pte)
{
return !!(pte_raw(pte) & cpu_to_be64(_PAGE_PRESENT));
}
+
+#ifdef CONFIG_PPC_MEM_KEYS
+extern bool arch_pte_access_permitted(u64 pte, bool write, bool execute);
+#else
+static inline bool arch_pte_access_permitted(u64 pte, bool write, bool execute)
+{
+ return true;
+}
+#endif /* CONFIG_PPC_MEM_KEYS */
+
+#define pte_access_permitted pte_access_permitted
+static inline bool pte_access_permitted(pte_t pte, bool write)
+{
+ unsigned long pteval = pte_val(pte);
+ /* Also check for pte_user */
+ unsigned long clear_pte_bits = _PAGE_PRIVILEGED;
+ /*
+ * _PAGE_READ is needed for any access and will be
+ * cleared for PROT_NONE
+ */
+ unsigned long need_pte_bits = _PAGE_PRESENT | _PAGE_READ;
+
+ if (write)
+ need_pte_bits |= _PAGE_WRITE;
+
+ if ((pteval & need_pte_bits) != need_pte_bits)
+ return false;
+
+ if ((pteval & clear_pte_bits) == clear_pte_bits)
+ return false;
+
+ return arch_pte_access_permitted(pte_val(pte), write, 0);
+}
+
/*
* Conversion functions: convert a page and protection to a page entry,
* and a page entry and page directory to the page they refer to.
@@ -850,6 +908,11 @@ static inline int pud_bad(pud_t pud)
return hash__pud_bad(pud);
}
+#define pud_access_permitted pud_access_permitted
+static inline bool pud_access_permitted(pud_t pud, bool write)
+{
+ return pte_access_permitted(pud_pte(pud), write);
+}
#define pgd_write(pgd) pte_write(pgd_pte(pgd))
static inline void pgd_set(pgd_t *pgdp, unsigned long val)
@@ -889,6 +952,12 @@ static inline int pgd_bad(pgd_t pgd)
return hash__pgd_bad(pgd);
}
+#define pgd_access_permitted pgd_access_permitted
+static inline bool pgd_access_permitted(pgd_t pgd, bool write)
+{
+ return pte_access_permitted(pgd_pte(pgd), write);
+}
+
extern struct page *pgd_page(pgd_t pgd);
/* Pointers in the page table tree are physical addresses */
@@ -1009,6 +1078,12 @@ static inline int pmd_protnone(pmd_t pmd)
#define __pmd_write(pmd) __pte_write(pmd_pte(pmd))
#define pmd_savedwrite(pmd) pte_savedwrite(pmd_pte(pmd))
+#define pmd_access_permitted pmd_access_permitted
+static inline bool pmd_access_permitted(pmd_t pmd, bool write)
+{
+ return pte_access_permitted(pmd_pte(pmd), write);
+}
+
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
extern pmd_t pfn_pmd(unsigned long pfn, pgprot_t pgprot);
extern pmd_t mk_pmd(struct page *page, pgprot_t pgprot);
diff --git a/arch/powerpc/include/asm/book3s/64/tlbflush-hash.h b/arch/powerpc/include/asm/book3s/64/tlbflush-hash.h
index 849ecaae9e79..64d02a704bcb 100644
--- a/arch/powerpc/include/asm/book3s/64/tlbflush-hash.h
+++ b/arch/powerpc/include/asm/book3s/64/tlbflush-hash.h
@@ -51,6 +51,7 @@ static inline void arch_leave_lazy_mmu_mode(void)
#define arch_flush_lazy_mmu_mode() do {} while (0)
+extern void hash__tlbiel_all(unsigned int action);
extern void flush_hash_page(unsigned long vpn, real_pte_t pte, int psize,
int ssize, unsigned long flags);
diff --git a/arch/powerpc/include/asm/book3s/64/tlbflush-radix.h b/arch/powerpc/include/asm/book3s/64/tlbflush-radix.h
index 6a9e68003387..8eea90f80e45 100644
--- a/arch/powerpc/include/asm/book3s/64/tlbflush-radix.h
+++ b/arch/powerpc/include/asm/book3s/64/tlbflush-radix.h
@@ -11,6 +11,12 @@ static inline int mmu_get_ap(int psize)
return mmu_psize_defs[psize].ap;
}
+#ifdef CONFIG_PPC_RADIX_MMU
+extern void radix__tlbiel_all(unsigned int action);
+#else
+static inline void radix__tlbiel_all(unsigned int action) { WARN_ON(1); };
+#endif
+
extern void radix__flush_hugetlb_tlb_range(struct vm_area_struct *vma,
unsigned long start, unsigned long end);
extern void radix__flush_tlb_range_psize(struct mm_struct *mm, unsigned long start,
@@ -47,4 +53,5 @@ extern void radix__flush_tlb_lpid(unsigned long lpid);
extern void radix__flush_tlb_all(void);
extern void radix__flush_tlb_pte_p9_dd1(unsigned long old_pte, struct mm_struct *mm,
unsigned long address);
+
#endif
diff --git a/arch/powerpc/include/asm/book3s/64/tlbflush.h b/arch/powerpc/include/asm/book3s/64/tlbflush.h
index 58b576f654b3..0cac17253513 100644
--- a/arch/powerpc/include/asm/book3s/64/tlbflush.h
+++ b/arch/powerpc/include/asm/book3s/64/tlbflush.h
@@ -8,6 +8,44 @@
#include <asm/book3s/64/tlbflush-hash.h>
#include <asm/book3s/64/tlbflush-radix.h>
+/* TLB flush actions. Used as argument to tlbiel_all() */
+enum {
+ TLB_INVAL_SCOPE_GLOBAL = 0, /* invalidate all TLBs */
+ TLB_INVAL_SCOPE_LPID = 1, /* invalidate TLBs for current LPID */
+};
+
+#ifdef CONFIG_PPC_NATIVE
+static inline void tlbiel_all(void)
+{
+ /*
+ * This is used for host machine check and bootup.
+ *
+ * This uses early_radix_enabled and implementations use
+ * early_cpu_has_feature etc because that works early in boot
+ * and this is the machine check path which is not performance
+ * critical.
+ */
+ if (early_radix_enabled())
+ radix__tlbiel_all(TLB_INVAL_SCOPE_GLOBAL);
+ else
+ hash__tlbiel_all(TLB_INVAL_SCOPE_GLOBAL);
+}
+#else
+static inline void tlbiel_all(void) { BUG(); };
+#endif
+
+static inline void tlbiel_all_lpid(bool radix)
+{
+ /*
+ * This is used for guest machine check.
+ */
+ if (radix)
+ radix__tlbiel_all(TLB_INVAL_SCOPE_LPID);
+ else
+ hash__tlbiel_all(TLB_INVAL_SCOPE_LPID);
+}
+
+
#define __HAVE_ARCH_FLUSH_PMD_TLB_RANGE
static inline void flush_pmd_tlb_range(struct vm_area_struct *vma,
unsigned long start, unsigned long end)
diff --git a/arch/powerpc/include/asm/bug.h b/arch/powerpc/include/asm/bug.h
index 3c04249bcf39..fd06dbe7d7d3 100644
--- a/arch/powerpc/include/asm/bug.h
+++ b/arch/powerpc/include/asm/bug.h
@@ -133,9 +133,11 @@ struct pt_regs;
extern int do_page_fault(struct pt_regs *, unsigned long, unsigned long);
extern void bad_page_fault(struct pt_regs *, unsigned long, int);
extern void _exception(int, struct pt_regs *, int, unsigned long);
+extern void _exception_pkey(int, struct pt_regs *, int, unsigned long, int);
extern void die(const char *, struct pt_regs *, long);
extern bool die_will_crash(void);
-
+extern void panic_flush_kmsg_start(void);
+extern void panic_flush_kmsg_end(void);
#endif /* !__ASSEMBLY__ */
#endif /* __KERNEL__ */
diff --git a/arch/powerpc/include/asm/code-patching.h b/arch/powerpc/include/asm/code-patching.h
index abef812de7f8..812535f40124 100644
--- a/arch/powerpc/include/asm/code-patching.h
+++ b/arch/powerpc/include/asm/code-patching.h
@@ -31,8 +31,10 @@ unsigned int create_cond_branch(const unsigned int *addr,
unsigned long target, int flags);
int patch_branch(unsigned int *addr, unsigned long target, int flags);
int patch_instruction(unsigned int *addr, unsigned int instr);
+int raw_patch_instruction(unsigned int *addr, unsigned int instr);
int instr_is_relative_branch(unsigned int instr);
+int instr_is_relative_link_branch(unsigned int instr);
int instr_is_branch_to_addr(const unsigned int *instr, unsigned long addr);
unsigned long branch_target(const unsigned int *instr);
unsigned int translate_branch(const unsigned int *dest,
diff --git a/arch/powerpc/include/asm/cpm.h b/arch/powerpc/include/asm/cpm.h
index b925df1b87d0..4c24ea8209bb 100644
--- a/arch/powerpc/include/asm/cpm.h
+++ b/arch/powerpc/include/asm/cpm.h
@@ -166,6 +166,6 @@ static inline int cpm_command(u32 command, u8 opcode)
}
#endif /* CONFIG_CPM */
-int cpm2_gpiochip_add32(struct device_node *np);
+int cpm2_gpiochip_add32(struct device *dev);
#endif
diff --git a/arch/powerpc/include/asm/cpm1.h b/arch/powerpc/include/asm/cpm1.h
index 3db821876d48..a116fe931789 100644
--- a/arch/powerpc/include/asm/cpm1.h
+++ b/arch/powerpc/include/asm/cpm1.h
@@ -605,5 +605,7 @@ enum cpm_clk {
};
int cpm1_clk_setup(enum cpm_clk_target target, int clock, int mode);
+int cpm1_gpiochip_add16(struct device *dev);
+int cpm1_gpiochip_add32(struct device *dev);
#endif /* __CPM1__ */
diff --git a/arch/powerpc/include/asm/cputable.h b/arch/powerpc/include/asm/cputable.h
index 0546663a98db..a2c5c95882cf 100644
--- a/arch/powerpc/include/asm/cputable.h
+++ b/arch/powerpc/include/asm/cputable.h
@@ -107,12 +107,6 @@ struct cpu_spec {
* called in real mode to handle SLB and TLB errors.
*/
long (*machine_check_early)(struct pt_regs *regs);
-
- /*
- * Processor specific routine to flush tlbs.
- */
- void (*flush_tlb)(unsigned int action);
-
};
extern struct cpu_spec *cur_cpu_spec;
@@ -133,12 +127,6 @@ extern void cpu_feature_keys_init(void);
static inline void cpu_feature_keys_init(void) { }
#endif
-/* TLB flush actions. Used as argument to cpu_spec.flush_tlb() hook */
-enum {
- TLB_INVAL_SCOPE_GLOBAL = 0, /* invalidate all TLBs */
- TLB_INVAL_SCOPE_LPID = 1, /* invalidate TLBs for current LPID */
-};
-
#endif /* __ASSEMBLY__ */
/* CPU kernel features */
@@ -207,7 +195,7 @@ enum {
#define CPU_FTR_STCX_CHECKS_ADDRESS LONG_ASM_CONST(0x0004000000000000)
#define CPU_FTR_POPCNTB LONG_ASM_CONST(0x0008000000000000)
#define CPU_FTR_POPCNTD LONG_ASM_CONST(0x0010000000000000)
-/* Free LONG_ASM_CONST(0x0020000000000000) */
+#define CPU_FTR_PKEY LONG_ASM_CONST(0x0020000000000000)
#define CPU_FTR_VMX_COPY LONG_ASM_CONST(0x0040000000000000)
#define CPU_FTR_TM LONG_ASM_CONST(0x0080000000000000)
#define CPU_FTR_CFAR LONG_ASM_CONST(0x0100000000000000)
@@ -454,7 +442,7 @@ enum {
CPU_FTR_DSCR | CPU_FTR_SAO | CPU_FTR_ASYM_SMT | \
CPU_FTR_STCX_CHECKS_ADDRESS | CPU_FTR_POPCNTB | CPU_FTR_POPCNTD | \
CPU_FTR_CFAR | CPU_FTR_HVMODE | \
- CPU_FTR_VMX_COPY | CPU_FTR_HAS_PPR | CPU_FTR_DABRX)
+ CPU_FTR_VMX_COPY | CPU_FTR_HAS_PPR | CPU_FTR_DABRX | CPU_FTR_PKEY)
#define CPU_FTRS_POWER8 (CPU_FTR_USE_TB | CPU_FTR_LWSYNC | \
CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_CTRL | CPU_FTR_ARCH_206 |\
CPU_FTR_MMCRA | CPU_FTR_SMT | \
@@ -464,7 +452,7 @@ enum {
CPU_FTR_STCX_CHECKS_ADDRESS | CPU_FTR_POPCNTB | CPU_FTR_POPCNTD | \
CPU_FTR_CFAR | CPU_FTR_HVMODE | CPU_FTR_VMX_COPY | \
CPU_FTR_DBELL | CPU_FTR_HAS_PPR | CPU_FTR_DAWR | \
- CPU_FTR_ARCH_207S | CPU_FTR_TM_COMP)
+ CPU_FTR_ARCH_207S | CPU_FTR_TM_COMP | CPU_FTR_PKEY)
#define CPU_FTRS_POWER8E (CPU_FTRS_POWER8 | CPU_FTR_PMAO_BUG)
#define CPU_FTRS_POWER8_DD1 (CPU_FTRS_POWER8 & ~CPU_FTR_DBELL)
#define CPU_FTRS_POWER9 (CPU_FTR_USE_TB | CPU_FTR_LWSYNC | \
@@ -476,7 +464,8 @@ enum {
CPU_FTR_STCX_CHECKS_ADDRESS | CPU_FTR_POPCNTB | CPU_FTR_POPCNTD | \
CPU_FTR_CFAR | CPU_FTR_HVMODE | CPU_FTR_VMX_COPY | \
CPU_FTR_DBELL | CPU_FTR_HAS_PPR | CPU_FTR_DAWR | \
- CPU_FTR_ARCH_207S | CPU_FTR_TM_COMP | CPU_FTR_ARCH_300)
+ CPU_FTR_ARCH_207S | CPU_FTR_TM_COMP | CPU_FTR_ARCH_300 | \
+ CPU_FTR_PKEY)
#define CPU_FTRS_POWER9_DD1 ((CPU_FTRS_POWER9 | CPU_FTR_POWER9_DD1) & \
(~CPU_FTR_SAO))
#define CPU_FTRS_POWER9_DD2_0 CPU_FTRS_POWER9
diff --git a/arch/powerpc/include/asm/drmem.h b/arch/powerpc/include/asm/drmem.h
new file mode 100644
index 000000000000..ce242b9ea8c6
--- /dev/null
+++ b/arch/powerpc/include/asm/drmem.h
@@ -0,0 +1,102 @@
+/*
+ * drmem.h: Power specific logical memory block representation
+ *
+ * Copyright 2017 IBM Corporation
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#ifndef _ASM_POWERPC_LMB_H
+#define _ASM_POWERPC_LMB_H
+
+struct drmem_lmb {
+ u64 base_addr;
+ u32 drc_index;
+ u32 aa_index;
+ u32 flags;
+};
+
+struct drmem_lmb_info {
+ struct drmem_lmb *lmbs;
+ int n_lmbs;
+ u32 lmb_size;
+};
+
+extern struct drmem_lmb_info *drmem_info;
+
+#define for_each_drmem_lmb_in_range(lmb, start, end) \
+ for ((lmb) = (start); (lmb) <= (end); (lmb)++)
+
+#define for_each_drmem_lmb(lmb) \
+ for_each_drmem_lmb_in_range((lmb), \
+ &drmem_info->lmbs[0], \
+ &drmem_info->lmbs[drmem_info->n_lmbs - 1])
+
+/*
+ * The of_drconf_cell_v1 struct defines the layout of the LMB data
+ * specified in the ibm,dynamic-memory device tree property.
+ * The property itself is a 32-bit value specifying the number of
+ * LMBs followed by an array of of_drconf_cell_v1 entries, one
+ * per LMB.
+ */
+struct of_drconf_cell_v1 {
+ __be64 base_addr;
+ __be32 drc_index;
+ __be32 reserved;
+ __be32 aa_index;
+ __be32 flags;
+};
+
+/*
+ * Version 2 of the ibm,dynamic-memory property is defined as a
+ * 32-bit value specifying the number of LMB sets followed by an
+ * array of of_drconf_cell_v2 entries, one per LMB set.
+ */
+struct of_drconf_cell_v2 {
+ u32 seq_lmbs;
+ u64 base_addr;
+ u32 drc_index;
+ u32 aa_index;
+ u32 flags;
+} __packed;
+
+#define DRCONF_MEM_ASSIGNED 0x00000008
+#define DRCONF_MEM_AI_INVALID 0x00000040
+#define DRCONF_MEM_RESERVED 0x00000080
+
+static inline u32 drmem_lmb_size(void)
+{
+ return drmem_info->lmb_size;
+}
+
+#define DRMEM_LMB_RESERVED 0x80000000
+
+static inline void drmem_mark_lmb_reserved(struct drmem_lmb *lmb)
+{
+ lmb->flags |= DRMEM_LMB_RESERVED;
+}
+
+static inline void drmem_remove_lmb_reservation(struct drmem_lmb *lmb)
+{
+ lmb->flags &= ~DRMEM_LMB_RESERVED;
+}
+
+static inline bool drmem_lmb_reserved(struct drmem_lmb *lmb)
+{
+ return lmb->flags & DRMEM_LMB_RESERVED;
+}
+
+u64 drmem_lmb_memory_max(void);
+void __init walk_drmem_lmbs(struct device_node *dn,
+ void (*func)(struct drmem_lmb *, const __be32 **));
+int drmem_update_dt(void);
+
+#ifdef CONFIG_PPC_PSERIES
+void __init walk_drmem_lmbs_early(unsigned long node,
+ void (*func)(struct drmem_lmb *, const __be32 **));
+#endif
+
+#endif /* _ASM_POWERPC_LMB_H */
diff --git a/arch/powerpc/include/asm/eeh.h b/arch/powerpc/include/asm/eeh.h
index 5161c37dd039..fd37cc101f4f 100644
--- a/arch/powerpc/include/asm/eeh.h
+++ b/arch/powerpc/include/asm/eeh.h
@@ -214,6 +214,7 @@ struct eeh_ops {
int (*write_config)(struct pci_dn *pdn, int where, int size, u32 val);
int (*next_error)(struct eeh_pe **pe);
int (*restore_config)(struct pci_dn *pdn);
+ int (*notify_resume)(struct pci_dn *pdn);
};
extern int eeh_subsystem_flags;
@@ -297,6 +298,7 @@ int eeh_pe_reset(struct eeh_pe *pe, int option);
int eeh_pe_configure(struct eeh_pe *pe);
int eeh_pe_inject_err(struct eeh_pe *pe, int type, int func,
unsigned long addr, unsigned long mask);
+int eeh_restore_vf_config(struct pci_dn *pdn);
/**
* EEH_POSSIBLE_ERROR() -- test for possible MMIO failure.
diff --git a/arch/powerpc/include/asm/exception-64s.h b/arch/powerpc/include/asm/exception-64s.h
index 7197b179c1b1..176dfb73d42c 100644
--- a/arch/powerpc/include/asm/exception-64s.h
+++ b/arch/powerpc/include/asm/exception-64s.h
@@ -251,18 +251,40 @@ END_FTR_SECTION_NESTED(ftr,ftr,943)
std r10,area+EX_R10(r13); /* save r10 - r12 */ \
OPT_GET_SPR(r10, SPRN_CFAR, CPU_FTR_CFAR)
-#define __EXCEPTION_PROLOG_1(area, extra, vec) \
+#define __EXCEPTION_PROLOG_1_PRE(area) \
OPT_SAVE_REG_TO_PACA(area+EX_PPR, r9, CPU_FTR_HAS_PPR); \
OPT_SAVE_REG_TO_PACA(area+EX_CFAR, r10, CPU_FTR_CFAR); \
SAVE_CTR(r10, area); \
- mfcr r9; \
- extra(vec); \
+ mfcr r9;
+
+#define __EXCEPTION_PROLOG_1_POST(area) \
std r11,area+EX_R11(r13); \
std r12,area+EX_R12(r13); \
GET_SCRATCH0(r10); \
std r10,area+EX_R13(r13)
+
+/*
+ * This version of the EXCEPTION_PROLOG_1 will carry
+ * addition parameter called "bitmask" to support
+ * checking of the interrupt maskable level in the SOFTEN_TEST.
+ * Intended to be used in MASKABLE_EXCPETION_* macros.
+ */
+#define MASKABLE_EXCEPTION_PROLOG_1(area, extra, vec, bitmask) \
+ __EXCEPTION_PROLOG_1_PRE(area); \
+ extra(vec, bitmask); \
+ __EXCEPTION_PROLOG_1_POST(area);
+
+/*
+ * This version of the EXCEPTION_PROLOG_1 is intended
+ * to be used in STD_EXCEPTION* macros
+ */
+#define _EXCEPTION_PROLOG_1(area, extra, vec) \
+ __EXCEPTION_PROLOG_1_PRE(area); \
+ extra(vec); \
+ __EXCEPTION_PROLOG_1_POST(area);
+
#define EXCEPTION_PROLOG_1(area, extra, vec) \
- __EXCEPTION_PROLOG_1(area, extra, vec)
+ _EXCEPTION_PROLOG_1(area, extra, vec)
#define __EXCEPTION_PROLOG_PSERIES_1(label, h) \
ld r10,PACAKMSR(r13); /* get MSR value for kernel */ \
@@ -485,7 +507,7 @@ END_FTR_SECTION_NESTED(ftr,ftr,943)
mflr r9; /* Get LR, later save to stack */ \
ld r2,PACATOC(r13); /* get kernel TOC into r2 */ \
std r9,_LINK(r1); \
- lbz r10,PACASOFTIRQEN(r13); \
+ lbz r10,PACAIRQSOFTMASK(r13); \
mfspr r11,SPRN_XER; /* save XER in stackframe */ \
std r10,SOFTE(r1); \
std r11,_XER(r1); \
@@ -549,22 +571,23 @@ END_FTR_SECTION_NESTED(ftr,ftr,943)
#define SOFTEN_VALUE_0xe80 PACA_IRQ_DBELL
#define SOFTEN_VALUE_0xe60 PACA_IRQ_HMI
#define SOFTEN_VALUE_0xea0 PACA_IRQ_EE
+#define SOFTEN_VALUE_0xf00 PACA_IRQ_PMI
-#define __SOFTEN_TEST(h, vec) \
- lbz r10,PACASOFTIRQEN(r13); \
- cmpwi r10,0; \
+#define __SOFTEN_TEST(h, vec, bitmask) \
+ lbz r10,PACAIRQSOFTMASK(r13); \
+ andi. r10,r10,bitmask; \
li r10,SOFTEN_VALUE_##vec; \
- beq masked_##h##interrupt
+ bne masked_##h##interrupt
-#define _SOFTEN_TEST(h, vec) __SOFTEN_TEST(h, vec)
+#define _SOFTEN_TEST(h, vec, bitmask) __SOFTEN_TEST(h, vec, bitmask)
-#define SOFTEN_TEST_PR(vec) \
+#define SOFTEN_TEST_PR(vec, bitmask) \
KVMTEST(EXC_STD, vec); \
- _SOFTEN_TEST(EXC_STD, vec)
+ _SOFTEN_TEST(EXC_STD, vec, bitmask)
-#define SOFTEN_TEST_HV(vec) \
+#define SOFTEN_TEST_HV(vec, bitmask) \
KVMTEST(EXC_HV, vec); \
- _SOFTEN_TEST(EXC_HV, vec)
+ _SOFTEN_TEST(EXC_HV, vec, bitmask)
#define KVMTEST_PR(vec) \
KVMTEST(EXC_STD, vec)
@@ -572,53 +595,57 @@ END_FTR_SECTION_NESTED(ftr,ftr,943)
#define KVMTEST_HV(vec) \
KVMTEST(EXC_HV, vec)
-#define SOFTEN_NOTEST_PR(vec) _SOFTEN_TEST(EXC_STD, vec)
-#define SOFTEN_NOTEST_HV(vec) _SOFTEN_TEST(EXC_HV, vec)
+#define SOFTEN_NOTEST_PR(vec, bitmask) _SOFTEN_TEST(EXC_STD, vec, bitmask)
+#define SOFTEN_NOTEST_HV(vec, bitmask) _SOFTEN_TEST(EXC_HV, vec, bitmask)
-#define __MASKABLE_EXCEPTION_PSERIES(vec, label, h, extra) \
+#define __MASKABLE_EXCEPTION_PSERIES(vec, label, h, extra, bitmask) \
SET_SCRATCH0(r13); /* save r13 */ \
EXCEPTION_PROLOG_0(PACA_EXGEN); \
- __EXCEPTION_PROLOG_1(PACA_EXGEN, extra, vec); \
+ MASKABLE_EXCEPTION_PROLOG_1(PACA_EXGEN, extra, vec, bitmask); \
EXCEPTION_PROLOG_PSERIES_1(label, h);
-#define _MASKABLE_EXCEPTION_PSERIES(vec, label, h, extra) \
- __MASKABLE_EXCEPTION_PSERIES(vec, label, h, extra)
+#define _MASKABLE_EXCEPTION_PSERIES(vec, label, h, extra, bitmask) \
+ __MASKABLE_EXCEPTION_PSERIES(vec, label, h, extra, bitmask)
-#define MASKABLE_EXCEPTION_PSERIES(loc, vec, label) \
+#define MASKABLE_EXCEPTION_PSERIES(loc, vec, label, bitmask) \
_MASKABLE_EXCEPTION_PSERIES(vec, label, \
- EXC_STD, SOFTEN_TEST_PR)
+ EXC_STD, SOFTEN_TEST_PR, bitmask)
-#define MASKABLE_EXCEPTION_PSERIES_OOL(vec, label) \
- EXCEPTION_PROLOG_1(PACA_EXGEN, SOFTEN_TEST_PR, vec); \
+#define MASKABLE_EXCEPTION_PSERIES_OOL(vec, label, bitmask) \
+ MASKABLE_EXCEPTION_PROLOG_1(PACA_EXGEN, SOFTEN_TEST_PR, vec, bitmask);\
EXCEPTION_PROLOG_PSERIES_1(label, EXC_STD)
-#define MASKABLE_EXCEPTION_HV(loc, vec, label) \
+#define MASKABLE_EXCEPTION_HV(loc, vec, label, bitmask) \
_MASKABLE_EXCEPTION_PSERIES(vec, label, \
- EXC_HV, SOFTEN_TEST_HV)
+ EXC_HV, SOFTEN_TEST_HV, bitmask)
-#define MASKABLE_EXCEPTION_HV_OOL(vec, label) \
- EXCEPTION_PROLOG_1(PACA_EXGEN, SOFTEN_TEST_HV, vec); \
+#define MASKABLE_EXCEPTION_HV_OOL(vec, label, bitmask) \
+ MASKABLE_EXCEPTION_PROLOG_1(PACA_EXGEN, SOFTEN_TEST_HV, vec, bitmask);\
EXCEPTION_PROLOG_PSERIES_1(label, EXC_HV)
-#define __MASKABLE_RELON_EXCEPTION_PSERIES(vec, label, h, extra) \
+#define __MASKABLE_RELON_EXCEPTION_PSERIES(vec, label, h, extra, bitmask) \
SET_SCRATCH0(r13); /* save r13 */ \
EXCEPTION_PROLOG_0(PACA_EXGEN); \
- __EXCEPTION_PROLOG_1(PACA_EXGEN, extra, vec); \
+ MASKABLE_EXCEPTION_PROLOG_1(PACA_EXGEN, extra, vec, bitmask); \
EXCEPTION_RELON_PROLOG_PSERIES_1(label, h)
-#define _MASKABLE_RELON_EXCEPTION_PSERIES(vec, label, h, extra) \
- __MASKABLE_RELON_EXCEPTION_PSERIES(vec, label, h, extra)
+#define _MASKABLE_RELON_EXCEPTION_PSERIES(vec, label, h, extra, bitmask)\
+ __MASKABLE_RELON_EXCEPTION_PSERIES(vec, label, h, extra, bitmask)
-#define MASKABLE_RELON_EXCEPTION_PSERIES(loc, vec, label) \
+#define MASKABLE_RELON_EXCEPTION_PSERIES(loc, vec, label, bitmask) \
_MASKABLE_RELON_EXCEPTION_PSERIES(vec, label, \
- EXC_STD, SOFTEN_NOTEST_PR)
+ EXC_STD, SOFTEN_NOTEST_PR, bitmask)
+
+#define MASKABLE_RELON_EXCEPTION_PSERIES_OOL(vec, label, bitmask) \
+ MASKABLE_EXCEPTION_PROLOG_1(PACA_EXGEN, SOFTEN_NOTEST_PR, vec, bitmask);\
+ EXCEPTION_PROLOG_PSERIES_1(label, EXC_STD);
-#define MASKABLE_RELON_EXCEPTION_HV(loc, vec, label) \
+#define MASKABLE_RELON_EXCEPTION_HV(loc, vec, label, bitmask) \
_MASKABLE_RELON_EXCEPTION_PSERIES(vec, label, \
- EXC_HV, SOFTEN_TEST_HV)
+ EXC_HV, SOFTEN_TEST_HV, bitmask)
-#define MASKABLE_RELON_EXCEPTION_HV_OOL(vec, label) \
- EXCEPTION_PROLOG_1(PACA_EXGEN, SOFTEN_TEST_HV, vec); \
+#define MASKABLE_RELON_EXCEPTION_HV_OOL(vec, label, bitmask) \
+ MASKABLE_EXCEPTION_PROLOG_1(PACA_EXGEN, SOFTEN_NOTEST_HV, vec, bitmask);\
EXCEPTION_RELON_PROLOG_PSERIES_1(label, EXC_HV)
/*
diff --git a/arch/powerpc/include/asm/firmware.h b/arch/powerpc/include/asm/firmware.h
index 8645897472b1..511acfd7ab0d 100644
--- a/arch/powerpc/include/asm/firmware.h
+++ b/arch/powerpc/include/asm/firmware.h
@@ -51,6 +51,8 @@
#define FW_FEATURE_BEST_ENERGY ASM_CONST(0x0000000080000000)
#define FW_FEATURE_TYPE1_AFFINITY ASM_CONST(0x0000000100000000)
#define FW_FEATURE_PRRN ASM_CONST(0x0000000200000000)
+#define FW_FEATURE_DRMEM_V2 ASM_CONST(0x0000000400000000)
+#define FW_FEATURE_DRC_INFO ASM_CONST(0x0000000400000000)
#ifndef __ASSEMBLY__
@@ -67,7 +69,8 @@ enum {
FW_FEATURE_CMO | FW_FEATURE_VPHN | FW_FEATURE_XCMO |
FW_FEATURE_SET_MODE | FW_FEATURE_BEST_ENERGY |
FW_FEATURE_TYPE1_AFFINITY | FW_FEATURE_PRRN |
- FW_FEATURE_HPT_RESIZE,
+ FW_FEATURE_HPT_RESIZE | FW_FEATURE_DRMEM_V2 |
+ FW_FEATURE_DRC_INFO,
FW_FEATURE_PSERIES_ALWAYS = 0,
FW_FEATURE_POWERNV_POSSIBLE = FW_FEATURE_OPAL,
FW_FEATURE_POWERNV_ALWAYS = 0,
diff --git a/arch/powerpc/include/asm/hardirq.h b/arch/powerpc/include/asm/hardirq.h
index 456f9e7b8d83..5986d473722b 100644
--- a/arch/powerpc/include/asm/hardirq.h
+++ b/arch/powerpc/include/asm/hardirq.h
@@ -29,6 +29,7 @@ DECLARE_PER_CPU_SHARED_ALIGNED(irq_cpustat_t, irq_stat);
#define local_softirq_pending() __this_cpu_read(irq_stat.__softirq_pending)
#define __ARCH_SET_SOFTIRQ_PENDING
+#define __ARCH_IRQ_EXIT_IRQS_DISABLED
#define set_softirq_pending(x) __this_cpu_write(irq_stat.__softirq_pending, (x))
#define or_softirq_pending(x) __this_cpu_or(irq_stat.__softirq_pending, (x))
diff --git a/arch/powerpc/include/asm/head-64.h b/arch/powerpc/include/asm/head-64.h
index fdcff76e9a25..7e0e93f24cb7 100644
--- a/arch/powerpc/include/asm/head-64.h
+++ b/arch/powerpc/include/asm/head-64.h
@@ -178,7 +178,7 @@ name:
* TRAMP_REAL_* - real, unrelocated helpers (virt can call these)
* TRAMP_VIRT_* - virt, unreloc helpers (in practice, real can use)
* TRAMP_KVM - KVM handlers that get put into real, unrelocated
- * EXC_COMMON_* - virt, relocated common handlers
+ * EXC_COMMON - virt, relocated common handlers
*
* The EXC handlers are given a name, and branch to name_common, or the
* appropriate KVM or masking function. Vector handler verieties are as
@@ -211,7 +211,6 @@ name:
* EXC_COMMON_BEGIN/END - used to open-code the handler
* EXC_COMMON
* EXC_COMMON_ASYNC
- * EXC_COMMON_HV
*
* TRAMP_REAL and TRAMP_VIRT can be used with BEGIN/END. KVM
* and OOL handlers are implemented as types of TRAMP and TRAMP_VIRT handlers.
@@ -269,14 +268,14 @@ name:
STD_RELON_EXCEPTION_PSERIES(start, realvec, name##_common); \
EXC_VIRT_END(name, start, size);
-#define EXC_REAL_MASKABLE(name, start, size) \
+#define EXC_REAL_MASKABLE(name, start, size, bitmask) \
EXC_REAL_BEGIN(name, start, size); \
- MASKABLE_EXCEPTION_PSERIES(start, start, name##_common); \
+ MASKABLE_EXCEPTION_PSERIES(start, start, name##_common, bitmask);\
EXC_REAL_END(name, start, size);
-#define EXC_VIRT_MASKABLE(name, start, size, realvec) \
+#define EXC_VIRT_MASKABLE(name, start, size, realvec, bitmask) \
EXC_VIRT_BEGIN(name, start, size); \
- MASKABLE_RELON_EXCEPTION_PSERIES(start, realvec, name##_common); \
+ MASKABLE_RELON_EXCEPTION_PSERIES(start, realvec, name##_common, bitmask);\
EXC_VIRT_END(name, start, size);
#define EXC_REAL_HV(name, start, size) \
@@ -305,13 +304,13 @@ name:
#define __EXC_REAL_OOL_MASKABLE(name, start, size) \
__EXC_REAL_OOL(name, start, size);
-#define __TRAMP_REAL_OOL_MASKABLE(name, vec) \
+#define __TRAMP_REAL_OOL_MASKABLE(name, vec, bitmask) \
TRAMP_REAL_BEGIN(tramp_real_##name); \
- MASKABLE_EXCEPTION_PSERIES_OOL(vec, name##_common); \
+ MASKABLE_EXCEPTION_PSERIES_OOL(vec, name##_common, bitmask); \
-#define EXC_REAL_OOL_MASKABLE(name, start, size) \
+#define EXC_REAL_OOL_MASKABLE(name, start, size, bitmask) \
__EXC_REAL_OOL_MASKABLE(name, start, size); \
- __TRAMP_REAL_OOL_MASKABLE(name, start);
+ __TRAMP_REAL_OOL_MASKABLE(name, start, bitmask);
#define __EXC_REAL_OOL_HV_DIRECT(name, start, size, handler) \
EXC_REAL_BEGIN(name, start, size); \
@@ -332,13 +331,13 @@ name:
#define __EXC_REAL_OOL_MASKABLE_HV(name, start, size) \
__EXC_REAL_OOL(name, start, size);
-#define __TRAMP_REAL_OOL_MASKABLE_HV(name, vec) \
+#define __TRAMP_REAL_OOL_MASKABLE_HV(name, vec, bitmask) \
TRAMP_REAL_BEGIN(tramp_real_##name); \
- MASKABLE_EXCEPTION_HV_OOL(vec, name##_common); \
+ MASKABLE_EXCEPTION_HV_OOL(vec, name##_common, bitmask); \
-#define EXC_REAL_OOL_MASKABLE_HV(name, start, size) \
+#define EXC_REAL_OOL_MASKABLE_HV(name, start, size, bitmask) \
__EXC_REAL_OOL_MASKABLE_HV(name, start, size); \
- __TRAMP_REAL_OOL_MASKABLE_HV(name, start);
+ __TRAMP_REAL_OOL_MASKABLE_HV(name, start, bitmask);
#define __EXC_VIRT_OOL(name, start, size) \
EXC_VIRT_BEGIN(name, start, size); \
@@ -356,13 +355,13 @@ name:
#define __EXC_VIRT_OOL_MASKABLE(name, start, size) \
__EXC_VIRT_OOL(name, start, size);
-#define __TRAMP_VIRT_OOL_MASKABLE(name, realvec) \
+#define __TRAMP_VIRT_OOL_MASKABLE(name, realvec, bitmask) \
TRAMP_VIRT_BEGIN(tramp_virt_##name); \
- MASKABLE_RELON_EXCEPTION_PSERIES_OOL(realvec, name##_common); \
+ MASKABLE_RELON_EXCEPTION_PSERIES_OOL(realvec, name##_common, bitmask);\
-#define EXC_VIRT_OOL_MASKABLE(name, start, size, realvec) \
+#define EXC_VIRT_OOL_MASKABLE(name, start, size, realvec, bitmask) \
__EXC_VIRT_OOL_MASKABLE(name, start, size); \
- __TRAMP_VIRT_OOL_MASKABLE(name, realvec);
+ __TRAMP_VIRT_OOL_MASKABLE(name, realvec, bitmask);
#define __EXC_VIRT_OOL_HV(name, start, size) \
__EXC_VIRT_OOL(name, start, size);
@@ -378,13 +377,13 @@ name:
#define __EXC_VIRT_OOL_MASKABLE_HV(name, start, size) \
__EXC_VIRT_OOL(name, start, size);
-#define __TRAMP_VIRT_OOL_MASKABLE_HV(name, realvec) \
+#define __TRAMP_VIRT_OOL_MASKABLE_HV(name, realvec, bitmask) \
TRAMP_VIRT_BEGIN(tramp_virt_##name); \
- MASKABLE_RELON_EXCEPTION_HV_OOL(realvec, name##_common); \
+ MASKABLE_RELON_EXCEPTION_HV_OOL(realvec, name##_common, bitmask);\
-#define EXC_VIRT_OOL_MASKABLE_HV(name, start, size, realvec) \
+#define EXC_VIRT_OOL_MASKABLE_HV(name, start, size, realvec, bitmask) \
__EXC_VIRT_OOL_MASKABLE_HV(name, start, size); \
- __TRAMP_VIRT_OOL_MASKABLE_HV(name, realvec);
+ __TRAMP_VIRT_OOL_MASKABLE_HV(name, realvec, bitmask);
#define TRAMP_KVM(area, n) \
TRAMP_KVM_BEGIN(do_kvm_##n); \
@@ -413,10 +412,6 @@ name:
EXC_COMMON_BEGIN(name); \
STD_EXCEPTION_COMMON_ASYNC(realvec, name, hdlr); \
-#define EXC_COMMON_HV(name, realvec, hdlr) \
- EXC_COMMON_BEGIN(name); \
- STD_EXCEPTION_COMMON(realvec + 0x2, name, hdlr); \
-
#endif /* __ASSEMBLY__ */
#endif /* _ASM_POWERPC_HEAD_64_H */
diff --git a/arch/powerpc/include/asm/hmi.h b/arch/powerpc/include/asm/hmi.h
index 85b7a1a21e22..9c14f7b5c46c 100644
--- a/arch/powerpc/include/asm/hmi.h
+++ b/arch/powerpc/include/asm/hmi.h
@@ -42,4 +42,8 @@ extern void wait_for_tb_resync(void);
static inline void wait_for_subcore_guest_exit(void) { }
static inline void wait_for_tb_resync(void) { }
#endif
+
+struct pt_regs;
+extern long hmi_handle_debugtrig(struct pt_regs *regs);
+
#endif /* __ASM_PPC64_HMI_H__ */
diff --git a/arch/powerpc/include/asm/hugetlb.h b/arch/powerpc/include/asm/hugetlb.h
index 14c9d44f355b..1a4847f67ea8 100644
--- a/arch/powerpc/include/asm/hugetlb.h
+++ b/arch/powerpc/include/asm/hugetlb.h
@@ -47,8 +47,7 @@ static inline pte_t *hugepd_page(hugepd_t hpd)
{
BUG_ON(!hugepd_ok(hpd));
#ifdef CONFIG_PPC_8xx
- return (pte_t *)__va(hpd_val(hpd) &
- ~(_PMD_PAGE_MASK | _PMD_PRESENT_MASK));
+ return (pte_t *)__va(hpd_val(hpd) & ~HUGEPD_SHIFT_MASK);
#else
return (pte_t *)((hpd_val(hpd) &
~HUGEPD_SHIFT_MASK) | PD_HUGE);
diff --git a/arch/powerpc/include/asm/hw_irq.h b/arch/powerpc/include/asm/hw_irq.h
index 3818fa0164f0..88e5e8f17e98 100644
--- a/arch/powerpc/include/asm/hw_irq.h
+++ b/arch/powerpc/include/asm/hw_irq.h
@@ -27,6 +27,15 @@
#define PACA_IRQ_DEC 0x08 /* Or FIT */
#define PACA_IRQ_EE_EDGE 0x10 /* BookE only */
#define PACA_IRQ_HMI 0x20
+#define PACA_IRQ_PMI 0x40
+
+/*
+ * flags for paca->irq_soft_mask
+ */
+#define IRQS_ENABLED 0
+#define IRQS_DISABLED 1 /* local_irq_disable() interrupts */
+#define IRQS_PMI_DISABLED 2
+#define IRQS_ALL_DISABLED (IRQS_DISABLED | IRQS_PMI_DISABLED)
#endif /* CONFIG_PPC64 */
@@ -43,46 +52,112 @@ extern void unknown_exception(struct pt_regs *regs);
#ifdef CONFIG_PPC64
#include <asm/paca.h>
-static inline unsigned long arch_local_save_flags(void)
+static inline notrace unsigned long irq_soft_mask_return(void)
{
unsigned long flags;
asm volatile(
"lbz %0,%1(13)"
: "=r" (flags)
- : "i" (offsetof(struct paca_struct, soft_enabled)));
+ : "i" (offsetof(struct paca_struct, irq_soft_mask)));
+
+ return flags;
+}
+
+/*
+ * The "memory" clobber acts as both a compiler barrier
+ * for the critical section and as a clobber because
+ * we changed paca->irq_soft_mask
+ */
+static inline notrace void irq_soft_mask_set(unsigned long mask)
+{
+#ifdef CONFIG_PPC_IRQ_SOFT_MASK_DEBUG
+ /*
+ * The irq mask must always include the STD bit if any are set.
+ *
+ * and interrupts don't get replayed until the standard
+ * interrupt (local_irq_disable()) is unmasked.
+ *
+ * Other masks must only provide additional masking beyond
+ * the standard, and they are also not replayed until the
+ * standard interrupt becomes unmasked.
+ *
+ * This could be changed, but it will require partial
+ * unmasks to be replayed, among other things. For now, take
+ * the simple approach.
+ */
+ WARN_ON(mask && !(mask & IRQS_DISABLED));
+#endif
+
+ asm volatile(
+ "stb %0,%1(13)"
+ :
+ : "r" (mask),
+ "i" (offsetof(struct paca_struct, irq_soft_mask))
+ : "memory");
+}
+
+static inline notrace unsigned long irq_soft_mask_set_return(unsigned long mask)
+{
+ unsigned long flags;
+
+#ifdef CONFIG_PPC_IRQ_SOFT_MASK_DEBUG
+ WARN_ON(mask && !(mask & IRQS_DISABLED));
+#endif
+
+ asm volatile(
+ "lbz %0,%1(13); stb %2,%1(13)"
+ : "=&r" (flags)
+ : "i" (offsetof(struct paca_struct, irq_soft_mask)),
+ "r" (mask)
+ : "memory");
return flags;
}
-static inline unsigned long arch_local_irq_disable(void)
+static inline notrace unsigned long irq_soft_mask_or_return(unsigned long mask)
{
- unsigned long flags, zero;
+ unsigned long flags, tmp;
asm volatile(
- "li %1,0; lbz %0,%2(13); stb %1,%2(13)"
- : "=r" (flags), "=&r" (zero)
- : "i" (offsetof(struct paca_struct, soft_enabled))
+ "lbz %0,%2(13); or %1,%0,%3; stb %1,%2(13)"
+ : "=&r" (flags), "=r" (tmp)
+ : "i" (offsetof(struct paca_struct, irq_soft_mask)),
+ "r" (mask)
: "memory");
+#ifdef CONFIG_PPC_IRQ_SOFT_MASK_DEBUG
+ WARN_ON((mask | flags) && !((mask | flags) & IRQS_DISABLED));
+#endif
+
return flags;
}
+static inline unsigned long arch_local_save_flags(void)
+{
+ return irq_soft_mask_return();
+}
+
+static inline void arch_local_irq_disable(void)
+{
+ irq_soft_mask_set(IRQS_DISABLED);
+}
+
extern void arch_local_irq_restore(unsigned long);
static inline void arch_local_irq_enable(void)
{
- arch_local_irq_restore(1);
+ arch_local_irq_restore(IRQS_ENABLED);
}
static inline unsigned long arch_local_irq_save(void)
{
- return arch_local_irq_disable();
+ return irq_soft_mask_set_return(IRQS_DISABLED);
}
static inline bool arch_irqs_disabled_flags(unsigned long flags)
{
- return flags == 0;
+ return flags & IRQS_DISABLED;
}
static inline bool arch_irqs_disabled(void)
@@ -90,6 +165,55 @@ static inline bool arch_irqs_disabled(void)
return arch_irqs_disabled_flags(arch_local_save_flags());
}
+#ifdef CONFIG_PPC_BOOK3S
+/*
+ * To support disabling and enabling of irq with PMI, set of
+ * new powerpc_local_irq_pmu_save() and powerpc_local_irq_restore()
+ * functions are added. These macros are implemented using generic
+ * linux local_irq_* code from include/linux/irqflags.h.
+ */
+#define raw_local_irq_pmu_save(flags) \
+ do { \
+ typecheck(unsigned long, flags); \
+ flags = irq_soft_mask_or_return(IRQS_DISABLED | \
+ IRQS_PMI_DISABLED); \
+ } while(0)
+
+#define raw_local_irq_pmu_restore(flags) \
+ do { \
+ typecheck(unsigned long, flags); \
+ arch_local_irq_restore(flags); \
+ } while(0)
+
+#ifdef CONFIG_TRACE_IRQFLAGS
+#define powerpc_local_irq_pmu_save(flags) \
+ do { \
+ raw_local_irq_pmu_save(flags); \
+ trace_hardirqs_off(); \
+ } while(0)
+#define powerpc_local_irq_pmu_restore(flags) \
+ do { \
+ if (raw_irqs_disabled_flags(flags)) { \
+ raw_local_irq_pmu_restore(flags); \
+ trace_hardirqs_off(); \
+ } else { \
+ trace_hardirqs_on(); \
+ raw_local_irq_pmu_restore(flags); \
+ } \
+ } while(0)
+#else
+#define powerpc_local_irq_pmu_save(flags) \
+ do { \
+ raw_local_irq_pmu_save(flags); \
+ } while(0)
+#define powerpc_local_irq_pmu_restore(flags) \
+ do { \
+ raw_local_irq_pmu_restore(flags); \
+ } while (0)
+#endif /* CONFIG_TRACE_IRQFLAGS */
+
+#endif /* CONFIG_PPC_BOOK3S */
+
#ifdef CONFIG_PPC_BOOK3E
#define __hard_irq_enable() asm volatile("wrteei 1" : : : "memory")
#define __hard_irq_disable() asm volatile("wrteei 0" : : : "memory")
@@ -98,14 +222,13 @@ static inline bool arch_irqs_disabled(void)
#define __hard_irq_disable() __mtmsrd(local_paca->kernel_msr, 1)
#endif
-#define hard_irq_disable() do { \
- u8 _was_enabled; \
- __hard_irq_disable(); \
- _was_enabled = local_paca->soft_enabled; \
- local_paca->soft_enabled = 0; \
- local_paca->irq_happened |= PACA_IRQ_HARD_DIS; \
- if (_was_enabled) \
- trace_hardirqs_off(); \
+#define hard_irq_disable() do { \
+ unsigned long flags; \
+ __hard_irq_disable(); \
+ flags = irq_soft_mask_set_return(IRQS_ALL_DISABLED); \
+ local_paca->irq_happened |= PACA_IRQ_HARD_DIS; \
+ if (!arch_irqs_disabled_flags(flags)) \
+ trace_hardirqs_off(); \
} while(0)
static inline bool lazy_irq_pending(void)
@@ -127,7 +250,7 @@ static inline void may_hard_irq_enable(void)
static inline bool arch_irq_disabled_regs(struct pt_regs *regs)
{
- return !regs->softe;
+ return (regs->softe & IRQS_DISABLED);
}
extern bool prep_irq_for_idle(void);
diff --git a/arch/powerpc/include/asm/imc-pmu.h b/arch/powerpc/include/asm/imc-pmu.h
index fad0e6ff460f..d76cb11be3e3 100644
--- a/arch/powerpc/include/asm/imc-pmu.h
+++ b/arch/powerpc/include/asm/imc-pmu.h
@@ -35,6 +35,13 @@
#define THREAD_IMC_ENABLE 0x8000000000000000ULL
/*
+ * For debugfs interface for imc-mode and imc-command
+ */
+#define IMC_CNTL_BLK_OFFSET 0x3FC00
+#define IMC_CNTL_BLK_CMD_OFFSET 8
+#define IMC_CNTL_BLK_MODE_OFFSET 32
+
+/*
* Structure to hold memory address information for imc units.
*/
struct imc_mem_info {
@@ -71,7 +78,7 @@ struct imc_events {
struct imc_pmu {
struct pmu pmu;
struct imc_mem_info *mem_info;
- struct imc_events **events;
+ struct imc_events *events;
/*
* Attribute groups for the PMU. Slot 0 used for
* format attribute, slot 1 used for cpusmask attribute,
diff --git a/arch/powerpc/include/asm/irqflags.h b/arch/powerpc/include/asm/irqflags.h
index 1aeb5f13b8c4..1a6c1ce17735 100644
--- a/arch/powerpc/include/asm/irqflags.h
+++ b/arch/powerpc/include/asm/irqflags.h
@@ -47,14 +47,14 @@
* be clobbered.
*/
#define RECONCILE_IRQ_STATE(__rA, __rB) \
- lbz __rA,PACASOFTIRQEN(r13); \
+ lbz __rA,PACAIRQSOFTMASK(r13); \
lbz __rB,PACAIRQHAPPENED(r13); \
- cmpwi cr0,__rA,0; \
- li __rA,0; \
+ andi. __rA,__rA,IRQS_DISABLED; \
+ li __rA,IRQS_DISABLED; \
ori __rB,__rB,PACA_IRQ_HARD_DIS; \
stb __rB,PACAIRQHAPPENED(r13); \
- beq 44f; \
- stb __rA,PACASOFTIRQEN(r13); \
+ bne 44f; \
+ stb __rA,PACAIRQSOFTMASK(r13); \
TRACE_DISABLE_INTS; \
44:
@@ -64,9 +64,9 @@
#define RECONCILE_IRQ_STATE(__rA, __rB) \
lbz __rA,PACAIRQHAPPENED(r13); \
- li __rB,0; \
+ li __rB,IRQS_DISABLED; \
ori __rA,__rA,PACA_IRQ_HARD_DIS; \
- stb __rB,PACASOFTIRQEN(r13); \
+ stb __rB,PACAIRQSOFTMASK(r13); \
stb __rA,PACAIRQHAPPENED(r13)
#endif
#endif
diff --git a/arch/powerpc/include/asm/kexec.h b/arch/powerpc/include/asm/kexec.h
index 4419d435639a..9dcbfa6bbb91 100644
--- a/arch/powerpc/include/asm/kexec.h
+++ b/arch/powerpc/include/asm/kexec.h
@@ -73,6 +73,8 @@ extern void kexec_smp_wait(void); /* get and clear naca physid, wait for
master to copy new code to 0 */
extern int crashing_cpu;
extern void crash_send_ipi(void (*crash_ipi_callback)(struct pt_regs *));
+extern void crash_ipi_callback(struct pt_regs *);
+extern int crash_wake_offline;
struct kimage;
struct pt_regs;
diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h
index 941c2a3f231b..9db18287b5f4 100644
--- a/arch/powerpc/include/asm/kvm_ppc.h
+++ b/arch/powerpc/include/asm/kvm_ppc.h
@@ -873,7 +873,7 @@ static inline void kvmppc_fix_ee_before_entry(void)
/* Only need to enable IRQs by hard enabling them after this */
local_paca->irq_happened = 0;
- local_paca->soft_enabled = 1;
+ irq_soft_mask_set(IRQS_ENABLED);
#endif
}
diff --git a/arch/powerpc/include/asm/local.h b/arch/powerpc/include/asm/local.h
index 600a68bd77f5..fdd00939270b 100644
--- a/arch/powerpc/include/asm/local.h
+++ b/arch/powerpc/include/asm/local.h
@@ -2,76 +2,64 @@
#ifndef _ARCH_POWERPC_LOCAL_H
#define _ARCH_POWERPC_LOCAL_H
+#ifdef CONFIG_PPC_BOOK3S_64
+
#include <linux/percpu.h>
#include <linux/atomic.h>
+#include <linux/irqflags.h>
+
+#include <asm/hw_irq.h>
typedef struct
{
- atomic_long_t a;
+ long v;
} local_t;
-#define LOCAL_INIT(i) { ATOMIC_LONG_INIT(i) }
-
-#define local_read(l) atomic_long_read(&(l)->a)
-#define local_set(l,i) atomic_long_set(&(l)->a, (i))
+#define LOCAL_INIT(i) { (i) }
-#define local_add(i,l) atomic_long_add((i),(&(l)->a))
-#define local_sub(i,l) atomic_long_sub((i),(&(l)->a))
-#define local_inc(l) atomic_long_inc(&(l)->a)
-#define local_dec(l) atomic_long_dec(&(l)->a)
-
-static __inline__ long local_add_return(long a, local_t *l)
+static __inline__ long local_read(local_t *l)
{
- long t;
-
- __asm__ __volatile__(
-"1:" PPC_LLARX(%0,0,%2,0) " # local_add_return\n\
- add %0,%1,%0\n"
- PPC405_ERR77(0,%2)
- PPC_STLCX "%0,0,%2 \n\
- bne- 1b"
- : "=&r" (t)
- : "r" (a), "r" (&(l->a.counter))
- : "cc", "memory");
-
- return t;
+ return READ_ONCE(l->v);
}
-#define local_add_negative(a, l) (local_add_return((a), (l)) < 0)
-
-static __inline__ long local_sub_return(long a, local_t *l)
+static __inline__ void local_set(local_t *l, long i)
{
- long t;
+ WRITE_ONCE(l->v, i);
+}
- __asm__ __volatile__(
-"1:" PPC_LLARX(%0,0,%2,0) " # local_sub_return\n\
- subf %0,%1,%0\n"
- PPC405_ERR77(0,%2)
- PPC_STLCX "%0,0,%2 \n\
- bne- 1b"
- : "=&r" (t)
- : "r" (a), "r" (&(l->a.counter))
- : "cc", "memory");
+#define LOCAL_OP(op, c_op) \
+static __inline__ void local_##op(long i, local_t *l) \
+{ \
+ unsigned long flags; \
+ \
+ powerpc_local_irq_pmu_save(flags); \
+ l->v c_op i; \
+ powerpc_local_irq_pmu_restore(flags); \
+}
- return t;
+#define LOCAL_OP_RETURN(op, c_op) \
+static __inline__ long local_##op##_return(long a, local_t *l) \
+{ \
+ long t; \
+ unsigned long flags; \
+ \
+ powerpc_local_irq_pmu_save(flags); \
+ t = (l->v c_op a); \
+ powerpc_local_irq_pmu_restore(flags); \
+ \
+ return t; \
}
-static __inline__ long local_inc_return(local_t *l)
-{
- long t;
+#define LOCAL_OPS(op, c_op) \
+ LOCAL_OP(op, c_op) \
+ LOCAL_OP_RETURN(op, c_op)
- __asm__ __volatile__(
-"1:" PPC_LLARX(%0,0,%1,0) " # local_inc_return\n\
- addic %0,%0,1\n"
- PPC405_ERR77(0,%1)
- PPC_STLCX "%0,0,%1 \n\
- bne- 1b"
- : "=&r" (t)
- : "r" (&(l->a.counter))
- : "cc", "xer", "memory");
+LOCAL_OPS(add, +=)
+LOCAL_OPS(sub, -=)
- return t;
-}
+#define local_add_negative(a, l) (local_add_return((a), (l)) < 0)
+#define local_inc_return(l) local_add_return(1LL, l)
+#define local_inc(l) local_inc_return(l)
/*
* local_inc_and_test - increment and test
@@ -81,28 +69,39 @@ static __inline__ long local_inc_return(local_t *l)
* and returns true if the result is zero, or false for all
* other cases.
*/
-#define local_inc_and_test(l) (local_inc_return(l) == 0)
+#define local_inc_and_test(l) (local_inc_return(l) == 0)
-static __inline__ long local_dec_return(local_t *l)
+#define local_dec_return(l) local_sub_return(1LL, l)
+#define local_dec(l) local_dec_return(l)
+#define local_sub_and_test(a, l) (local_sub_return((a), (l)) == 0)
+#define local_dec_and_test(l) (local_dec_return((l)) == 0)
+
+static __inline__ long local_cmpxchg(local_t *l, long o, long n)
{
long t;
+ unsigned long flags;
- __asm__ __volatile__(
-"1:" PPC_LLARX(%0,0,%1,0) " # local_dec_return\n\
- addic %0,%0,-1\n"
- PPC405_ERR77(0,%1)
- PPC_STLCX "%0,0,%1\n\
- bne- 1b"
- : "=&r" (t)
- : "r" (&(l->a.counter))
- : "cc", "xer", "memory");
+ powerpc_local_irq_pmu_save(flags);
+ t = l->v;
+ if (t == o)
+ l->v = n;
+ powerpc_local_irq_pmu_restore(flags);
return t;
}
-#define local_cmpxchg(l, o, n) \
- (cmpxchg_local(&((l)->a.counter), (o), (n)))
-#define local_xchg(l, n) (xchg_local(&((l)->a.counter), (n)))
+static __inline__ long local_xchg(local_t *l, long n)
+{
+ long t;
+ unsigned long flags;
+
+ powerpc_local_irq_pmu_save(flags);
+ t = l->v;
+ l->v = n;
+ powerpc_local_irq_pmu_restore(flags);
+
+ return t;
+}
/**
* local_add_unless - add unless the number is a given value
@@ -115,62 +114,35 @@ static __inline__ long local_dec_return(local_t *l)
*/
static __inline__ int local_add_unless(local_t *l, long a, long u)
{
- long t;
-
- __asm__ __volatile__ (
-"1:" PPC_LLARX(%0,0,%1,0) " # local_add_unless\n\
- cmpw 0,%0,%3 \n\
- beq- 2f \n\
- add %0,%2,%0 \n"
- PPC405_ERR77(0,%2)
- PPC_STLCX "%0,0,%1 \n\
- bne- 1b \n"
-" subf %0,%2,%0 \n\
-2:"
- : "=&r" (t)
- : "r" (&(l->a.counter)), "r" (a), "r" (u)
- : "cc", "memory");
-
- return t != u;
-}
-
-#define local_inc_not_zero(l) local_add_unless((l), 1, 0)
-
-#define local_sub_and_test(a, l) (local_sub_return((a), (l)) == 0)
-#define local_dec_and_test(l) (local_dec_return((l)) == 0)
-
-/*
- * Atomically test *l and decrement if it is greater than 0.
- * The function returns the old value of *l minus 1.
- */
-static __inline__ long local_dec_if_positive(local_t *l)
-{
- long t;
+ unsigned long flags;
+ int ret = 0;
- __asm__ __volatile__(
-"1:" PPC_LLARX(%0,0,%1,0) " # local_dec_if_positive\n\
- cmpwi %0,1\n\
- addi %0,%0,-1\n\
- blt- 2f\n"
- PPC405_ERR77(0,%1)
- PPC_STLCX "%0,0,%1\n\
- bne- 1b"
- "\n\
-2:" : "=&b" (t)
- : "r" (&(l->a.counter))
- : "cc", "memory");
+ powerpc_local_irq_pmu_save(flags);
+ if (l->v != u) {
+ l->v += a;
+ ret = 1;
+ }
+ powerpc_local_irq_pmu_restore(flags);
- return t;
+ return ret;
}
+#define local_inc_not_zero(l) local_add_unless((l), 1, 0)
+
/* Use these for per-cpu local_t variables: on some archs they are
* much more efficient than these naive implementations. Note they take
* a variable, not an address.
*/
-#define __local_inc(l) ((l)->a.counter++)
-#define __local_dec(l) ((l)->a.counter++)
-#define __local_add(i,l) ((l)->a.counter+=(i))
-#define __local_sub(i,l) ((l)->a.counter-=(i))
+#define __local_inc(l) ((l)->v++)
+#define __local_dec(l) ((l)->v++)
+#define __local_add(i,l) ((l)->v+=(i))
+#define __local_sub(i,l) ((l)->v-=(i))
+
+#else /* CONFIG_PPC64 */
+
+#include <asm-generic/local.h>
+
+#endif /* CONFIG_PPC64 */
#endif /* _ARCH_POWERPC_LOCAL_H */
diff --git a/arch/powerpc/include/asm/machdep.h b/arch/powerpc/include/asm/machdep.h
index cd2fc1cc1cc7..ffe7c71e1132 100644
--- a/arch/powerpc/include/asm/machdep.h
+++ b/arch/powerpc/include/asm/machdep.h
@@ -173,11 +173,19 @@ struct machdep_calls {
/* Called after scan and before resource survey */
void (*pcibios_fixup_phb)(struct pci_controller *hose);
+ /*
+ * Called after device has been added to bus and
+ * before sysfs has been created.
+ */
+ void (*pcibios_bus_add_device)(struct pci_dev *pdev);
+
resource_size_t (*pcibios_default_alignment)(void);
#ifdef CONFIG_PCI_IOV
void (*pcibios_fixup_sriov)(struct pci_dev *pdev);
resource_size_t (*pcibios_iov_resource_alignment)(struct pci_dev *, int resno);
+ int (*pcibios_sriov_enable)(struct pci_dev *pdev, u16 num_vfs);
+ int (*pcibios_sriov_disable)(struct pci_dev *pdev);
#endif /* CONFIG_PCI_IOV */
/* Called to shutdown machine specific hardware not already controlled
diff --git a/arch/powerpc/include/asm/mman.h b/arch/powerpc/include/asm/mman.h
index 30922f699341..07e3f54de9e3 100644
--- a/arch/powerpc/include/asm/mman.h
+++ b/arch/powerpc/include/asm/mman.h
@@ -13,6 +13,7 @@
#include <asm/cputable.h>
#include <linux/mm.h>
+#include <linux/pkeys.h>
#include <asm/cpu_has_feature.h>
/*
@@ -22,13 +23,23 @@
static inline unsigned long arch_calc_vm_prot_bits(unsigned long prot,
unsigned long pkey)
{
- return (prot & PROT_SAO) ? VM_SAO : 0;
+#ifdef CONFIG_PPC_MEM_KEYS
+ return (((prot & PROT_SAO) ? VM_SAO : 0) | pkey_to_vmflag_bits(pkey));
+#else
+ return ((prot & PROT_SAO) ? VM_SAO : 0);
+#endif
}
#define arch_calc_vm_prot_bits(prot, pkey) arch_calc_vm_prot_bits(prot, pkey)
static inline pgprot_t arch_vm_get_page_prot(unsigned long vm_flags)
{
+#ifdef CONFIG_PPC_MEM_KEYS
+ return (vm_flags & VM_SAO) ?
+ __pgprot(_PAGE_SAO | vmflag_to_pte_pkey_bits(vm_flags)) :
+ __pgprot(0 | vmflag_to_pte_pkey_bits(vm_flags));
+#else
return (vm_flags & VM_SAO) ? __pgprot(_PAGE_SAO) : __pgprot(0);
+#endif
}
#define arch_vm_get_page_prot(vm_flags) arch_vm_get_page_prot(vm_flags)
diff --git a/arch/powerpc/include/asm/mmu-8xx.h b/arch/powerpc/include/asm/mmu-8xx.h
index 5bb3dbede41a..2f806e329648 100644
--- a/arch/powerpc/include/asm/mmu-8xx.h
+++ b/arch/powerpc/include/asm/mmu-8xx.h
@@ -29,17 +29,25 @@
#define MI_Kp 0x40000000 /* Should always be set */
/*
- * All pages' PP exec bits are set to 000, which means Execute for Supervisor
- * and no Execute for User.
- * Then we use the APG to say whether accesses are according to Page rules,
- * "all Supervisor" rules (Exec for all) and "all User" rules (Exec for noone)
- * Therefore, we define 4 APG groups. msb is _PAGE_EXEC, lsb is _PAGE_USER
- * 0 (00) => Not User, no exec => 11 (all accesses performed as user)
- * 1 (01) => User but no exec => 11 (all accesses performed as user)
- * 2 (10) => Not User, exec => 01 (rights according to page definition)
- * 3 (11) => User, exec => 00 (all accesses performed as supervisor)
- */
-#define MI_APG_INIT 0xf4ffffff
+ * All pages' PP data bits are set to either 001 or 011 by copying _PAGE_EXEC
+ * into bit 21 in the ITLBmiss handler (bit 21 is the middle bit), which means
+ * respectively NA for All or X for Supervisor and no access for User.
+ * Then we use the APG to say whether accesses are according to Page rules or
+ * "all Supervisor" rules (Access to all)
+ * We also use the 2nd APG bit for _PAGE_ACCESSED when having SWAP:
+ * When that bit is not set access is done iaw "all user"
+ * which means no access iaw page rules.
+ * Therefore, we define 4 APG groups. lsb is _PMD_USER, 2nd is _PAGE_ACCESSED
+ * 0x => No access => 11 (all accesses performed as user iaw page definition)
+ * 10 => No user => 01 (all accesses performed according to page definition)
+ * 11 => User => 00 (all accesses performed as supervisor iaw page definition)
+ * We define all 16 groups so that all other bits of APG can take any value
+ */
+#ifdef CONFIG_SWAP
+#define MI_APG_INIT 0xf4f4f4f4
+#else
+#define MI_APG_INIT 0x44444444
+#endif
/* The effective page number register. When read, contains the information
* about the last instruction TLB miss. When MI_RPN is written, bits in
@@ -102,17 +110,25 @@
#define MD_Kp 0x40000000 /* Should always be set */
/*
- * All pages' PP data bits are set to either 000 or 011, which means
+ * All pages' PP data bits are set to either 000 or 011 or 001, which means
* respectively RW for Supervisor and no access for User, or RO for
- * Supervisor and no access for user.
+ * Supervisor and no access for user and NA for ALL.
* Then we use the APG to say whether accesses are according to Page rules or
* "all Supervisor" rules (Access to all)
- * Therefore, we define 2 APG groups. lsb is _PAGE_USER
- * 0 => No user => 01 (all accesses performed according to page definition)
- * 1 => User => 00 (all accesses performed as supervisor
- * according to page definition)
- */
-#define MD_APG_INIT 0x4fffffff
+ * We also use the 2nd APG bit for _PAGE_ACCESSED when having SWAP:
+ * When that bit is not set access is done iaw "all user"
+ * which means no access iaw page rules.
+ * Therefore, we define 4 APG groups. lsb is _PMD_USER, 2nd is _PAGE_ACCESSED
+ * 0x => No access => 11 (all accesses performed as user iaw page definition)
+ * 10 => No user => 01 (all accesses performed according to page definition)
+ * 11 => User => 00 (all accesses performed as supervisor iaw page definition)
+ * We define all 16 groups so that all other bits of APG can take any value
+ */
+#ifdef CONFIG_SWAP
+#define MD_APG_INIT 0xf4f4f4f4
+#else
+#define MD_APG_INIT 0x44444444
+#endif
/* The effective page number register. When read, contains the information
* about the last instruction TLB miss. When MD_RPN is written, bits in
@@ -164,6 +180,12 @@
*/
#define SPRN_M_TW 799
+/* APGs */
+#define M_APG0 0x00000000
+#define M_APG1 0x00000020
+#define M_APG2 0x00000040
+#define M_APG3 0x00000060
+
#ifndef __ASSEMBLY__
typedef struct {
unsigned int id;
diff --git a/arch/powerpc/include/asm/mmu.h b/arch/powerpc/include/asm/mmu.h
index 6364f5c2cc3e..bb38312cff28 100644
--- a/arch/powerpc/include/asm/mmu.h
+++ b/arch/powerpc/include/asm/mmu.h
@@ -260,6 +260,15 @@ static inline bool early_radix_enabled(void)
}
#endif
+#ifdef CONFIG_PPC_MEM_KEYS
+extern u16 get_mm_addr_key(struct mm_struct *mm, unsigned long address);
+#else
+static inline u16 get_mm_addr_key(struct mm_struct *mm, unsigned long address)
+{
+ return 0;
+}
+#endif /* CONFIG_PPC_MEM_KEYS */
+
#endif /* !__ASSEMBLY__ */
/* The kernel use the constants below to index in the page sizes array.
diff --git a/arch/powerpc/include/asm/mmu_context.h b/arch/powerpc/include/asm/mmu_context.h
index e2a2b8400490..051b3d63afe3 100644
--- a/arch/powerpc/include/asm/mmu_context.h
+++ b/arch/powerpc/include/asm/mmu_context.h
@@ -187,11 +187,33 @@ static inline void arch_bprm_mm_init(struct mm_struct *mm,
{
}
+#ifdef CONFIG_PPC_MEM_KEYS
+bool arch_vma_access_permitted(struct vm_area_struct *vma, bool write,
+ bool execute, bool foreign);
+#else /* CONFIG_PPC_MEM_KEYS */
static inline bool arch_vma_access_permitted(struct vm_area_struct *vma,
bool write, bool execute, bool foreign)
{
/* by default, allow everything */
return true;
}
+
+#define pkey_mm_init(mm)
+#define thread_pkey_regs_save(thread)
+#define thread_pkey_regs_restore(new_thread, old_thread)
+#define thread_pkey_regs_init(thread)
+
+static inline int vma_pkey(struct vm_area_struct *vma)
+{
+ return 0;
+}
+
+static inline u64 pte_to_hpte_pkey_bits(u64 pteflags)
+{
+ return 0x0UL;
+}
+
+#endif /* CONFIG_PPC_MEM_KEYS */
+
#endif /* __KERNEL__ */
#endif /* __ASM_POWERPC_MMU_CONTEXT_H */
diff --git a/arch/powerpc/include/asm/mpic_timer.h b/arch/powerpc/include/asm/mpic_timer.h
index 0e23cd4ac8aa..13e6702ec458 100644
--- a/arch/powerpc/include/asm/mpic_timer.h
+++ b/arch/powerpc/include/asm/mpic_timer.h
@@ -29,17 +29,17 @@ struct mpic_timer {
#ifdef CONFIG_MPIC_TIMER
struct mpic_timer *mpic_request_timer(irq_handler_t fn, void *dev,
- const struct timeval *time);
+ time64_t time);
void mpic_start_timer(struct mpic_timer *handle);
void mpic_stop_timer(struct mpic_timer *handle);
-void mpic_get_remain_time(struct mpic_timer *handle, struct timeval *time);
+void mpic_get_remain_time(struct mpic_timer *handle, time64_t *time);
void mpic_free_timer(struct mpic_timer *handle);
#else
struct mpic_timer *mpic_request_timer(irq_handler_t fn, void *dev,
- const struct timeval *time) { return NULL; }
+ time64_t time) { return NULL; }
void mpic_start_timer(struct mpic_timer *handle) { }
void mpic_stop_timer(struct mpic_timer *handle) { }
-void mpic_get_remain_time(struct mpic_timer *handle, struct timeval *time) { }
+void mpic_get_remain_time(struct mpic_timer *handle, time64_t *time) { }
void mpic_free_timer(struct mpic_timer *handle) { }
#endif
diff --git a/arch/powerpc/include/asm/nmi.h b/arch/powerpc/include/asm/nmi.h
index e97f58689ca7..9c80939b4d14 100644
--- a/arch/powerpc/include/asm/nmi.h
+++ b/arch/powerpc/include/asm/nmi.h
@@ -4,10 +4,6 @@
#ifdef CONFIG_PPC_WATCHDOG
extern void arch_touch_nmi_watchdog(void);
-extern void arch_trigger_cpumask_backtrace(const cpumask_t *mask,
- bool exclude_self);
-#define arch_trigger_cpumask_backtrace arch_trigger_cpumask_backtrace
-
#else
static inline void arch_touch_nmi_watchdog(void) {}
#endif
diff --git a/arch/powerpc/include/asm/nohash/32/pgalloc.h b/arch/powerpc/include/asm/nohash/32/pgalloc.h
index d072139ff2e5..29d37bd1f3b3 100644
--- a/arch/powerpc/include/asm/nohash/32/pgalloc.h
+++ b/arch/powerpc/include/asm/nohash/32/pgalloc.h
@@ -61,7 +61,8 @@ static inline void pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmdp,
static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmdp,
pgtable_t pte_page)
{
- *pmdp = __pmd((page_to_pfn(pte_page) << PAGE_SHIFT) | _PMD_PRESENT);
+ *pmdp = __pmd((page_to_pfn(pte_page) << PAGE_SHIFT) | _PMD_USER |
+ _PMD_PRESENT);
}
#define pmd_pgtable(pmd) pmd_page(pmd)
diff --git a/arch/powerpc/include/asm/nohash/32/pgtable.h b/arch/powerpc/include/asm/nohash/32/pgtable.h
index cc2bfec3aa3b..504a3c36ce5c 100644
--- a/arch/powerpc/include/asm/nohash/32/pgtable.h
+++ b/arch/powerpc/include/asm/nohash/32/pgtable.h
@@ -282,7 +282,7 @@ static inline void __ptep_set_access_flags(struct mm_struct *mm,
{
unsigned long set = pte_val(entry) &
(_PAGE_DIRTY | _PAGE_ACCESSED | _PAGE_RW | _PAGE_EXEC);
- unsigned long clr = ~pte_val(entry) & _PAGE_RO;
+ unsigned long clr = ~pte_val(entry) & (_PAGE_RO | _PAGE_NA);
pte_update(ptep, clr, set);
}
diff --git a/arch/powerpc/include/asm/nohash/32/pte-8xx.h b/arch/powerpc/include/asm/nohash/32/pte-8xx.h
index 6dc0180fd5c7..f04cb46ae8a1 100644
--- a/arch/powerpc/include/asm/nohash/32/pte-8xx.h
+++ b/arch/powerpc/include/asm/nohash/32/pte-8xx.h
@@ -31,37 +31,34 @@
/* Definitions for 8xx embedded chips. */
#define _PAGE_PRESENT 0x0001 /* Page is valid */
#define _PAGE_NO_CACHE 0x0002 /* I: cache inhibit */
-#define _PAGE_SHARED 0x0004 /* No ASID (context) compare */
-#define _PAGE_SPECIAL 0x0008 /* SW entry, forced to 0 by the TLB miss */
+#define _PAGE_PRIVILEGED 0x0004 /* No ASID (context) compare */
+#define _PAGE_HUGE 0x0008 /* SPS: Small Page Size (1 if 16k, 512k or 8M)*/
#define _PAGE_DIRTY 0x0100 /* C: page changed */
/* These 4 software bits must be masked out when the L2 entry is loaded
* into the TLB.
*/
#define _PAGE_GUARDED 0x0010 /* Copied to L1 G entry in DTLB */
-#define _PAGE_USER 0x0020 /* Copied to L1 APG lsb */
-#define _PAGE_EXEC 0x0040 /* Copied to L1 APG */
-#define _PAGE_WRITETHRU 0x0080 /* software: caching is write through */
-#define _PAGE_ACCESSED 0x0800 /* software: page referenced */
+#define _PAGE_SPECIAL 0x0020 /* SW entry */
+#define _PAGE_EXEC 0x0040 /* Copied to PP (bit 21) in ITLB */
+#define _PAGE_ACCESSED 0x0080 /* software: page referenced */
+#define _PAGE_NA 0x0200 /* Supervisor NA, User no access */
#define _PAGE_RO 0x0600 /* Supervisor RO, User no access */
#define _PMD_PRESENT 0x0001
-#define _PMD_BAD 0x0ff0
+#define _PMD_BAD 0x0fd0
#define _PMD_PAGE_MASK 0x000c
#define _PMD_PAGE_8M 0x000c
#define _PMD_PAGE_512K 0x0004
+#define _PMD_USER 0x0020 /* APG 1 */
/* Until my rework is finished, 8xx still needs atomic PTE updates */
#define PTE_ATOMIC_UPDATES 1
-/* We need to add _PAGE_SHARED to kernel pages */
-#define _PAGE_KERNEL_RO (_PAGE_SHARED | _PAGE_RO)
-#define _PAGE_KERNEL_ROX (_PAGE_SHARED | _PAGE_RO | _PAGE_EXEC)
-#define _PAGE_KERNEL_RW (_PAGE_SHARED | _PAGE_DIRTY | _PAGE_RW | \
- _PAGE_HWWRITE)
-#define _PAGE_KERNEL_RWX (_PAGE_SHARED | _PAGE_DIRTY | _PAGE_RW | \
- _PAGE_HWWRITE | _PAGE_EXEC)
+#ifdef CONFIG_PPC_16K_PAGES
+#define _PAGE_PSIZE _PAGE_HUGE
+#endif
#endif /* __KERNEL__ */
#endif /* _ASM_POWERPC_NOHASH_32_PTE_8xx_H */
diff --git a/arch/powerpc/include/asm/nohash/pgtable.h b/arch/powerpc/include/asm/nohash/pgtable.h
index 5c68f4a59f75..c56de1e8026f 100644
--- a/arch/powerpc/include/asm/nohash/pgtable.h
+++ b/arch/powerpc/include/asm/nohash/pgtable.h
@@ -45,6 +45,29 @@ static inline int pte_present(pte_t pte)
return pte_val(pte) & _PAGE_PRESENT;
}
+/*
+ * We only find page table entry in the last level
+ * Hence no need for other accessors
+ */
+#define pte_access_permitted pte_access_permitted
+static inline bool pte_access_permitted(pte_t pte, bool write)
+{
+ unsigned long pteval = pte_val(pte);
+ /*
+ * A read-only access is controlled by _PAGE_USER bit.
+ * We have _PAGE_READ set for WRITE and EXECUTE
+ */
+ unsigned long need_pte_bits = _PAGE_PRESENT | _PAGE_USER;
+
+ if (write)
+ need_pte_bits |= _PAGE_WRITE;
+
+ if ((pteval & need_pte_bits) != need_pte_bits)
+ return false;
+
+ return true;
+}
+
/* Conversion functions: convert a page and protection to a page entry,
* and a page entry and page directory to the page they refer to.
*
@@ -103,7 +126,7 @@ static inline pte_t pte_mkspecial(pte_t pte)
static inline pte_t pte_mkhuge(pte_t pte)
{
- return pte;
+ return __pte(pte_val(pte) | _PAGE_HUGE);
}
static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
@@ -212,8 +235,10 @@ extern int ptep_set_access_flags(struct vm_area_struct *vma, unsigned long addre
#define pgprot_cached(prot) (__pgprot((pgprot_val(prot) & ~_PAGE_CACHE_CTL) | \
_PAGE_COHERENT))
+#if _PAGE_WRITETHRU != 0
#define pgprot_cached_wthru(prot) (__pgprot((pgprot_val(prot) & ~_PAGE_CACHE_CTL) | \
_PAGE_COHERENT | _PAGE_WRITETHRU))
+#endif
#define pgprot_cached_noncoherent(prot) \
(__pgprot(pgprot_val(prot) & ~_PAGE_CACHE_CTL))
diff --git a/arch/powerpc/include/asm/nohash/pte-book3e.h b/arch/powerpc/include/asm/nohash/pte-book3e.h
index 2da4532ca377..ccee8eb509bb 100644
--- a/arch/powerpc/include/asm/nohash/pte-book3e.h
+++ b/arch/powerpc/include/asm/nohash/pte-book3e.h
@@ -55,6 +55,7 @@
#define _PAGE_KERNEL_RWX (_PAGE_BAP_SW | _PAGE_BAP_SR | _PAGE_DIRTY | _PAGE_BAP_SX)
#define _PAGE_KERNEL_ROX (_PAGE_BAP_SR | _PAGE_BAP_SX)
#define _PAGE_USER (_PAGE_BAP_UR | _PAGE_BAP_SR) /* Can be read */
+#define _PAGE_PRIVILEGED (_PAGE_BAP_SR)
#define _PAGE_HASHPTE 0
#define _PAGE_BUSY 0
diff --git a/arch/powerpc/include/asm/opal-api.h b/arch/powerpc/include/asm/opal-api.h
index 233c7504b1f2..24c73f5575ee 100644
--- a/arch/powerpc/include/asm/opal-api.h
+++ b/arch/powerpc/include/asm/opal-api.h
@@ -201,7 +201,10 @@
#define OPAL_SET_POWER_SHIFT_RATIO 155
#define OPAL_SENSOR_GROUP_CLEAR 156
#define OPAL_PCI_SET_P2P 157
-#define OPAL_LAST 157
+#define OPAL_NPU_SPA_SETUP 159
+#define OPAL_NPU_SPA_CLEAR_CACHE 160
+#define OPAL_NPU_TL_SET 161
+#define OPAL_LAST 161
/* Device tree flags */
diff --git a/arch/powerpc/include/asm/opal.h b/arch/powerpc/include/asm/opal.h
index 0c545f7fc77b..12e70fb58700 100644
--- a/arch/powerpc/include/asm/opal.h
+++ b/arch/powerpc/include/asm/opal.h
@@ -34,6 +34,12 @@ int64_t opal_npu_init_context(uint64_t phb_id, int pasid, uint64_t msr,
uint64_t bdf);
int64_t opal_npu_map_lpar(uint64_t phb_id, uint64_t bdf, uint64_t lparid,
uint64_t lpcr);
+int64_t opal_npu_spa_setup(uint64_t phb_id, uint32_t bdfn,
+ uint64_t addr, uint64_t PE_mask);
+int64_t opal_npu_spa_clear_cache(uint64_t phb_id, uint32_t bdfn,
+ uint64_t PE_handle);
+int64_t opal_npu_tl_set(uint64_t phb_id, uint32_t bdfn, long cap,
+ uint64_t rate_phys, uint32_t size);
int64_t opal_console_write(int64_t term_number, __be64 *length,
const uint8_t *buffer);
int64_t opal_console_read(int64_t term_number, __be64 *length,
diff --git a/arch/powerpc/include/asm/paca.h b/arch/powerpc/include/asm/paca.h
index 23ac7fc0af23..b62c31037cad 100644
--- a/arch/powerpc/include/asm/paca.h
+++ b/arch/powerpc/include/asm/paca.h
@@ -159,7 +159,7 @@ struct paca_struct {
u64 saved_r1; /* r1 save for RTAS calls or PM */
u64 saved_msr; /* MSR saved here by enter_rtas */
u16 trap_save; /* Used when bad stack is encountered */
- u8 soft_enabled; /* irq soft-enable flag */
+ u8 irq_soft_mask; /* mask for irq soft masking */
u8 irq_happened; /* irq happened while soft-disabled */
u8 io_sync; /* writel() needs spin_unlock sync */
u8 irq_work_pending; /* IRQ_WORK interrupt while soft-disable */
@@ -239,8 +239,7 @@ struct paca_struct {
*/
u64 exrfi[EX_SIZE] __aligned(0x80);
void *rfi_flush_fallback_area;
- u64 l1d_flush_congruence;
- u64 l1d_flush_sets;
+ u64 l1d_flush_size;
#endif
};
diff --git a/arch/powerpc/include/asm/pci-bridge.h b/arch/powerpc/include/asm/pci-bridge.h
index 62ed83db04ae..94d449031b18 100644
--- a/arch/powerpc/include/asm/pci-bridge.h
+++ b/arch/powerpc/include/asm/pci-bridge.h
@@ -197,25 +197,22 @@ struct pci_dn {
struct iommu_table_group *table_group; /* for phb's or bridges */
int pci_ext_config_space; /* for pci devices */
-
- struct pci_dev *pcidev; /* back-pointer to the pci device */
#ifdef CONFIG_EEH
struct eeh_dev *edev; /* eeh device */
#endif
#define IODA_INVALID_PE 0xFFFFFFFF
-#ifdef CONFIG_PPC_POWERNV
unsigned int pe_number;
- int vf_index; /* VF index in the PF */
#ifdef CONFIG_PCI_IOV
+ int vf_index; /* VF index in the PF */
u16 vfs_expanded; /* number of VFs IOV BAR expanded */
u16 num_vfs; /* number of VFs enabled*/
unsigned int *pe_num_map; /* PE# for the first VF PE or array */
bool m64_single_mode; /* Use M64 BAR in Single Mode */
#define IODA_INVALID_M64 (-1)
- int (*m64_map)[PCI_SRIOV_NUM_BARS];
+ int (*m64_map)[PCI_SRIOV_NUM_BARS]; /* Only used on powernv */
+ int last_allow_rc; /* Only used on pseries */
#endif /* CONFIG_PCI_IOV */
int mps; /* Maximum Payload Size */
-#endif
struct list_head child_list;
struct list_head list;
struct resource holes[PCI_SRIOV_NUM_BARS];
diff --git a/arch/powerpc/include/asm/pci.h b/arch/powerpc/include/asm/pci.h
index 8dc32eacc97c..d82802ff5088 100644
--- a/arch/powerpc/include/asm/pci.h
+++ b/arch/powerpc/include/asm/pci.h
@@ -121,6 +121,8 @@ extern int remove_phb_dynamic(struct pci_controller *phb);
extern struct pci_dev *of_create_pci_dev(struct device_node *node,
struct pci_bus *bus, int devfn);
+extern unsigned int pci_parse_of_flags(u32 addr0, int bridge);
+
extern void of_scan_pci_bridge(struct pci_dev *dev);
extern void of_scan_bus(struct device_node *node, struct pci_bus *bus);
diff --git a/arch/powerpc/include/asm/pkeys.h b/arch/powerpc/include/asm/pkeys.h
new file mode 100644
index 000000000000..0409c80c32c0
--- /dev/null
+++ b/arch/powerpc/include/asm/pkeys.h
@@ -0,0 +1,218 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
+/*
+ * PowerPC Memory Protection Keys management
+ *
+ * Copyright 2017, Ram Pai, IBM Corporation.
+ */
+
+#ifndef _ASM_POWERPC_KEYS_H
+#define _ASM_POWERPC_KEYS_H
+
+#include <linux/jump_label.h>
+#include <asm/firmware.h>
+
+DECLARE_STATIC_KEY_TRUE(pkey_disabled);
+extern int pkeys_total; /* total pkeys as per device tree */
+extern u32 initial_allocation_mask; /* bits set for reserved keys */
+
+/*
+ * Define these here temporarily so we're not dependent on patching linux/mm.h.
+ * Once it's updated we can drop these.
+ */
+#ifndef VM_PKEY_BIT0
+# define VM_PKEY_SHIFT VM_HIGH_ARCH_BIT_0
+# define VM_PKEY_BIT0 VM_HIGH_ARCH_0
+# define VM_PKEY_BIT1 VM_HIGH_ARCH_1
+# define VM_PKEY_BIT2 VM_HIGH_ARCH_2
+# define VM_PKEY_BIT3 VM_HIGH_ARCH_3
+# define VM_PKEY_BIT4 VM_HIGH_ARCH_4
+#endif
+
+#define ARCH_VM_PKEY_FLAGS (VM_PKEY_BIT0 | VM_PKEY_BIT1 | VM_PKEY_BIT2 | \
+ VM_PKEY_BIT3 | VM_PKEY_BIT4)
+
+/* Override any generic PKEY permission defines */
+#define PKEY_DISABLE_EXECUTE 0x4
+#define PKEY_ACCESS_MASK (PKEY_DISABLE_ACCESS | \
+ PKEY_DISABLE_WRITE | \
+ PKEY_DISABLE_EXECUTE)
+
+static inline u64 pkey_to_vmflag_bits(u16 pkey)
+{
+ return (((u64)pkey << VM_PKEY_SHIFT) & ARCH_VM_PKEY_FLAGS);
+}
+
+static inline u64 vmflag_to_pte_pkey_bits(u64 vm_flags)
+{
+ if (static_branch_likely(&pkey_disabled))
+ return 0x0UL;
+
+ return (((vm_flags & VM_PKEY_BIT0) ? H_PTE_PKEY_BIT4 : 0x0UL) |
+ ((vm_flags & VM_PKEY_BIT1) ? H_PTE_PKEY_BIT3 : 0x0UL) |
+ ((vm_flags & VM_PKEY_BIT2) ? H_PTE_PKEY_BIT2 : 0x0UL) |
+ ((vm_flags & VM_PKEY_BIT3) ? H_PTE_PKEY_BIT1 : 0x0UL) |
+ ((vm_flags & VM_PKEY_BIT4) ? H_PTE_PKEY_BIT0 : 0x0UL));
+}
+
+static inline int vma_pkey(struct vm_area_struct *vma)
+{
+ if (static_branch_likely(&pkey_disabled))
+ return 0;
+ return (vma->vm_flags & ARCH_VM_PKEY_FLAGS) >> VM_PKEY_SHIFT;
+}
+
+#define arch_max_pkey() pkeys_total
+
+static inline u64 pte_to_hpte_pkey_bits(u64 pteflags)
+{
+ return (((pteflags & H_PTE_PKEY_BIT0) ? HPTE_R_KEY_BIT0 : 0x0UL) |
+ ((pteflags & H_PTE_PKEY_BIT1) ? HPTE_R_KEY_BIT1 : 0x0UL) |
+ ((pteflags & H_PTE_PKEY_BIT2) ? HPTE_R_KEY_BIT2 : 0x0UL) |
+ ((pteflags & H_PTE_PKEY_BIT3) ? HPTE_R_KEY_BIT3 : 0x0UL) |
+ ((pteflags & H_PTE_PKEY_BIT4) ? HPTE_R_KEY_BIT4 : 0x0UL));
+}
+
+static inline u16 pte_to_pkey_bits(u64 pteflags)
+{
+ return (((pteflags & H_PTE_PKEY_BIT0) ? 0x10 : 0x0UL) |
+ ((pteflags & H_PTE_PKEY_BIT1) ? 0x8 : 0x0UL) |
+ ((pteflags & H_PTE_PKEY_BIT2) ? 0x4 : 0x0UL) |
+ ((pteflags & H_PTE_PKEY_BIT3) ? 0x2 : 0x0UL) |
+ ((pteflags & H_PTE_PKEY_BIT4) ? 0x1 : 0x0UL));
+}
+
+#define pkey_alloc_mask(pkey) (0x1 << pkey)
+
+#define mm_pkey_allocation_map(mm) (mm->context.pkey_allocation_map)
+
+#define __mm_pkey_allocated(mm, pkey) { \
+ mm_pkey_allocation_map(mm) |= pkey_alloc_mask(pkey); \
+}
+
+#define __mm_pkey_free(mm, pkey) { \
+ mm_pkey_allocation_map(mm) &= ~pkey_alloc_mask(pkey); \
+}
+
+#define __mm_pkey_is_allocated(mm, pkey) \
+ (mm_pkey_allocation_map(mm) & pkey_alloc_mask(pkey))
+
+#define __mm_pkey_is_reserved(pkey) (initial_allocation_mask & \
+ pkey_alloc_mask(pkey))
+
+static inline bool mm_pkey_is_allocated(struct mm_struct *mm, int pkey)
+{
+ /* A reserved key is never considered as 'explicitly allocated' */
+ return ((pkey < arch_max_pkey()) &&
+ !__mm_pkey_is_reserved(pkey) &&
+ __mm_pkey_is_allocated(mm, pkey));
+}
+
+extern void __arch_activate_pkey(int pkey);
+extern void __arch_deactivate_pkey(int pkey);
+/*
+ * Returns a positive, 5-bit key on success, or -1 on failure.
+ * Relies on the mmap_sem to protect against concurrency in mm_pkey_alloc() and
+ * mm_pkey_free().
+ */
+static inline int mm_pkey_alloc(struct mm_struct *mm)
+{
+ /*
+ * Note: this is the one and only place we make sure that the pkey is
+ * valid as far as the hardware is concerned. The rest of the kernel
+ * trusts that only good, valid pkeys come out of here.
+ */
+ u32 all_pkeys_mask = (u32)(~(0x0));
+ int ret;
+
+ if (static_branch_likely(&pkey_disabled))
+ return -1;
+
+ /*
+ * Are we out of pkeys? We must handle this specially because ffz()
+ * behavior is undefined if there are no zeros.
+ */
+ if (mm_pkey_allocation_map(mm) == all_pkeys_mask)
+ return -1;
+
+ ret = ffz((u32)mm_pkey_allocation_map(mm));
+ __mm_pkey_allocated(mm, ret);
+
+ /*
+ * Enable the key in the hardware
+ */
+ if (ret > 0)
+ __arch_activate_pkey(ret);
+ return ret;
+}
+
+static inline int mm_pkey_free(struct mm_struct *mm, int pkey)
+{
+ if (static_branch_likely(&pkey_disabled))
+ return -1;
+
+ if (!mm_pkey_is_allocated(mm, pkey))
+ return -EINVAL;
+
+ /*
+ * Disable the key in the hardware
+ */
+ __arch_deactivate_pkey(pkey);
+ __mm_pkey_free(mm, pkey);
+
+ return 0;
+}
+
+/*
+ * Try to dedicate one of the protection keys to be used as an
+ * execute-only protection key.
+ */
+extern int __execute_only_pkey(struct mm_struct *mm);
+static inline int execute_only_pkey(struct mm_struct *mm)
+{
+ if (static_branch_likely(&pkey_disabled))
+ return -1;
+
+ return __execute_only_pkey(mm);
+}
+
+extern int __arch_override_mprotect_pkey(struct vm_area_struct *vma,
+ int prot, int pkey);
+static inline int arch_override_mprotect_pkey(struct vm_area_struct *vma,
+ int prot, int pkey)
+{
+ if (static_branch_likely(&pkey_disabled))
+ return 0;
+
+ /*
+ * Is this an mprotect_pkey() call? If so, never override the value that
+ * came from the user.
+ */
+ if (pkey != -1)
+ return pkey;
+
+ return __arch_override_mprotect_pkey(vma, prot, pkey);
+}
+
+extern int __arch_set_user_pkey_access(struct task_struct *tsk, int pkey,
+ unsigned long init_val);
+static inline int arch_set_user_pkey_access(struct task_struct *tsk, int pkey,
+ unsigned long init_val)
+{
+ if (static_branch_likely(&pkey_disabled))
+ return -EINVAL;
+ return __arch_set_user_pkey_access(tsk, pkey, init_val);
+}
+
+static inline bool arch_pkeys_enabled(void)
+{
+ return !static_branch_likely(&pkey_disabled);
+}
+
+extern void pkey_mm_init(struct mm_struct *mm);
+extern bool arch_supports_pkeys(int cap);
+extern unsigned int arch_usable_pkeys(void);
+extern void thread_pkey_regs_save(struct thread_struct *thread);
+extern void thread_pkey_regs_restore(struct thread_struct *new_thread,
+ struct thread_struct *old_thread);
+extern void thread_pkey_regs_init(struct thread_struct *thread);
+#endif /*_ASM_POWERPC_KEYS_H */
diff --git a/arch/powerpc/include/asm/pnv-ocxl.h b/arch/powerpc/include/asm/pnv-ocxl.h
new file mode 100644
index 000000000000..f6945d3bc971
--- /dev/null
+++ b/arch/powerpc/include/asm/pnv-ocxl.h
@@ -0,0 +1,36 @@
+// SPDX-License-Identifier: GPL-2.0+
+// Copyright 2017 IBM Corp.
+#ifndef _ASM_PNV_OCXL_H
+#define _ASM_PNV_OCXL_H
+
+#include <linux/pci.h>
+
+#define PNV_OCXL_TL_MAX_TEMPLATE 63
+#define PNV_OCXL_TL_BITS_PER_RATE 4
+#define PNV_OCXL_TL_RATE_BUF_SIZE ((PNV_OCXL_TL_MAX_TEMPLATE+1) * PNV_OCXL_TL_BITS_PER_RATE / 8)
+
+extern int pnv_ocxl_get_actag(struct pci_dev *dev, u16 *base, u16 *enabled,
+ u16 *supported);
+extern int pnv_ocxl_get_pasid_count(struct pci_dev *dev, int *count);
+
+extern int pnv_ocxl_get_tl_cap(struct pci_dev *dev, long *cap,
+ char *rate_buf, int rate_buf_size);
+extern int pnv_ocxl_set_tl_conf(struct pci_dev *dev, long cap,
+ uint64_t rate_buf_phys, int rate_buf_size);
+
+extern int pnv_ocxl_get_xsl_irq(struct pci_dev *dev, int *hwirq);
+extern void pnv_ocxl_unmap_xsl_regs(void __iomem *dsisr, void __iomem *dar,
+ void __iomem *tfc, void __iomem *pe_handle);
+extern int pnv_ocxl_map_xsl_regs(struct pci_dev *dev, void __iomem **dsisr,
+ void __iomem **dar, void __iomem **tfc,
+ void __iomem **pe_handle);
+
+extern int pnv_ocxl_spa_setup(struct pci_dev *dev, void *spa_mem, int PE_mask,
+ void **platform_data);
+extern void pnv_ocxl_spa_release(void *platform_data);
+extern int pnv_ocxl_spa_remove_pe(void *platform_data, int pe_handle);
+
+extern int pnv_ocxl_alloc_xive_irq(u32 *irq, u64 *trigger_addr);
+extern void pnv_ocxl_free_xive_irq(u32 irq);
+
+#endif /* _ASM_PNV_OCXL_H */
diff --git a/arch/powerpc/include/asm/ppc-opcode.h b/arch/powerpc/include/asm/ppc-opcode.h
index ce0930d68857..ab5c1588b487 100644
--- a/arch/powerpc/include/asm/ppc-opcode.h
+++ b/arch/powerpc/include/asm/ppc-opcode.h
@@ -236,6 +236,7 @@
#define PPC_INST_RFCI 0x4c000066
#define PPC_INST_RFDI 0x4c00004e
#define PPC_INST_RFMCI 0x4c00004c
+#define PPC_INST_MFSPR 0x7c0002a6
#define PPC_INST_MFSPR_DSCR 0x7c1102a6
#define PPC_INST_MFSPR_DSCR_MASK 0xfc1ffffe
#define PPC_INST_MTSPR_DSCR 0x7c1103a6
@@ -383,6 +384,7 @@
#define __PPC_ME64(s) __PPC_MB64(s)
#define __PPC_BI(s) (((s) & 0x1f) << 16)
#define __PPC_CT(t) (((t) & 0x0f) << 21)
+#define __PPC_SPR(r) ((((r) & 0x1f) << 16) | ((((r) >> 5) & 0x1f) << 11))
/*
* Only use the larx hint bit on 64bit CPUs. e500v1/v2 based CPUs will treat a
diff --git a/arch/powerpc/include/asm/processor.h b/arch/powerpc/include/asm/processor.h
index bdab3b74eb98..01299cdc9806 100644
--- a/arch/powerpc/include/asm/processor.h
+++ b/arch/powerpc/include/asm/processor.h
@@ -309,6 +309,11 @@ struct thread_struct {
struct thread_vr_state ckvr_state; /* Checkpointed VR state */
unsigned long ckvrsave; /* Checkpointed VRSAVE */
#endif /* CONFIG_PPC_TRANSACTIONAL_MEM */
+#ifdef CONFIG_PPC_MEM_KEYS
+ unsigned long amr;
+ unsigned long iamr;
+ unsigned long uamor;
+#endif
#ifdef CONFIG_KVM_BOOK3S_32_HANDLER
void* kvm_shadow_vcpu; /* KVM internal data */
#endif /* CONFIG_KVM_BOOK3S_32_HANDLER */
diff --git a/arch/powerpc/include/asm/prom.h b/arch/powerpc/include/asm/prom.h
index 825bd5998701..b04c5ce8191b 100644
--- a/arch/powerpc/include/asm/prom.h
+++ b/arch/powerpc/include/asm/prom.h
@@ -80,21 +80,20 @@ extern void of_instantiate_rtc(void);
extern int of_get_ibm_chip_id(struct device_node *np);
-/* The of_drconf_cell struct defines the layout of the LMB array
- * specified in the device tree property
- * ibm,dynamic-reconfiguration-memory/ibm,dynamic-memory
- */
-struct of_drconf_cell {
- u64 base_addr;
- u32 drc_index;
- u32 reserved;
- u32 aa_index;
- u32 flags;
+struct of_drc_info {
+ char *drc_type;
+ char *drc_name_prefix;
+ u32 drc_index_start;
+ u32 drc_name_suffix_start;
+ u32 num_sequential_elems;
+ u32 sequential_inc;
+ u32 drc_power_domain;
+ u32 last_drc_index;
};
-#define DRCONF_MEM_ASSIGNED 0x00000008
-#define DRCONF_MEM_AI_INVALID 0x00000040
-#define DRCONF_MEM_RESERVED 0x00000080
+extern int of_read_drc_info_cell(struct property **prop,
+ const __be32 **curval, struct of_drc_info *data);
+
/*
* There are two methods for telling firmware what our capabilities are.
@@ -159,6 +158,7 @@ struct of_drconf_cell {
#define OV5_PFO_HW_842 0x1140 /* PFO Compression Accelerator */
#define OV5_PFO_HW_ENCR 0x1120 /* PFO Encryption Accelerator */
#define OV5_SUB_PROCESSORS 0x1501 /* 1,2,or 4 Sub-Processors supported */
+#define OV5_DRMEM_V2 0x1680 /* ibm,dynamic-reconfiguration-v2 */
#define OV5_XIVE_SUPPORT 0x17C0 /* XIVE Exploitation Support Mask */
#define OV5_XIVE_LEGACY 0x1700 /* XIVE legacy mode Only */
#define OV5_XIVE_EXPLOIT 0x1740 /* XIVE exploitation mode Only */
@@ -175,6 +175,7 @@ struct of_drconf_cell {
#define OV5_HASH_GTSE 0x1940 /* Guest Translation Shoot Down Avail */
/* Radix Table Extensions */
#define OV5_RADIX_GTSE 0x1A40 /* Guest Translation Shoot Down Avail */
+#define OV5_DRC_INFO 0x1640 /* Redef Prop Structures: drc-info */
/* Option Vector 6: IBM PAPR hints */
#define OV6_LINUX 0x02 /* Linux is our OS */
diff --git a/arch/powerpc/include/asm/pte-common.h b/arch/powerpc/include/asm/pte-common.h
index ce142ef99ba7..c4a72c7a8c83 100644
--- a/arch/powerpc/include/asm/pte-common.h
+++ b/arch/powerpc/include/asm/pte-common.h
@@ -8,9 +8,6 @@
#ifndef _PAGE_HASHPTE
#define _PAGE_HASHPTE 0
#endif
-#ifndef _PAGE_SHARED
-#define _PAGE_SHARED 0
-#endif
#ifndef _PAGE_HWWRITE
#define _PAGE_HWWRITE 0
#endif
@@ -45,6 +42,20 @@
#ifndef _PAGE_PTE
#define _PAGE_PTE 0
#endif
+/* At least one of _PAGE_PRIVILEGED or _PAGE_USER must be defined */
+#ifndef _PAGE_PRIVILEGED
+#define _PAGE_PRIVILEGED 0
+#else
+#ifndef _PAGE_USER
+#define _PAGE_USER 0
+#endif
+#endif
+#ifndef _PAGE_NA
+#define _PAGE_NA 0
+#endif
+#ifndef _PAGE_HUGE
+#define _PAGE_HUGE 0
+#endif
#ifndef _PMD_PRESENT_MASK
#define _PMD_PRESENT_MASK _PMD_PRESENT
@@ -53,17 +64,22 @@
#define _PMD_SIZE 0
#define PMD_PAGE_SIZE(pmd) bad_call_to_PMD_PAGE_SIZE()
#endif
+#ifndef _PMD_USER
+#define _PMD_USER 0
+#endif
#ifndef _PAGE_KERNEL_RO
-#define _PAGE_KERNEL_RO (_PAGE_RO)
+#define _PAGE_KERNEL_RO (_PAGE_PRIVILEGED | _PAGE_RO)
#endif
#ifndef _PAGE_KERNEL_ROX
-#define _PAGE_KERNEL_ROX (_PAGE_EXEC | _PAGE_RO)
+#define _PAGE_KERNEL_ROX (_PAGE_PRIVILEGED | _PAGE_RO | _PAGE_EXEC)
#endif
#ifndef _PAGE_KERNEL_RW
-#define _PAGE_KERNEL_RW (_PAGE_DIRTY | _PAGE_RW | _PAGE_HWWRITE)
+#define _PAGE_KERNEL_RW (_PAGE_PRIVILEGED | _PAGE_DIRTY | _PAGE_RW | \
+ _PAGE_HWWRITE)
#endif
#ifndef _PAGE_KERNEL_RWX
-#define _PAGE_KERNEL_RWX (_PAGE_DIRTY | _PAGE_RW | _PAGE_HWWRITE | _PAGE_EXEC)
+#define _PAGE_KERNEL_RWX (_PAGE_PRIVILEGED | _PAGE_DIRTY | _PAGE_RW | \
+ _PAGE_HWWRITE | _PAGE_EXEC)
#endif
#ifndef _PAGE_HPTEFLAGS
#define _PAGE_HPTEFLAGS _PAGE_HASHPTE
@@ -85,7 +101,7 @@ extern unsigned long bad_call_to_PMD_PAGE_SIZE(void);
*/
static inline bool pte_user(pte_t pte)
{
- return (pte_val(pte) & _PAGE_USER) == _PAGE_USER;
+ return (pte_val(pte) & (_PAGE_USER | _PAGE_PRIVILEGED)) == _PAGE_USER;
}
#endif /* __ASSEMBLY__ */
@@ -115,7 +131,8 @@ static inline bool pte_user(pte_t pte)
/* Mask of bits returned by pte_pgprot() */
#define PAGE_PROT_BITS (_PAGE_GUARDED | _PAGE_COHERENT | _PAGE_NO_CACHE | \
_PAGE_WRITETHRU | _PAGE_ENDIAN | _PAGE_4K_PFN | \
- _PAGE_USER | _PAGE_ACCESSED | _PAGE_RO | \
+ _PAGE_USER | _PAGE_ACCESSED | _PAGE_RO | _PAGE_NA | \
+ _PAGE_PRIVILEGED | \
_PAGE_RW | _PAGE_HWWRITE | _PAGE_DIRTY | _PAGE_EXEC)
/*
@@ -142,7 +159,7 @@ static inline bool pte_user(pte_t pte)
*
* Note due to the way vm flags are laid out, the bits are XWR
*/
-#define PAGE_NONE __pgprot(_PAGE_BASE)
+#define PAGE_NONE __pgprot(_PAGE_BASE | _PAGE_NA)
#define PAGE_SHARED __pgprot(_PAGE_BASE | _PAGE_USER | _PAGE_RW)
#define PAGE_SHARED_X __pgprot(_PAGE_BASE | _PAGE_USER | _PAGE_RW | \
_PAGE_EXEC)
diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h
index b779f3ccd412..e6c7eadf6bce 100644
--- a/arch/powerpc/include/asm/reg.h
+++ b/arch/powerpc/include/asm/reg.h
@@ -312,7 +312,6 @@
DSISR_BAD_EXT_CTRL)
#define DSISR_BAD_FAULT_64S (DSISR_BAD_FAULT_32S | \
DSISR_ATTR_CONFLICT | \
- DSISR_KEYFAULT | \
DSISR_UNSUPP_MMU | \
DSISR_PRTABLE_FAULT | \
DSISR_ICSWX_NO_CT | \
@@ -432,8 +431,9 @@
#define SPRN_LPID 0x13F /* Logical Partition Identifier */
#endif
#define LPID_RSVD 0x3ff /* Reserved LPID for partn switching */
-#define SPRN_HMER 0x150 /* Hardware m? error recovery */
-#define SPRN_HMEER 0x151 /* Hardware m? enable error recovery */
+#define SPRN_HMER 0x150 /* Hypervisor maintenance exception reg */
+#define HMER_DEBUG_TRIG (1ul << (63 - 17)) /* Debug trigger */
+#define SPRN_HMEER 0x151 /* Hyp maintenance exception enable reg */
#define SPRN_PCR 0x152 /* Processor compatibility register */
#define PCR_VEC_DIS (1ul << (63-0)) /* Vec. disable (bit NA since POWER8) */
#define PCR_VSX_DIS (1ul << (63-1)) /* VSX disable (bit NA since POWER8) */
diff --git a/arch/powerpc/include/asm/reg_8xx.h b/arch/powerpc/include/asm/reg_8xx.h
index 53a7e2955d3e..7192eece6c3e 100644
--- a/arch/powerpc/include/asm/reg_8xx.h
+++ b/arch/powerpc/include/asm/reg_8xx.h
@@ -66,86 +66,4 @@
#define DC_DFWT 0x40000000 /* Data cache is forced write through */
#define DC_LES 0x20000000 /* Caches are little endian mode */
-#ifdef CONFIG_8xx_CPU6
-#define do_mtspr_cpu6(rn, rn_addr, v) \
- do { \
- int _reg_cpu6 = rn_addr, _tmp_cpu6; \
- asm volatile("stw %0, %1;" \
- "lwz %0, %1;" \
- "mtspr " __stringify(rn) ",%2" : \
- : "r" (_reg_cpu6), "m"(_tmp_cpu6), \
- "r" ((unsigned long)(v)) \
- : "memory"); \
- } while (0)
-
-#define do_mtspr(rn, v) asm volatile("mtspr " __stringify(rn) ",%0" : \
- : "r" ((unsigned long)(v)) \
- : "memory")
-#define mtspr(rn, v) \
- do { \
- if (rn == SPRN_IMMR) \
- do_mtspr_cpu6(rn, 0x3d30, v); \
- else if (rn == SPRN_IC_CST) \
- do_mtspr_cpu6(rn, 0x2110, v); \
- else if (rn == SPRN_IC_ADR) \
- do_mtspr_cpu6(rn, 0x2310, v); \
- else if (rn == SPRN_IC_DAT) \
- do_mtspr_cpu6(rn, 0x2510, v); \
- else if (rn == SPRN_DC_CST) \
- do_mtspr_cpu6(rn, 0x3110, v); \
- else if (rn == SPRN_DC_ADR) \
- do_mtspr_cpu6(rn, 0x3310, v); \
- else if (rn == SPRN_DC_DAT) \
- do_mtspr_cpu6(rn, 0x3510, v); \
- else if (rn == SPRN_MI_CTR) \
- do_mtspr_cpu6(rn, 0x2180, v); \
- else if (rn == SPRN_MI_AP) \
- do_mtspr_cpu6(rn, 0x2580, v); \
- else if (rn == SPRN_MI_EPN) \
- do_mtspr_cpu6(rn, 0x2780, v); \
- else if (rn == SPRN_MI_TWC) \
- do_mtspr_cpu6(rn, 0x2b80, v); \
- else if (rn == SPRN_MI_RPN) \
- do_mtspr_cpu6(rn, 0x2d80, v); \
- else if (rn == SPRN_MI_CAM) \
- do_mtspr_cpu6(rn, 0x2190, v); \
- else if (rn == SPRN_MI_RAM0) \
- do_mtspr_cpu6(rn, 0x2390, v); \
- else if (rn == SPRN_MI_RAM1) \
- do_mtspr_cpu6(rn, 0x2590, v); \
- else if (rn == SPRN_MD_CTR) \
- do_mtspr_cpu6(rn, 0x3180, v); \
- else if (rn == SPRN_M_CASID) \
- do_mtspr_cpu6(rn, 0x3380, v); \
- else if (rn == SPRN_MD_AP) \
- do_mtspr_cpu6(rn, 0x3580, v); \
- else if (rn == SPRN_MD_EPN) \
- do_mtspr_cpu6(rn, 0x3780, v); \
- else if (rn == SPRN_M_TWB) \
- do_mtspr_cpu6(rn, 0x3980, v); \
- else if (rn == SPRN_MD_TWC) \
- do_mtspr_cpu6(rn, 0x3b80, v); \
- else if (rn == SPRN_MD_RPN) \
- do_mtspr_cpu6(rn, 0x3d80, v); \
- else if (rn == SPRN_M_TW) \
- do_mtspr_cpu6(rn, 0x3f80, v); \
- else if (rn == SPRN_MD_CAM) \
- do_mtspr_cpu6(rn, 0x3190, v); \
- else if (rn == SPRN_MD_RAM0) \
- do_mtspr_cpu6(rn, 0x3390, v); \
- else if (rn == SPRN_MD_RAM1) \
- do_mtspr_cpu6(rn, 0x3590, v); \
- else if (rn == SPRN_DEC) \
- do_mtspr_cpu6(rn, 0x2c00, v); \
- else if (rn == SPRN_TBWL) \
- do_mtspr_cpu6(rn, 0x3880, v); \
- else if (rn == SPRN_TBWU) \
- do_mtspr_cpu6(rn, 0x3a80, v); \
- else if (rn == SPRN_DPDR) \
- do_mtspr_cpu6(rn, 0x2d30, v); \
- else \
- do_mtspr(rn, v); \
- } while (0)
-#endif
-
#endif /* _ASM_POWERPC_REG_8xx_H */
diff --git a/arch/powerpc/include/asm/systbl.h b/arch/powerpc/include/asm/systbl.h
index 449912f057f6..d61f9c96d916 100644
--- a/arch/powerpc/include/asm/systbl.h
+++ b/arch/powerpc/include/asm/systbl.h
@@ -389,3 +389,6 @@ COMPAT_SYS_SPU(preadv2)
COMPAT_SYS_SPU(pwritev2)
SYSCALL(kexec_file_load)
SYSCALL(statx)
+SYSCALL(pkey_alloc)
+SYSCALL(pkey_free)
+SYSCALL(pkey_mprotect)
diff --git a/arch/powerpc/include/asm/unistd.h b/arch/powerpc/include/asm/unistd.h
index 9ba11dbcaca9..daf1ba97a00c 100644
--- a/arch/powerpc/include/asm/unistd.h
+++ b/arch/powerpc/include/asm/unistd.h
@@ -12,14 +12,10 @@
#include <uapi/asm/unistd.h>
-#define NR_syscalls 384
+#define NR_syscalls 387
#define __NR__exit __NR_exit
-#define __IGNORE_pkey_mprotect
-#define __IGNORE_pkey_alloc
-#define __IGNORE_pkey_free
-
#ifndef __ASSEMBLY__
#include <linux/types.h>
diff --git a/arch/powerpc/include/asm/xive-regs.h b/arch/powerpc/include/asm/xive-regs.h
index 1d3f2be5ae39..fa4288822b68 100644
--- a/arch/powerpc/include/asm/xive-regs.h
+++ b/arch/powerpc/include/asm/xive-regs.h
@@ -10,6 +10,41 @@
#define _ASM_POWERPC_XIVE_REGS_H
/*
+ * "magic" Event State Buffer (ESB) MMIO offsets.
+ *
+ * Each interrupt source has a 2-bit state machine called ESB
+ * which can be controlled by MMIO. It's made of 2 bits, P and
+ * Q. P indicates that an interrupt is pending (has been sent
+ * to a queue and is waiting for an EOI). Q indicates that the
+ * interrupt has been triggered while pending.
+ *
+ * This acts as a coalescing mechanism in order to guarantee
+ * that a given interrupt only occurs at most once in a queue.
+ *
+ * When doing an EOI, the Q bit will indicate if the interrupt
+ * needs to be re-triggered.
+ *
+ * The following offsets into the ESB MMIO allow to read or
+ * manipulate the PQ bits. They must be used with an 8-bytes
+ * load instruction. They all return the previous state of the
+ * interrupt (atomically).
+ *
+ * Additionally, some ESB pages support doing an EOI via a
+ * store at 0 and some ESBs support doing a trigger via a
+ * separate trigger page.
+ */
+#define XIVE_ESB_STORE_EOI 0x400 /* Store */
+#define XIVE_ESB_LOAD_EOI 0x000 /* Load */
+#define XIVE_ESB_GET 0x800 /* Load */
+#define XIVE_ESB_SET_PQ_00 0xc00 /* Load */
+#define XIVE_ESB_SET_PQ_01 0xd00 /* Load */
+#define XIVE_ESB_SET_PQ_10 0xe00 /* Load */
+#define XIVE_ESB_SET_PQ_11 0xf00 /* Load */
+
+#define XIVE_ESB_VAL_P 0x2
+#define XIVE_ESB_VAL_Q 0x1
+
+/*
* Thread Management (aka "TM") registers
*/
diff --git a/arch/powerpc/include/asm/xive.h b/arch/powerpc/include/asm/xive.h
index 371fbebf1ec9..7624e22f5045 100644
--- a/arch/powerpc/include/asm/xive.h
+++ b/arch/powerpc/include/asm/xive.h
@@ -58,6 +58,9 @@ struct xive_irq_data {
#define XIVE_IRQ_FLAG_EOI_FW 0x10
#define XIVE_IRQ_FLAG_H_INT_ESB 0x20
+/* Special flag set by KVM for excalation interrupts */
+#define XIVE_IRQ_NO_EOI 0x80
+
#define XIVE_INVALID_CHIP_ID -1
/* A queue tracking structure in a CPU */
@@ -72,41 +75,6 @@ struct xive_q {
atomic_t pending_count;
};
-/*
- * "magic" Event State Buffer (ESB) MMIO offsets.
- *
- * Each interrupt source has a 2-bit state machine called ESB
- * which can be controlled by MMIO. It's made of 2 bits, P and
- * Q. P indicates that an interrupt is pending (has been sent
- * to a queue and is waiting for an EOI). Q indicates that the
- * interrupt has been triggered while pending.
- *
- * This acts as a coalescing mechanism in order to guarantee
- * that a given interrupt only occurs at most once in a queue.
- *
- * When doing an EOI, the Q bit will indicate if the interrupt
- * needs to be re-triggered.
- *
- * The following offsets into the ESB MMIO allow to read or
- * manipulate the PQ bits. They must be used with an 8-bytes
- * load instruction. They all return the previous state of the
- * interrupt (atomically).
- *
- * Additionally, some ESB pages support doing an EOI via a
- * store at 0 and some ESBs support doing a trigger via a
- * separate trigger page.
- */
-#define XIVE_ESB_STORE_EOI 0x400 /* Store */
-#define XIVE_ESB_LOAD_EOI 0x000 /* Load */
-#define XIVE_ESB_GET 0x800 /* Load */
-#define XIVE_ESB_SET_PQ_00 0xc00 /* Load */
-#define XIVE_ESB_SET_PQ_01 0xd00 /* Load */
-#define XIVE_ESB_SET_PQ_10 0xe00 /* Load */
-#define XIVE_ESB_SET_PQ_11 0xf00 /* Load */
-
-#define XIVE_ESB_VAL_P 0x2
-#define XIVE_ESB_VAL_Q 0x1
-
/* Global enable flags for the XIVE support */
extern bool __xive_enabled;
@@ -154,7 +122,7 @@ static inline bool xive_enabled(void) { return false; }
static inline bool xive_spapr_init(void) { return false; }
static inline bool xive_native_init(void) { return false; }
static inline void xive_smp_probe(void) { }
-extern inline int xive_smp_prepare_cpu(unsigned int cpu) { return -EINVAL; }
+static inline int xive_smp_prepare_cpu(unsigned int cpu) { return -EINVAL; }
static inline void xive_smp_setup_cpu(void) { }
static inline void xive_smp_disable_cpu(void) { }
static inline void xive_kexec_teardown_cpu(int secondary) { }
diff --git a/arch/powerpc/include/uapi/asm/elf.h b/arch/powerpc/include/uapi/asm/elf.h
index 5f201d40bcca..860c59291bfc 100644
--- a/arch/powerpc/include/uapi/asm/elf.h
+++ b/arch/powerpc/include/uapi/asm/elf.h
@@ -97,6 +97,7 @@
#define ELF_NTMSPRREG 3 /* include tfhar, tfiar, texasr */
#define ELF_NEBB 3 /* includes ebbrr, ebbhr, bescr */
#define ELF_NPMU 5 /* includes siar, sdar, sier, mmcr2, mmcr0 */
+#define ELF_NPKEY 3 /* includes amr, iamr, uamor */
typedef unsigned long elf_greg_t64;
typedef elf_greg_t64 elf_gregset_t64[ELF_NGREG];
diff --git a/arch/powerpc/include/uapi/asm/mman.h b/arch/powerpc/include/uapi/asm/mman.h
index e63bc37e33af..65065ce32814 100644
--- a/arch/powerpc/include/uapi/asm/mman.h
+++ b/arch/powerpc/include/uapi/asm/mman.h
@@ -30,4 +30,10 @@
#define MAP_STACK 0x20000 /* give out an address that is best suited for process/thread stacks */
#define MAP_HUGETLB 0x40000 /* create a huge page mapping */
+/* Override any generic PKEY permission defines */
+#define PKEY_DISABLE_EXECUTE 0x4
+#undef PKEY_ACCESS_MASK
+#define PKEY_ACCESS_MASK (PKEY_DISABLE_ACCESS |\
+ PKEY_DISABLE_WRITE |\
+ PKEY_DISABLE_EXECUTE)
#endif /* _UAPI_ASM_POWERPC_MMAN_H */
diff --git a/arch/powerpc/include/uapi/asm/unistd.h b/arch/powerpc/include/uapi/asm/unistd.h
index df8684f31919..389c36fd8299 100644
--- a/arch/powerpc/include/uapi/asm/unistd.h
+++ b/arch/powerpc/include/uapi/asm/unistd.h
@@ -395,5 +395,8 @@
#define __NR_pwritev2 381
#define __NR_kexec_file_load 382
#define __NR_statx 383
+#define __NR_pkey_alloc 384
+#define __NR_pkey_free 385
+#define __NR_pkey_mprotect 386
#endif /* _UAPI_ASM_POWERPC_UNISTD_H_ */
diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c
index f390d57cf2e1..88b84ac76b53 100644
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -178,7 +178,7 @@ int main(void)
OFFSET(PACATOC, paca_struct, kernel_toc);
OFFSET(PACAKBASE, paca_struct, kernelbase);
OFFSET(PACAKMSR, paca_struct, kernel_msr);
- OFFSET(PACASOFTIRQEN, paca_struct, soft_enabled);
+ OFFSET(PACAIRQSOFTMASK, paca_struct, irq_soft_mask);
OFFSET(PACAIRQHAPPENED, paca_struct, irq_happened);
#ifdef CONFIG_PPC_BOOK3S
OFFSET(PACACONTEXTID, paca_struct, mm_ctx_id);
@@ -239,8 +239,7 @@ int main(void)
OFFSET(PACA_IN_NMI, paca_struct, in_nmi);
OFFSET(PACA_RFI_FLUSH_FALLBACK_AREA, paca_struct, rfi_flush_fallback_area);
OFFSET(PACA_EXRFI, paca_struct, exrfi);
- OFFSET(PACA_L1D_FLUSH_CONGRUENCE, paca_struct, l1d_flush_congruence);
- OFFSET(PACA_L1D_FLUSH_SETS, paca_struct, l1d_flush_sets);
+ OFFSET(PACA_L1D_FLUSH_SIZE, paca_struct, l1d_flush_size);
#endif
OFFSET(PACAHWCPUID, paca_struct, hw_cpu_id);
@@ -401,6 +400,8 @@ int main(void)
/* Other bits used by the vdso */
DEFINE(CLOCK_REALTIME, CLOCK_REALTIME);
DEFINE(CLOCK_MONOTONIC, CLOCK_MONOTONIC);
+ DEFINE(CLOCK_REALTIME_COARSE, CLOCK_REALTIME_COARSE);
+ DEFINE(CLOCK_MONOTONIC_COARSE, CLOCK_MONOTONIC_COARSE);
DEFINE(NSEC_PER_SEC, NSEC_PER_SEC);
DEFINE(CLOCK_REALTIME_RES, MONOTONIC_RES_NSEC);
diff --git a/arch/powerpc/kernel/cpu_setup_power.S b/arch/powerpc/kernel/cpu_setup_power.S
index 679bbe714e85..3f30c994e931 100644
--- a/arch/powerpc/kernel/cpu_setup_power.S
+++ b/arch/powerpc/kernel/cpu_setup_power.S
@@ -31,7 +31,6 @@ _GLOBAL(__setup_cpu_power7)
mfspr r3,SPRN_LPCR
li r4,(LPCR_LPES1 >> LPCR_LPES_SH)
bl __init_LPCR_ISA206
- bl __init_tlb_power7
mtlr r11
blr
@@ -45,7 +44,6 @@ _GLOBAL(__restore_cpu_power7)
mfspr r3,SPRN_LPCR
li r4,(LPCR_LPES1 >> LPCR_LPES_SH)
bl __init_LPCR_ISA206
- bl __init_tlb_power7
mtlr r11
blr
@@ -64,7 +62,6 @@ _GLOBAL(__setup_cpu_power8)
li r4,0 /* LPES = 0 */
bl __init_LPCR_ISA206
bl __init_HFSCR
- bl __init_tlb_power8
bl __init_PMU_HV
bl __init_PMU_HV_ISA207
mtlr r11
@@ -86,7 +83,6 @@ _GLOBAL(__restore_cpu_power8)
li r4,0 /* LPES = 0 */
bl __init_LPCR_ISA206
bl __init_HFSCR
- bl __init_tlb_power8
bl __init_PMU_HV
bl __init_PMU_HV_ISA207
mtlr r11
@@ -111,7 +107,6 @@ _GLOBAL(__setup_cpu_power9)
li r4,0 /* LPES = 0 */
bl __init_LPCR_ISA300
bl __init_HFSCR
- bl __init_tlb_power9
bl __init_PMU_HV
mtlr r11
blr
@@ -136,7 +131,6 @@ _GLOBAL(__restore_cpu_power9)
li r4,0 /* LPES = 0 */
bl __init_LPCR_ISA300
bl __init_HFSCR
- bl __init_tlb_power9
bl __init_PMU_HV
mtlr r11
blr
@@ -194,50 +188,6 @@ __init_HFSCR:
mtspr SPRN_HFSCR,r3
blr
-/*
- * Clear the TLB using the specified IS form of tlbiel instruction
- * (invalidate by congruence class). P7 has 128 CCs., P8 has 512.
- */
-__init_tlb_power7:
- li r6,POWER7_TLB_SETS
- mtctr r6
- li r7,0xc00 /* IS field = 0b11 */
- ptesync
-2: tlbiel r7
- addi r7,r7,0x1000
- bdnz 2b
- ptesync
-1: blr
-
-__init_tlb_power8:
- li r6,POWER8_TLB_SETS
- mtctr r6
- li r7,0xc00 /* IS field = 0b11 */
- ptesync
-2: tlbiel r7
- addi r7,r7,0x1000
- bdnz 2b
- ptesync
-1: blr
-
-/*
- * Flush the TLB in hash mode. Hash must flush with RIC=2 once for process
- * and one for partition scope to clear process and partition table entries.
- */
-__init_tlb_power9:
- li r6,POWER9_TLB_SETS_HASH - 1
- mtctr r6
- li r7,0xc00 /* IS field = 0b11 */
- li r8,0
- ptesync
- PPC_TLBIEL(7, 8, 2, 1, 0)
- PPC_TLBIEL(7, 8, 2, 0, 0)
-2: addi r7,r7,0x1000
- PPC_TLBIEL(7, 8, 0, 0, 0)
- bdnz 2b
- ptesync
-1: blr
-
__init_PMU_HV:
li r5,0
mtspr SPRN_MMCRC,r5
diff --git a/arch/powerpc/kernel/cputable.c b/arch/powerpc/kernel/cputable.c
index 1350f49d81a8..c40a9fc1e5d1 100644
--- a/arch/powerpc/kernel/cputable.c
+++ b/arch/powerpc/kernel/cputable.c
@@ -74,9 +74,6 @@ extern void __setup_cpu_power8(unsigned long offset, struct cpu_spec* spec);
extern void __restore_cpu_power8(void);
extern void __setup_cpu_power9(unsigned long offset, struct cpu_spec* spec);
extern void __restore_cpu_power9(void);
-extern void __flush_tlb_power7(unsigned int action);
-extern void __flush_tlb_power8(unsigned int action);
-extern void __flush_tlb_power9(unsigned int action);
extern long __machine_check_early_realmode_p7(struct pt_regs *regs);
extern long __machine_check_early_realmode_p8(struct pt_regs *regs);
extern long __machine_check_early_realmode_p9(struct pt_regs *regs);
@@ -368,7 +365,6 @@ static struct cpu_spec __initdata cpu_specs[] = {
.oprofile_cpu_type = "ppc64/ibm-compat-v1",
.cpu_setup = __setup_cpu_power7,
.cpu_restore = __restore_cpu_power7,
- .flush_tlb = __flush_tlb_power7,
.machine_check_early = __machine_check_early_realmode_p7,
.platform = "power7",
},
@@ -386,7 +382,6 @@ static struct cpu_spec __initdata cpu_specs[] = {
.oprofile_cpu_type = "ppc64/ibm-compat-v1",
.cpu_setup = __setup_cpu_power8,
.cpu_restore = __restore_cpu_power8,
- .flush_tlb = __flush_tlb_power8,
.machine_check_early = __machine_check_early_realmode_p8,
.platform = "power8",
},
@@ -404,7 +399,6 @@ static struct cpu_spec __initdata cpu_specs[] = {
.oprofile_cpu_type = "ppc64/ibm-compat-v1",
.cpu_setup = __setup_cpu_power9,
.cpu_restore = __restore_cpu_power9,
- .flush_tlb = __flush_tlb_power9,
.platform = "power9",
},
{ /* Power7 */
@@ -423,7 +417,6 @@ static struct cpu_spec __initdata cpu_specs[] = {
.oprofile_type = PPC_OPROFILE_POWER4,
.cpu_setup = __setup_cpu_power7,
.cpu_restore = __restore_cpu_power7,
- .flush_tlb = __flush_tlb_power7,
.machine_check_early = __machine_check_early_realmode_p7,
.platform = "power7",
},
@@ -443,7 +436,6 @@ static struct cpu_spec __initdata cpu_specs[] = {
.oprofile_type = PPC_OPROFILE_POWER4,
.cpu_setup = __setup_cpu_power7,
.cpu_restore = __restore_cpu_power7,
- .flush_tlb = __flush_tlb_power7,
.machine_check_early = __machine_check_early_realmode_p7,
.platform = "power7+",
},
@@ -463,7 +455,6 @@ static struct cpu_spec __initdata cpu_specs[] = {
.oprofile_type = PPC_OPROFILE_INVALID,
.cpu_setup = __setup_cpu_power8,
.cpu_restore = __restore_cpu_power8,
- .flush_tlb = __flush_tlb_power8,
.machine_check_early = __machine_check_early_realmode_p8,
.platform = "power8",
},
@@ -483,7 +474,6 @@ static struct cpu_spec __initdata cpu_specs[] = {
.oprofile_type = PPC_OPROFILE_INVALID,
.cpu_setup = __setup_cpu_power8,
.cpu_restore = __restore_cpu_power8,
- .flush_tlb = __flush_tlb_power8,
.machine_check_early = __machine_check_early_realmode_p8,
.platform = "power8",
},
@@ -503,7 +493,6 @@ static struct cpu_spec __initdata cpu_specs[] = {
.oprofile_type = PPC_OPROFILE_INVALID,
.cpu_setup = __setup_cpu_power8,
.cpu_restore = __restore_cpu_power8,
- .flush_tlb = __flush_tlb_power8,
.machine_check_early = __machine_check_early_realmode_p8,
.platform = "power8",
},
@@ -523,7 +512,6 @@ static struct cpu_spec __initdata cpu_specs[] = {
.oprofile_type = PPC_OPROFILE_INVALID,
.cpu_setup = __setup_cpu_power8,
.cpu_restore = __restore_cpu_power8,
- .flush_tlb = __flush_tlb_power8,
.machine_check_early = __machine_check_early_realmode_p8,
.platform = "power8",
},
@@ -543,7 +531,6 @@ static struct cpu_spec __initdata cpu_specs[] = {
.oprofile_type = PPC_OPROFILE_INVALID,
.cpu_setup = __setup_cpu_power9,
.cpu_restore = __restore_cpu_power9,
- .flush_tlb = __flush_tlb_power9,
.machine_check_early = __machine_check_early_realmode_p9,
.platform = "power9",
},
@@ -563,7 +550,6 @@ static struct cpu_spec __initdata cpu_specs[] = {
.oprofile_type = PPC_OPROFILE_INVALID,
.cpu_setup = __setup_cpu_power9,
.cpu_restore = __restore_cpu_power9,
- .flush_tlb = __flush_tlb_power9,
.machine_check_early = __machine_check_early_realmode_p9,
.platform = "power9",
},
@@ -583,7 +569,6 @@ static struct cpu_spec __initdata cpu_specs[] = {
.oprofile_type = PPC_OPROFILE_INVALID,
.cpu_setup = __setup_cpu_power9,
.cpu_restore = __restore_cpu_power9,
- .flush_tlb = __flush_tlb_power9,
.machine_check_early = __machine_check_early_realmode_p9,
.platform = "power9",
},
diff --git a/arch/powerpc/kernel/crash.c b/arch/powerpc/kernel/crash.c
index cbabb5adccd9..00b215125d3e 100644
--- a/arch/powerpc/kernel/crash.c
+++ b/arch/powerpc/kernel/crash.c
@@ -44,6 +44,14 @@
#define REAL_MODE_TIMEOUT 10000
static int time_to_dump;
+/*
+ * crash_wake_offline should be set to 1 by platforms that intend to wake
+ * up offline cpus prior to jumping to a kdump kernel. Currently powernv
+ * sets it to 1, since we want to avoid things from happening when an
+ * offline CPU wakes up due to something like an HMI (malfunction error),
+ * which propagates to all threads.
+ */
+int crash_wake_offline;
#define CRASH_HANDLER_MAX 3
/* List of shutdown handles */
@@ -63,15 +71,12 @@ static int handle_fault(struct pt_regs *regs)
#ifdef CONFIG_SMP
static atomic_t cpus_in_crash;
-static void crash_ipi_callback(struct pt_regs *regs)
+void crash_ipi_callback(struct pt_regs *regs)
{
static cpumask_t cpus_state_saved = CPU_MASK_NONE;
int cpu = smp_processor_id();
- if (!cpu_online(cpu))
- return;
-
hard_irq_disable();
if (!cpumask_test_cpu(cpu, &cpus_state_saved)) {
crash_save_cpu(regs, cpu);
@@ -109,6 +114,9 @@ static void crash_kexec_prepare_cpus(int cpu)
printk(KERN_EMERG "Sending IPI to other CPUs\n");
+ if (crash_wake_offline)
+ ncpus = num_present_cpus() - 1;
+
crash_send_ipi(crash_ipi_callback);
smp_wmb();
diff --git a/arch/powerpc/kernel/dt_cpu_ftrs.c b/arch/powerpc/kernel/dt_cpu_ftrs.c
index 8bdc2f96c5d6..945e2c29ad2d 100644
--- a/arch/powerpc/kernel/dt_cpu_ftrs.c
+++ b/arch/powerpc/kernel/dt_cpu_ftrs.c
@@ -77,8 +77,6 @@ struct dt_cpu_feature {
* Set up the base CPU
*/
-extern void __flush_tlb_power8(unsigned int action);
-extern void __flush_tlb_power9(unsigned int action);
extern long __machine_check_early_realmode_p8(struct pt_regs *regs);
extern long __machine_check_early_realmode_p9(struct pt_regs *regs);
@@ -92,27 +90,6 @@ static struct {
static void (*init_pmu_registers)(void);
-static void cpufeatures_flush_tlb(void)
-{
- /*
- * This is a temporary measure to keep equivalent TLB flush as the
- * cputable based setup code.
- */
- switch (PVR_VER(mfspr(SPRN_PVR))) {
- case PVR_POWER8:
- case PVR_POWER8E:
- case PVR_POWER8NVL:
- __flush_tlb_power8(TLB_INVAL_SCOPE_GLOBAL);
- break;
- case PVR_POWER9:
- __flush_tlb_power9(TLB_INVAL_SCOPE_GLOBAL);
- break;
- default:
- pr_err("unknown CPU version for boot TLB flush\n");
- break;
- }
-}
-
static void __restore_cpu_cpufeatures(void)
{
/*
@@ -137,8 +114,6 @@ static void __restore_cpu_cpufeatures(void)
if (init_pmu_registers)
init_pmu_registers();
-
- cpufeatures_flush_tlb();
}
static char dt_cpu_name[64];
@@ -157,7 +132,6 @@ static struct cpu_spec __initdata base_cpu_spec = {
.oprofile_type = PPC_OPROFILE_INVALID,
.cpu_setup = NULL,
.cpu_restore = __restore_cpu_cpufeatures,
- .flush_tlb = NULL,
.machine_check_early = NULL,
.platform = NULL,
};
@@ -412,7 +386,6 @@ static void init_pmu_power8(void)
static int __init feat_enable_mce_power8(struct dt_cpu_feature *f)
{
cur_cpu_spec->platform = "power8";
- cur_cpu_spec->flush_tlb = __flush_tlb_power8;
cur_cpu_spec->machine_check_early = __machine_check_early_realmode_p8;
return 1;
@@ -451,7 +424,6 @@ static void init_pmu_power9(void)
static int __init feat_enable_mce_power9(struct dt_cpu_feature *f)
{
cur_cpu_spec->platform = "power9";
- cur_cpu_spec->flush_tlb = __flush_tlb_power9;
cur_cpu_spec->machine_check_early = __machine_check_early_realmode_p9;
return 1;
@@ -752,8 +724,6 @@ static void __init cpufeatures_setup_finished(void)
system_registers.hfscr = mfspr(SPRN_HFSCR);
system_registers.fscr = mfspr(SPRN_FSCR);
- cpufeatures_flush_tlb();
-
pr_info("final cpu/mmu features = 0x%016lx 0x%08x\n",
cur_cpu_spec->cpu_features, cur_cpu_spec->mmu_features);
}
diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c
index cbca0a667682..cc649809885e 100644
--- a/arch/powerpc/kernel/eeh.c
+++ b/arch/powerpc/kernel/eeh.c
@@ -740,6 +740,65 @@ static void *eeh_restore_dev_state(void *data, void *userdata)
return NULL;
}
+int eeh_restore_vf_config(struct pci_dn *pdn)
+{
+ struct eeh_dev *edev = pdn_to_eeh_dev(pdn);
+ u32 devctl, cmd, cap2, aer_capctl;
+ int old_mps;
+
+ if (edev->pcie_cap) {
+ /* Restore MPS */
+ old_mps = (ffs(pdn->mps) - 8) << 5;
+ eeh_ops->read_config(pdn, edev->pcie_cap + PCI_EXP_DEVCTL,
+ 2, &devctl);
+ devctl &= ~PCI_EXP_DEVCTL_PAYLOAD;
+ devctl |= old_mps;
+ eeh_ops->write_config(pdn, edev->pcie_cap + PCI_EXP_DEVCTL,
+ 2, devctl);
+
+ /* Disable Completion Timeout */
+ eeh_ops->read_config(pdn, edev->pcie_cap + PCI_EXP_DEVCAP2,
+ 4, &cap2);
+ if (cap2 & 0x10) {
+ eeh_ops->read_config(pdn,
+ edev->pcie_cap + PCI_EXP_DEVCTL2,
+ 4, &cap2);
+ cap2 |= 0x10;
+ eeh_ops->write_config(pdn,
+ edev->pcie_cap + PCI_EXP_DEVCTL2,
+ 4, cap2);
+ }
+ }
+
+ /* Enable SERR and parity checking */
+ eeh_ops->read_config(pdn, PCI_COMMAND, 2, &cmd);
+ cmd |= (PCI_COMMAND_PARITY | PCI_COMMAND_SERR);
+ eeh_ops->write_config(pdn, PCI_COMMAND, 2, cmd);
+
+ /* Enable report various errors */
+ if (edev->pcie_cap) {
+ eeh_ops->read_config(pdn, edev->pcie_cap + PCI_EXP_DEVCTL,
+ 2, &devctl);
+ devctl &= ~PCI_EXP_DEVCTL_CERE;
+ devctl |= (PCI_EXP_DEVCTL_NFERE |
+ PCI_EXP_DEVCTL_FERE |
+ PCI_EXP_DEVCTL_URRE);
+ eeh_ops->write_config(pdn, edev->pcie_cap + PCI_EXP_DEVCTL,
+ 2, devctl);
+ }
+
+ /* Enable ECRC generation and check */
+ if (edev->pcie_cap && edev->aer_cap) {
+ eeh_ops->read_config(pdn, edev->aer_cap + PCI_ERR_CAP,
+ 4, &aer_capctl);
+ aer_capctl |= (PCI_ERR_CAP_ECRC_GENE | PCI_ERR_CAP_ECRC_CHKE);
+ eeh_ops->write_config(pdn, edev->aer_cap + PCI_ERR_CAP,
+ 4, aer_capctl);
+ }
+
+ return 0;
+}
+
/**
* pcibios_set_pcie_reset_state - Set PCI-E reset state
* @dev: pci device struct
diff --git a/arch/powerpc/kernel/eeh_driver.c b/arch/powerpc/kernel/eeh_driver.c
index 4f71e4c9beb7..beea2182d754 100644
--- a/arch/powerpc/kernel/eeh_driver.c
+++ b/arch/powerpc/kernel/eeh_driver.c
@@ -228,6 +228,7 @@ static void *eeh_report_error(void *data, void *userdata)
edev->in_error = true;
eeh_pcid_put(dev);
+ pci_uevent_ers(dev, PCI_ERS_RESULT_NONE);
return NULL;
}
@@ -381,6 +382,10 @@ static void *eeh_report_resume(void *data, void *userdata)
driver->err_handler->resume(dev);
eeh_pcid_put(dev);
+ pci_uevent_ers(dev, PCI_ERS_RESULT_RECOVERED);
+#ifdef CONFIG_PCI_IOV
+ eeh_ops->notify_resume(eeh_dev_to_pdn(edev));
+#endif
return NULL;
}
@@ -416,6 +421,7 @@ static void *eeh_report_failure(void *data, void *userdata)
driver->err_handler->error_detected(dev, pci_channel_io_perm_failure);
eeh_pcid_put(dev);
+ pci_uevent_ers(dev, PCI_ERS_RESULT_DISCONNECT);
return NULL;
}
@@ -440,7 +446,7 @@ static void *eeh_add_virt_device(void *data, void *userdata)
return NULL;
}
-#ifdef CONFIG_PPC_POWERNV
+#ifdef CONFIG_PCI_IOV
pci_iov_add_virtfn(edev->physfn, pdn->vf_index);
#endif
return NULL;
@@ -496,7 +502,7 @@ static void *eeh_rmv_device(void *data, void *userdata)
(*removed)++;
if (edev->physfn) {
-#ifdef CONFIG_PPC_POWERNV
+#ifdef CONFIG_PCI_IOV
struct pci_dn *pdn = eeh_dev_to_pdn(edev);
pci_iov_remove_virtfn(edev->physfn, pdn->vf_index);
diff --git a/arch/powerpc/kernel/eeh_sysfs.c b/arch/powerpc/kernel/eeh_sysfs.c
index 797549289798..deed906dd8f1 100644
--- a/arch/powerpc/kernel/eeh_sysfs.c
+++ b/arch/powerpc/kernel/eeh_sysfs.c
@@ -48,7 +48,7 @@ static ssize_t eeh_show_##_name(struct device *dev, \
\
return sprintf(buf, _format "\n", edev->_memb); \
} \
-static DEVICE_ATTR(_name, S_IRUGO, eeh_show_##_name, NULL);
+static DEVICE_ATTR(_name, 0444, eeh_show_##_name, NULL);
EEH_SHOW_ATTR(eeh_mode, mode, "0x%x");
EEH_SHOW_ATTR(eeh_pe_config_addr, pe_config_addr, "0x%x");
@@ -90,6 +90,65 @@ static ssize_t eeh_pe_state_store(struct device *dev,
static DEVICE_ATTR_RW(eeh_pe_state);
+#ifdef CONFIG_PCI_IOV
+static ssize_t eeh_notify_resume_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct pci_dev *pdev = to_pci_dev(dev);
+ struct eeh_dev *edev = pci_dev_to_eeh_dev(pdev);
+ struct pci_dn *pdn = pci_get_pdn(pdev);
+
+ if (!edev || !edev->pe)
+ return -ENODEV;
+
+ pdn = pci_get_pdn(pdev);
+ return sprintf(buf, "%d\n", pdn->last_allow_rc);
+}
+
+static ssize_t eeh_notify_resume_store(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf, size_t count)
+{
+ struct pci_dev *pdev = to_pci_dev(dev);
+ struct eeh_dev *edev = pci_dev_to_eeh_dev(pdev);
+
+ if (!edev || !edev->pe || !eeh_ops->notify_resume)
+ return -ENODEV;
+
+ if (eeh_ops->notify_resume(pci_get_pdn(pdev)))
+ return -EIO;
+
+ return count;
+}
+static DEVICE_ATTR_RW(eeh_notify_resume);
+
+static int eeh_notify_resume_add(struct pci_dev *pdev)
+{
+ struct device_node *np;
+ int rc = 0;
+
+ np = pci_device_to_OF_node(pdev->is_physfn ? pdev : pdev->physfn);
+
+ if (of_property_read_bool(np, "ibm,is-open-sriov-pf"))
+ rc = device_create_file(&pdev->dev, &dev_attr_eeh_notify_resume);
+
+ return rc;
+}
+
+static void eeh_notify_resume_remove(struct pci_dev *pdev)
+{
+ struct device_node *np;
+
+ np = pci_device_to_OF_node(pdev->is_physfn ? pdev : pdev->physfn);
+
+ if (of_property_read_bool(np, "ibm,is-open-sriov-pf"))
+ device_remove_file(&pdev->dev, &dev_attr_eeh_notify_resume);
+}
+#else
+static inline int eeh_notify_resume_add(struct pci_dev *pdev) { return 0; }
+static inline void eeh_notify_resume_remove(struct pci_dev *pdev) { }
+#endif /* CONFIG_PCI_IOV */
+
void eeh_sysfs_add_device(struct pci_dev *pdev)
{
struct eeh_dev *edev = pci_dev_to_eeh_dev(pdev);
@@ -104,6 +163,7 @@ void eeh_sysfs_add_device(struct pci_dev *pdev)
rc += device_create_file(&pdev->dev, &dev_attr_eeh_mode);
rc += device_create_file(&pdev->dev, &dev_attr_eeh_pe_config_addr);
rc += device_create_file(&pdev->dev, &dev_attr_eeh_pe_state);
+ rc += eeh_notify_resume_add(pdev);
if (rc)
pr_warn("EEH: Unable to create sysfs entries\n");
@@ -129,6 +189,8 @@ void eeh_sysfs_remove_device(struct pci_dev *pdev)
device_remove_file(&pdev->dev, &dev_attr_eeh_pe_config_addr);
device_remove_file(&pdev->dev, &dev_attr_eeh_pe_state);
+ eeh_notify_resume_remove(pdev);
+
if (edev)
edev->mode &= ~EEH_DEV_SYSFS;
}
diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S
index e780e1fbf6c2..eb8d01bae8c6 100644
--- a/arch/powerpc/kernel/entry_32.S
+++ b/arch/powerpc/kernel/entry_32.S
@@ -211,7 +211,7 @@ transfer_to_handler_cont:
mflr r9
lwz r11,0(r9) /* virtual address of handler */
lwz r9,4(r9) /* where to go when done */
-#ifdef CONFIG_PPC_8xx_PERF_EVENT
+#if defined(CONFIG_PPC_8xx) && defined(CONFIG_PERF_EVENTS)
mtspr SPRN_NRI, r0
#endif
#ifdef CONFIG_TRACE_IRQFLAGS
@@ -301,7 +301,7 @@ stack_ovf:
lis r9,StackOverflow@ha
addi r9,r9,StackOverflow@l
LOAD_MSR_KERNEL(r10,MSR_KERNEL)
-#ifdef CONFIG_PPC_8xx_PERF_EVENT
+#if defined(CONFIG_PPC_8xx) && defined(CONFIG_PERF_EVENTS)
mtspr SPRN_NRI, r0
#endif
mtspr SPRN_SRR0,r9
@@ -430,7 +430,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_NEED_PAIRED_STWCX)
lwz r7,_NIP(r1)
lwz r2,GPR2(r1)
lwz r1,GPR1(r1)
-#ifdef CONFIG_PPC_8xx_PERF_EVENT
+#if defined(CONFIG_PPC_8xx) && defined(CONFIG_PERF_EVENTS)
mtspr SPRN_NRI, r0
#endif
mtspr SPRN_SRR0,r7
@@ -727,7 +727,7 @@ fast_exception_return:
lwz r10,_LINK(r11)
mtlr r10
REST_GPR(10, r11)
-#ifdef CONFIG_PPC_8xx_PERF_EVENT
+#if defined(CONFIG_PPC_8xx) && defined(CONFIG_PERF_EVENTS)
mtspr SPRN_NRI, r0
#endif
mtspr SPRN_SRR1,r9
@@ -978,7 +978,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_NEED_PAIRED_STWCX)
.globl exc_exit_restart
exc_exit_restart:
lwz r12,_NIP(r1)
-#ifdef CONFIG_PPC_8xx_PERF_EVENT
+#if defined(CONFIG_PPC_8xx) && defined(CONFIG_PERF_EVENTS)
mtspr SPRN_NRI, r0
#endif
mtspr SPRN_SRR0,r12
diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S
index 2748584b767d..2cb5109a7ea3 100644
--- a/arch/powerpc/kernel/entry_64.S
+++ b/arch/powerpc/kernel/entry_64.S
@@ -133,10 +133,9 @@ END_FW_FTR_SECTION_IFSET(FW_FEATURE_SPLPAR)
* of irq tracing is used, we additionally check that condition
* is correct
*/
-#if defined(CONFIG_TRACE_IRQFLAGS) && defined(CONFIG_BUG)
- lbz r10,PACASOFTIRQEN(r13)
- xori r10,r10,1
-1: tdnei r10,0
+#if defined(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG) && defined(CONFIG_BUG)
+ lbz r10,PACAIRQSOFTMASK(r13)
+1: tdnei r10,IRQS_ENABLED
EMIT_BUG_ENTRY 1b,__FILE__,__LINE__,BUGFLAG_WARNING
#endif
@@ -152,7 +151,7 @@ system_call: /* label this so stack traces look sane */
/* We do need to set SOFTE in the stack frame or the return
* from interrupt will be painful
*/
- li r10,1
+ li r10,IRQS_ENABLED
std r10,SOFTE(r1)
CURRENT_THREAD_INFO(r11, r1)
@@ -755,10 +754,10 @@ resume_kernel:
beq+ restore
/* Check that preempt_count() == 0 and interrupts are enabled */
lwz r8,TI_PREEMPT(r9)
- cmpwi cr1,r8,0
+ cmpwi cr0,r8,0
+ bne restore
ld r0,SOFTE(r1)
- cmpdi r0,0
- crandc eq,cr1*4+eq,eq
+ andi. r0,r0,IRQS_DISABLED
bne restore
/*
@@ -796,12 +795,12 @@ restore:
* are about to re-enable interrupts
*/
ld r5,SOFTE(r1)
- lbz r6,PACASOFTIRQEN(r13)
- cmpwi cr0,r5,0
- beq .Lrestore_irq_off
+ lbz r6,PACAIRQSOFTMASK(r13)
+ andi. r5,r5,IRQS_DISABLED
+ bne .Lrestore_irq_off
/* We are enabling, were we already enabled ? Yes, just return */
- cmpwi cr0,r6,1
+ andi. r6,r6,IRQS_DISABLED
beq cr0,.Ldo_restore
/*
@@ -820,8 +819,8 @@ restore:
*/
.Lrestore_no_replay:
TRACE_ENABLE_INTS
- li r0,1
- stb r0,PACASOFTIRQEN(r13);
+ li r0,IRQS_ENABLED
+ stb r0,PACAIRQSOFTMASK(r13);
/*
* Final return path. BookE is handled in a different file
@@ -939,9 +938,13 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
beq 1f
rlwinm r7,r7,0,~PACA_IRQ_HARD_DIS
stb r7,PACAIRQHAPPENED(r13)
-1: li r0,0
- stb r0,PACASOFTIRQEN(r13);
- TRACE_DISABLE_INTS
+1:
+#if defined(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG) && defined(CONFIG_BUG)
+ /* The interrupt should not have soft enabled. */
+ lbz r7,PACAIRQSOFTMASK(r13)
+1: tdeqi r7,IRQS_ENABLED
+ EMIT_BUG_ENTRY 1b,__FILE__,__LINE__,BUGFLAG_WARNING
+#endif
b .Ldo_restore
/*
@@ -979,6 +982,11 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
addi r3,r1,STACK_FRAME_OVERHEAD;
bl do_IRQ
b ret_from_except
+1: cmpwi cr0,r3,0xf00
+ bne 1f
+ addi r3,r1,STACK_FRAME_OVERHEAD;
+ bl performance_monitor_exception
+ b ret_from_except
1: cmpwi cr0,r3,0xe60
bne 1f
addi r3,r1,STACK_FRAME_OVERHEAD;
@@ -1055,15 +1063,15 @@ _GLOBAL(enter_rtas)
li r0,0
mtcr r0
-#ifdef CONFIG_BUG
+#ifdef CONFIG_BUG
/* There is no way it is acceptable to get here with interrupts enabled,
* check it with the asm equivalent of WARN_ON
*/
- lbz r0,PACASOFTIRQEN(r13)
-1: tdnei r0,0
+ lbz r0,PACAIRQSOFTMASK(r13)
+1: tdeqi r0,IRQS_ENABLED
EMIT_BUG_ENTRY 1b,__FILE__,__LINE__,BUGFLAG_WARNING
#endif
-
+
/* Hard-disable interrupts */
mfmsr r6
rldicl r7,r6,48,1
@@ -1107,6 +1115,17 @@ __enter_rtas:
rtas_return_loc:
FIXUP_ENDIAN
+ /*
+ * Clear RI and set SF before anything.
+ */
+ mfmsr r6
+ li r0,MSR_RI
+ andc r6,r6,r0
+ sldi r0,r0,(MSR_SF_LG - MSR_RI_LG)
+ or r6,r6,r0
+ sync
+ mtmsrd r6
+
/* relocation is off at this point */
GET_PACA(r4)
clrldi r4,r4,2 /* convert to realmode address */
@@ -1115,12 +1134,6 @@ rtas_return_loc:
0: mflr r3
ld r3,(1f-0b)(r3) /* get &rtas_restore_regs */
- mfmsr r6
- li r0,MSR_RI
- andc r6,r6,r0
- sync
- mtmsrd r6
-
ld r1,PACAR1(r4) /* Restore our SP */
ld r4,PACASAVEDMSR(r4) /* Restore our MSR */
diff --git a/arch/powerpc/kernel/exceptions-64e.S b/arch/powerpc/kernel/exceptions-64e.S
index acd8ca76233e..ee832d344a5a 100644
--- a/arch/powerpc/kernel/exceptions-64e.S
+++ b/arch/powerpc/kernel/exceptions-64e.S
@@ -139,7 +139,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_EMB_HV)
mfspr r10,SPRN_ESR
SPECIAL_EXC_STORE(r10,ESR)
- lbz r10,PACASOFTIRQEN(r13)
+ lbz r10,PACAIRQSOFTMASK(r13)
SPECIAL_EXC_STORE(r10,SOFTE)
ld r10,_NIP(r1)
SPECIAL_EXC_STORE(r10,CSRR0)
@@ -206,17 +206,17 @@ BEGIN_FTR_SECTION
mtspr SPRN_MAS8,r10
END_FTR_SECTION_IFSET(CPU_FTR_EMB_HV)
- lbz r6,PACASOFTIRQEN(r13)
+ lbz r6,PACAIRQSOFTMASK(r13)
ld r5,SOFTE(r1)
/* Interrupts had better not already be enabled... */
- twnei r6,0
+ tweqi r6,IRQS_ENABLED
- cmpwi cr0,r5,0
- beq 1f
+ andi. r6,r5,IRQS_DISABLED
+ bne 1f
TRACE_ENABLE_INTS
- stb r5,PACASOFTIRQEN(r13)
+ stb r5,PACAIRQSOFTMASK(r13)
1:
/*
* Restore PACAIRQHAPPENED rather than setting it based on
@@ -351,9 +351,9 @@ ret_from_mc_except:
#define PROLOG_ADDITION_NONE_MC(n)
#define PROLOG_ADDITION_MASKABLE_GEN(n) \
- lbz r10,PACASOFTIRQEN(r13); /* are irqs soft-disabled ? */ \
- cmpwi cr0,r10,0; /* yes -> go out of line */ \
- beq masked_interrupt_book3e_##n
+ lbz r10,PACAIRQSOFTMASK(r13); /* are irqs soft-masked? */ \
+ andi. r10,r10,IRQS_DISABLED; /* yes -> go out of line */ \
+ bne masked_interrupt_book3e_##n
#define PROLOG_ADDITION_2REGS_GEN(n) \
std r14,PACA_EXGEN+EX_R14(r13); \
@@ -397,7 +397,7 @@ exc_##n##_common: \
mfspr r8,SPRN_XER; /* save XER in stackframe */ \
ld r9,excf+EX_R1(r13); /* load orig r1 back from PACA */ \
lwz r10,excf+EX_CR(r13); /* load orig CR back from PACA */ \
- lbz r11,PACASOFTIRQEN(r13); /* get current IRQ softe */ \
+ lbz r11,PACAIRQSOFTMASK(r13); /* get current IRQ softe */ \
ld r12,exception_marker@toc(r2); \
li r0,0; \
std r3,GPR10(r1); /* save r10 to stackframe */ \
diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S
index 2dc10bf646b8..243d072a225a 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -718,10 +718,12 @@ EXC_REAL_BEGIN(hardware_interrupt, 0x500, 0x100)
hardware_interrupt_hv:
BEGIN_FTR_SECTION
_MASKABLE_EXCEPTION_PSERIES(0x500, hardware_interrupt_common,
- EXC_HV, SOFTEN_TEST_HV)
+ EXC_HV, SOFTEN_TEST_HV,
+ IRQS_DISABLED)
FTR_SECTION_ELSE
_MASKABLE_EXCEPTION_PSERIES(0x500, hardware_interrupt_common,
- EXC_STD, SOFTEN_TEST_PR)
+ EXC_STD, SOFTEN_TEST_PR,
+ IRQS_DISABLED)
ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206)
EXC_REAL_END(hardware_interrupt, 0x500, 0x100)
@@ -729,9 +731,13 @@ EXC_VIRT_BEGIN(hardware_interrupt, 0x4500, 0x100)
.globl hardware_interrupt_relon_hv;
hardware_interrupt_relon_hv:
BEGIN_FTR_SECTION
- _MASKABLE_RELON_EXCEPTION_PSERIES(0x500, hardware_interrupt_common, EXC_HV, SOFTEN_TEST_HV)
+ _MASKABLE_RELON_EXCEPTION_PSERIES(0x500, hardware_interrupt_common,
+ EXC_HV, SOFTEN_TEST_HV,
+ IRQS_DISABLED)
FTR_SECTION_ELSE
- _MASKABLE_RELON_EXCEPTION_PSERIES(0x500, hardware_interrupt_common, EXC_STD, SOFTEN_TEST_PR)
+ _MASKABLE_RELON_EXCEPTION_PSERIES(0x500, hardware_interrupt_common,
+ EXC_STD, SOFTEN_TEST_PR,
+ IRQS_DISABLED)
ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE)
EXC_VIRT_END(hardware_interrupt, 0x4500, 0x100)
@@ -827,8 +833,8 @@ END_FTR_SECTION_IFSET(CPU_FTR_TM)
#endif
-EXC_REAL_MASKABLE(decrementer, 0x900, 0x80)
-EXC_VIRT_MASKABLE(decrementer, 0x4900, 0x80, 0x900)
+EXC_REAL_MASKABLE(decrementer, 0x900, 0x80, IRQS_DISABLED)
+EXC_VIRT_MASKABLE(decrementer, 0x4900, 0x80, 0x900, IRQS_DISABLED)
TRAMP_KVM(PACA_EXGEN, 0x900)
EXC_COMMON_ASYNC(decrementer_common, 0x900, timer_interrupt)
@@ -839,8 +845,8 @@ TRAMP_KVM_HV(PACA_EXGEN, 0x980)
EXC_COMMON(hdecrementer_common, 0x980, hdec_interrupt)
-EXC_REAL_MASKABLE(doorbell_super, 0xa00, 0x100)
-EXC_VIRT_MASKABLE(doorbell_super, 0x4a00, 0x100, 0xa00)
+EXC_REAL_MASKABLE(doorbell_super, 0xa00, 0x100, IRQS_DISABLED)
+EXC_VIRT_MASKABLE(doorbell_super, 0x4a00, 0x100, 0xa00, IRQS_DISABLED)
TRAMP_KVM(PACA_EXGEN, 0xa00)
#ifdef CONFIG_PPC_DOORBELL
EXC_COMMON_ASYNC(doorbell_super_common, 0xa00, doorbell_exception)
@@ -1052,7 +1058,7 @@ EXC_COMMON(emulation_assist_common, 0xe40, emulation_assist_interrupt)
* mode.
*/
__EXC_REAL_OOL_HV_DIRECT(hmi_exception, 0xe60, 0x20, hmi_exception_early)
-__TRAMP_REAL_OOL_MASKABLE_HV(hmi_exception, 0xe60)
+__TRAMP_REAL_OOL_MASKABLE_HV(hmi_exception, 0xe60, IRQS_DISABLED)
EXC_VIRT_NONE(0x4e60, 0x20)
TRAMP_KVM_HV(PACA_EXGEN, 0xe60)
TRAMP_REAL_BEGIN(hmi_exception_early)
@@ -1110,8 +1116,8 @@ EXC_COMMON_BEGIN(hmi_exception_common)
EXCEPTION_COMMON(PACA_EXGEN, 0xe60, hmi_exception_common, handle_hmi_exception,
ret_from_except, FINISH_NAP;ADD_NVGPRS;ADD_RECONCILE;RUNLATCH_ON)
-EXC_REAL_OOL_MASKABLE_HV(h_doorbell, 0xe80, 0x20)
-EXC_VIRT_OOL_MASKABLE_HV(h_doorbell, 0x4e80, 0x20, 0xe80)
+EXC_REAL_OOL_MASKABLE_HV(h_doorbell, 0xe80, 0x20, IRQS_DISABLED)
+EXC_VIRT_OOL_MASKABLE_HV(h_doorbell, 0x4e80, 0x20, 0xe80, IRQS_DISABLED)
TRAMP_KVM_HV(PACA_EXGEN, 0xe80)
#ifdef CONFIG_PPC_DOORBELL
EXC_COMMON_ASYNC(h_doorbell_common, 0xe80, doorbell_exception)
@@ -1120,8 +1126,8 @@ EXC_COMMON_ASYNC(h_doorbell_common, 0xe80, unknown_exception)
#endif
-EXC_REAL_OOL_MASKABLE_HV(h_virt_irq, 0xea0, 0x20)
-EXC_VIRT_OOL_MASKABLE_HV(h_virt_irq, 0x4ea0, 0x20, 0xea0)
+EXC_REAL_OOL_MASKABLE_HV(h_virt_irq, 0xea0, 0x20, IRQS_DISABLED)
+EXC_VIRT_OOL_MASKABLE_HV(h_virt_irq, 0x4ea0, 0x20, 0xea0, IRQS_DISABLED)
TRAMP_KVM_HV(PACA_EXGEN, 0xea0)
EXC_COMMON_ASYNC(h_virt_irq_common, 0xea0, do_IRQ)
@@ -1132,8 +1138,8 @@ EXC_REAL_NONE(0xee0, 0x20)
EXC_VIRT_NONE(0x4ee0, 0x20)
-EXC_REAL_OOL(performance_monitor, 0xf00, 0x20)
-EXC_VIRT_OOL(performance_monitor, 0x4f00, 0x20, 0xf00)
+EXC_REAL_OOL_MASKABLE(performance_monitor, 0xf00, 0x20, IRQS_PMI_DISABLED)
+EXC_VIRT_OOL_MASKABLE(performance_monitor, 0x4f00, 0x20, 0xf00, IRQS_PMI_DISABLED)
TRAMP_KVM(PACA_EXGEN, 0xf00)
EXC_COMMON_ASYNC(performance_monitor_common, 0xf00, performance_monitor_exception)
@@ -1345,7 +1351,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_CFAR)
b .
#endif
-EXC_COMMON_HV(denorm_common, 0x1500, unknown_exception)
+EXC_COMMON(denorm_common, 0x1500, unknown_exception)
#ifdef CONFIG_CBE_RAS
@@ -1455,39 +1461,37 @@ TRAMP_REAL_BEGIN(rfi_flush_fallback)
std r9,PACA_EXRFI+EX_R9(r13)
std r10,PACA_EXRFI+EX_R10(r13)
std r11,PACA_EXRFI+EX_R11(r13)
- std r12,PACA_EXRFI+EX_R12(r13)
- std r8,PACA_EXRFI+EX_R13(r13)
mfctr r9
ld r10,PACA_RFI_FLUSH_FALLBACK_AREA(r13)
- ld r11,PACA_L1D_FLUSH_SETS(r13)
- ld r12,PACA_L1D_FLUSH_CONGRUENCE(r13)
- /*
- * The load adresses are at staggered offsets within cachelines,
- * which suits some pipelines better (on others it should not
- * hurt).
- */
- addi r12,r12,8
+ ld r11,PACA_L1D_FLUSH_SIZE(r13)
+ srdi r11,r11,(7 + 3) /* 128 byte lines, unrolled 8x */
mtctr r11
DCBT_STOP_ALL_STREAM_IDS(r11) /* Stop prefetch streams */
/* order ld/st prior to dcbt stop all streams with flushing */
sync
-1: li r8,0
- .rept 8 /* 8-way set associative */
- ldx r11,r10,r8
- add r8,r8,r12
- xor r11,r11,r11 // Ensure r11 is 0 even if fallback area is not
- add r8,r8,r11 // Add 0, this creates a dependency on the ldx
- .endr
- addi r10,r10,128 /* 128 byte cache line */
+
+ /*
+ * The load adresses are at staggered offsets within cachelines,
+ * which suits some pipelines better (on others it should not
+ * hurt).
+ */
+1:
+ ld r11,(0x80 + 8)*0(r10)
+ ld r11,(0x80 + 8)*1(r10)
+ ld r11,(0x80 + 8)*2(r10)
+ ld r11,(0x80 + 8)*3(r10)
+ ld r11,(0x80 + 8)*4(r10)
+ ld r11,(0x80 + 8)*5(r10)
+ ld r11,(0x80 + 8)*6(r10)
+ ld r11,(0x80 + 8)*7(r10)
+ addi r10,r10,0x80*8
bdnz 1b
mtctr r9
ld r9,PACA_EXRFI+EX_R9(r13)
ld r10,PACA_EXRFI+EX_R10(r13)
ld r11,PACA_EXRFI+EX_R11(r13)
- ld r12,PACA_EXRFI+EX_R12(r13)
- ld r8,PACA_EXRFI+EX_R13(r13)
GET_SCRATCH0(r13);
rfid
@@ -1497,39 +1501,37 @@ TRAMP_REAL_BEGIN(hrfi_flush_fallback)
std r9,PACA_EXRFI+EX_R9(r13)
std r10,PACA_EXRFI+EX_R10(r13)
std r11,PACA_EXRFI+EX_R11(r13)
- std r12,PACA_EXRFI+EX_R12(r13)
- std r8,PACA_EXRFI+EX_R13(r13)
mfctr r9
ld r10,PACA_RFI_FLUSH_FALLBACK_AREA(r13)
- ld r11,PACA_L1D_FLUSH_SETS(r13)
- ld r12,PACA_L1D_FLUSH_CONGRUENCE(r13)
- /*
- * The load adresses are at staggered offsets within cachelines,
- * which suits some pipelines better (on others it should not
- * hurt).
- */
- addi r12,r12,8
+ ld r11,PACA_L1D_FLUSH_SIZE(r13)
+ srdi r11,r11,(7 + 3) /* 128 byte lines, unrolled 8x */
mtctr r11
DCBT_STOP_ALL_STREAM_IDS(r11) /* Stop prefetch streams */
/* order ld/st prior to dcbt stop all streams with flushing */
sync
-1: li r8,0
- .rept 8 /* 8-way set associative */
- ldx r11,r10,r8
- add r8,r8,r12
- xor r11,r11,r11 // Ensure r11 is 0 even if fallback area is not
- add r8,r8,r11 // Add 0, this creates a dependency on the ldx
- .endr
- addi r10,r10,128 /* 128 byte cache line */
+
+ /*
+ * The load adresses are at staggered offsets within cachelines,
+ * which suits some pipelines better (on others it should not
+ * hurt).
+ */
+1:
+ ld r11,(0x80 + 8)*0(r10)
+ ld r11,(0x80 + 8)*1(r10)
+ ld r11,(0x80 + 8)*2(r10)
+ ld r11,(0x80 + 8)*3(r10)
+ ld r11,(0x80 + 8)*4(r10)
+ ld r11,(0x80 + 8)*5(r10)
+ ld r11,(0x80 + 8)*6(r10)
+ ld r11,(0x80 + 8)*7(r10)
+ addi r10,r10,0x80*8
bdnz 1b
mtctr r9
ld r9,PACA_EXRFI+EX_R9(r13)
ld r10,PACA_EXRFI+EX_R10(r13)
ld r11,PACA_EXRFI+EX_R11(r13)
- ld r12,PACA_EXRFI+EX_R12(r13)
- ld r8,PACA_EXRFI+EX_R13(r13)
GET_SCRATCH0(r13);
hrfid
@@ -1632,7 +1634,7 @@ USE_TEXT_SECTION()
.balign IFETCH_ALIGN_BYTES
do_hash_page:
#ifdef CONFIG_PPC_BOOK3S_64
- lis r0,(DSISR_BAD_FAULT_64S|DSISR_DABRMATCH)@h
+ lis r0,(DSISR_BAD_FAULT_64S | DSISR_DABRMATCH | DSISR_KEYFAULT)@h
ori r0,r0,DSISR_BAD_FAULT_64S@l
and. r0,r4,r0 /* weird error? */
bne- handle_page_fault /* if not, try to insert a HPTE */
@@ -1828,6 +1830,8 @@ BEGIN_FTR_SECTION
FTR_SECTION_ELSE
beq hardware_interrupt_common
ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_300)
+ cmpwi r3,0xf00
+ beq performance_monitor_common
BEGIN_FTR_SECTION
cmpwi r3,0xa00
beq h_doorbell_common_msgclr
diff --git a/arch/powerpc/kernel/head_64.S b/arch/powerpc/kernel/head_64.S
index aa71a90f5222..a61151a6ea5e 100644
--- a/arch/powerpc/kernel/head_64.S
+++ b/arch/powerpc/kernel/head_64.S
@@ -765,8 +765,8 @@ _GLOBAL(pmac_secondary_start)
/* Mark interrupts soft and hard disabled (they might be enabled
* in the PACA when doing hotplug)
*/
- li r0,0
- stb r0,PACASOFTIRQEN(r13)
+ li r0,IRQS_DISABLED
+ stb r0,PACAIRQSOFTMASK(r13)
li r0,PACA_IRQ_HARD_DIS
stb r0,PACAIRQHAPPENED(r13)
@@ -822,7 +822,8 @@ __secondary_start:
/* Mark interrupts soft and hard disabled (they might be enabled
* in the PACA when doing hotplug)
*/
- stb r7,PACASOFTIRQEN(r13)
+ li r7,IRQS_DISABLED
+ stb r7,PACAIRQSOFTMASK(r13)
li r0,PACA_IRQ_HARD_DIS
stb r0,PACAIRQHAPPENED(r13)
@@ -988,8 +989,8 @@ start_here_common:
/* Mark interrupts soft and hard disabled (they might be enabled
* in the PACA when doing hotplug)
*/
- li r0,0
- stb r0,PACASOFTIRQEN(r13)
+ li r0,IRQS_DISABLED
+ stb r0,PACAIRQSOFTMASK(r13)
li r0,PACA_IRQ_HARD_DIS
stb r0,PACAIRQHAPPENED(r13)
diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S
index 4fee00d414e8..d8670a37d70c 100644
--- a/arch/powerpc/kernel/head_8xx.S
+++ b/arch/powerpc/kernel/head_8xx.S
@@ -33,23 +33,6 @@
#include <asm/fixmap.h>
#include <asm/export.h>
-/* Macro to make the code more readable. */
-#ifdef CONFIG_8xx_CPU6
-#define SPRN_MI_TWC_ADDR 0x2b80
-#define SPRN_MI_RPN_ADDR 0x2d80
-#define SPRN_MD_TWC_ADDR 0x3b80
-#define SPRN_MD_RPN_ADDR 0x3d80
-
-#define MTSPR_CPU6(spr, reg, treg) \
- li treg, spr##_ADDR; \
- stw treg, 12(r0); \
- lwz treg, 12(r0); \
- mtspr spr, reg
-#else
-#define MTSPR_CPU6(spr, reg, treg) \
- mtspr spr, reg
-#endif
-
#if CONFIG_TASK_SIZE <= 0x80000000 && CONFIG_PAGE_OFFSET >= 0x80000000
/* By simply checking Address >= 0x80000000, we know if its a kernel address */
#define SIMPLE_KERNEL_ADDRESS 1
@@ -69,11 +52,7 @@
* Value for the bits that have fixed value in RPN entries.
* Also used for tagging DAR for DTLBerror.
*/
-#ifdef CONFIG_PPC_16K_PAGES
-#define RPN_PATTERN (0x00f0 | MD_SPS16K)
-#else
#define RPN_PATTERN 0x00f0
-#endif
#define PAGE_SHIFT_512K 19
#define PAGE_SHIFT_8M 23
@@ -134,15 +113,12 @@ turn_on_mmu:
* task's thread_struct.
*/
#define EXCEPTION_PROLOG \
- EXCEPTION_PROLOG_0; \
+ mtspr SPRN_SPRG_SCRATCH0, r10; \
+ mtspr SPRN_SPRG_SCRATCH1, r11; \
mfcr r10; \
EXCEPTION_PROLOG_1; \
EXCEPTION_PROLOG_2
-#define EXCEPTION_PROLOG_0 \
- mtspr SPRN_SPRG_SCRATCH0,r10; \
- mtspr SPRN_SPRG_SCRATCH1,r11
-
#define EXCEPTION_PROLOG_1 \
mfspr r11,SPRN_SRR1; /* check whether user or kernel */ \
andi. r11,r11,MSR_PR; \
@@ -177,13 +153,6 @@ turn_on_mmu:
SAVE_2GPRS(7, r11)
/*
- * Exception exit code.
- */
-#define EXCEPTION_EPILOG_0 \
- mfspr r10,SPRN_SPRG_SCRATCH0; \
- mfspr r11,SPRN_SPRG_SCRATCH1
-
-/*
* Note: code which follows this uses cr0.eq (set if from kernel),
* r11, r12 (SRR0), and r9 (SRR1).
*
@@ -326,15 +295,10 @@ SystemCall:
#endif
InstructionTLBMiss:
-#if defined(CONFIG_8xx_CPU6) || defined(ITLB_MISS_KERNEL) || defined(CONFIG_HUGETLB_PAGE)
- mtspr SPRN_SPRG_SCRATCH2, r3
-#endif
- EXCEPTION_PROLOG_0
-#ifdef CONFIG_PPC_8xx_PERF_EVENT
- lis r10, (itlb_miss_counter - PAGE_OFFSET)@ha
- lwz r11, (itlb_miss_counter - PAGE_OFFSET)@l(r10)
- addi r11, r11, 1
- stw r11, (itlb_miss_counter - PAGE_OFFSET)@l(r10)
+ mtspr SPRN_SPRG_SCRATCH0, r10
+ mtspr SPRN_SPRG_SCRATCH1, r11
+#if defined(ITLB_MISS_KERNEL) || defined(CONFIG_HUGETLB_PAGE)
+ mtspr SPRN_SPRG_SCRATCH2, r12
#endif
/* If we are faulting a kernel address, we have to use the
@@ -345,7 +309,7 @@ InstructionTLBMiss:
/* Only modules will cause ITLB Misses as we always
* pin the first 8MB of kernel memory */
#if defined(ITLB_MISS_KERNEL) || defined(CONFIG_HUGETLB_PAGE)
- mfcr r3
+ mfcr r12
#endif
#ifdef ITLB_MISS_KERNEL
#if defined(SIMPLE_KERNEL_ADDRESS) && defined(CONFIG_PIN_TLB_TEXT)
@@ -388,40 +352,46 @@ _ENTRY(ITLBMiss_cmp)
lwz r10, 0(r10) /* Get the pte */
4:
#if defined(ITLB_MISS_KERNEL) || defined(CONFIG_HUGETLB_PAGE)
- mtcr r3
+ mtcr r12
#endif
- /* Insert the APG into the TWC from the Linux PTE. */
- rlwimi r11, r10, 0, 25, 26
- /* Load the MI_TWC with the attributes for this "segment." */
- MTSPR_CPU6(SPRN_MI_TWC, r11, r3) /* Set segment attributes */
-#if defined (CONFIG_HUGETLB_PAGE) && defined (CONFIG_PPC_4K_PAGES)
- rlwimi r10, r11, 1, MI_SPS16K
-#endif
#ifdef CONFIG_SWAP
- rlwinm r11, r10, 32-5, _PAGE_PRESENT
- and r11, r11, r10
- rlwimi r10, r11, 0, _PAGE_PRESENT
+ rlwinm r11, r10, 31, _PAGE_ACCESSED >> 1
#endif
- li r11, RPN_PATTERN
+ /* Load the MI_TWC with the attributes for this "segment." */
+ mtspr SPRN_MI_TWC, r11 /* Set segment attributes */
+
+ li r11, RPN_PATTERN | 0x200
/* The Linux PTE won't go exactly into the MMU TLB.
- * Software indicator bits 20-23 and 28 must be clear.
- * Software indicator bits 24, 25, 26, and 27 must be
+ * Software indicator bits 20 and 23 must be clear.
+ * Software indicator bits 22, 24, 25, 26, and 27 must be
* set. All other Linux PTE bits control the behavior
* of the MMU.
*/
-#if defined (CONFIG_HUGETLB_PAGE) && defined (CONFIG_PPC_4K_PAGES)
- rlwimi r10, r11, 0, 0x0ff0 /* Set 24-27, clear 20-23 */
-#else
- rlwimi r10, r11, 0, 0x0ff8 /* Set 24-27, clear 20-23,28 */
-#endif
- MTSPR_CPU6(SPRN_MI_RPN, r10, r3) /* Update TLB entry */
+ rlwimi r11, r10, 4, 0x0400 /* Copy _PAGE_EXEC into bit 21 */
+ rlwimi r10, r11, 0, 0x0ff0 /* Set 22, 24-27, clear 20,23 */
+ mtspr SPRN_MI_RPN, r10 /* Update TLB entry */
/* Restore registers */
-#if defined(CONFIG_8xx_CPU6) || defined(ITLB_MISS_KERNEL) || defined(CONFIG_HUGETLB_PAGE)
- mfspr r3, SPRN_SPRG_SCRATCH2
+_ENTRY(itlb_miss_exit_1)
+ mfspr r10, SPRN_SPRG_SCRATCH0
+ mfspr r11, SPRN_SPRG_SCRATCH1
+#if defined(ITLB_MISS_KERNEL) || defined(CONFIG_HUGETLB_PAGE)
+ mfspr r12, SPRN_SPRG_SCRATCH2
+#endif
+ rfi
+#ifdef CONFIG_PERF_EVENTS
+_ENTRY(itlb_miss_perf)
+ lis r10, (itlb_miss_counter - PAGE_OFFSET)@ha
+ lwz r11, (itlb_miss_counter - PAGE_OFFSET)@l(r10)
+ addi r11, r11, 1
+ stw r11, (itlb_miss_counter - PAGE_OFFSET)@l(r10)
+#endif
+ mfspr r10, SPRN_SPRG_SCRATCH0
+ mfspr r11, SPRN_SPRG_SCRATCH1
+#if defined(ITLB_MISS_KERNEL) || defined(CONFIG_HUGETLB_PAGE)
+ mfspr r12, SPRN_SPRG_SCRATCH2
#endif
- EXCEPTION_EPILOG_0
rfi
#ifdef CONFIG_HUGETLB_PAGE
@@ -436,7 +406,6 @@ _ENTRY(ITLBMiss_cmp)
rlwinm r10, r11, 0, ~HUGEPD_SHIFT_MASK
#endif
lwz r10, 0(r10) /* Get the pte */
- rlwinm r11, r11, 0, 0xf
b 4b
20: /* 512k pages */
@@ -445,21 +414,15 @@ _ENTRY(ITLBMiss_cmp)
/* Add level 2 base */
rlwimi r10, r11, 0, 0, 32 + PAGE_SHIFT_512K - (PAGE_SHIFT << 1) - 1
lwz r10, 0(r10) /* Get the pte */
- rlwinm r11, r11, 0, 0xf
b 4b
#endif
. = 0x1200
DataStoreTLBMiss:
- mtspr SPRN_SPRG_SCRATCH2, r3
- EXCEPTION_PROLOG_0
-#ifdef CONFIG_PPC_8xx_PERF_EVENT
- lis r10, (dtlb_miss_counter - PAGE_OFFSET)@ha
- lwz r11, (dtlb_miss_counter - PAGE_OFFSET)@l(r10)
- addi r11, r11, 1
- stw r11, (dtlb_miss_counter - PAGE_OFFSET)@l(r10)
-#endif
- mfcr r3
+ mtspr SPRN_SPRG_SCRATCH0, r10
+ mtspr SPRN_SPRG_SCRATCH1, r11
+ mtspr SPRN_SPRG_SCRATCH2, r12
+ mfcr r12
/* If we are faulting a kernel address, we have to use the
* kernel page tables.
@@ -499,59 +462,49 @@ _ENTRY(DTLBMiss_jmp)
rlwimi r10, r11, 0, 0, 32 - PAGE_SHIFT - 1 /* Add level 2 base */
lwz r10, 0(r10) /* Get the pte */
4:
- mtcr r3
+ mtcr r12
- /* Insert the Guarded flag and APG into the TWC from the Linux PTE.
- * It is bit 26-27 of both the Linux PTE and the TWC (at least
+ /* Insert the Guarded flag into the TWC from the Linux PTE.
+ * It is bit 27 of both the Linux PTE and the TWC (at least
* I got that right :-). It will be better when we can put
* this into the Linux pgd/pmd and load it in the operation
* above.
*/
- rlwimi r11, r10, 0, 26, 27
- /* Insert the WriteThru flag into the TWC from the Linux PTE.
- * It is bit 25 in the Linux PTE and bit 30 in the TWC
- */
- rlwimi r11, r10, 32-5, 30, 30
- MTSPR_CPU6(SPRN_MD_TWC, r11, r3)
-
- /* In 4k pages mode, SPS (bit 28) in RPN must match PS[1] (bit 29)
- * In 16k pages mode, SPS is always 1 */
-#if defined (CONFIG_HUGETLB_PAGE) && defined (CONFIG_PPC_4K_PAGES)
- rlwimi r10, r11, 1, MD_SPS16K
-#endif
- /* Both _PAGE_ACCESSED and _PAGE_PRESENT has to be set.
- * We also need to know if the insn is a load/store, so:
- * Clear _PAGE_PRESENT and load that which will
- * trap into DTLB Error with store bit set accordinly.
- */
- /* PRESENT=0x1, ACCESSED=0x20
- * r11 = ((r10 & PRESENT) & ((r10 & ACCESSED) >> 5));
- * r10 = (r10 & ~PRESENT) | r11;
- */
+ rlwimi r11, r10, 0, _PAGE_GUARDED
#ifdef CONFIG_SWAP
- rlwinm r11, r10, 32-5, _PAGE_PRESENT
- and r11, r11, r10
- rlwimi r10, r11, 0, _PAGE_PRESENT
+ /* _PAGE_ACCESSED has to be set. We use second APG bit for that, 0
+ * on that bit will represent a Non Access group
+ */
+ rlwinm r11, r10, 31, _PAGE_ACCESSED >> 1
#endif
+ mtspr SPRN_MD_TWC, r11
+
/* The Linux PTE won't go exactly into the MMU TLB.
- * Software indicator bits 22 and 28 must be clear.
* Software indicator bits 24, 25, 26, and 27 must be
* set. All other Linux PTE bits control the behavior
* of the MMU.
*/
li r11, RPN_PATTERN
-#if defined (CONFIG_HUGETLB_PAGE) && defined (CONFIG_PPC_4K_PAGES)
rlwimi r10, r11, 0, 24, 27 /* Set 24-27 */
-#else
- rlwimi r10, r11, 0, 24, 28 /* Set 24-27, clear 28 */
-#endif
- rlwimi r10, r11, 0, 20, 20 /* clear 20 */
- MTSPR_CPU6(SPRN_MD_RPN, r10, r3) /* Update TLB entry */
+ mtspr SPRN_MD_RPN, r10 /* Update TLB entry */
/* Restore registers */
- mfspr r3, SPRN_SPRG_SCRATCH2
mtspr SPRN_DAR, r11 /* Tag DAR */
- EXCEPTION_EPILOG_0
+_ENTRY(dtlb_miss_exit_1)
+ mfspr r10, SPRN_SPRG_SCRATCH0
+ mfspr r11, SPRN_SPRG_SCRATCH1
+ mfspr r12, SPRN_SPRG_SCRATCH2
+ rfi
+#ifdef CONFIG_PERF_EVENTS
+_ENTRY(dtlb_miss_perf)
+ lis r10, (dtlb_miss_counter - PAGE_OFFSET)@ha
+ lwz r11, (dtlb_miss_counter - PAGE_OFFSET)@l(r10)
+ addi r11, r11, 1
+ stw r11, (dtlb_miss_counter - PAGE_OFFSET)@l(r10)
+#endif
+ mfspr r10, SPRN_SPRG_SCRATCH0
+ mfspr r11, SPRN_SPRG_SCRATCH1
+ mfspr r12, SPRN_SPRG_SCRATCH2
rfi
#ifdef CONFIG_HUGETLB_PAGE
@@ -566,7 +519,6 @@ _ENTRY(DTLBMiss_jmp)
rlwinm r10, r11, 0, ~HUGEPD_SHIFT_MASK
#endif
lwz r10, 0(r10) /* Get the pte */
- rlwinm r11, r11, 0, 0xf
b 4b
20: /* 512k pages */
@@ -575,7 +527,6 @@ _ENTRY(DTLBMiss_jmp)
/* Add level 2 base */
rlwimi r10, r11, 0, 0, 32 + PAGE_SHIFT_512K - (PAGE_SHIFT << 1) - 1
lwz r10, 0(r10) /* Get the pte */
- rlwinm r11, r11, 0, 0xf
b 4b
#endif
@@ -601,7 +552,8 @@ itlbie:
*/
. = 0x1400
DataTLBError:
- EXCEPTION_PROLOG_0
+ mtspr SPRN_SPRG_SCRATCH0, r10
+ mtspr SPRN_SPRG_SCRATCH1, r11
mfcr r10
mfspr r11, SPRN_DAR
@@ -636,7 +588,8 @@ dtlbie:
*/
. = 0x1c00
DataBreakpoint:
- EXCEPTION_PROLOG_0
+ mtspr SPRN_SPRG_SCRATCH0, r10
+ mtspr SPRN_SPRG_SCRATCH1, r11
mfcr r10
mfspr r11, SPRN_SRR0
cmplwi cr0, r11, (dtlbie - PAGE_OFFSET)@l
@@ -652,13 +605,15 @@ DataBreakpoint:
EXC_XFER_EE(0x1c00, do_break)
11:
mtcr r10
- EXCEPTION_EPILOG_0
+ mfspr r10, SPRN_SPRG_SCRATCH0
+ mfspr r11, SPRN_SPRG_SCRATCH1
rfi
-#ifdef CONFIG_PPC_8xx_PERF_EVENT
+#ifdef CONFIG_PERF_EVENTS
. = 0x1d00
InstructionBreakpoint:
- EXCEPTION_PROLOG_0
+ mtspr SPRN_SPRG_SCRATCH0, r10
+ mtspr SPRN_SPRG_SCRATCH1, r11
lis r10, (instruction_counter - PAGE_OFFSET)@ha
lwz r11, (instruction_counter - PAGE_OFFSET)@l(r10)
addi r11, r11, -1
@@ -666,7 +621,8 @@ InstructionBreakpoint:
lis r10, 0xffff
ori r10, r10, 0x01
mtspr SPRN_COUNTA, r10
- EXCEPTION_EPILOG_0
+ mfspr r10, SPRN_SPRG_SCRATCH0
+ mfspr r11, SPRN_SPRG_SCRATCH1
rfi
#else
EXCEPTION(0x1d00, Trap_1d, unknown_exception, EXC_XFER_EE)
@@ -681,51 +637,57 @@ InstructionBreakpoint:
* not enough space in the DataStoreTLBMiss area.
*/
DTLBMissIMMR:
- mtcr r3
- /* Set 512k byte guarded page and mark it valid */
- li r10, MD_PS512K | MD_GUARDED | MD_SVALID
- MTSPR_CPU6(SPRN_MD_TWC, r10, r11)
+ mtcr r12
+ /* Set 512k byte guarded page and mark it valid and accessed */
+ li r10, MD_PS512K | MD_GUARDED | MD_SVALID | M_APG2
+ mtspr SPRN_MD_TWC, r10
mfspr r10, SPRN_IMMR /* Get current IMMR */
rlwinm r10, r10, 0, 0xfff80000 /* Get 512 kbytes boundary */
- ori r10, r10, 0xf0 | MD_SPS16K | _PAGE_SHARED | _PAGE_DIRTY | \
+ ori r10, r10, 0xf0 | MD_SPS16K | _PAGE_PRIVILEGED | _PAGE_DIRTY | \
_PAGE_PRESENT | _PAGE_NO_CACHE
- MTSPR_CPU6(SPRN_MD_RPN, r10, r11) /* Update TLB entry */
+ mtspr SPRN_MD_RPN, r10 /* Update TLB entry */
li r11, RPN_PATTERN
mtspr SPRN_DAR, r11 /* Tag DAR */
- mfspr r3, SPRN_SPRG_SCRATCH2
- EXCEPTION_EPILOG_0
+_ENTRY(dtlb_miss_exit_2)
+ mfspr r10, SPRN_SPRG_SCRATCH0
+ mfspr r11, SPRN_SPRG_SCRATCH1
+ mfspr r12, SPRN_SPRG_SCRATCH2
rfi
DTLBMissLinear:
- mtcr r3
- /* Set 8M byte page and mark it valid */
- li r11, MD_PS8MEG | MD_SVALID
- MTSPR_CPU6(SPRN_MD_TWC, r11, r3)
+ mtcr r12
+ /* Set 8M byte page and mark it valid and accessed */
+ li r11, MD_PS8MEG | MD_SVALID | M_APG2
+ mtspr SPRN_MD_TWC, r11
rlwinm r10, r10, 0, 0x0f800000 /* 8xx supports max 256Mb RAM */
- ori r10, r10, 0xf0 | MD_SPS16K | _PAGE_SHARED | _PAGE_DIRTY | \
+ ori r10, r10, 0xf0 | MD_SPS16K | _PAGE_PRIVILEGED | _PAGE_DIRTY | \
_PAGE_PRESENT
- MTSPR_CPU6(SPRN_MD_RPN, r10, r11) /* Update TLB entry */
+ mtspr SPRN_MD_RPN, r10 /* Update TLB entry */
li r11, RPN_PATTERN
mtspr SPRN_DAR, r11 /* Tag DAR */
- mfspr r3, SPRN_SPRG_SCRATCH2
- EXCEPTION_EPILOG_0
+_ENTRY(dtlb_miss_exit_3)
+ mfspr r10, SPRN_SPRG_SCRATCH0
+ mfspr r11, SPRN_SPRG_SCRATCH1
+ mfspr r12, SPRN_SPRG_SCRATCH2
rfi
#ifndef CONFIG_PIN_TLB_TEXT
ITLBMissLinear:
- mtcr r3
- /* Set 8M byte page and mark it valid */
- li r11, MI_PS8MEG | MI_SVALID | _PAGE_EXEC
- MTSPR_CPU6(SPRN_MI_TWC, r11, r3)
+ mtcr r12
+ /* Set 8M byte page and mark it valid,accessed */
+ li r11, MI_PS8MEG | MI_SVALID | M_APG2
+ mtspr SPRN_MI_TWC, r11
rlwinm r10, r10, 0, 0x0f800000 /* 8xx supports max 256Mb RAM */
- ori r10, r10, 0xf0 | MI_SPS16K | _PAGE_SHARED | _PAGE_DIRTY | \
+ ori r10, r10, 0xf0 | MI_SPS16K | _PAGE_PRIVILEGED | _PAGE_DIRTY | \
_PAGE_PRESENT
- MTSPR_CPU6(SPRN_MI_RPN, r10, r11) /* Update TLB entry */
+ mtspr SPRN_MI_RPN, r10 /* Update TLB entry */
- mfspr r3, SPRN_SPRG_SCRATCH2
- EXCEPTION_EPILOG_0
+_ENTRY(itlb_miss_exit_2)
+ mfspr r10, SPRN_SPRG_SCRATCH0
+ mfspr r11, SPRN_SPRG_SCRATCH1
+ mfspr r12, SPRN_SPRG_SCRATCH2
rfi
#endif
@@ -933,13 +895,6 @@ start_here:
*/
lis r6, swapper_pg_dir@ha
tophys(r6,r6)
-#ifdef CONFIG_8xx_CPU6
- lis r4, cpu6_errata_word@h
- ori r4, r4, cpu6_errata_word@l
- li r3, 0x3f80
- stw r3, 12(r4)
- lwz r3, 12(r4)
-#endif
mtspr SPRN_M_TW, r6
lis r4,2f@h
ori r4,r4,2f@l
@@ -1004,8 +959,8 @@ initial_mmu:
lis r8, KERNELBASE@h /* Create vaddr for TLB */
ori r8, r8, MI_EVALID /* Mark it valid */
mtspr SPRN_MI_EPN, r8
- li r8, MI_PS8MEG | (2 << 5) /* Set 8M byte page, APG 2 */
- ori r8, r8, MI_SVALID /* Make it valid */
+ li r8, MI_PS8MEG /* Set 8M byte page */
+ ori r8, r8, MI_SVALID | M_APG2 /* Make it valid, APG 2 */
mtspr SPRN_MI_TWC, r8
li r8, MI_BOOTINIT /* Create RPN for address 0 */
mtspr SPRN_MI_RPN, r8 /* Store TLB entry */
@@ -1032,7 +987,7 @@ initial_mmu:
ori r8, r8, MD_EVALID /* Mark it valid */
mtspr SPRN_MD_EPN, r8
li r8, MD_PS512K | MD_GUARDED /* Set 512k byte page */
- ori r8, r8, MD_SVALID /* Make it valid */
+ ori r8, r8, MD_SVALID | M_APG2 /* Make it valid and accessed */
mtspr SPRN_MD_TWC, r8
mr r8, r9 /* Create paddr for TLB */
ori r8, r8, MI_BOOTINIT|0x2 /* Inhibit cache -- Cort */
@@ -1061,7 +1016,7 @@ initial_mmu:
#endif
/* Disable debug mode entry on breakpoints */
mfspr r8, SPRN_DER
-#ifdef CONFIG_PPC_8xx_PERF_EVENT
+#ifdef CONFIG_PERF_EVENTS
rlwinm r8, r8, 0, ~0xc
#else
rlwinm r8, r8, 0, ~0x8
@@ -1094,13 +1049,7 @@ swapper_pg_dir:
abatron_pteptrs:
.space 8
-#ifdef CONFIG_8xx_CPU6
- .globl cpu6_errata_word
-cpu6_errata_word:
- .space 16
-#endif
-
-#ifdef CONFIG_PPC_8xx_PERF_EVENT
+#ifdef CONFIG_PERF_EVENTS
.globl itlb_miss_counter
itlb_miss_counter:
.space 4
diff --git a/arch/powerpc/kernel/idle_book3e.S b/arch/powerpc/kernel/idle_book3e.S
index 48c21acef915..2b269315d377 100644
--- a/arch/powerpc/kernel/idle_book3e.S
+++ b/arch/powerpc/kernel/idle_book3e.S
@@ -17,6 +17,7 @@
#include <asm/processor.h>
#include <asm/thread_info.h>
#include <asm/epapr_hcalls.h>
+#include <asm/hw_irq.h>
/* 64-bit version only for now */
#ifdef CONFIG_PPC64
@@ -46,8 +47,8 @@ _GLOBAL(\name)
bl trace_hardirqs_on
addi r1,r1,128
#endif
- li r0,1
- stb r0,PACASOFTIRQEN(r13)
+ li r0,IRQS_ENABLED
+ stb r0,PACAIRQSOFTMASK(r13)
/* Interrupts will make use return to LR, so get something we want
* in there
diff --git a/arch/powerpc/kernel/idle_power4.S b/arch/powerpc/kernel/idle_power4.S
index f57a19348bdd..08faa93755f9 100644
--- a/arch/powerpc/kernel/idle_power4.S
+++ b/arch/powerpc/kernel/idle_power4.S
@@ -15,6 +15,7 @@
#include <asm/ppc_asm.h>
#include <asm/asm-offsets.h>
#include <asm/irqflags.h>
+#include <asm/hw_irq.h>
#undef DEBUG
@@ -53,8 +54,8 @@ END_FTR_SECTION_IFCLR(CPU_FTR_CAN_NAP)
mfmsr r7
#endif /* CONFIG_TRACE_IRQFLAGS */
- li r0,1
- stb r0,PACASOFTIRQEN(r13) /* we'll hard-enable shortly */
+ li r0,IRQS_ENABLED
+ stb r0,PACAIRQSOFTMASK(r13) /* we'll hard-enable shortly */
BEGIN_FTR_SECTION
DSSALL
sync
diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c
index b7a84522e652..f88038847790 100644
--- a/arch/powerpc/kernel/irq.c
+++ b/arch/powerpc/kernel/irq.c
@@ -67,6 +67,7 @@
#include <asm/smp.h>
#include <asm/livepatch.h>
#include <asm/asm-prototypes.h>
+#include <asm/hw_irq.h>
#ifdef CONFIG_PPC64
#include <asm/paca.h>
@@ -106,12 +107,6 @@ static inline notrace unsigned long get_irq_happened(void)
return happened;
}
-static inline notrace void set_soft_enabled(unsigned long enable)
-{
- __asm__ __volatile__("stb %0,%1(13)"
- : : "r" (enable), "i" (offsetof(struct paca_struct, soft_enabled)));
-}
-
static inline notrace int decrementer_check_overflow(void)
{
u64 now = get_tb_or_rtc();
@@ -191,6 +186,11 @@ notrace unsigned int __check_irq_replay(void)
return 0x900;
}
+ if (happened & PACA_IRQ_PMI) {
+ local_paca->irq_happened &= ~PACA_IRQ_PMI;
+ return 0xf00;
+ }
+
if (happened & PACA_IRQ_EE) {
local_paca->irq_happened &= ~PACA_IRQ_EE;
return 0x500;
@@ -224,15 +224,16 @@ notrace unsigned int __check_irq_replay(void)
return 0;
}
-notrace void arch_local_irq_restore(unsigned long en)
+notrace void arch_local_irq_restore(unsigned long mask)
{
unsigned char irq_happened;
unsigned int replay;
/* Write the new soft-enabled value */
- set_soft_enabled(en);
- if (!en)
+ irq_soft_mask_set(mask);
+ if (mask)
return;
+
/*
* From this point onward, we can take interrupts, preempt,
* etc... unless we got hard-disabled. We check if an event
@@ -263,7 +264,7 @@ notrace void arch_local_irq_restore(unsigned long en)
*/
if (unlikely(irq_happened != PACA_IRQ_HARD_DIS))
__hard_irq_disable();
-#ifdef CONFIG_TRACE_IRQFLAGS
+#ifdef CONFIG_PPC_IRQ_SOFT_MASK_DEBUG
else {
/*
* We should already be hard disabled here. We had bugs
@@ -274,9 +275,9 @@ notrace void arch_local_irq_restore(unsigned long en)
if (WARN_ON(mfmsr() & MSR_EE))
__hard_irq_disable();
}
-#endif /* CONFIG_TRACE_IRQFLAGS */
+#endif
- set_soft_enabled(0);
+ irq_soft_mask_set(IRQS_ALL_DISABLED);
trace_hardirqs_off();
/*
@@ -288,7 +289,7 @@ notrace void arch_local_irq_restore(unsigned long en)
/* We can soft-enable now */
trace_hardirqs_on();
- set_soft_enabled(1);
+ irq_soft_mask_set(IRQS_ENABLED);
/*
* And replay if we have to. This will return with interrupts
@@ -363,7 +364,7 @@ bool prep_irq_for_idle(void)
* of entering the low power state.
*/
local_paca->irq_happened &= ~PACA_IRQ_HARD_DIS;
- local_paca->soft_enabled = 1;
+ irq_soft_mask_set(IRQS_ENABLED);
/* Tell the caller to enter the low power state */
return true;
diff --git a/arch/powerpc/kernel/mce.c b/arch/powerpc/kernel/mce.c
index 71e8a1b8c86e..efdd16a79075 100644
--- a/arch/powerpc/kernel/mce.c
+++ b/arch/powerpc/kernel/mce.c
@@ -495,37 +495,123 @@ long machine_check_early(struct pt_regs *regs)
return handled;
}
-long hmi_exception_realmode(struct pt_regs *regs)
+/* Possible meanings for HMER_DEBUG_TRIG bit being set on POWER9 */
+static enum {
+ DTRIG_UNKNOWN,
+ DTRIG_VECTOR_CI, /* need to emulate vector CI load instr */
+ DTRIG_SUSPEND_ESCAPE, /* need to escape from TM suspend mode */
+} hmer_debug_trig_function;
+
+static int init_debug_trig_function(void)
{
- __this_cpu_inc(irq_stat.hmi_exceptions);
-
-#ifdef CONFIG_PPC_BOOK3S_64
- /* Workaround for P9 vector CI loads (see p9_hmi_special_emu) */
- if (pvr_version_is(PVR_POWER9)) {
- unsigned long hmer = mfspr(SPRN_HMER);
-
- /* Do we have the debug bit set */
- if (hmer & PPC_BIT(17)) {
- hmer &= ~PPC_BIT(17);
- mtspr(SPRN_HMER, hmer);
-
- /*
- * Now to avoid problems with soft-disable we
- * only do the emulation if we are coming from
- * user space
- */
- if (user_mode(regs))
- local_paca->hmi_p9_special_emu = 1;
-
- /*
- * Don't bother going to OPAL if that's the
- * only relevant bit.
- */
- if (!(hmer & mfspr(SPRN_HMEER)))
- return local_paca->hmi_p9_special_emu;
+ int pvr;
+ struct device_node *cpun;
+ struct property *prop = NULL;
+ const char *str;
+
+ /* First look in the device tree */
+ preempt_disable();
+ cpun = of_get_cpu_node(smp_processor_id(), NULL);
+ if (cpun) {
+ of_property_for_each_string(cpun, "ibm,hmi-special-triggers",
+ prop, str) {
+ if (strcmp(str, "bit17-vector-ci-load") == 0)
+ hmer_debug_trig_function = DTRIG_VECTOR_CI;
+ else if (strcmp(str, "bit17-tm-suspend-escape") == 0)
+ hmer_debug_trig_function = DTRIG_SUSPEND_ESCAPE;
}
+ of_node_put(cpun);
+ }
+ preempt_enable();
+
+ /* If we found the property, don't look at PVR */
+ if (prop)
+ goto out;
+
+ pvr = mfspr(SPRN_PVR);
+ /* Check for POWER9 Nimbus (scale-out) */
+ if ((PVR_VER(pvr) == PVR_POWER9) && (pvr & 0xe000) == 0) {
+ /* DD2.2 and later */
+ if ((pvr & 0xfff) >= 0x202)
+ hmer_debug_trig_function = DTRIG_SUSPEND_ESCAPE;
+ /* DD2.0 and DD2.1 - used for vector CI load emulation */
+ else if ((pvr & 0xfff) >= 0x200)
+ hmer_debug_trig_function = DTRIG_VECTOR_CI;
+ }
+
+ out:
+ switch (hmer_debug_trig_function) {
+ case DTRIG_VECTOR_CI:
+ pr_debug("HMI debug trigger used for vector CI load\n");
+ break;
+ case DTRIG_SUSPEND_ESCAPE:
+ pr_debug("HMI debug trigger used for TM suspend escape\n");
+ break;
+ default:
+ break;
}
-#endif /* CONFIG_PPC_BOOK3S_64 */
+ return 0;
+}
+__initcall(init_debug_trig_function);
+
+/*
+ * Handle HMIs that occur as a result of a debug trigger.
+ * Return values:
+ * -1 means this is not a HMI cause that we know about
+ * 0 means no further handling is required
+ * 1 means further handling is required
+ */
+long hmi_handle_debugtrig(struct pt_regs *regs)
+{
+ unsigned long hmer = mfspr(SPRN_HMER);
+ long ret = 0;
+
+ /* HMER_DEBUG_TRIG bit is used for various workarounds on P9 */
+ if (!((hmer & HMER_DEBUG_TRIG)
+ && hmer_debug_trig_function != DTRIG_UNKNOWN))
+ return -1;
+
+ hmer &= ~HMER_DEBUG_TRIG;
+ /* HMER is a write-AND register */
+ mtspr(SPRN_HMER, ~HMER_DEBUG_TRIG);
+
+ switch (hmer_debug_trig_function) {
+ case DTRIG_VECTOR_CI:
+ /*
+ * Now to avoid problems with soft-disable we
+ * only do the emulation if we are coming from
+ * host user space
+ */
+ if (regs && user_mode(regs))
+ ret = local_paca->hmi_p9_special_emu = 1;
+
+ break;
+
+ default:
+ break;
+ }
+
+ /*
+ * See if any other HMI causes remain to be handled
+ */
+ if (hmer & mfspr(SPRN_HMEER))
+ return -1;
+
+ return ret;
+}
+
+/*
+ * Return values:
+ */
+long hmi_exception_realmode(struct pt_regs *regs)
+{
+ int ret;
+
+ __this_cpu_inc(irq_stat.hmi_exceptions);
+
+ ret = hmi_handle_debugtrig(regs);
+ if (ret >= 0)
+ return ret;
wait_for_subcore_guest_exit();
diff --git a/arch/powerpc/kernel/mce_power.c b/arch/powerpc/kernel/mce_power.c
index 644f7040b91c..fe6fc63251fe 100644
--- a/arch/powerpc/kernel/mce_power.c
+++ b/arch/powerpc/kernel/mce_power.c
@@ -58,115 +58,6 @@ static unsigned long addr_to_pfn(struct pt_regs *regs, unsigned long addr)
return pte_pfn(*ptep);
}
-static void flush_tlb_206(unsigned int num_sets, unsigned int action)
-{
- unsigned long rb;
- unsigned int i;
-
- switch (action) {
- case TLB_INVAL_SCOPE_GLOBAL:
- rb = TLBIEL_INVAL_SET;
- break;
- case TLB_INVAL_SCOPE_LPID:
- rb = TLBIEL_INVAL_SET_LPID;
- break;
- default:
- BUG();
- break;
- }
-
- asm volatile("ptesync" : : : "memory");
- for (i = 0; i < num_sets; i++) {
- asm volatile("tlbiel %0" : : "r" (rb));
- rb += 1 << TLBIEL_INVAL_SET_SHIFT;
- }
- asm volatile("ptesync" : : : "memory");
-}
-
-static void flush_tlb_300(unsigned int num_sets, unsigned int action)
-{
- unsigned long rb;
- unsigned int i;
- unsigned int r;
-
- switch (action) {
- case TLB_INVAL_SCOPE_GLOBAL:
- rb = TLBIEL_INVAL_SET;
- break;
- case TLB_INVAL_SCOPE_LPID:
- rb = TLBIEL_INVAL_SET_LPID;
- break;
- default:
- BUG();
- break;
- }
-
- asm volatile("ptesync" : : : "memory");
-
- if (early_radix_enabled())
- r = 1;
- else
- r = 0;
-
- /*
- * First flush table/PWC caches with set 0, then flush the
- * rest of the sets, partition scope. Radix must then do it
- * all again with process scope. Hash just has to flush
- * process table.
- */
- asm volatile(PPC_TLBIEL(%0, %1, %2, %3, %4) : :
- "r"(rb), "r"(0), "i"(2), "i"(0), "r"(r));
- for (i = 1; i < num_sets; i++) {
- unsigned long set = i * (1<<TLBIEL_INVAL_SET_SHIFT);
-
- asm volatile(PPC_TLBIEL(%0, %1, %2, %3, %4) : :
- "r"(rb+set), "r"(0), "i"(2), "i"(0), "r"(r));
- }
-
- asm volatile(PPC_TLBIEL(%0, %1, %2, %3, %4) : :
- "r"(rb), "r"(0), "i"(2), "i"(1), "r"(r));
- if (early_radix_enabled()) {
- for (i = 1; i < num_sets; i++) {
- unsigned long set = i * (1<<TLBIEL_INVAL_SET_SHIFT);
-
- asm volatile(PPC_TLBIEL(%0, %1, %2, %3, %4) : :
- "r"(rb+set), "r"(0), "i"(2), "i"(1), "r"(r));
- }
- }
-
- asm volatile("ptesync" : : : "memory");
-}
-
-/*
- * Generic routines to flush TLB on POWER processors. These routines
- * are used as flush_tlb hook in the cpu_spec.
- *
- * action => TLB_INVAL_SCOPE_GLOBAL: Invalidate all TLBs.
- * TLB_INVAL_SCOPE_LPID: Invalidate TLB for current LPID.
- */
-void __flush_tlb_power7(unsigned int action)
-{
- flush_tlb_206(POWER7_TLB_SETS, action);
-}
-
-void __flush_tlb_power8(unsigned int action)
-{
- flush_tlb_206(POWER8_TLB_SETS, action);
-}
-
-void __flush_tlb_power9(unsigned int action)
-{
- unsigned int num_sets;
-
- if (early_radix_enabled())
- num_sets = POWER9_TLB_SETS_RADIX;
- else
- num_sets = POWER9_TLB_SETS_HASH;
-
- flush_tlb_300(num_sets, action);
-}
-
-
/* flush SLBs and reload */
#ifdef CONFIG_PPC_BOOK3S_64
static void flush_and_reload_slb(void)
@@ -226,10 +117,8 @@ static int mce_flush(int what)
return 1;
}
if (what == MCE_FLUSH_TLB) {
- if (cur_cpu_spec && cur_cpu_spec->flush_tlb) {
- cur_cpu_spec->flush_tlb(TLB_INVAL_SCOPE_GLOBAL);
- return 1;
- }
+ tlbiel_all();
+ return 1;
}
return 0;
diff --git a/arch/powerpc/kernel/module.lds b/arch/powerpc/kernel/module.lds
new file mode 100644
index 000000000000..cea5dc124be4
--- /dev/null
+++ b/arch/powerpc/kernel/module.lds
@@ -0,0 +1,8 @@
+/* Force alignment of .toc section. */
+SECTIONS
+{
+ .toc 0 : ALIGN(256)
+ {
+ *(.got .toc)
+ }
+}
diff --git a/arch/powerpc/kernel/module_64.c b/arch/powerpc/kernel/module_64.c
index 218971ac7e04..a2636c250b7b 100644
--- a/arch/powerpc/kernel/module_64.c
+++ b/arch/powerpc/kernel/module_64.c
@@ -348,8 +348,11 @@ int module_frob_arch_sections(Elf64_Ehdr *hdr,
char *p;
if (strcmp(secstrings + sechdrs[i].sh_name, ".stubs") == 0)
me->arch.stubs_section = i;
- else if (strcmp(secstrings + sechdrs[i].sh_name, ".toc") == 0)
+ else if (strcmp(secstrings + sechdrs[i].sh_name, ".toc") == 0) {
me->arch.toc_section = i;
+ if (sechdrs[i].sh_addralign < 8)
+ sechdrs[i].sh_addralign = 8;
+ }
else if (strcmp(secstrings+sechdrs[i].sh_name,"__versions")==0)
dedotify_versions((void *)hdr + sechdrs[i].sh_offset,
sechdrs[i].sh_size);
@@ -387,12 +390,15 @@ int module_frob_arch_sections(Elf64_Ehdr *hdr,
return 0;
}
-/* r2 is the TOC pointer: it actually points 0x8000 into the TOC (this
- gives the value maximum span in an instruction which uses a signed
- offset) */
+/*
+ * r2 is the TOC pointer: it actually points 0x8000 into the TOC (this gives the
+ * value maximum span in an instruction which uses a signed offset). Round down
+ * to a 256 byte boundary for the odd case where we are setting up r2 without a
+ * .toc section.
+ */
static inline unsigned long my_r2(const Elf64_Shdr *sechdrs, struct module *me)
{
- return sechdrs[me->arch.toc_section].sh_addr + 0x8000;
+ return (sechdrs[me->arch.toc_section].sh_addr & ~0xfful) + 0x8000;
}
/* Both low and high 16 bits are added as SIGNED additions, so if low
@@ -501,12 +507,22 @@ static bool is_early_mcount_callsite(u32 *instruction)
restore r2. */
static int restore_r2(u32 *instruction, struct module *me)
{
- if (is_early_mcount_callsite(instruction - 1))
+ u32 *prev_insn = instruction - 1;
+
+ if (is_early_mcount_callsite(prev_insn))
+ return 1;
+
+ /*
+ * Make sure the branch isn't a sibling call. Sibling calls aren't
+ * "link" branches and they don't return, so they don't need the r2
+ * restore afterwards.
+ */
+ if (!instr_is_relative_link_branch(*prev_insn))
return 1;
if (*instruction != PPC_INST_NOP) {
- pr_err("%s: Expect noop after relocate, got %08x\n",
- me->name, *instruction);
+ pr_err("%s: Expected nop after call, got %08x at %pS\n",
+ me->name, *instruction, instruction);
return 0;
}
/* ld r2,R2_STACK_OFFSET(r1) */
@@ -628,7 +644,8 @@ int apply_relocate_add(Elf64_Shdr *sechdrs,
case R_PPC_REL24:
/* FIXME: Handle weak symbols here --RR */
- if (sym->st_shndx == SHN_UNDEF) {
+ if (sym->st_shndx == SHN_UNDEF ||
+ sym->st_shndx == SHN_LIVEPATCH) {
/* External: go via stub */
value = stub_for_addr(sechdrs, value, me);
if (!value)
diff --git a/arch/powerpc/kernel/optprobes_head.S b/arch/powerpc/kernel/optprobes_head.S
index 52fc864cdec4..98a3aeeb3c8c 100644
--- a/arch/powerpc/kernel/optprobes_head.S
+++ b/arch/powerpc/kernel/optprobes_head.S
@@ -58,7 +58,7 @@ optprobe_template_entry:
std r5,_XER(r1)
mfcr r5
std r5,_CCR(r1)
- lbz r5,PACASOFTIRQEN(r13)
+ lbz r5,PACAIRQSOFTMASK(r13)
std r5,SOFTE(r1)
/*
diff --git a/arch/powerpc/kernel/paca.c b/arch/powerpc/kernel/paca.c
index d6597038931d..95ffedf14885 100644
--- a/arch/powerpc/kernel/paca.c
+++ b/arch/powerpc/kernel/paca.c
@@ -18,6 +18,8 @@
#include <asm/pgtable.h>
#include <asm/kexec.h>
+#include "setup.h"
+
#ifdef CONFIG_PPC_BOOK3S
/*
@@ -208,15 +210,14 @@ void __init allocate_pacas(void)
u64 limit;
int cpu;
- limit = ppc64_rma_size;
-
#ifdef CONFIG_PPC_BOOK3S_64
/*
- * We can't take SLB misses on the paca, and we want to access them
- * in real mode, so allocate them within the RMA and also within
- * the first segment.
+ * We access pacas in real mode, and cannot take SLB faults
+ * on them when in virtual mode, so allocate them accordingly.
*/
- limit = min(0x10000000ULL, limit);
+ limit = min(ppc64_bolted_size(), ppc64_rma_size);
+#else
+ limit = ppc64_rma_size;
#endif
paca_size = PAGE_ALIGN(sizeof(struct paca_struct) * nr_cpu_ids);
diff --git a/arch/powerpc/kernel/pci-common.c b/arch/powerpc/kernel/pci-common.c
index 590f4d0a6cb1..208e623b2557 100644
--- a/arch/powerpc/kernel/pci-common.c
+++ b/arch/powerpc/kernel/pci-common.c
@@ -249,8 +249,31 @@ resource_size_t pcibios_iov_resource_alignment(struct pci_dev *pdev, int resno)
return pci_iov_resource_size(pdev, resno);
}
+
+int pcibios_sriov_enable(struct pci_dev *pdev, u16 num_vfs)
+{
+ if (ppc_md.pcibios_sriov_enable)
+ return ppc_md.pcibios_sriov_enable(pdev, num_vfs);
+
+ return 0;
+}
+
+int pcibios_sriov_disable(struct pci_dev *pdev)
+{
+ if (ppc_md.pcibios_sriov_disable)
+ return ppc_md.pcibios_sriov_disable(pdev);
+
+ return 0;
+}
+
#endif /* CONFIG_PCI_IOV */
+void pcibios_bus_add_device(struct pci_dev *pdev)
+{
+ if (ppc_md.pcibios_bus_add_device)
+ ppc_md.pcibios_bus_add_device(pdev);
+}
+
static resource_size_t pcibios_io_size(const struct pci_controller *hose)
{
#ifdef CONFIG_PPC64
@@ -1276,8 +1299,8 @@ static void pcibios_allocate_bus_resources(struct pci_bus *bus)
i + PCI_BRIDGE_RESOURCES) == 0)
continue;
}
- pr_warning("PCI: Cannot allocate resource region "
- "%d of PCI bridge %d, will remap\n", i, bus->number);
+ pr_warn("PCI: Cannot allocate resource region %d of PCI bridge %d, will remap\n",
+ i, bus->number);
clear_resource:
/* The resource might be figured out when doing
* reassignment based on the resources required
diff --git a/arch/powerpc/kernel/pci_dn.c b/arch/powerpc/kernel/pci_dn.c
index 0e395afbf0f4..ab147a1909c8 100644
--- a/arch/powerpc/kernel/pci_dn.c
+++ b/arch/powerpc/kernel/pci_dn.c
@@ -156,10 +156,8 @@ static struct pci_dn *add_one_dev_pci_data(struct pci_dn *parent,
pdn->parent = parent;
pdn->busno = busno;
pdn->devfn = devfn;
-#ifdef CONFIG_PPC_POWERNV
pdn->vf_index = vf_index;
pdn->pe_number = IODA_INVALID_PE;
-#endif
INIT_LIST_HEAD(&pdn->child_list);
INIT_LIST_HEAD(&pdn->list);
list_add_tail(&pdn->list, &parent->child_list);
@@ -226,9 +224,7 @@ void remove_dev_pci_data(struct pci_dev *pdev)
*/
if (pdev->is_virtfn) {
pdn = pci_get_pdn(pdev);
-#ifdef CONFIG_PPC_POWERNV
pdn->pe_number = IODA_INVALID_PE;
-#endif
return;
}
@@ -294,9 +290,7 @@ struct pci_dn *pci_add_device_node_info(struct pci_controller *hose,
return NULL;
dn->data = pdn;
pdn->phb = hose;
-#ifdef CONFIG_PPC_POWERNV
pdn->pe_number = IODA_INVALID_PE;
-#endif
regs = of_get_property(dn, "reg", NULL);
if (regs) {
u32 addr = of_read_number(regs, 1);
diff --git a/arch/powerpc/kernel/pci_of_scan.c b/arch/powerpc/kernel/pci_of_scan.c
index 0d790f8432d2..20ceec4a5f5e 100644
--- a/arch/powerpc/kernel/pci_of_scan.c
+++ b/arch/powerpc/kernel/pci_of_scan.c
@@ -38,7 +38,7 @@ static u32 get_int_prop(struct device_node *np, const char *name, u32 def)
* @addr0: value of 1st cell of a device tree PCI address.
* @bridge: Set this flag if the address is from a bridge 'ranges' property
*/
-static unsigned int pci_parse_of_flags(u32 addr0, int bridge)
+unsigned int pci_parse_of_flags(u32 addr0, int bridge)
{
unsigned int flags = 0;
diff --git a/arch/powerpc/kernel/proc_powerpc.c b/arch/powerpc/kernel/proc_powerpc.c
index 56548bf6231f..9bfbd800d32f 100644
--- a/arch/powerpc/kernel/proc_powerpc.c
+++ b/arch/powerpc/kernel/proc_powerpc.c
@@ -63,7 +63,7 @@ static int __init proc_ppc64_init(void)
{
struct proc_dir_entry *pde;
- pde = proc_create_data("powerpc/systemcfg", S_IFREG|S_IRUGO, NULL,
+ pde = proc_create_data("powerpc/systemcfg", S_IFREG | 0444, NULL,
&page_map_fops, vdso_data);
if (!pde)
return 1;
diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index 4208cbe2fb7f..1738c4127b32 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -42,6 +42,7 @@
#include <linux/hw_breakpoint.h>
#include <linux/uaccess.h>
#include <linux/elf-randomize.h>
+#include <linux/pkeys.h>
#include <asm/pgtable.h>
#include <asm/io.h>
@@ -57,6 +58,7 @@
#include <asm/debug.h>
#ifdef CONFIG_PPC64
#include <asm/firmware.h>
+#include <asm/hw_irq.h>
#endif
#include <asm/code-patching.h>
#include <asm/exec.h>
@@ -1097,6 +1099,8 @@ static inline void save_sprs(struct thread_struct *t)
t->tar = mfspr(SPRN_TAR);
}
#endif
+
+ thread_pkey_regs_save(t);
}
static inline void restore_sprs(struct thread_struct *old_thread,
@@ -1136,6 +1140,8 @@ static inline void restore_sprs(struct thread_struct *old_thread,
old_thread->tidr != new_thread->tidr)
mtspr(SPRN_TIDR, new_thread->tidr);
#endif
+
+ thread_pkey_regs_restore(new_thread, old_thread);
}
#ifdef CONFIG_PPC_BOOK3S_64
@@ -1404,7 +1410,7 @@ void show_regs(struct pt_regs * regs)
print_msr_bits(regs->msr);
pr_cont(" CR: %08lx XER: %08lx\n", regs->ccr, regs->xer);
trap = TRAP(regs);
- if ((regs->trap != 0xc00) && cpu_has_feature(CPU_FTR_CFAR))
+ if ((TRAP(regs) != 0xc00) && cpu_has_feature(CPU_FTR_CFAR))
pr_cont("CFAR: "REG" ", regs->orig_gpr3);
if (trap == 0x200 || trap == 0x300 || trap == 0x600)
#if defined(CONFIG_4xx) || defined(CONFIG_BOOKE)
@@ -1504,14 +1510,15 @@ static int assign_thread_tidr(void)
{
int index;
int err;
+ unsigned long flags;
again:
if (!ida_pre_get(&vas_thread_ida, GFP_KERNEL))
return -ENOMEM;
- spin_lock(&vas_thread_id_lock);
+ spin_lock_irqsave(&vas_thread_id_lock, flags);
err = ida_get_new_above(&vas_thread_ida, 1, &index);
- spin_unlock(&vas_thread_id_lock);
+ spin_unlock_irqrestore(&vas_thread_id_lock, flags);
if (err == -EAGAIN)
goto again;
@@ -1519,9 +1526,9 @@ again:
return err;
if (index > MAX_THREAD_CONTEXT) {
- spin_lock(&vas_thread_id_lock);
+ spin_lock_irqsave(&vas_thread_id_lock, flags);
ida_remove(&vas_thread_ida, index);
- spin_unlock(&vas_thread_id_lock);
+ spin_unlock_irqrestore(&vas_thread_id_lock, flags);
return -ENOMEM;
}
@@ -1530,9 +1537,11 @@ again:
static void free_thread_tidr(int id)
{
- spin_lock(&vas_thread_id_lock);
+ unsigned long flags;
+
+ spin_lock_irqsave(&vas_thread_id_lock, flags);
ida_remove(&vas_thread_ida, id);
- spin_unlock(&vas_thread_id_lock);
+ spin_unlock_irqrestore(&vas_thread_id_lock, flags);
}
/*
@@ -1584,6 +1593,7 @@ int set_thread_tidr(struct task_struct *t)
return 0;
}
+EXPORT_SYMBOL_GPL(set_thread_tidr);
#endif /* CONFIG_PPC64 */
@@ -1669,7 +1679,7 @@ int copy_thread(unsigned long clone_flags, unsigned long usp,
childregs->gpr[14] = ppc_function_entry((void *)usp);
#ifdef CONFIG_PPC64
clear_tsk_thread_flag(p, TIF_32BIT);
- childregs->softe = 1;
+ childregs->softe = IRQS_ENABLED;
#endif
childregs->gpr[15] = kthread_arg;
p->thread.regs = NULL; /* no user register state */
@@ -1860,6 +1870,8 @@ void start_thread(struct pt_regs *regs, unsigned long start, unsigned long sp)
current->thread.tm_tfiar = 0;
current->thread.load_tm = 0;
#endif /* CONFIG_PPC_TRANSACTIONAL_MEM */
+
+ thread_pkey_regs_init(&current->thread);
}
EXPORT_SYMBOL(start_thread);
diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c
index b15bae265c90..4dffef947b8a 100644
--- a/arch/powerpc/kernel/prom.c
+++ b/arch/powerpc/kernel/prom.c
@@ -59,6 +59,7 @@
#include <asm/epapr_hcalls.h>
#include <asm/firmware.h>
#include <asm/dt_cpu_ftrs.h>
+#include <asm/drmem.h>
#include <mm/mmu_decl.h>
@@ -455,92 +456,74 @@ static int __init early_init_dt_scan_chosen_ppc(unsigned long node,
#ifdef CONFIG_PPC_PSERIES
/*
- * Interpret the ibm,dynamic-memory property in the
- * /ibm,dynamic-reconfiguration-memory node.
+ * Interpret the ibm dynamic reconfiguration memory LMBs.
* This contains a list of memory blocks along with NUMA affinity
* information.
*/
-static int __init early_init_dt_scan_drconf_memory(unsigned long node)
+static void __init early_init_drmem_lmb(struct drmem_lmb *lmb,
+ const __be32 **usm)
{
- const __be32 *dm, *ls, *usm;
- int l;
- unsigned long n, flags;
- u64 base, size, memblock_size;
- unsigned int is_kexec_kdump = 0, rngs;
-
- ls = of_get_flat_dt_prop(node, "ibm,lmb-size", &l);
- if (ls == NULL || l < dt_root_size_cells * sizeof(__be32))
- return 0;
- memblock_size = dt_mem_next_cell(dt_root_size_cells, &ls);
+ u64 base, size;
+ int is_kexec_kdump = 0, rngs;
- dm = of_get_flat_dt_prop(node, "ibm,dynamic-memory", &l);
- if (dm == NULL || l < sizeof(__be32))
- return 0;
+ base = lmb->base_addr;
+ size = drmem_lmb_size();
+ rngs = 1;
- n = of_read_number(dm++, 1); /* number of entries */
- if (l < (n * (dt_root_addr_cells + 4) + 1) * sizeof(__be32))
- return 0;
+ /*
+ * Skip this block if the reserved bit is set in flags
+ * or if the block is not assigned to this partition.
+ */
+ if ((lmb->flags & DRCONF_MEM_RESERVED) ||
+ !(lmb->flags & DRCONF_MEM_ASSIGNED))
+ return;
- /* check if this is a kexec/kdump kernel. */
- usm = of_get_flat_dt_prop(node, "linux,drconf-usable-memory",
- &l);
- if (usm != NULL)
+ if (*usm)
is_kexec_kdump = 1;
- for (; n != 0; --n) {
- base = dt_mem_next_cell(dt_root_addr_cells, &dm);
- flags = of_read_number(&dm[3], 1);
- /* skip DRC index, pad, assoc. list index, flags */
- dm += 4;
- /* skip this block if the reserved bit is set in flags
- or if the block is not assigned to this partition */
- if ((flags & DRCONF_MEM_RESERVED) ||
- !(flags & DRCONF_MEM_ASSIGNED))
- continue;
- size = memblock_size;
- rngs = 1;
+ if (is_kexec_kdump) {
+ /*
+ * For each memblock in ibm,dynamic-memory, a
+ * corresponding entry in linux,drconf-usable-memory
+ * property contains a counter 'p' followed by 'p'
+ * (base, size) duple. Now read the counter from
+ * linux,drconf-usable-memory property
+ */
+ rngs = dt_mem_next_cell(dt_root_size_cells, usm);
+ if (!rngs) /* there are no (base, size) duple */
+ return;
+ }
+
+ do {
if (is_kexec_kdump) {
- /*
- * For each memblock in ibm,dynamic-memory, a corresponding
- * entry in linux,drconf-usable-memory property contains
- * a counter 'p' followed by 'p' (base, size) duple.
- * Now read the counter from
- * linux,drconf-usable-memory property
- */
- rngs = dt_mem_next_cell(dt_root_size_cells, &usm);
- if (!rngs) /* there are no (base, size) duple */
+ base = dt_mem_next_cell(dt_root_addr_cells, usm);
+ size = dt_mem_next_cell(dt_root_size_cells, usm);
+ }
+
+ if (iommu_is_off) {
+ if (base >= 0x80000000ul)
continue;
+ if ((base + size) > 0x80000000ul)
+ size = 0x80000000ul - base;
}
- do {
- if (is_kexec_kdump) {
- base = dt_mem_next_cell(dt_root_addr_cells,
- &usm);
- size = dt_mem_next_cell(dt_root_size_cells,
- &usm);
- }
- if (iommu_is_off) {
- if (base >= 0x80000000ul)
- continue;
- if ((base + size) > 0x80000000ul)
- size = 0x80000000ul - base;
- }
- memblock_add(base, size);
- } while (--rngs);
- }
- memblock_dump_all();
- return 0;
+
+ DBG("Adding: %llx -> %llx\n", base, size);
+ memblock_add(base, size);
+ } while (--rngs);
}
-#else
-#define early_init_dt_scan_drconf_memory(node) 0
#endif /* CONFIG_PPC_PSERIES */
static int __init early_init_dt_scan_memory_ppc(unsigned long node,
const char *uname,
int depth, void *data)
{
+#ifdef CONFIG_PPC_PSERIES
if (depth == 1 &&
- strcmp(uname, "ibm,dynamic-reconfiguration-memory") == 0)
- return early_init_dt_scan_drconf_memory(node);
+ strcmp(uname, "ibm,dynamic-reconfiguration-memory") == 0) {
+ walk_drmem_lmbs_early(node, early_init_drmem_lmb);
+ return 0;
+ }
+#endif
return early_init_dt_scan_memory(node, uname, depth, data);
}
diff --git a/arch/powerpc/kernel/prom_init.c b/arch/powerpc/kernel/prom_init.c
index 02190e90c7ae..adf044daafd7 100644
--- a/arch/powerpc/kernel/prom_init.c
+++ b/arch/powerpc/kernel/prom_init.c
@@ -869,10 +869,12 @@ struct ibm_arch_vec __cacheline_aligned ibm_architecture_vec = {
.reserved2 = 0,
.reserved3 = 0,
.subprocessors = 1,
+ .byte22 = OV5_FEAT(OV5_DRMEM_V2),
.intarch = 0,
.mmu = 0,
.hash_ext = 0,
.radix_ext = 0,
+ .byte22 = OV5_FEAT(OV5_DRC_INFO),
},
/* option vector 6: IBM PAPR hints */
diff --git a/arch/powerpc/kernel/ptrace.c b/arch/powerpc/kernel/ptrace.c
index f52ad5bb7109..ca72d7391d40 100644
--- a/arch/powerpc/kernel/ptrace.c
+++ b/arch/powerpc/kernel/ptrace.c
@@ -35,6 +35,7 @@
#include <linux/context_tracking.h>
#include <linux/uaccess.h>
+#include <linux/pkeys.h>
#include <asm/page.h>
#include <asm/pgtable.h>
#include <asm/switch_to.h>
@@ -283,6 +284,18 @@ int ptrace_get_reg(struct task_struct *task, int regno, unsigned long *data)
if (regno == PT_DSCR)
return get_user_dscr(task, data);
+#ifdef CONFIG_PPC64
+ /*
+ * softe copies paca->irq_soft_mask variable state. Since irq_soft_mask is
+ * no more used as a flag, lets force usr to alway see the softe value as 1
+ * which means interrupts are not soft disabled.
+ */
+ if (regno == PT_SOFTE) {
+ *data = 1;
+ return 0;
+ }
+#endif
+
if (regno < (sizeof(struct pt_regs) / sizeof(unsigned long))) {
*data = ((unsigned long *)task->thread.regs)[regno];
return 0;
@@ -1775,6 +1788,61 @@ static int pmu_set(struct task_struct *target,
return ret;
}
#endif
+
+#ifdef CONFIG_PPC_MEM_KEYS
+static int pkey_active(struct task_struct *target,
+ const struct user_regset *regset)
+{
+ if (!arch_pkeys_enabled())
+ return -ENODEV;
+
+ return regset->n;
+}
+
+static int pkey_get(struct task_struct *target,
+ const struct user_regset *regset,
+ unsigned int pos, unsigned int count,
+ void *kbuf, void __user *ubuf)
+{
+ BUILD_BUG_ON(TSO(amr) + sizeof(unsigned long) != TSO(iamr));
+ BUILD_BUG_ON(TSO(iamr) + sizeof(unsigned long) != TSO(uamor));
+
+ if (!arch_pkeys_enabled())
+ return -ENODEV;
+
+ return user_regset_copyout(&pos, &count, &kbuf, &ubuf,
+ &target->thread.amr, 0,
+ ELF_NPKEY * sizeof(unsigned long));
+}
+
+static int pkey_set(struct task_struct *target,
+ const struct user_regset *regset,
+ unsigned int pos, unsigned int count,
+ const void *kbuf, const void __user *ubuf)
+{
+ u64 new_amr;
+ int ret;
+
+ if (!arch_pkeys_enabled())
+ return -ENODEV;
+
+ /* Only the AMR can be set from userspace */
+ if (pos != 0 || count != sizeof(new_amr))
+ return -EINVAL;
+
+ ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+ &new_amr, 0, sizeof(new_amr));
+ if (ret)
+ return ret;
+
+ /* UAMOR determines which bits of the AMR can be set from userspace. */
+ target->thread.amr = (new_amr & target->thread.uamor) |
+ (target->thread.amr & ~target->thread.uamor);
+
+ return 0;
+}
+#endif /* CONFIG_PPC_MEM_KEYS */
+
/*
* These are our native regset flavors.
*/
@@ -1809,6 +1877,9 @@ enum powerpc_regset {
REGSET_EBB, /* EBB registers */
REGSET_PMR, /* Performance Monitor Registers */
#endif
+#ifdef CONFIG_PPC_MEM_KEYS
+ REGSET_PKEY, /* AMR register */
+#endif
};
static const struct user_regset native_regsets[] = {
@@ -1914,6 +1985,13 @@ static const struct user_regset native_regsets[] = {
.active = pmu_active, .get = pmu_get, .set = pmu_set
},
#endif
+#ifdef CONFIG_PPC_MEM_KEYS
+ [REGSET_PKEY] = {
+ .core_note_type = NT_PPC_PKEY, .n = ELF_NPKEY,
+ .size = sizeof(u64), .align = sizeof(u64),
+ .active = pkey_active, .get = pkey_get, .set = pkey_set
+ },
+#endif
};
static const struct user_regset_view user_ppc_native_view = {
diff --git a/arch/powerpc/kernel/rtas-proc.c b/arch/powerpc/kernel/rtas-proc.c
index c8c5f3a550c2..fb070d8cad07 100644
--- a/arch/powerpc/kernel/rtas-proc.c
+++ b/arch/powerpc/kernel/rtas-proc.c
@@ -261,19 +261,19 @@ static int __init proc_rtas_init(void)
if (rtas_node == NULL)
return -ENODEV;
- proc_create("powerpc/rtas/progress", S_IRUGO|S_IWUSR, NULL,
+ proc_create("powerpc/rtas/progress", 0644, NULL,
&ppc_rtas_progress_operations);
- proc_create("powerpc/rtas/clock", S_IRUGO|S_IWUSR, NULL,
+ proc_create("powerpc/rtas/clock", 0644, NULL,
&ppc_rtas_clock_operations);
- proc_create("powerpc/rtas/poweron", S_IWUSR|S_IRUGO, NULL,
+ proc_create("powerpc/rtas/poweron", 0644, NULL,
&ppc_rtas_poweron_operations);
- proc_create("powerpc/rtas/sensors", S_IRUGO, NULL,
+ proc_create("powerpc/rtas/sensors", 0444, NULL,
&ppc_rtas_sensors_operations);
- proc_create("powerpc/rtas/frequency", S_IWUSR|S_IRUGO, NULL,
+ proc_create("powerpc/rtas/frequency", 0644, NULL,
&ppc_rtas_tone_freq_operations);
- proc_create("powerpc/rtas/volume", S_IWUSR|S_IRUGO, NULL,
+ proc_create("powerpc/rtas/volume", 0644, NULL,
&ppc_rtas_tone_volume_operations);
- proc_create("powerpc/rtas/rmo_buffer", S_IRUSR, NULL,
+ proc_create("powerpc/rtas/rmo_buffer", 0400, NULL,
&ppc_rtas_rmo_buf_ops);
return 0;
}
diff --git a/arch/powerpc/kernel/rtas_flash.c b/arch/powerpc/kernel/rtas_flash.c
index f6f6a8a5103a..10fabae2574d 100644
--- a/arch/powerpc/kernel/rtas_flash.c
+++ b/arch/powerpc/kernel/rtas_flash.c
@@ -727,7 +727,7 @@ static int __init rtas_flash_init(void)
const struct rtas_flash_file *f = &rtas_flash_files[i];
int token;
- if (!proc_create(f->filename, S_IRUSR | S_IWUSR, NULL, &f->fops))
+ if (!proc_create(f->filename, 0600, NULL, &f->fops))
goto enomem;
/*
diff --git a/arch/powerpc/kernel/rtasd.c b/arch/powerpc/kernel/rtasd.c
index 1da8b7d8c6ca..fc600a8b1e77 100644
--- a/arch/powerpc/kernel/rtasd.c
+++ b/arch/powerpc/kernel/rtasd.c
@@ -581,7 +581,7 @@ static int __init rtas_init(void)
if (!rtas_log_buf)
return -ENODEV;
- entry = proc_create("powerpc/rtas/error_log", S_IRUSR, NULL,
+ entry = proc_create("powerpc/rtas/error_log", 0400, NULL,
&proc_rtas_log_operations);
if (!entry)
printk(KERN_ERR "Failed to create error_log proc entry\n");
diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c
index 3f33869c6486..d73ec518ef80 100644
--- a/arch/powerpc/kernel/setup-common.c
+++ b/arch/powerpc/kernel/setup-common.c
@@ -346,10 +346,8 @@ static int show_cpuinfo(struct seq_file *m, void *v)
loops_per_jiffy / (500000/HZ),
(loops_per_jiffy / (5000/HZ)) % 100);
#endif
-
-#ifdef CONFIG_SMP
seq_printf(m, "\n");
-#endif
+
/* If this is the last cpu, print the summary */
if (cpumask_next(cpu_id, cpu_online_mask) >= nr_cpu_ids)
show_cpuinfo_summary(m);
@@ -379,10 +377,10 @@ static void c_stop(struct seq_file *m, void *v)
}
const struct seq_operations cpuinfo_op = {
- .start =c_start,
- .next = c_next,
- .stop = c_stop,
- .show = show_cpuinfo,
+ .start = c_start,
+ .next = c_next,
+ .stop = c_stop,
+ .show = show_cpuinfo,
};
void __init check_for_initrd(void)
@@ -459,13 +457,13 @@ static void __init cpu_init_thread_core_maps(int tpc)
*/
void __init smp_setup_cpu_maps(void)
{
- struct device_node *dn = NULL;
+ struct device_node *dn;
int cpu = 0;
int nthreads = 1;
DBG("smp_setup_cpu_maps()\n");
- while ((dn = of_find_node_by_type(dn, "cpu")) && cpu < nr_cpu_ids) {
+ for_each_node_by_type(dn, "cpu") {
const __be32 *intserv;
__be32 cpu_be;
int j, len;
@@ -505,6 +503,11 @@ void __init smp_setup_cpu_maps(void)
set_cpu_possible(cpu, true);
cpu++;
}
+
+ if (cpu >= nr_cpu_ids) {
+ of_node_put(dn);
+ break;
+ }
}
/* If no SMT supported, nthreads is forced to 1 */
diff --git a/arch/powerpc/kernel/setup.h b/arch/powerpc/kernel/setup.h
index 21c18071d9d5..3fc11e30308f 100644
--- a/arch/powerpc/kernel/setup.h
+++ b/arch/powerpc/kernel/setup.h
@@ -51,6 +51,10 @@ void record_spr_defaults(void);
static inline void record_spr_defaults(void) { };
#endif
+#ifdef CONFIG_PPC64
+u64 ppc64_bolted_size(void);
+#endif
+
/*
* Having this in kvm_ppc.h makes include dependencies too
* tricky to solve for setup-common.c so have it here.
diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c
index e67413f4a8f0..c388cc3357fa 100644
--- a/arch/powerpc/kernel/setup_64.c
+++ b/arch/powerpc/kernel/setup_64.c
@@ -10,8 +10,6 @@
* 2 of the License, or (at your option) any later version.
*/
-#define DEBUG
-
#include <linux/export.h>
#include <linux/string.h>
#include <linux/sched.h>
@@ -69,6 +67,7 @@
#include <asm/livepatch.h>
#include <asm/opal.h>
#include <asm/cputhreads.h>
+#include <asm/hw_irq.h>
#include "setup.h"
@@ -190,6 +189,8 @@ static void __init fixup_boot_paca(void)
get_paca()->cpu_start = 1;
/* Allow percpu accesses to work until we setup percpu data */
get_paca()->data_offset = 0;
+ /* Mark interrupts disabled in PACA */
+ irq_soft_mask_set(IRQS_DISABLED);
}
static void __init configure_exceptions(void)
@@ -352,7 +353,7 @@ void __init early_setup(unsigned long dt_ptr)
void early_setup_secondary(void)
{
/* Mark interrupts disabled in PACA */
- get_paca()->soft_enabled = 0;
+ irq_soft_mask_set(IRQS_DISABLED);
/* Initialize the hash table or TLB handling */
early_init_mmu_secondary();
@@ -568,25 +569,31 @@ void __init initialize_cache_info(void)
DBG(" <- initialize_cache_info()\n");
}
-/* This returns the limit below which memory accesses to the linear
- * mapping are guarnateed not to cause a TLB or SLB miss. This is
- * used to allocate interrupt or emergency stacks for which our
- * exception entry path doesn't deal with being interrupted.
+/*
+ * This returns the limit below which memory accesses to the linear
+ * mapping are guarnateed not to cause an architectural exception (e.g.,
+ * TLB or SLB miss fault).
+ *
+ * This is used to allocate PACAs and various interrupt stacks that
+ * that are accessed early in interrupt handlers that must not cause
+ * re-entrant interrupts.
*/
-static __init u64 safe_stack_limit(void)
+__init u64 ppc64_bolted_size(void)
{
#ifdef CONFIG_PPC_BOOK3E
/* Freescale BookE bolts the entire linear mapping */
- if (mmu_has_feature(MMU_FTR_TYPE_FSL_E))
+ /* XXX: BookE ppc64_rma_limit setup seems to disagree? */
+ if (early_mmu_has_feature(MMU_FTR_TYPE_FSL_E))
return linear_map_top;
/* Other BookE, we assume the first GB is bolted */
return 1ul << 30;
#else
+ /* BookS radix, does not take faults on linear mapping */
if (early_radix_enabled())
return ULONG_MAX;
- /* BookS, the first segment is bolted */
- if (mmu_has_feature(MMU_FTR_1T_SEGMENT))
+ /* BookS hash, the first segment is bolted */
+ if (early_mmu_has_feature(MMU_FTR_1T_SEGMENT))
return 1UL << SID_SHIFT_1T;
return 1UL << SID_SHIFT;
#endif
@@ -594,7 +601,7 @@ static __init u64 safe_stack_limit(void)
void __init irqstack_early_init(void)
{
- u64 limit = safe_stack_limit();
+ u64 limit = ppc64_bolted_size();
unsigned int i;
/*
@@ -679,7 +686,7 @@ void __init emergency_stack_init(void)
* initialized in kernel/irq.c. These are initialized here in order
* to have emergency stacks available as early as possible.
*/
- limit = min(safe_stack_limit(), ppc64_rma_size);
+ limit = min(ppc64_bolted_size(), ppc64_rma_size);
for_each_possible_cpu(i) {
struct thread_info *ti;
@@ -857,7 +864,7 @@ static void init_fallback_flush(void)
int cpu;
l1d_size = ppc64_caches.l1d.size;
- limit = min(safe_stack_limit(), ppc64_rma_size);
+ limit = min(ppc64_bolted_size(), ppc64_rma_size);
/*
* Align to L1d size, and size it at 2x L1d size, to catch possible
@@ -868,19 +875,8 @@ static void init_fallback_flush(void)
memset(l1d_flush_fallback_area, 0, l1d_size * 2);
for_each_possible_cpu(cpu) {
- /*
- * The fallback flush is currently coded for 8-way
- * associativity. Different associativity is possible, but it
- * will be treated as 8-way and may not evict the lines as
- * effectively.
- *
- * 128 byte lines are mandatory.
- */
- u64 c = l1d_size / 8;
-
paca[cpu].rfi_flush_fallback_area = l1d_flush_fallback_area;
- paca[cpu].l1d_flush_congruence = c;
- paca[cpu].l1d_flush_sets = c / 128;
+ paca[cpu].l1d_flush_size = l1d_size;
}
}
diff --git a/arch/powerpc/kernel/signal_32.c b/arch/powerpc/kernel/signal_32.c
index aded81169648..a46de0035214 100644
--- a/arch/powerpc/kernel/signal_32.c
+++ b/arch/powerpc/kernel/signal_32.c
@@ -111,12 +111,20 @@ static inline int save_general_regs(struct pt_regs *regs,
{
elf_greg_t64 *gregs = (elf_greg_t64 *)regs;
int i;
+ /* Force usr to alway see softe as 1 (interrupts enabled) */
+ elf_greg_t64 softe = 0x1;
WARN_ON(!FULL_REGS(regs));
for (i = 0; i <= PT_RESULT; i ++) {
if (i == 14 && !FULL_REGS(regs))
i = 32;
+ if ( i == PT_SOFTE) {
+ if(__put_user((unsigned int)softe, &frame->mc_gregs[i]))
+ return -EFAULT;
+ else
+ continue;
+ }
if (__put_user((unsigned int)gregs[i], &frame->mc_gregs[i]))
return -EFAULT;
}
diff --git a/arch/powerpc/kernel/signal_64.c b/arch/powerpc/kernel/signal_64.c
index 4b9ca3570344..720117690822 100644
--- a/arch/powerpc/kernel/signal_64.c
+++ b/arch/powerpc/kernel/signal_64.c
@@ -110,6 +110,8 @@ static long setup_sigcontext(struct sigcontext __user *sc,
struct pt_regs *regs = tsk->thread.regs;
unsigned long msr = regs->msr;
long err = 0;
+ /* Force usr to alway see softe as 1 (interrupts enabled) */
+ unsigned long softe = 0x1;
BUG_ON(tsk != current);
@@ -169,6 +171,7 @@ static long setup_sigcontext(struct sigcontext __user *sc,
WARN_ON(!FULL_REGS(regs));
err |= __copy_to_user(&sc->gp_regs, regs, GP_REGS_SIZE);
err |= __put_user(msr, &sc->gp_regs[PT_MSR]);
+ err |= __put_user(softe, &sc->gp_regs[PT_SOFTE]);
err |= __put_user(signr, &sc->signal);
err |= __put_user(handler, &sc->handler);
if (set != NULL)
@@ -207,7 +210,7 @@ static long setup_tm_sigcontexts(struct sigcontext __user *sc,
elf_vrreg_t __user *tm_v_regs = sigcontext_vmx_regs(tm_sc);
#endif
struct pt_regs *regs = tsk->thread.regs;
- unsigned long msr = tsk->thread.ckpt_regs.msr;
+ unsigned long msr = tsk->thread.regs->msr;
long err = 0;
BUG_ON(tsk != current);
@@ -216,6 +219,12 @@ static long setup_tm_sigcontexts(struct sigcontext __user *sc,
WARN_ON(tm_suspend_disabled);
+ /* Restore checkpointed FP, VEC, and VSX bits from ckpt_regs as
+ * it contains the correct FP, VEC, VSX state after we treclaimed
+ * the transaction and giveup_all() was called on reclaiming.
+ */
+ msr |= tsk->thread.ckpt_regs.msr & (MSR_FP | MSR_VEC | MSR_VSX);
+
/* Remove TM bits from thread's MSR. The MSR in the sigcontext
* just indicates to userland that we were doing a transaction, but we
* don't want to return in transactional state. This also ensures
diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c
index e0a4c1f82e25..bbe7634b3a43 100644
--- a/arch/powerpc/kernel/smp.c
+++ b/arch/powerpc/kernel/smp.c
@@ -543,7 +543,25 @@ void smp_send_debugger_break(void)
#ifdef CONFIG_KEXEC_CORE
void crash_send_ipi(void (*crash_ipi_callback)(struct pt_regs *))
{
+ int cpu;
+
smp_send_nmi_ipi(NMI_IPI_ALL_OTHERS, crash_ipi_callback, 1000000);
+ if (kdump_in_progress() && crash_wake_offline) {
+ for_each_present_cpu(cpu) {
+ if (cpu_online(cpu))
+ continue;
+ /*
+ * crash_ipi_callback will wait for
+ * all cpus, including offline CPUs.
+ * We don't care about nmi_ipi_function.
+ * Offline cpus will jump straight into
+ * crash_ipi_callback, we can skip the
+ * entire NMI dance and waiting for
+ * cpus to clear pending mask, etc.
+ */
+ do_smp_send_nmi_ipi(cpu);
+ }
+ }
}
#endif
diff --git a/arch/powerpc/kernel/sysfs.c b/arch/powerpc/kernel/sysfs.c
index b8d4a1dac39f..5a8bfee6e187 100644
--- a/arch/powerpc/kernel/sysfs.c
+++ b/arch/powerpc/kernel/sysfs.c
@@ -485,6 +485,7 @@ SYSFS_PMCSETUP(mmcra, SPRN_MMCRA);
SYSFS_SPRSETUP(purr, SPRN_PURR);
SYSFS_SPRSETUP(spurr, SPRN_SPURR);
SYSFS_SPRSETUP(pir, SPRN_PIR);
+SYSFS_SPRSETUP(tscr, SPRN_TSCR);
/*
Lets only enable read for phyp resources and
@@ -495,6 +496,7 @@ static DEVICE_ATTR(mmcra, 0600, show_mmcra, store_mmcra);
static DEVICE_ATTR(spurr, 0400, show_spurr, NULL);
static DEVICE_ATTR(purr, 0400, show_purr, store_purr);
static DEVICE_ATTR(pir, 0400, show_pir, NULL);
+static DEVICE_ATTR(tscr, 0600, show_tscr, store_tscr);
/*
* This is the system wide DSCR register default value. Any
@@ -785,6 +787,9 @@ static int register_cpu_online(unsigned int cpu)
if (cpu_has_feature(CPU_FTR_PPCAS_ARCH_V2))
device_create_file(s, &dev_attr_pir);
+
+ if (cpu_has_feature(CPU_FTR_ARCH_206))
+ device_create_file(s, &dev_attr_tscr);
#endif /* CONFIG_PPC64 */
#ifdef CONFIG_PPC_FSL_BOOK3E
@@ -867,6 +872,9 @@ static int unregister_cpu_online(unsigned int cpu)
if (cpu_has_feature(CPU_FTR_PPCAS_ARCH_V2))
device_remove_file(s, &dev_attr_pir);
+
+ if (cpu_has_feature(CPU_FTR_ARCH_206))
+ device_remove_file(s, &dev_attr_tscr);
#endif /* CONFIG_PPC64 */
#ifdef CONFIG_PPC_FSL_BOOK3E
diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c
index fe6f3a285455..a32823dcd9a4 100644
--- a/arch/powerpc/kernel/time.c
+++ b/arch/powerpc/kernel/time.c
@@ -244,7 +244,7 @@ static u64 scan_dispatch_log(u64 stop_tb)
void accumulate_stolen_time(void)
{
u64 sst, ust;
- u8 save_soft_enabled = local_paca->soft_enabled;
+ unsigned long save_irq_soft_mask = irq_soft_mask_return();
struct cpu_accounting_data *acct = &local_paca->accounting;
/* We are called early in the exception entry, before
@@ -253,7 +253,7 @@ void accumulate_stolen_time(void)
* needs to reflect that so various debug stuff doesn't
* complain
*/
- local_paca->soft_enabled = 0;
+ irq_soft_mask_set(IRQS_DISABLED);
sst = scan_dispatch_log(acct->starttime_user);
ust = scan_dispatch_log(acct->starttime);
@@ -261,7 +261,7 @@ void accumulate_stolen_time(void)
acct->utime -= ust;
acct->steal_time += ust + sst;
- local_paca->soft_enabled = save_soft_enabled;
+ irq_soft_mask_set(save_irq_soft_mask);
}
static inline u64 calculate_stolen_time(u64 stop_tb)
diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c
index c93f1e6a9fff..1e48d157196a 100644
--- a/arch/powerpc/kernel/traps.c
+++ b/arch/powerpc/kernel/traps.c
@@ -20,6 +20,7 @@
#include <linux/sched/debug.h>
#include <linux/kernel.h>
#include <linux/mm.h>
+#include <linux/pkeys.h>
#include <linux/stddef.h>
#include <linux/unistd.h>
#include <linux/ptrace.h>
@@ -38,6 +39,8 @@
#include <linux/ratelimit.h>
#include <linux/context_tracking.h>
#include <linux/smp.h>
+#include <linux/console.h>
+#include <linux/kmsg_dump.h>
#include <asm/emulated_ops.h>
#include <asm/pgtable.h>
@@ -142,6 +145,28 @@ static int die_owner = -1;
static unsigned int die_nest_count;
static int die_counter;
+extern void panic_flush_kmsg_start(void)
+{
+ /*
+ * These are mostly taken from kernel/panic.c, but tries to do
+ * relatively minimal work. Don't use delay functions (TB may
+ * be broken), don't crash dump (need to set a firmware log),
+ * don't run notifiers. We do want to get some information to
+ * Linux console.
+ */
+ console_verbose();
+ bust_spinlocks(1);
+}
+
+extern void panic_flush_kmsg_end(void)
+{
+ printk_safe_flush_on_panic();
+ kmsg_dump(KMSG_DUMP_PANIC);
+ bust_spinlocks(0);
+ debug_locks_off();
+ console_flush_on_panic();
+}
+
static unsigned long oops_begin(struct pt_regs *regs)
{
int cpu;
@@ -266,7 +291,9 @@ void user_single_step_siginfo(struct task_struct *tsk,
info->si_addr = (void __user *)regs->nip;
}
-void _exception(int signr, struct pt_regs *regs, int code, unsigned long addr)
+
+void _exception_pkey(int signr, struct pt_regs *regs, int code,
+ unsigned long addr, int key)
{
siginfo_t info;
const char fmt32[] = KERN_INFO "%s[%d]: unhandled signal %d " \
@@ -289,13 +316,27 @@ void _exception(int signr, struct pt_regs *regs, int code, unsigned long addr)
local_irq_enable();
current->thread.trap_nr = code;
+
+ /*
+ * Save all the pkey registers AMR/IAMR/UAMOR. Eg: Core dumps need
+ * to capture the content, if the task gets killed.
+ */
+ thread_pkey_regs_save(&current->thread);
+
memset(&info, 0, sizeof(info));
info.si_signo = signr;
info.si_code = code;
info.si_addr = (void __user *) addr;
+ info.si_pkey = key;
+
force_sig_info(signr, &info, current);
}
+void _exception(int signr, struct pt_regs *regs, int code, unsigned long addr)
+{
+ _exception_pkey(signr, regs, code, addr, 0);
+}
+
void system_reset_exception(struct pt_regs *regs)
{
/*
@@ -337,7 +378,7 @@ void system_reset_exception(struct pt_regs *regs)
* No debugger or crash dump registered, print logs then
* panic.
*/
- __die("System Reset", regs, SIGABRT);
+ die("System Reset", regs, SIGABRT);
mdelay(2*MSEC_PER_SEC); /* Wait a little while for others to print */
add_taint(TAINT_DIE, LOCKDEP_NOW_UNRELIABLE);
@@ -1564,7 +1605,7 @@ void facility_unavailable_exception(struct pt_regs *regs)
u8 status;
bool hv;
- hv = (regs->trap == 0xf80);
+ hv = (TRAP(regs) == 0xf80);
if (hv)
value = mfspr(SPRN_HFSCR);
else
@@ -2113,13 +2154,13 @@ static int __init ppc_warn_emulated_init(void)
if (!dir)
return -ENOMEM;
- d = debugfs_create_u32("do_warn", S_IRUGO | S_IWUSR, dir,
+ d = debugfs_create_u32("do_warn", 0644, dir,
&ppc_warn_emulated);
if (!d)
goto fail;
for (i = 0; i < sizeof(ppc_emulated)/sizeof(*entries); i++) {
- d = debugfs_create_u32(entries[i].name, S_IRUGO | S_IWUSR, dir,
+ d = debugfs_create_u32(entries[i].name, 0644, dir,
(u32 *)&entries[i].val.counter);
if (!d)
goto fail;
diff --git a/arch/powerpc/kernel/vdso64/gettimeofday.S b/arch/powerpc/kernel/vdso64/gettimeofday.S
index 382021324883..c002adcc694c 100644
--- a/arch/powerpc/kernel/vdso64/gettimeofday.S
+++ b/arch/powerpc/kernel/vdso64/gettimeofday.S
@@ -64,6 +64,12 @@ V_FUNCTION_BEGIN(__kernel_clock_gettime)
cmpwi cr0,r3,CLOCK_REALTIME
cmpwi cr1,r3,CLOCK_MONOTONIC
cror cr0*4+eq,cr0*4+eq,cr1*4+eq
+
+ cmpwi cr5,r3,CLOCK_REALTIME_COARSE
+ cmpwi cr6,r3,CLOCK_MONOTONIC_COARSE
+ cror cr5*4+eq,cr5*4+eq,cr6*4+eq
+
+ cror cr0*4+eq,cr0*4+eq,cr5*4+eq
bne cr0,99f
mflr r12 /* r12 saves lr */
@@ -72,6 +78,7 @@ V_FUNCTION_BEGIN(__kernel_clock_gettime)
bl V_LOCAL_FUNC(__get_datapage) /* get data page */
lis r7,NSEC_PER_SEC@h /* want nanoseconds */
ori r7,r7,NSEC_PER_SEC@l
+ beq cr5,70f
50: bl V_LOCAL_FUNC(__do_get_tspec) /* get time from tb & kernel */
bne cr1,80f /* if not monotonic, all done */
@@ -97,19 +104,57 @@ V_FUNCTION_BEGIN(__kernel_clock_gettime)
ld r0,CFG_TB_UPDATE_COUNT(r3)
cmpld cr0,r0,r8 /* check if updated */
bne- 50b
+ b 78f
- /* Add wall->monotonic offset and check for overflow or underflow.
+ /*
+ * For coarse clocks we get data directly from the vdso data page, so
+ * we don't need to call __do_get_tspec, but we still need to do the
+ * counter trick.
*/
- add r4,r4,r6
- add r5,r5,r9
- cmpd cr0,r5,r7
- cmpdi cr1,r5,0
- blt 1f
- subf r5,r7,r5
- addi r4,r4,1
-1: bge cr1,80f
- addi r4,r4,-1
- add r5,r5,r7
+70: ld r8,CFG_TB_UPDATE_COUNT(r3)
+ andi. r0,r8,1 /* pending update ? loop */
+ bne- 70b
+ add r3,r3,r0 /* r0 is already 0 */
+
+ /*
+ * CLOCK_REALTIME_COARSE, below values are needed for MONOTONIC_COARSE
+ * too
+ */
+ ld r4,STAMP_XTIME+TSPC64_TV_SEC(r3)
+ ld r5,STAMP_XTIME+TSPC64_TV_NSEC(r3)
+ bne cr6,75f
+
+ /* CLOCK_MONOTONIC_COARSE */
+ lwa r6,WTOM_CLOCK_SEC(r3)
+ lwa r9,WTOM_CLOCK_NSEC(r3)
+
+ /* check if counter has updated */
+ or r0,r6,r9
+75: or r0,r0,r4
+ or r0,r0,r5
+ xor r0,r0,r0
+ add r3,r3,r0
+ ld r0,CFG_TB_UPDATE_COUNT(r3)
+ cmpld cr0,r0,r8 /* check if updated */
+ bne- 70b
+
+ /* Counter has not updated, so continue calculating proper values for
+ * sec and nsec if monotonic coarse, or just return with the proper
+ * values for realtime.
+ */
+ bne cr6,80f
+
+ /* Add wall->monotonic offset and check for overflow or underflow */
+78: add r4,r4,r6
+ add r5,r5,r9
+ cmpd cr0,r5,r7
+ cmpdi cr1,r5,0
+ blt 79f
+ subf r5,r7,r5
+ addi r4,r4,1
+79: bge cr1,80f
+ addi r4,r4,-1
+ add r5,r5,r7
80: std r4,TSPC64_TV_SEC(r11)
std r5,TSPC64_TV_NSEC(r11)
diff --git a/arch/powerpc/kernel/vmlinux.lds.S b/arch/powerpc/kernel/vmlinux.lds.S
index 74901a87bf7a..c8af90ff49f0 100644
--- a/arch/powerpc/kernel/vmlinux.lds.S
+++ b/arch/powerpc/kernel/vmlinux.lds.S
@@ -273,6 +273,7 @@ SECTIONS
#ifdef CONFIG_PPC32
.data : AT(ADDR(.data) - LOAD_OFFSET) {
DATA_DATA
+ *(.data.rel*)
*(.sdata)
*(.sdata2)
*(.got.plt) *(.got)
diff --git a/arch/powerpc/kernel/watchdog.c b/arch/powerpc/kernel/watchdog.c
index 87da80ccced1..6256dc3b0087 100644
--- a/arch/powerpc/kernel/watchdog.c
+++ b/arch/powerpc/kernel/watchdog.c
@@ -6,6 +6,9 @@
*
* This uses code from arch/sparc/kernel/nmi.c and kernel/watchdog.c
*/
+
+#define pr_fmt(fmt) "watchdog: " fmt
+
#include <linux/kernel.h>
#include <linux/param.h>
#include <linux/init.h>
@@ -26,15 +29,45 @@
#include <asm/paca.h>
/*
- * The watchdog has a simple timer that runs on each CPU, once per timer
- * period. This is the heartbeat.
+ * The powerpc watchdog ensures that each CPU is able to service timers.
+ * The watchdog sets up a simple timer on each CPU to run once per timer
+ * period, and updates a per-cpu timestamp and a "pending" cpumask. This is
+ * the heartbeat.
+ *
+ * Then there are two systems to check that the heartbeat is still running.
+ * The local soft-NMI, and the SMP checker.
+ *
+ * The soft-NMI checker can detect lockups on the local CPU. When interrupts
+ * are disabled with local_irq_disable(), platforms that use soft-masking
+ * can leave hardware interrupts enabled and handle them with a masked
+ * interrupt handler. The masked handler can send the timer interrupt to the
+ * watchdog's soft_nmi_interrupt(), which appears to Linux as an NMI
+ * interrupt, and can be used to detect CPUs stuck with IRQs disabled.
+ *
+ * The soft-NMI checker will compare the heartbeat timestamp for this CPU
+ * with the current time, and take action if the difference exceeds the
+ * watchdog threshold.
+ *
+ * The limitation of the soft-NMI watchdog is that it does not work when
+ * interrupts are hard disabled or otherwise not being serviced. This is
+ * solved by also having a SMP watchdog where all CPUs check all other
+ * CPUs heartbeat.
*
- * Then there are checks to see if the heartbeat has not triggered on a CPU
- * for the panic timeout period. Currently the watchdog only supports an
- * SMP check, so the heartbeat only turns on when we have 2 or more CPUs.
+ * The SMP checker can detect lockups on other CPUs. A gobal "pending"
+ * cpumask is kept, containing all CPUs which enable the watchdog. Each
+ * CPU clears their pending bit in their heartbeat timer. When the bitmask
+ * becomes empty, the last CPU to clear its pending bit updates a global
+ * timestamp and refills the pending bitmask.
*
- * This is not an NMI watchdog, but Linux uses that name for a generic
- * watchdog in some cases, so NMI gets used in some places.
+ * In the heartbeat timer, if any CPU notices that the global timestamp has
+ * not been updated for a period exceeding the watchdog threshold, then it
+ * means the CPU(s) with their bit still set in the pending mask have had
+ * their heartbeat stop, and action is taken.
+ *
+ * Some platforms implement true NMI IPIs, which can by used by the SMP
+ * watchdog to detect an unresponsive CPU and pull it out of its stuck
+ * state with the NMI IPI, to get crash/debug data from it. This way the
+ * SMP watchdog can detect hardware interrupts off lockups.
*/
static cpumask_t wd_cpus_enabled __read_mostly;
@@ -47,19 +80,7 @@ static u64 wd_timer_period_ms __read_mostly; /* interval between heartbeat */
static DEFINE_PER_CPU(struct timer_list, wd_timer);
static DEFINE_PER_CPU(u64, wd_timer_tb);
-/*
- * These are for the SMP checker. CPUs clear their pending bit in their
- * heartbeat. If the bitmask becomes empty, the time is noted and the
- * bitmask is refilled.
- *
- * All CPUs clear their bit in the pending mask every timer period.
- * Once all have cleared, the time is noted and the bits are reset.
- * If the time since all clear was greater than the panic timeout,
- * we can panic with the list of stuck CPUs.
- *
- * This will work best with NMI IPIs for crash code so the stuck CPUs
- * can be pulled out to get their backtraces.
- */
+/* SMP checker bits */
static unsigned long __wd_smp_lock;
static cpumask_t wd_smp_cpus_pending;
static cpumask_t wd_smp_cpus_stuck;
@@ -90,7 +111,7 @@ static inline void wd_smp_unlock(unsigned long *flags)
static void wd_lockup_ipi(struct pt_regs *regs)
{
- pr_emerg("Watchdog CPU:%d Hard LOCKUP\n", raw_smp_processor_id());
+ pr_emerg("CPU %d Hard LOCKUP\n", raw_smp_processor_id());
print_modules();
print_irqtrace_events(current);
if (regs)
@@ -131,8 +152,8 @@ static void watchdog_smp_panic(int cpu, u64 tb)
if (cpumask_weight(&wd_smp_cpus_pending) == 0)
goto out;
- pr_emerg("Watchdog CPU:%d detected Hard LOCKUP other CPUS:%*pbl\n",
- cpu, cpumask_pr_args(&wd_smp_cpus_pending));
+ pr_emerg("CPU %d detected hard LOCKUP on other CPUs %*pbl\n",
+ cpu, cpumask_pr_args(&wd_smp_cpus_pending));
if (!sysctl_hardlockup_all_cpu_backtrace) {
/*
@@ -175,7 +196,7 @@ static void wd_smp_clear_cpu_pending(int cpu, u64 tb)
if (unlikely(cpumask_test_cpu(cpu, &wd_smp_cpus_stuck))) {
unsigned long flags;
- pr_emerg("Watchdog CPU:%d became unstuck\n", cpu);
+ pr_emerg("CPU %d became unstuck\n", cpu);
wd_smp_lock(&flags);
cpumask_clear_cpu(cpu, &wd_smp_cpus_stuck);
wd_smp_unlock(&flags);
@@ -233,13 +254,10 @@ void soft_nmi_interrupt(struct pt_regs *regs)
}
set_cpu_stuck(cpu, tb);
- pr_emerg("Watchdog CPU:%d Hard LOCKUP\n", cpu);
+ pr_emerg("CPU %d self-detected hard LOCKUP @ %pS\n", cpu, (void *)regs->nip);
print_modules();
print_irqtrace_events(current);
- if (regs)
- show_regs(regs);
- else
- dump_stack();
+ show_regs(regs);
wd_smp_unlock(&flags);
@@ -388,30 +406,8 @@ int __init watchdog_nmi_probe(void)
"powerpc/watchdog:online",
start_wd_on_cpu, stop_wd_on_cpu);
if (err < 0) {
- pr_warn("Watchdog could not be initialized");
+ pr_warn("could not be initialized");
return err;
}
return 0;
}
-
-static void handle_backtrace_ipi(struct pt_regs *regs)
-{
- nmi_cpu_backtrace(regs);
-}
-
-static void raise_backtrace_ipi(cpumask_t *mask)
-{
- unsigned int cpu;
-
- for_each_cpu(cpu, mask) {
- if (cpu == smp_processor_id())
- handle_backtrace_ipi(NULL);
- else
- smp_send_nmi_ipi(cpu, handle_backtrace_ipi, 1000000);
- }
-}
-
-void arch_trigger_cpumask_backtrace(const cpumask_t *mask, bool exclude_self)
-{
- nmi_trigger_cpumask_backtrace(mask, exclude_self, raise_backtrace_ipi);
-}
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 2d46037ce936..e4f70c33fbc7 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -93,10 +93,10 @@
static DECLARE_BITMAP(default_enabled_hcalls, MAX_HCALL_OPCODE/4 + 1);
static int dynamic_mt_modes = 6;
-module_param(dynamic_mt_modes, int, S_IRUGO | S_IWUSR);
+module_param(dynamic_mt_modes, int, 0644);
MODULE_PARM_DESC(dynamic_mt_modes, "Set of allowed dynamic micro-threading modes: 0 (= none), 2, 4, or 6 (= 2 or 4)");
static int target_smt_mode;
-module_param(target_smt_mode, int, S_IRUGO | S_IWUSR);
+module_param(target_smt_mode, int, 0644);
MODULE_PARM_DESC(target_smt_mode, "Target threads per core (0 = max)");
static bool indep_threads_mode = true;
@@ -109,12 +109,10 @@ static struct kernel_param_ops module_param_ops = {
.get = param_get_int,
};
-module_param_cb(kvm_irq_bypass, &module_param_ops, &kvm_irq_bypass,
- S_IRUGO | S_IWUSR);
+module_param_cb(kvm_irq_bypass, &module_param_ops, &kvm_irq_bypass, 0644);
MODULE_PARM_DESC(kvm_irq_bypass, "Bypass passthrough interrupt optimization");
-module_param_cb(h_ipi_redirect, &module_param_ops, &h_ipi_redirect,
- S_IRUGO | S_IWUSR);
+module_param_cb(h_ipi_redirect, &module_param_ops, &h_ipi_redirect, 0644);
MODULE_PARM_DESC(h_ipi_redirect, "Redirect H_IPI wakeup to a free host core");
#endif
diff --git a/arch/powerpc/kvm/book3s_hv_ras.c b/arch/powerpc/kvm/book3s_hv_ras.c
index c356f9a40b24..b11043b23c18 100644
--- a/arch/powerpc/kvm/book3s_hv_ras.c
+++ b/arch/powerpc/kvm/book3s_hv_ras.c
@@ -87,8 +87,7 @@ static long kvmppc_realmode_mc_power7(struct kvm_vcpu *vcpu)
DSISR_MC_SLB_PARITY | DSISR_MC_DERAT_MULTI);
}
if (dsisr & DSISR_MC_TLB_MULTI) {
- if (cur_cpu_spec && cur_cpu_spec->flush_tlb)
- cur_cpu_spec->flush_tlb(TLB_INVAL_SCOPE_LPID);
+ tlbiel_all_lpid(vcpu->kvm->arch.radix);
dsisr &= ~DSISR_MC_TLB_MULTI;
}
/* Any other errors we don't understand? */
@@ -105,8 +104,7 @@ static long kvmppc_realmode_mc_power7(struct kvm_vcpu *vcpu)
reload_slb(vcpu);
break;
case SRR1_MC_IFETCH_TLBMULTI:
- if (cur_cpu_spec && cur_cpu_spec->flush_tlb)
- cur_cpu_spec->flush_tlb(TLB_INVAL_SCOPE_LPID);
+ tlbiel_all_lpid(vcpu->kvm->arch.radix);
break;
default:
handled = 0;
@@ -268,17 +266,19 @@ static void kvmppc_tb_resync_done(void)
* secondary threads to proceed.
* - All secondary threads will eventually call opal hmi handler on
* their exit path.
+ *
+ * Returns 1 if the timebase offset should be applied, 0 if not.
*/
long kvmppc_realmode_hmi_handler(void)
{
- int ptid = local_paca->kvm_hstate.ptid;
bool resync_req;
- /* This is only called on primary thread. */
- BUG_ON(ptid != 0);
__this_cpu_inc(irq_stat.hmi_exceptions);
+ if (hmi_handle_debugtrig(NULL) >= 0)
+ return 1;
+
/*
* By now primary thread has already completed guest->host
* partition switch but haven't signaled secondaries yet.
diff --git a/arch/powerpc/kvm/book3s_hv_rm_mmu.c b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
index 26c11f678fbf..8888e625a999 100644
--- a/arch/powerpc/kvm/book3s_hv_rm_mmu.c
+++ b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
@@ -42,7 +42,7 @@ static void *real_vmalloc_addr(void *x)
}
/* Return 1 if we need to do a global tlbie, 0 if we can use tlbiel */
-static int global_invalidates(struct kvm *kvm, unsigned long flags)
+static int global_invalidates(struct kvm *kvm)
{
int global;
int cpu;
@@ -522,7 +522,7 @@ long kvmppc_do_h_remove(struct kvm *kvm, unsigned long flags,
if (v & HPTE_V_VALID) {
hpte[0] &= ~cpu_to_be64(HPTE_V_VALID);
rb = compute_tlbie_rb(v, pte_r, pte_index);
- do_tlbies(kvm, &rb, 1, global_invalidates(kvm, flags), true);
+ do_tlbies(kvm, &rb, 1, global_invalidates(kvm), true);
/*
* The reference (R) and change (C) bits in a HPT
* entry can be set by hardware at any time up until
@@ -572,7 +572,7 @@ long kvmppc_h_bulk_remove(struct kvm_vcpu *vcpu)
if (kvm_is_radix(kvm))
return H_FUNCTION;
- global = global_invalidates(kvm, 0);
+ global = global_invalidates(kvm);
for (i = 0; i < 4 && ret == H_SUCCESS; ) {
n = 0;
for (; i < 4; ++i) {
@@ -732,8 +732,7 @@ long kvmppc_h_protect(struct kvm_vcpu *vcpu, unsigned long flags,
rb = compute_tlbie_rb(v, r, pte_index);
hpte[0] = cpu_to_be64((pte_v & ~HPTE_V_VALID) |
HPTE_V_ABSENT);
- do_tlbies(kvm, &rb, 1, global_invalidates(kvm, flags),
- true);
+ do_tlbies(kvm, &rb, 1, global_invalidates(kvm), true);
/* Don't lose R/C bit updates done by hardware */
r |= be64_to_cpu(hpte[1]) & (HPTE_R_R | HPTE_R_C);
hpte[1] = cpu_to_be64(r);
diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
index 9c61f736c75b..7886b313d135 100644
--- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
@@ -1908,16 +1908,17 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
bne 27f
bl kvmppc_realmode_hmi_handler
nop
+ cmpdi r3, 0
li r12, BOOK3S_INTERRUPT_HMI
/*
- * At this point kvmppc_realmode_hmi_handler would have resync-ed
- * the TB. Hence it is not required to subtract guest timebase
- * offset from timebase. So, skip it.
+ * At this point kvmppc_realmode_hmi_handler may have resync-ed
+ * the TB, and if it has, we must not subtract the guest timebase
+ * offset from the timebase. So, skip it.
*
* Also, do not call kvmppc_subcore_exit_guest() because it has
* been invoked as part of kvmppc_realmode_hmi_handler().
*/
- b 30f
+ beq 30f
27:
/* Subtract timebase offset from timebase */
@@ -3248,7 +3249,7 @@ kvmppc_bad_host_intr:
mfctr r4
#endif
mfxer r5
- lbz r6, PACASOFTIRQEN(r13)
+ lbz r6, PACAIRQSOFTMASK(r13)
std r3, _LINK(r1)
std r4, _CTR(r1)
std r5, _XER(r1)
diff --git a/arch/powerpc/kvm/book3s_xics.c b/arch/powerpc/kvm/book3s_xics.c
index d329b2add7e2..b8356cdc0c04 100644
--- a/arch/powerpc/kvm/book3s_xics.c
+++ b/arch/powerpc/kvm/book3s_xics.c
@@ -1039,7 +1039,7 @@ static void xics_debugfs_init(struct kvmppc_xics *xics)
return;
}
- xics->dentry = debugfs_create_file(name, S_IRUGO, powerpc_debugfs_root,
+ xics->dentry = debugfs_create_file(name, 0444, powerpc_debugfs_root,
xics, &xics_debug_fops);
pr_debug("%s: created %s\n", __func__, name);
diff --git a/arch/powerpc/lib/code-patching.c b/arch/powerpc/lib/code-patching.c
index d469224c4ada..e0d881ab304e 100644
--- a/arch/powerpc/lib/code-patching.c
+++ b/arch/powerpc/lib/code-patching.c
@@ -23,19 +23,26 @@
#include <asm/code-patching.h>
#include <asm/setup.h>
-static int __patch_instruction(unsigned int *addr, unsigned int instr)
+static int __patch_instruction(unsigned int *exec_addr, unsigned int instr,
+ unsigned int *patch_addr)
{
int err;
- __put_user_size(instr, addr, 4, err);
+ __put_user_size(instr, patch_addr, 4, err);
if (err)
return err;
- asm ("dcbst 0, %0; sync; icbi 0,%0; sync; isync" :: "r" (addr));
+ asm ("dcbst 0, %0; sync; icbi 0,%1; sync; isync" :: "r" (patch_addr),
+ "r" (exec_addr));
return 0;
}
+int raw_patch_instruction(unsigned int *addr, unsigned int instr)
+{
+ return __patch_instruction(addr, instr, addr);
+}
+
#ifdef CONFIG_STRICT_KERNEL_RWX
static DEFINE_PER_CPU(struct vm_struct *, text_poke_area);
@@ -138,7 +145,7 @@ static inline int unmap_patch_area(unsigned long addr)
int patch_instruction(unsigned int *addr, unsigned int instr)
{
int err;
- unsigned int *dest = NULL;
+ unsigned int *patch_addr = NULL;
unsigned long flags;
unsigned long text_poke_addr;
unsigned long kaddr = (unsigned long)addr;
@@ -148,8 +155,8 @@ int patch_instruction(unsigned int *addr, unsigned int instr)
* when text_poke_area is not ready, but we still need
* to allow patching. We just do the plain old patching
*/
- if (!this_cpu_read(*PTRRELOC(&text_poke_area)))
- return __patch_instruction(addr, instr);
+ if (!this_cpu_read(text_poke_area))
+ return raw_patch_instruction(addr, instr);
local_irq_save(flags);
@@ -159,17 +166,10 @@ int patch_instruction(unsigned int *addr, unsigned int instr)
goto out;
}
- dest = (unsigned int *)(text_poke_addr) +
+ patch_addr = (unsigned int *)(text_poke_addr) +
((kaddr & ~PAGE_MASK) / sizeof(unsigned int));
- /*
- * We use __put_user_size so that we can handle faults while
- * writing to dest and return err to handle faults gracefully
- */
- __put_user_size(instr, dest, 4, err);
- if (!err)
- asm ("dcbst 0, %0; sync; icbi 0,%0; icbi 0,%1; sync; isync"
- ::"r" (dest), "r"(addr));
+ __patch_instruction(addr, instr, patch_addr);
err = unmap_patch_area(text_poke_addr);
if (err)
@@ -184,7 +184,7 @@ out:
int patch_instruction(unsigned int *addr, unsigned int instr)
{
- return __patch_instruction(addr, instr);
+ return raw_patch_instruction(addr, instr);
}
#endif /* CONFIG_STRICT_KERNEL_RWX */
@@ -302,6 +302,11 @@ int instr_is_relative_branch(unsigned int instr)
return instr_is_branch_iform(instr) || instr_is_branch_bform(instr);
}
+int instr_is_relative_link_branch(unsigned int instr)
+{
+ return instr_is_relative_branch(instr) && (instr & BRANCH_SET_LINK);
+}
+
static unsigned long branch_iform_target(const unsigned int *instr)
{
signed long imm;
diff --git a/arch/powerpc/lib/feature-fixups.c b/arch/powerpc/lib/feature-fixups.c
index a95ea007d654..73697c4e3468 100644
--- a/arch/powerpc/lib/feature-fixups.c
+++ b/arch/powerpc/lib/feature-fixups.c
@@ -62,7 +62,7 @@ static int patch_alt_instruction(unsigned int *src, unsigned int *dest,
}
}
- patch_instruction(dest, instr);
+ raw_patch_instruction(dest, instr);
return 0;
}
@@ -91,7 +91,7 @@ static int patch_feature_section(unsigned long value, struct fixup_entry *fcur)
}
for (; dest < end; dest++)
- patch_instruction(dest, PPC_INST_NOP);
+ raw_patch_instruction(dest, PPC_INST_NOP);
return 0;
}
@@ -170,7 +170,7 @@ void do_lwsync_fixups(unsigned long value, void *fixup_start, void *fixup_end)
for (; start < end; start++) {
dest = (void *)start + *start;
- patch_instruction(dest, PPC_INST_LWSYNC);
+ raw_patch_instruction(dest, PPC_INST_LWSYNC);
}
}
@@ -188,7 +188,7 @@ static void do_final_fixups(void)
length = (__end_interrupts - _stext) / sizeof(int);
while (length--) {
- patch_instruction(dest, *src);
+ raw_patch_instruction(dest, *src);
src++;
dest++;
}
diff --git a/arch/powerpc/mm/8xx_mmu.c b/arch/powerpc/mm/8xx_mmu.c
index f29212e40f40..849f50cd62f2 100644
--- a/arch/powerpc/mm/8xx_mmu.c
+++ b/arch/powerpc/mm/8xx_mmu.c
@@ -67,7 +67,7 @@ void __init MMU_init_hw(void)
/* PIN up to the 3 first 8Mb after IMMR in DTLB table */
#ifdef CONFIG_PIN_TLB_DATA
unsigned long ctr = mfspr(SPRN_MD_CTR) & 0xfe000000;
- unsigned long flags = 0xf0 | MD_SPS16K | _PAGE_SHARED | _PAGE_DIRTY;
+ unsigned long flags = 0xf0 | MD_SPS16K | _PAGE_PRIVILEGED | _PAGE_DIRTY;
#ifdef CONFIG_PIN_TLB_IMMR
int i = 29;
#else
@@ -79,7 +79,7 @@ void __init MMU_init_hw(void)
for (; i < 32 && mem >= LARGE_PAGE_SIZE_8M; i++) {
mtspr(SPRN_MD_CTR, ctr | (i << 8));
mtspr(SPRN_MD_EPN, (unsigned long)__va(addr) | MD_EVALID);
- mtspr(SPRN_MD_TWC, MD_PS8MEG | MD_SVALID);
+ mtspr(SPRN_MD_TWC, MD_PS8MEG | MD_SVALID | M_APG2);
mtspr(SPRN_MD_RPN, addr | flags | _PAGE_PRESENT);
addr += LARGE_PAGE_SIZE_8M;
mem -= LARGE_PAGE_SIZE_8M;
diff --git a/arch/powerpc/mm/Makefile b/arch/powerpc/mm/Makefile
index 76a6b057d454..f06f3577d8d1 100644
--- a/arch/powerpc/mm/Makefile
+++ b/arch/powerpc/mm/Makefile
@@ -9,7 +9,7 @@ ccflags-$(CONFIG_PPC64) := $(NO_MINIMAL_TOC)
obj-y := fault.o mem.o pgtable.o mmap.o \
init_$(BITS).o pgtable_$(BITS).o \
- init-common.o mmu_context.o
+ init-common.o mmu_context.o drmem.o
obj-$(CONFIG_PPC_MMU_NOHASH) += mmu_context_nohash.o tlb_nohash.o \
tlb_nohash_low.o
obj-$(CONFIG_PPC_BOOK3E) += tlb_low_$(BITS)e.o
@@ -44,3 +44,4 @@ obj-$(CONFIG_PPC_COPRO_BASE) += copro_fault.o
obj-$(CONFIG_SPAPR_TCE_IOMMU) += mmu_context_iommu.o
obj-$(CONFIG_PPC_PTDUMP) += dump_linuxpagetables.o
obj-$(CONFIG_PPC_HTDUMP) += dump_hashpagetable.o
+obj-$(CONFIG_PPC_MEM_KEYS) += pkeys.o
diff --git a/arch/powerpc/mm/drmem.c b/arch/powerpc/mm/drmem.c
new file mode 100644
index 000000000000..1604110c4238
--- /dev/null
+++ b/arch/powerpc/mm/drmem.c
@@ -0,0 +1,439 @@
+/*
+ * Dynamic reconfiguration memory support
+ *
+ * Copyright 2017 IBM Corporation
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#define pr_fmt(fmt) "drmem: " fmt
+
+#include <linux/kernel.h>
+#include <linux/of.h>
+#include <linux/of_fdt.h>
+#include <linux/memblock.h>
+#include <asm/prom.h>
+#include <asm/drmem.h>
+
+static struct drmem_lmb_info __drmem_info;
+struct drmem_lmb_info *drmem_info = &__drmem_info;
+
+u64 drmem_lmb_memory_max(void)
+{
+ struct drmem_lmb *last_lmb;
+
+ last_lmb = &drmem_info->lmbs[drmem_info->n_lmbs - 1];
+ return last_lmb->base_addr + drmem_lmb_size();
+}
+
+static u32 drmem_lmb_flags(struct drmem_lmb *lmb)
+{
+ /*
+ * Return the value of the lmb flags field minus the reserved
+ * bit used internally for hotplug processing.
+ */
+ return lmb->flags & ~DRMEM_LMB_RESERVED;
+}
+
+static struct property *clone_property(struct property *prop, u32 prop_sz)
+{
+ struct property *new_prop;
+
+ new_prop = kzalloc(sizeof(*new_prop), GFP_KERNEL);
+ if (!new_prop)
+ return NULL;
+
+ new_prop->name = kstrdup(prop->name, GFP_KERNEL);
+ new_prop->value = kzalloc(prop_sz, GFP_KERNEL);
+ if (!new_prop->name || !new_prop->value) {
+ kfree(new_prop->name);
+ kfree(new_prop->value);
+ kfree(new_prop);
+ return NULL;
+ }
+
+ new_prop->length = prop_sz;
+#if defined(CONFIG_OF_DYNAMIC)
+ of_property_set_flag(new_prop, OF_DYNAMIC);
+#endif
+ return new_prop;
+}
+
+static int drmem_update_dt_v1(struct device_node *memory,
+ struct property *prop)
+{
+ struct property *new_prop;
+ struct of_drconf_cell_v1 *dr_cell;
+ struct drmem_lmb *lmb;
+ u32 *p;
+
+ new_prop = clone_property(prop, prop->length);
+ if (!new_prop)
+ return -1;
+
+ p = new_prop->value;
+ *p++ = cpu_to_be32(drmem_info->n_lmbs);
+
+ dr_cell = (struct of_drconf_cell_v1 *)p;
+
+ for_each_drmem_lmb(lmb) {
+ dr_cell->base_addr = cpu_to_be64(lmb->base_addr);
+ dr_cell->drc_index = cpu_to_be32(lmb->drc_index);
+ dr_cell->aa_index = cpu_to_be32(lmb->aa_index);
+ dr_cell->flags = cpu_to_be32(drmem_lmb_flags(lmb));
+
+ dr_cell++;
+ }
+
+ of_update_property(memory, new_prop);
+ return 0;
+}
+
+static void init_drconf_v2_cell(struct of_drconf_cell_v2 *dr_cell,
+ struct drmem_lmb *lmb)
+{
+ dr_cell->base_addr = cpu_to_be64(lmb->base_addr);
+ dr_cell->drc_index = cpu_to_be32(lmb->drc_index);
+ dr_cell->aa_index = cpu_to_be32(lmb->aa_index);
+ dr_cell->flags = cpu_to_be32(lmb->flags);
+}
+
+static int drmem_update_dt_v2(struct device_node *memory,
+ struct property *prop)
+{
+ struct property *new_prop;
+ struct of_drconf_cell_v2 *dr_cell;
+ struct drmem_lmb *lmb, *prev_lmb;
+ u32 lmb_sets, prop_sz, seq_lmbs;
+ u32 *p;
+
+ /* First pass, determine how many LMB sets are needed. */
+ lmb_sets = 0;
+ prev_lmb = NULL;
+ for_each_drmem_lmb(lmb) {
+ if (!prev_lmb) {
+ prev_lmb = lmb;
+ lmb_sets++;
+ continue;
+ }
+
+ if (prev_lmb->aa_index != lmb->aa_index ||
+ prev_lmb->flags != lmb->flags)
+ lmb_sets++;
+
+ prev_lmb = lmb;
+ }
+
+ prop_sz = lmb_sets * sizeof(*dr_cell) + sizeof(__be32);
+ new_prop = clone_property(prop, prop_sz);
+ if (!new_prop)
+ return -1;
+
+ p = new_prop->value;
+ *p++ = cpu_to_be32(lmb_sets);
+
+ dr_cell = (struct of_drconf_cell_v2 *)p;
+
+ /* Second pass, populate the LMB set data */
+ prev_lmb = NULL;
+ seq_lmbs = 0;
+ for_each_drmem_lmb(lmb) {
+ if (prev_lmb == NULL) {
+ /* Start of first LMB set */
+ prev_lmb = lmb;
+ init_drconf_v2_cell(dr_cell, lmb);
+ seq_lmbs++;
+ continue;
+ }
+
+ if (prev_lmb->aa_index != lmb->aa_index ||
+ prev_lmb->flags != lmb->flags) {
+ /* end of one set, start of another */
+ dr_cell->seq_lmbs = cpu_to_be32(seq_lmbs);
+ dr_cell++;
+
+ init_drconf_v2_cell(dr_cell, lmb);
+ seq_lmbs = 1;
+ } else {
+ seq_lmbs++;
+ }
+
+ prev_lmb = lmb;
+ }
+
+ /* close out last LMB set */
+ dr_cell->seq_lmbs = cpu_to_be32(seq_lmbs);
+ of_update_property(memory, new_prop);
+ return 0;
+}
+
+int drmem_update_dt(void)
+{
+ struct device_node *memory;
+ struct property *prop;
+ int rc = -1;
+
+ memory = of_find_node_by_path("/ibm,dynamic-reconfiguration-memory");
+ if (!memory)
+ return -1;
+
+ prop = of_find_property(memory, "ibm,dynamic-memory", NULL);
+ if (prop) {
+ rc = drmem_update_dt_v1(memory, prop);
+ } else {
+ prop = of_find_property(memory, "ibm,dynamic-memory-v2", NULL);
+ if (prop)
+ rc = drmem_update_dt_v2(memory, prop);
+ }
+
+ of_node_put(memory);
+ return rc;
+}
+
+static void __init read_drconf_v1_cell(struct drmem_lmb *lmb,
+ const __be32 **prop)
+{
+ const __be32 *p = *prop;
+
+ lmb->base_addr = dt_mem_next_cell(dt_root_addr_cells, &p);
+ lmb->drc_index = of_read_number(p++, 1);
+
+ p++; /* skip reserved field */
+
+ lmb->aa_index = of_read_number(p++, 1);
+ lmb->flags = of_read_number(p++, 1);
+
+ *prop = p;
+}
+
+static void __init __walk_drmem_v1_lmbs(const __be32 *prop, const __be32 *usm,
+ void (*func)(struct drmem_lmb *, const __be32 **))
+{
+ struct drmem_lmb lmb;
+ u32 i, n_lmbs;
+
+ n_lmbs = of_read_number(prop++, 1);
+
+ for (i = 0; i < n_lmbs; i++) {
+ read_drconf_v1_cell(&lmb, &prop);
+ func(&lmb, &usm);
+ }
+}
+
+static void __init read_drconf_v2_cell(struct of_drconf_cell_v2 *dr_cell,
+ const __be32 **prop)
+{
+ const __be32 *p = *prop;
+
+ dr_cell->seq_lmbs = of_read_number(p++, 1);
+ dr_cell->base_addr = dt_mem_next_cell(dt_root_addr_cells, &p);
+ dr_cell->drc_index = of_read_number(p++, 1);
+ dr_cell->aa_index = of_read_number(p++, 1);
+ dr_cell->flags = of_read_number(p++, 1);
+
+ *prop = p;
+}
+
+static void __init __walk_drmem_v2_lmbs(const __be32 *prop, const __be32 *usm,
+ void (*func)(struct drmem_lmb *, const __be32 **))
+{
+ struct of_drconf_cell_v2 dr_cell;
+ struct drmem_lmb lmb;
+ u32 i, j, lmb_sets;
+
+ lmb_sets = of_read_number(prop++, 1);
+
+ for (i = 0; i < lmb_sets; i++) {
+ read_drconf_v2_cell(&dr_cell, &prop);
+
+ for (j = 0; j < dr_cell.seq_lmbs; j++) {
+ lmb.base_addr = dr_cell.base_addr;
+ dr_cell.base_addr += drmem_lmb_size();
+
+ lmb.drc_index = dr_cell.drc_index;
+ dr_cell.drc_index++;
+
+ lmb.aa_index = dr_cell.aa_index;
+ lmb.flags = dr_cell.flags;
+
+ func(&lmb, &usm);
+ }
+ }
+}
+
+#ifdef CONFIG_PPC_PSERIES
+void __init walk_drmem_lmbs_early(unsigned long node,
+ void (*func)(struct drmem_lmb *, const __be32 **))
+{
+ const __be32 *prop, *usm;
+ int len;
+
+ prop = of_get_flat_dt_prop(node, "ibm,lmb-size", &len);
+ if (!prop || len < dt_root_size_cells * sizeof(__be32))
+ return;
+
+ drmem_info->lmb_size = dt_mem_next_cell(dt_root_size_cells, &prop);
+
+ usm = of_get_flat_dt_prop(node, "linux,drconf-usable-memory", &len);
+
+ prop = of_get_flat_dt_prop(node, "ibm,dynamic-memory", &len);
+ if (prop) {
+ __walk_drmem_v1_lmbs(prop, usm, func);
+ } else {
+ prop = of_get_flat_dt_prop(node, "ibm,dynamic-memory-v2",
+ &len);
+ if (prop)
+ __walk_drmem_v2_lmbs(prop, usm, func);
+ }
+
+ memblock_dump_all();
+}
+
+#endif
+
+static int __init init_drmem_lmb_size(struct device_node *dn)
+{
+ const __be32 *prop;
+ int len;
+
+ if (drmem_info->lmb_size)
+ return 0;
+
+ prop = of_get_property(dn, "ibm,lmb-size", &len);
+ if (!prop || len < dt_root_size_cells * sizeof(__be32)) {
+ pr_info("Could not determine LMB size\n");
+ return -1;
+ }
+
+ drmem_info->lmb_size = dt_mem_next_cell(dt_root_size_cells, &prop);
+ return 0;
+}
+
+/*
+ * Returns the property linux,drconf-usable-memory if
+ * it exists (the property exists only in kexec/kdump kernels,
+ * added by kexec-tools)
+ */
+static const __be32 *of_get_usable_memory(struct device_node *dn)
+{
+ const __be32 *prop;
+ u32 len;
+
+ prop = of_get_property(dn, "linux,drconf-usable-memory", &len);
+ if (!prop || len < sizeof(unsigned int))
+ return NULL;
+
+ return prop;
+}
+
+void __init walk_drmem_lmbs(struct device_node *dn,
+ void (*func)(struct drmem_lmb *, const __be32 **))
+{
+ const __be32 *prop, *usm;
+
+ if (init_drmem_lmb_size(dn))
+ return;
+
+ usm = of_get_usable_memory(dn);
+
+ prop = of_get_property(dn, "ibm,dynamic-memory", NULL);
+ if (prop) {
+ __walk_drmem_v1_lmbs(prop, usm, func);
+ } else {
+ prop = of_get_property(dn, "ibm,dynamic-memory-v2", NULL);
+ if (prop)
+ __walk_drmem_v2_lmbs(prop, usm, func);
+ }
+}
+
+static void __init init_drmem_v1_lmbs(const __be32 *prop)
+{
+ struct drmem_lmb *lmb;
+
+ drmem_info->n_lmbs = of_read_number(prop++, 1);
+
+ drmem_info->lmbs = kcalloc(drmem_info->n_lmbs, sizeof(*lmb),
+ GFP_KERNEL);
+ if (!drmem_info->lmbs)
+ return;
+
+ for_each_drmem_lmb(lmb)
+ read_drconf_v1_cell(lmb, &prop);
+}
+
+static void __init init_drmem_v2_lmbs(const __be32 *prop)
+{
+ struct drmem_lmb *lmb;
+ struct of_drconf_cell_v2 dr_cell;
+ const __be32 *p;
+ u32 i, j, lmb_sets;
+ int lmb_index;
+
+ lmb_sets = of_read_number(prop++, 1);
+
+ /* first pass, calculate the number of LMBs */
+ p = prop;
+ for (i = 0; i < lmb_sets; i++) {
+ read_drconf_v2_cell(&dr_cell, &p);
+ drmem_info->n_lmbs += dr_cell.seq_lmbs;
+ }
+
+ drmem_info->lmbs = kcalloc(drmem_info->n_lmbs, sizeof(*lmb),
+ GFP_KERNEL);
+ if (!drmem_info->lmbs)
+ return;
+
+ /* second pass, read in the LMB information */
+ lmb_index = 0;
+ p = prop;
+
+ for (i = 0; i < lmb_sets; i++) {
+ read_drconf_v2_cell(&dr_cell, &p);
+
+ for (j = 0; j < dr_cell.seq_lmbs; j++) {
+ lmb = &drmem_info->lmbs[lmb_index++];
+
+ lmb->base_addr = dr_cell.base_addr;
+ dr_cell.base_addr += drmem_info->lmb_size;
+
+ lmb->drc_index = dr_cell.drc_index;
+ dr_cell.drc_index++;
+
+ lmb->aa_index = dr_cell.aa_index;
+ lmb->flags = dr_cell.flags;
+ }
+ }
+}
+
+static int __init drmem_init(void)
+{
+ struct device_node *dn;
+ const __be32 *prop;
+
+ dn = of_find_node_by_path("/ibm,dynamic-reconfiguration-memory");
+ if (!dn) {
+ pr_info("No dynamic reconfiguration memory found\n");
+ return 0;
+ }
+
+ if (init_drmem_lmb_size(dn)) {
+ of_node_put(dn);
+ return 0;
+ }
+
+ prop = of_get_property(dn, "ibm,dynamic-memory", NULL);
+ if (prop) {
+ init_drmem_v1_lmbs(prop);
+ } else {
+ prop = of_get_property(dn, "ibm,dynamic-memory-v2", NULL);
+ if (prop)
+ init_drmem_v2_lmbs(prop);
+ }
+
+ of_node_put(dn);
+ return 0;
+}
+late_initcall(drmem_init);
diff --git a/arch/powerpc/mm/dump_linuxpagetables.c b/arch/powerpc/mm/dump_linuxpagetables.c
index c2e7dea59490..876e2a3c79f2 100644
--- a/arch/powerpc/mm/dump_linuxpagetables.c
+++ b/arch/powerpc/mm/dump_linuxpagetables.c
@@ -112,26 +112,25 @@ struct flag_info {
static const struct flag_info flag_array[] = {
{
-#ifdef CONFIG_PPC_BOOK3S_64
- .mask = _PAGE_PRIVILEGED,
- .val = 0,
-#else
- .mask = _PAGE_USER,
+ .mask = _PAGE_USER | _PAGE_PRIVILEGED,
.val = _PAGE_USER,
-#endif
.set = "user",
.clear = " ",
}, {
-#if _PAGE_RO == 0
- .mask = _PAGE_RW,
+ .mask = _PAGE_RW | _PAGE_RO | _PAGE_NA,
.val = _PAGE_RW,
-#else
- .mask = _PAGE_RO,
- .val = 0,
-#endif
.set = "rw",
- .clear = "ro",
}, {
+ .mask = _PAGE_RW | _PAGE_RO | _PAGE_NA,
+ .val = _PAGE_RO,
+ .set = "ro",
+ }, {
+#if _PAGE_NA != 0
+ .mask = _PAGE_RW | _PAGE_RO | _PAGE_NA,
+ .val = _PAGE_RO,
+ .set = "na",
+ }, {
+#endif
.mask = _PAGE_EXEC,
.val = _PAGE_EXEC,
.set = " X ",
@@ -213,7 +212,7 @@ static const struct flag_info flag_array[] = {
.val = H_PAGE_4K_PFN,
.set = "4K_pfn",
}, {
-#endif
+#else /* CONFIG_PPC_64K_PAGES */
.mask = H_PAGE_F_GIX,
.val = H_PAGE_F_GIX,
.set = "f_gix",
@@ -224,14 +223,11 @@ static const struct flag_info flag_array[] = {
.val = H_PAGE_F_SECOND,
.set = "f_second",
}, {
+#endif /* CONFIG_PPC_64K_PAGES */
#endif
.mask = _PAGE_SPECIAL,
.val = _PAGE_SPECIAL,
.set = "special",
- }, {
- .mask = _PAGE_SHARED,
- .val = _PAGE_SHARED,
- .set = "shared",
}
};
diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c
index 6e1e39035380..866446cf2d9a 100644
--- a/arch/powerpc/mm/fault.c
+++ b/arch/powerpc/mm/fault.c
@@ -107,7 +107,8 @@ static bool store_updates_sp(struct pt_regs *regs)
*/
static int
-__bad_area_nosemaphore(struct pt_regs *regs, unsigned long address, int si_code)
+__bad_area_nosemaphore(struct pt_regs *regs, unsigned long address, int si_code,
+ int pkey)
{
/*
* If we are in kernel mode, bail out with a SEGV, this will
@@ -117,17 +118,18 @@ __bad_area_nosemaphore(struct pt_regs *regs, unsigned long address, int si_code)
if (!user_mode(regs))
return SIGSEGV;
- _exception(SIGSEGV, regs, si_code, address);
+ _exception_pkey(SIGSEGV, regs, si_code, address, pkey);
return 0;
}
static noinline int bad_area_nosemaphore(struct pt_regs *regs, unsigned long address)
{
- return __bad_area_nosemaphore(regs, address, SEGV_MAPERR);
+ return __bad_area_nosemaphore(regs, address, SEGV_MAPERR, 0);
}
-static int __bad_area(struct pt_regs *regs, unsigned long address, int si_code)
+static int __bad_area(struct pt_regs *regs, unsigned long address, int si_code,
+ int pkey)
{
struct mm_struct *mm = current->mm;
@@ -137,17 +139,23 @@ static int __bad_area(struct pt_regs *regs, unsigned long address, int si_code)
*/
up_read(&mm->mmap_sem);
- return __bad_area_nosemaphore(regs, address, si_code);
+ return __bad_area_nosemaphore(regs, address, si_code, pkey);
}
static noinline int bad_area(struct pt_regs *regs, unsigned long address)
{
- return __bad_area(regs, address, SEGV_MAPERR);
+ return __bad_area(regs, address, SEGV_MAPERR, 0);
+}
+
+static int bad_key_fault_exception(struct pt_regs *regs, unsigned long address,
+ int pkey)
+{
+ return __bad_area_nosemaphore(regs, address, SEGV_PKUERR, pkey);
}
static noinline int bad_access(struct pt_regs *regs, unsigned long address)
{
- return __bad_area(regs, address, SEGV_ACCERR);
+ return __bad_area(regs, address, SEGV_ACCERR, 0);
}
static int do_sigbus(struct pt_regs *regs, unsigned long address,
@@ -432,6 +440,10 @@ static int __do_page_fault(struct pt_regs *regs, unsigned long address,
perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address);
+ if (error_code & DSISR_KEYFAULT)
+ return bad_key_fault_exception(regs, address,
+ get_mm_addr_key(mm, address));
+
/*
* We want to do this outside mmap_sem, because reading code around nip
* can result in fault, which will cause a deadlock when called with
@@ -503,6 +515,31 @@ good_area:
* the fault.
*/
fault = handle_mm_fault(vma, address, flags);
+
+#ifdef CONFIG_PPC_MEM_KEYS
+ /*
+ * if the HPTE is not hashed, hardware will not detect
+ * a key fault. Lets check if we failed because of a
+ * software detected key fault.
+ */
+ if (unlikely(fault & VM_FAULT_SIGSEGV) &&
+ !arch_vma_access_permitted(vma, flags & FAULT_FLAG_WRITE,
+ is_exec, 0)) {
+ /*
+ * The PGD-PDT...PMD-PTE tree may not have been fully setup.
+ * Hence we cannot walk the tree to locate the PTE, to locate
+ * the key. Hence let's use vma_pkey() to get the key; instead
+ * of get_mm_addr_key().
+ */
+ int pkey = vma_pkey(vma);
+
+ if (likely(pkey)) {
+ up_read(&mm->mmap_sem);
+ return bad_key_fault_exception(regs, address, pkey);
+ }
+ }
+#endif /* CONFIG_PPC_MEM_KEYS */
+
major |= fault & VM_FAULT_MAJOR;
/*
@@ -576,7 +613,7 @@ void bad_page_fault(struct pt_regs *regs, unsigned long address, int sig)
/* kernel has accessed a bad area */
- switch (regs->trap) {
+ switch (TRAP(regs)) {
case 0x300:
case 0x380:
printk(KERN_ALERT "Unable to handle kernel paging request for "
diff --git a/arch/powerpc/mm/hash64_4k.c b/arch/powerpc/mm/hash64_4k.c
index 6fa450c12d6d..5a69b51d08a3 100644
--- a/arch/powerpc/mm/hash64_4k.c
+++ b/arch/powerpc/mm/hash64_4k.c
@@ -20,6 +20,7 @@ int __hash_page_4K(unsigned long ea, unsigned long access, unsigned long vsid,
pte_t *ptep, unsigned long trap, unsigned long flags,
int ssize, int subpg_prot)
{
+ real_pte_t rpte;
unsigned long hpte_group;
unsigned long rflags, pa;
unsigned long old_pte, new_pte;
@@ -54,6 +55,7 @@ int __hash_page_4K(unsigned long ea, unsigned long access, unsigned long vsid,
* need to add in 0x1 if it's a read-only user page
*/
rflags = htab_convert_pte_flags(new_pte);
+ rpte = __real_pte(__pte(old_pte), ptep);
if (cpu_has_feature(CPU_FTR_NOEXECUTE) &&
!cpu_has_feature(CPU_FTR_COHERENT_ICACHE))
@@ -64,13 +66,10 @@ int __hash_page_4K(unsigned long ea, unsigned long access, unsigned long vsid,
/*
* There MIGHT be an HPTE for this pte
*/
- hash = hpt_hash(vpn, shift, ssize);
- if (old_pte & H_PAGE_F_SECOND)
- hash = ~hash;
- slot = (hash & htab_hash_mask) * HPTES_PER_GROUP;
- slot += (old_pte & H_PAGE_F_GIX) >> H_PAGE_F_GIX_SHIFT;
+ unsigned long gslot = pte_get_hash_gslot(vpn, shift, ssize,
+ rpte, 0);
- if (mmu_hash_ops.hpte_updatepp(slot, rflags, vpn, MMU_PAGE_4K,
+ if (mmu_hash_ops.hpte_updatepp(gslot, rflags, vpn, MMU_PAGE_4K,
MMU_PAGE_4K, ssize, flags) == -1)
old_pte &= ~_PAGE_HPTEFLAGS;
}
@@ -118,8 +117,7 @@ repeat:
return -1;
}
new_pte = (new_pte & ~_PAGE_HPTEFLAGS) | H_PAGE_HASHPTE;
- new_pte |= (slot << H_PAGE_F_GIX_SHIFT) &
- (H_PAGE_F_SECOND | H_PAGE_F_GIX);
+ new_pte |= pte_set_hidx(ptep, rpte, 0, slot);
}
*ptep = __pte(new_pte & ~H_PAGE_BUSY);
return 0;
diff --git a/arch/powerpc/mm/hash64_64k.c b/arch/powerpc/mm/hash64_64k.c
index 1a68cb19b0e3..2253bbc6a599 100644
--- a/arch/powerpc/mm/hash64_64k.c
+++ b/arch/powerpc/mm/hash64_64k.c
@@ -15,34 +15,22 @@
#include <linux/mm.h>
#include <asm/machdep.h>
#include <asm/mmu.h>
+
/*
- * index from 0 - 15
+ * Return true, if the entry has a slot value which
+ * the software considers as invalid.
*/
-bool __rpte_sub_valid(real_pte_t rpte, unsigned long index)
+static inline bool hpte_soft_invalid(unsigned long hidx)
{
- unsigned long g_idx;
- unsigned long ptev = pte_val(rpte.pte);
-
- g_idx = (ptev & H_PAGE_COMBO_VALID) >> H_PAGE_F_GIX_SHIFT;
- index = index >> 2;
- if (g_idx & (0x1 << index))
- return true;
- else
- return false;
+ return ((hidx & 0xfUL) == 0xfUL);
}
+
/*
* index from 0 - 15
*/
-static unsigned long mark_subptegroup_valid(unsigned long ptev, unsigned long index)
+bool __rpte_sub_valid(real_pte_t rpte, unsigned long index)
{
- unsigned long g_idx;
-
- if (!(ptev & H_PAGE_COMBO))
- return ptev;
- index = index >> 2;
- g_idx = 0x1 << index;
-
- return ptev | (g_idx << H_PAGE_F_GIX_SHIFT);
+ return !(hpte_soft_invalid(__rpte_to_hidx(rpte, index)));
}
int __hash_page_4K(unsigned long ea, unsigned long access, unsigned long vsid,
@@ -50,12 +38,11 @@ int __hash_page_4K(unsigned long ea, unsigned long access, unsigned long vsid,
int ssize, int subpg_prot)
{
real_pte_t rpte;
- unsigned long *hidxp;
unsigned long hpte_group;
unsigned int subpg_index;
- unsigned long rflags, pa, hidx;
+ unsigned long rflags, pa;
unsigned long old_pte, new_pte, subpg_pte;
- unsigned long vpn, hash, slot;
+ unsigned long vpn, hash, slot, gslot;
unsigned long shift = mmu_psize_defs[MMU_PAGE_4K].shift;
/*
@@ -116,8 +103,8 @@ int __hash_page_4K(unsigned long ea, unsigned long access, unsigned long vsid,
* On hash insert failure we use old pte value and we don't
* want slot information there if we have a insert failure.
*/
- old_pte &= ~(H_PAGE_HASHPTE | H_PAGE_F_GIX | H_PAGE_F_SECOND);
- new_pte &= ~(H_PAGE_HASHPTE | H_PAGE_F_GIX | H_PAGE_F_SECOND);
+ old_pte &= ~H_PAGE_HASHPTE;
+ new_pte &= ~H_PAGE_HASHPTE;
goto htab_insert_hpte;
}
/*
@@ -126,18 +113,14 @@ int __hash_page_4K(unsigned long ea, unsigned long access, unsigned long vsid,
if (__rpte_sub_valid(rpte, subpg_index)) {
int ret;
- hash = hpt_hash(vpn, shift, ssize);
- hidx = __rpte_to_hidx(rpte, subpg_index);
- if (hidx & _PTEIDX_SECONDARY)
- hash = ~hash;
- slot = (hash & htab_hash_mask) * HPTES_PER_GROUP;
- slot += hidx & _PTEIDX_GROUP_IX;
-
- ret = mmu_hash_ops.hpte_updatepp(slot, rflags, vpn,
+ gslot = pte_get_hash_gslot(vpn, shift, ssize, rpte,
+ subpg_index);
+ ret = mmu_hash_ops.hpte_updatepp(gslot, rflags, vpn,
MMU_PAGE_4K, MMU_PAGE_4K,
ssize, flags);
+
/*
- *if we failed because typically the HPTE wasn't really here
+ * If we failed because typically the HPTE wasn't really here
* we try an insertion.
*/
if (ret == -1)
@@ -148,6 +131,14 @@ int __hash_page_4K(unsigned long ea, unsigned long access, unsigned long vsid,
}
htab_insert_hpte:
+
+ /*
+ * Initialize all hidx entries to invalid value, the first time
+ * the PTE is about to allocate a 4K HPTE.
+ */
+ if (!(old_pte & H_PAGE_COMBO))
+ rpte.hidx = INVALID_RPTE_HIDX;
+
/*
* handle H_PAGE_4K_PFN case
*/
@@ -172,15 +163,39 @@ repeat:
* Primary is full, try the secondary
*/
if (unlikely(slot == -1)) {
+ bool soft_invalid;
+
hpte_group = ((~hash & htab_hash_mask) * HPTES_PER_GROUP) & ~0x7UL;
slot = mmu_hash_ops.hpte_insert(hpte_group, vpn, pa,
rflags, HPTE_V_SECONDARY,
MMU_PAGE_4K, MMU_PAGE_4K,
ssize);
- if (slot == -1) {
- if (mftb() & 0x1)
+
+ soft_invalid = hpte_soft_invalid(slot);
+ if (unlikely(soft_invalid)) {
+ /*
+ * We got a valid slot from a hardware point of view.
+ * but we cannot use it, because we use this special
+ * value; as defined by hpte_soft_invalid(), to track
+ * invalid slots. We cannot use it. So invalidate it.
+ */
+ gslot = slot & _PTEIDX_GROUP_IX;
+ mmu_hash_ops.hpte_invalidate(hpte_group + gslot, vpn,
+ MMU_PAGE_4K, MMU_PAGE_4K,
+ ssize, 0);
+ }
+
+ if (unlikely(slot == -1 || soft_invalid)) {
+ /*
+ * For soft invalid slot, let's ensure that we release a
+ * slot from the primary, with the hope that we will
+ * acquire that slot next time we try. This will ensure
+ * that we do not get the same soft-invalid slot.
+ */
+ if (soft_invalid || (mftb() & 0x1))
hpte_group = ((hash & htab_hash_mask) *
HPTES_PER_GROUP) & ~0x7UL;
+
mmu_hash_ops.hpte_remove(hpte_group);
/*
* FIXME!! Should be try the group from which we removed ?
@@ -198,21 +213,10 @@ repeat:
MMU_PAGE_4K, MMU_PAGE_4K, old_pte);
return -1;
}
- /*
- * Insert slot number & secondary bit in PTE second half,
- * clear H_PAGE_BUSY and set appropriate HPTE slot bit
- * Since we have H_PAGE_BUSY set on ptep, we can be sure
- * nobody is undating hidx.
- */
- hidxp = (unsigned long *)(ptep + PTRS_PER_PTE);
- rpte.hidx &= ~(0xfUL << (subpg_index << 2));
- *hidxp = rpte.hidx | (slot << (subpg_index << 2));
- new_pte = mark_subptegroup_valid(new_pte, subpg_index);
- new_pte |= H_PAGE_HASHPTE;
- /*
- * check __real_pte for details on matching smp_rmb()
- */
- smp_wmb();
+
+ new_pte |= pte_set_hidx(ptep, rpte, subpg_index, slot);
+ new_pte |= H_PAGE_HASHPTE;
+
*ptep = __pte(new_pte & ~H_PAGE_BUSY);
return 0;
}
@@ -221,6 +225,7 @@ int __hash_page_64K(unsigned long ea, unsigned long access,
unsigned long vsid, pte_t *ptep, unsigned long trap,
unsigned long flags, int ssize)
{
+ real_pte_t rpte;
unsigned long hpte_group;
unsigned long rflags, pa;
unsigned long old_pte, new_pte;
@@ -257,6 +262,7 @@ int __hash_page_64K(unsigned long ea, unsigned long access,
} while (!pte_xchg(ptep, __pte(old_pte), __pte(new_pte)));
rflags = htab_convert_pte_flags(new_pte);
+ rpte = __real_pte(__pte(old_pte), ptep);
if (cpu_has_feature(CPU_FTR_NOEXECUTE) &&
!cpu_has_feature(CPU_FTR_COHERENT_ICACHE))
@@ -264,16 +270,13 @@ int __hash_page_64K(unsigned long ea, unsigned long access,
vpn = hpt_vpn(ea, vsid, ssize);
if (unlikely(old_pte & H_PAGE_HASHPTE)) {
+ unsigned long gslot;
+
/*
* There MIGHT be an HPTE for this pte
*/
- hash = hpt_hash(vpn, shift, ssize);
- if (old_pte & H_PAGE_F_SECOND)
- hash = ~hash;
- slot = (hash & htab_hash_mask) * HPTES_PER_GROUP;
- slot += (old_pte & H_PAGE_F_GIX) >> H_PAGE_F_GIX_SHIFT;
-
- if (mmu_hash_ops.hpte_updatepp(slot, rflags, vpn, MMU_PAGE_64K,
+ gslot = pte_get_hash_gslot(vpn, shift, ssize, rpte, 0);
+ if (mmu_hash_ops.hpte_updatepp(gslot, rflags, vpn, MMU_PAGE_64K,
MMU_PAGE_64K, ssize,
flags) == -1)
old_pte &= ~_PAGE_HPTEFLAGS;
@@ -322,9 +325,9 @@ repeat:
MMU_PAGE_64K, MMU_PAGE_64K, old_pte);
return -1;
}
+
new_pte = (new_pte & ~_PAGE_HPTEFLAGS) | H_PAGE_HASHPTE;
- new_pte |= (slot << H_PAGE_F_GIX_SHIFT) &
- (H_PAGE_F_SECOND | H_PAGE_F_GIX);
+ new_pte |= pte_set_hidx(ptep, rpte, 0, slot);
}
*ptep = __pte(new_pte & ~H_PAGE_BUSY);
return 0;
diff --git a/arch/powerpc/mm/hash_native_64.c b/arch/powerpc/mm/hash_native_64.c
index 640cf566e986..a0675e91ad7d 100644
--- a/arch/powerpc/mm/hash_native_64.c
+++ b/arch/powerpc/mm/hash_native_64.c
@@ -47,6 +47,103 @@
DEFINE_RAW_SPINLOCK(native_tlbie_lock);
+static inline void tlbiel_hash_set_isa206(unsigned int set, unsigned int is)
+{
+ unsigned long rb;
+
+ rb = (set << PPC_BITLSHIFT(51)) | (is << PPC_BITLSHIFT(53));
+
+ asm volatile("tlbiel %0" : : "r" (rb));
+}
+
+/*
+ * tlbiel instruction for hash, set invalidation
+ * i.e., r=1 and is=01 or is=10 or is=11
+ */
+static inline void tlbiel_hash_set_isa300(unsigned int set, unsigned int is,
+ unsigned int pid,
+ unsigned int ric, unsigned int prs)
+{
+ unsigned long rb;
+ unsigned long rs;
+ unsigned int r = 0; /* hash format */
+
+ rb = (set << PPC_BITLSHIFT(51)) | (is << PPC_BITLSHIFT(53));
+ rs = ((unsigned long)pid << PPC_BITLSHIFT(31));
+
+ asm volatile(PPC_TLBIEL(%0, %1, %2, %3, %4)
+ : : "r"(rb), "r"(rs), "i"(ric), "i"(prs), "r"(r)
+ : "memory");
+}
+
+
+static void tlbiel_all_isa206(unsigned int num_sets, unsigned int is)
+{
+ unsigned int set;
+
+ asm volatile("ptesync": : :"memory");
+
+ for (set = 0; set < num_sets; set++)
+ tlbiel_hash_set_isa206(set, is);
+
+ asm volatile("ptesync": : :"memory");
+}
+
+static void tlbiel_all_isa300(unsigned int num_sets, unsigned int is)
+{
+ unsigned int set;
+
+ asm volatile("ptesync": : :"memory");
+
+ /*
+ * Flush the first set of the TLB, and any caching of partition table
+ * entries. Then flush the remaining sets of the TLB. Hash mode uses
+ * partition scoped TLB translations.
+ */
+ tlbiel_hash_set_isa300(0, is, 0, 2, 0);
+ for (set = 1; set < num_sets; set++)
+ tlbiel_hash_set_isa300(set, is, 0, 0, 0);
+
+ /*
+ * Now invalidate the process table cache.
+ *
+ * From ISA v3.0B p. 1078:
+ * The following forms are invalid.
+ * * PRS=1, R=0, and RIC!=2 (The only process-scoped
+ * HPT caching is of the Process Table.)
+ */
+ tlbiel_hash_set_isa300(0, is, 0, 2, 1);
+
+ asm volatile("ptesync": : :"memory");
+}
+
+void hash__tlbiel_all(unsigned int action)
+{
+ unsigned int is;
+
+ switch (action) {
+ case TLB_INVAL_SCOPE_GLOBAL:
+ is = 3;
+ break;
+ case TLB_INVAL_SCOPE_LPID:
+ is = 2;
+ break;
+ default:
+ BUG();
+ }
+
+ if (early_cpu_has_feature(CPU_FTR_ARCH_300))
+ tlbiel_all_isa300(POWER9_TLB_SETS_HASH, is);
+ else if (early_cpu_has_feature(CPU_FTR_ARCH_207S))
+ tlbiel_all_isa206(POWER8_TLB_SETS, is);
+ else if (early_cpu_has_feature(CPU_FTR_ARCH_206))
+ tlbiel_all_isa206(POWER7_TLB_SETS, is);
+ else
+ WARN(1, "%s called on pre-POWER7 CPU\n", __func__);
+
+ asm volatile(PPC_INVALIDATE_ERAT "; isync" : : :"memory");
+}
+
static inline unsigned long ___tlbie(unsigned long vpn, int psize,
int apsize, int ssize)
{
diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c
index 655a5a9a183d..7d07c7e17db6 100644
--- a/arch/powerpc/mm/hash_utils_64.c
+++ b/arch/powerpc/mm/hash_utils_64.c
@@ -36,6 +36,7 @@
#include <linux/memblock.h>
#include <linux/context_tracking.h>
#include <linux/libfdt.h>
+#include <linux/pkeys.h>
#include <asm/debugfs.h>
#include <asm/processor.h>
@@ -232,6 +233,7 @@ unsigned long htab_convert_pte_flags(unsigned long pteflags)
*/
rflags |= HPTE_R_M;
+ rflags |= pte_to_hpte_pkey_bits(pteflags);
return rflags;
}
@@ -606,7 +608,7 @@ static void init_hpte_page_sizes(void)
continue; /* not a supported page size */
for (ap = bp; ap < MMU_PAGE_COUNT; ++ap) {
penc = mmu_psize_defs[bp].penc[ap];
- if (penc == -1)
+ if (penc == -1 || !mmu_psize_defs[ap].shift)
continue;
shift = mmu_psize_defs[ap].shift - LP_SHIFT;
if (shift <= 0)
@@ -772,7 +774,7 @@ void resize_hpt_for_hotplug(unsigned long new_mem_size)
int rc;
rc = mmu_hash_ops.resize_hpt(target_hpt_shift);
- if (rc)
+ if (rc && (rc != -ENODEV))
printk(KERN_WARNING
"Unable to resize hash page table to target order %d: %d\n",
target_hpt_shift, rc);
@@ -979,8 +981,9 @@ void __init hash__early_init_devtree(void)
void __init hash__early_init_mmu(void)
{
+#ifndef CONFIG_PPC_64K_PAGES
/*
- * We have code in __hash_page_64K() and elsewhere, which assumes it can
+ * We have code in __hash_page_4K() and elsewhere, which assumes it can
* do the following:
* new_pte |= (slot << H_PAGE_F_GIX_SHIFT) & (H_PAGE_F_SECOND | H_PAGE_F_GIX);
*
@@ -991,6 +994,7 @@ void __init hash__early_init_mmu(void)
* with a BUILD_BUG_ON().
*/
BUILD_BUG_ON(H_PAGE_F_SECOND != (1ul << (H_PAGE_F_GIX_SHIFT + 3)));
+#endif /* CONFIG_PPC_64K_PAGES */
htab_init_page_sizes();
@@ -1049,6 +1053,10 @@ void __init hash__early_init_mmu(void)
pr_info("Initializing hash mmu with SLB\n");
/* Initialize SLB management */
slb_initialize();
+
+ if (cpu_has_feature(CPU_FTR_ARCH_206)
+ && cpu_has_feature(CPU_FTR_HVMODE))
+ tlbiel_all();
}
#ifdef CONFIG_SMP
@@ -1068,6 +1076,10 @@ void hash__early_init_mmu_secondary(void)
}
/* Initialize SLB */
slb_initialize();
+
+ if (cpu_has_feature(CPU_FTR_ARCH_206)
+ && cpu_has_feature(CPU_FTR_HVMODE))
+ tlbiel_all();
}
#endif /* CONFIG_SMP */
@@ -1569,6 +1581,30 @@ out_exit:
local_irq_restore(flags);
}
+#ifdef CONFIG_PPC_MEM_KEYS
+/*
+ * Return the protection key associated with the given address and the
+ * mm_struct.
+ */
+u16 get_mm_addr_key(struct mm_struct *mm, unsigned long address)
+{
+ pte_t *ptep;
+ u16 pkey = 0;
+ unsigned long flags;
+
+ if (!mm || !mm->pgd)
+ return 0;
+
+ local_irq_save(flags);
+ ptep = find_linux_pte(mm->pgd, address, NULL, NULL);
+ if (ptep)
+ pkey = pte_to_pkey_bits(pte_val(READ_ONCE(*ptep)));
+ local_irq_restore(flags);
+
+ return pkey;
+}
+#endif /* CONFIG_PPC_MEM_KEYS */
+
#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
static inline void tm_flush_hash_page(int local)
{
@@ -1592,29 +1628,42 @@ static inline void tm_flush_hash_page(int local)
}
#endif
+/*
+ * Return the global hash slot, corresponding to the given PTE, which contains
+ * the HPTE.
+ */
+unsigned long pte_get_hash_gslot(unsigned long vpn, unsigned long shift,
+ int ssize, real_pte_t rpte, unsigned int subpg_index)
+{
+ unsigned long hash, gslot, hidx;
+
+ hash = hpt_hash(vpn, shift, ssize);
+ hidx = __rpte_to_hidx(rpte, subpg_index);
+ if (hidx & _PTEIDX_SECONDARY)
+ hash = ~hash;
+ gslot = (hash & htab_hash_mask) * HPTES_PER_GROUP;
+ gslot += hidx & _PTEIDX_GROUP_IX;
+ return gslot;
+}
+
/* WARNING: This is called from hash_low_64.S, if you change this prototype,
* do not forget to update the assembly call site !
*/
void flush_hash_page(unsigned long vpn, real_pte_t pte, int psize, int ssize,
unsigned long flags)
{
- unsigned long hash, index, shift, hidx, slot;
+ unsigned long index, shift, gslot;
int local = flags & HPTE_LOCAL_UPDATE;
DBG_LOW("flush_hash_page(vpn=%016lx)\n", vpn);
pte_iterate_hashed_subpages(pte, psize, vpn, index, shift) {
- hash = hpt_hash(vpn, shift, ssize);
- hidx = __rpte_to_hidx(pte, index);
- if (hidx & _PTEIDX_SECONDARY)
- hash = ~hash;
- slot = (hash & htab_hash_mask) * HPTES_PER_GROUP;
- slot += hidx & _PTEIDX_GROUP_IX;
- DBG_LOW(" sub %ld: hash=%lx, hidx=%lx\n", index, slot, hidx);
+ gslot = pte_get_hash_gslot(vpn, shift, ssize, pte, index);
+ DBG_LOW(" sub %ld: gslot=%lx\n", index, gslot);
/*
* We use same base page size and actual psize, because we don't
* use these functions for hugepage
*/
- mmu_hash_ops.hpte_invalidate(slot, vpn, psize, psize,
+ mmu_hash_ops.hpte_invalidate(gslot, vpn, psize, psize,
ssize, local);
} pte_iterate_hashed_end();
@@ -1825,16 +1874,24 @@ void hash__setup_initial_memory_limit(phys_addr_t first_memblock_base,
*/
BUG_ON(first_memblock_base != 0);
- /* On LPAR systems, the first entry is our RMA region,
- * non-LPAR 64-bit hash MMU systems don't have a limitation
- * on real mode access, but using the first entry works well
- * enough. We also clamp it to 1G to avoid some funky things
- * such as RTAS bugs etc...
+ /*
+ * On virtualized systems the first entry is our RMA region aka VRMA,
+ * non-virtualized 64-bit hash MMU systems don't have a limitation
+ * on real mode access.
+ *
+ * For guests on platforms before POWER9, we clamp the it limit to 1G
+ * to avoid some funky things such as RTAS bugs etc...
*/
- ppc64_rma_size = min_t(u64, first_memblock_size, 0x40000000);
+ if (!early_cpu_has_feature(CPU_FTR_HVMODE)) {
+ ppc64_rma_size = first_memblock_size;
+ if (!early_cpu_has_feature(CPU_FTR_ARCH_300))
+ ppc64_rma_size = min_t(u64, ppc64_rma_size, 0x40000000);
- /* Finally limit subsequent allocations */
- memblock_set_current_limit(ppc64_rma_size);
+ /* Finally limit subsequent allocations */
+ memblock_set_current_limit(ppc64_rma_size);
+ } else {
+ ppc64_rma_size = ULONG_MAX;
+ }
}
#ifdef CONFIG_DEBUG_FS
diff --git a/arch/powerpc/mm/hugetlbpage-hash64.c b/arch/powerpc/mm/hugetlbpage-hash64.c
index 0c2a91df3210..12511f5a015f 100644
--- a/arch/powerpc/mm/hugetlbpage-hash64.c
+++ b/arch/powerpc/mm/hugetlbpage-hash64.c
@@ -23,6 +23,7 @@ int __hash_page_huge(unsigned long ea, unsigned long access, unsigned long vsid,
pte_t *ptep, unsigned long trap, unsigned long flags,
int ssize, unsigned int shift, unsigned int mmu_psize)
{
+ real_pte_t rpte;
unsigned long vpn;
unsigned long old_pte, new_pte;
unsigned long rflags, pa, sz;
@@ -62,6 +63,7 @@ int __hash_page_huge(unsigned long ea, unsigned long access, unsigned long vsid,
} while(!pte_xchg(ptep, __pte(old_pte), __pte(new_pte)));
rflags = htab_convert_pte_flags(new_pte);
+ rpte = __real_pte(__pte(old_pte), ptep);
sz = ((1UL) << shift);
if (!cpu_has_feature(CPU_FTR_COHERENT_ICACHE))
@@ -72,15 +74,10 @@ int __hash_page_huge(unsigned long ea, unsigned long access, unsigned long vsid,
/* Check if pte already has an hpte (case 2) */
if (unlikely(old_pte & H_PAGE_HASHPTE)) {
/* There MIGHT be an HPTE for this pte */
- unsigned long hash, slot;
+ unsigned long gslot;
- hash = hpt_hash(vpn, shift, ssize);
- if (old_pte & H_PAGE_F_SECOND)
- hash = ~hash;
- slot = (hash & htab_hash_mask) * HPTES_PER_GROUP;
- slot += (old_pte & H_PAGE_F_GIX) >> H_PAGE_F_GIX_SHIFT;
-
- if (mmu_hash_ops.hpte_updatepp(slot, rflags, vpn, mmu_psize,
+ gslot = pte_get_hash_gslot(vpn, shift, ssize, rpte, 0);
+ if (mmu_hash_ops.hpte_updatepp(gslot, rflags, vpn, mmu_psize,
mmu_psize, ssize, flags) == -1)
old_pte &= ~_PAGE_HPTEFLAGS;
}
@@ -107,8 +104,7 @@ int __hash_page_huge(unsigned long ea, unsigned long access, unsigned long vsid,
return -1;
}
- new_pte |= (slot << H_PAGE_F_GIX_SHIFT) &
- (H_PAGE_F_SECOND | H_PAGE_F_GIX);
+ new_pte |= pte_set_hidx(ptep, rpte, 0, slot);
}
/*
diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c
index a9b9083c5e49..876da2bc1796 100644
--- a/arch/powerpc/mm/hugetlbpage.c
+++ b/arch/powerpc/mm/hugetlbpage.c
@@ -96,7 +96,7 @@ static int __hugepte_alloc(struct mm_struct *mm, hugepd_t *hpdp,
*hpdp = __hugepd(__pa(new) |
(shift_to_mmu_psize(pshift) << 2));
#elif defined(CONFIG_PPC_8xx)
- *hpdp = __hugepd(__pa(new) |
+ *hpdp = __hugepd(__pa(new) | _PMD_USER |
(pshift == PAGE_SHIFT_8M ? _PMD_PAGE_8M :
_PMD_PAGE_512K) | _PMD_PRESENT);
#else
@@ -752,7 +752,7 @@ void flush_dcache_icache_hugepage(struct page *page)
* So long as we atomically load page table pointers we are safe against teardown,
* we can follow the address down to the the page and take a ref on it.
* This function need to be called with interrupts disabled. We use this variant
- * when we have MSR[EE] = 0 but the paca->soft_enabled = 1
+ * when we have MSR[EE] = 0 but the paca->irq_soft_mask = IRQS_ENABLED
*/
pte_t *__find_linux_pte(pgd_t *pgdir, unsigned long ea,
bool *is_thp, unsigned *hpage_shift)
@@ -855,9 +855,7 @@ int gup_hugepte(pte_t *ptep, unsigned long sz, unsigned long addr,
pte = READ_ONCE(*ptep);
- if (!pte_present(pte) || !pte_read(pte))
- return 0;
- if (write && !pte_write(pte))
+ if (!pte_access_permitted(pte, write))
return 0;
/* hugepages are never "special" */
diff --git a/arch/powerpc/mm/init_64.c b/arch/powerpc/mm/init_64.c
index a07722531b32..f6eb7e8f4c93 100644
--- a/arch/powerpc/mm/init_64.c
+++ b/arch/powerpc/mm/init_64.c
@@ -214,9 +214,8 @@ int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node)
rc = vmemmap_create_mapping(start, page_size, __pa(p));
if (rc < 0) {
- pr_warning(
- "vmemmap_populate: Unable to create vmemmap mapping: %d\n",
- rc);
+ pr_warn("%s: Unable to create vmemmap mapping: %d\n",
+ __func__, rc);
return -EFAULT;
}
}
diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c
index 4362b86ef84c..1281c6eb3a85 100644
--- a/arch/powerpc/mm/mem.c
+++ b/arch/powerpc/mm/mem.c
@@ -138,8 +138,7 @@ int arch_add_memory(int nid, u64 start, u64 size, bool want_memblock)
start = (unsigned long)__va(start);
rc = create_section_mapping(start, start + size);
if (rc) {
- pr_warning(
- "Unable to create mapping for hot added memory 0x%llx..0x%llx: %d\n",
+ pr_warn("Unable to create mapping for hot added memory 0x%llx..0x%llx: %d\n",
start, start + size, rc);
return -EFAULT;
}
diff --git a/arch/powerpc/mm/mmu_context_book3s64.c b/arch/powerpc/mm/mmu_context_book3s64.c
index 59c0766ae4e0..929d9ef7083f 100644
--- a/arch/powerpc/mm/mmu_context_book3s64.c
+++ b/arch/powerpc/mm/mmu_context_book3s64.c
@@ -16,6 +16,7 @@
#include <linux/string.h>
#include <linux/types.h>
#include <linux/mm.h>
+#include <linux/pkeys.h>
#include <linux/spinlock.h>
#include <linux/idr.h>
#include <linux/export.h>
@@ -118,6 +119,7 @@ static int hash__init_new_context(struct mm_struct *mm)
subpage_prot_init_new_context(mm);
+ pkey_mm_init(mm);
return index;
}
diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c
index adb6364f4091..314d19ab9385 100644
--- a/arch/powerpc/mm/numa.c
+++ b/arch/powerpc/mm/numa.c
@@ -40,6 +40,7 @@
#include <asm/hvcall.h>
#include <asm/setup.h>
#include <asm/vdso.h>
+#include <asm/drmem.h>
static int numa_enabled = 1;
@@ -179,21 +180,6 @@ static const __be32 *of_get_associativity(struct device_node *dev)
return of_get_property(dev, "ibm,associativity", NULL);
}
-/*
- * Returns the property linux,drconf-usable-memory if
- * it exists (the property exists only in kexec/kdump kernels,
- * added by kexec-tools)
- */
-static const __be32 *of_get_usable_memory(struct device_node *memory)
-{
- const __be32 *prop;
- u32 len;
- prop = of_get_property(memory, "linux,drconf-usable-memory", &len);
- if (!prop || len < sizeof(unsigned int))
- return NULL;
- return prop;
-}
-
int __node_distance(int a, int b)
{
int i;
@@ -387,69 +373,6 @@ static unsigned long read_n_cells(int n, const __be32 **buf)
return result;
}
-/*
- * Read the next memblock list entry from the ibm,dynamic-memory property
- * and return the information in the provided of_drconf_cell structure.
- */
-static void read_drconf_cell(struct of_drconf_cell *drmem, const __be32 **cellp)
-{
- const __be32 *cp;
-
- drmem->base_addr = read_n_cells(n_mem_addr_cells, cellp);
-
- cp = *cellp;
- drmem->drc_index = of_read_number(cp, 1);
- drmem->reserved = of_read_number(&cp[1], 1);
- drmem->aa_index = of_read_number(&cp[2], 1);
- drmem->flags = of_read_number(&cp[3], 1);
-
- *cellp = cp + 4;
-}
-
-/*
- * Retrieve and validate the ibm,dynamic-memory property of the device tree.
- *
- * The layout of the ibm,dynamic-memory property is a number N of memblock
- * list entries followed by N memblock list entries. Each memblock list entry
- * contains information as laid out in the of_drconf_cell struct above.
- */
-static int of_get_drconf_memory(struct device_node *memory, const __be32 **dm)
-{
- const __be32 *prop;
- u32 len, entries;
-
- prop = of_get_property(memory, "ibm,dynamic-memory", &len);
- if (!prop || len < sizeof(unsigned int))
- return 0;
-
- entries = of_read_number(prop++, 1);
-
- /* Now that we know the number of entries, revalidate the size
- * of the property read in to ensure we have everything
- */
- if (len < (entries * (n_mem_addr_cells + 4) + 1) * sizeof(unsigned int))
- return 0;
-
- *dm = prop;
- return entries;
-}
-
-/*
- * Retrieve and validate the ibm,lmb-size property for drconf memory
- * from the device tree.
- */
-static u64 of_get_lmb_size(struct device_node *memory)
-{
- const __be32 *prop;
- u32 len;
-
- prop = of_get_property(memory, "ibm,lmb-size", &len);
- if (!prop || len < sizeof(unsigned int))
- return 0;
-
- return read_n_cells(n_mem_size_cells, &prop);
-}
-
struct assoc_arrays {
u32 n_arrays;
u32 array_sz;
@@ -466,19 +389,27 @@ struct assoc_arrays {
* indicating the size of each associativity array, followed by a list
* of N associativity arrays.
*/
-static int of_get_assoc_arrays(struct device_node *memory,
- struct assoc_arrays *aa)
+static int of_get_assoc_arrays(struct assoc_arrays *aa)
{
+ struct device_node *memory;
const __be32 *prop;
u32 len;
+ memory = of_find_node_by_path("/ibm,dynamic-reconfiguration-memory");
+ if (!memory)
+ return -1;
+
prop = of_get_property(memory, "ibm,associativity-lookup-arrays", &len);
- if (!prop || len < 2 * sizeof(unsigned int))
+ if (!prop || len < 2 * sizeof(unsigned int)) {
+ of_node_put(memory);
return -1;
+ }
aa->n_arrays = of_read_number(prop++, 1);
aa->array_sz = of_read_number(prop++, 1);
+ of_node_put(memory);
+
/* Now that we know the number of arrays and size of each array,
* revalidate the size of the property read in.
*/
@@ -493,26 +424,30 @@ static int of_get_assoc_arrays(struct device_node *memory,
* This is like of_node_to_nid_single() for memory represented in the
* ibm,dynamic-reconfiguration-memory node.
*/
-static int of_drconf_to_nid_single(struct of_drconf_cell *drmem,
- struct assoc_arrays *aa)
+static int of_drconf_to_nid_single(struct drmem_lmb *lmb)
{
+ struct assoc_arrays aa = { .arrays = NULL };
int default_nid = 0;
int nid = default_nid;
- int index;
+ int rc, index;
+
+ rc = of_get_assoc_arrays(&aa);
+ if (rc)
+ return default_nid;
- if (min_common_depth > 0 && min_common_depth <= aa->array_sz &&
- !(drmem->flags & DRCONF_MEM_AI_INVALID) &&
- drmem->aa_index < aa->n_arrays) {
- index = drmem->aa_index * aa->array_sz + min_common_depth - 1;
- nid = of_read_number(&aa->arrays[index], 1);
+ if (min_common_depth > 0 && min_common_depth <= aa.array_sz &&
+ !(lmb->flags & DRCONF_MEM_AI_INVALID) &&
+ lmb->aa_index < aa.n_arrays) {
+ index = lmb->aa_index * aa.array_sz + min_common_depth - 1;
+ nid = of_read_number(&aa.arrays[index], 1);
if (nid == 0xffff || nid >= MAX_NUMNODES)
nid = default_nid;
if (nid > 0) {
- index = drmem->aa_index * aa->array_sz;
+ index = lmb->aa_index * aa.array_sz;
initialize_distance_lookup_table(nid,
- &aa->arrays[index]);
+ &aa.arrays[index]);
}
}
@@ -551,7 +486,7 @@ static int numa_setup_cpu(unsigned long lcpu)
nid = of_node_to_nid_single(cpu);
out_present:
- if (nid < 0 || !node_online(nid))
+ if (nid < 0 || !node_possible(nid))
nid = first_online_node;
map_cpu_to_node(lcpu, nid);
@@ -645,67 +580,48 @@ static inline int __init read_usm_ranges(const __be32 **usm)
* Extract NUMA information from the ibm,dynamic-reconfiguration-memory
* node. This assumes n_mem_{addr,size}_cells have been set.
*/
-static void __init parse_drconf_memory(struct device_node *memory)
+static void __init numa_setup_drmem_lmb(struct drmem_lmb *lmb,
+ const __be32 **usm)
{
- const __be32 *uninitialized_var(dm), *usm;
- unsigned int n, rc, ranges, is_kexec_kdump = 0;
- unsigned long lmb_size, base, size, sz;
+ unsigned int ranges, is_kexec_kdump = 0;
+ unsigned long base, size, sz;
int nid;
- struct assoc_arrays aa = { .arrays = NULL };
-
- n = of_get_drconf_memory(memory, &dm);
- if (!n)
- return;
-
- lmb_size = of_get_lmb_size(memory);
- if (!lmb_size)
- return;
- rc = of_get_assoc_arrays(memory, &aa);
- if (rc)
+ /*
+ * Skip this block if the reserved bit is set in flags (0x80)
+ * or if the block is not assigned to this partition (0x8)
+ */
+ if ((lmb->flags & DRCONF_MEM_RESERVED)
+ || !(lmb->flags & DRCONF_MEM_ASSIGNED))
return;
- /* check if this is a kexec/kdump kernel */
- usm = of_get_usable_memory(memory);
- if (usm != NULL)
+ if (*usm)
is_kexec_kdump = 1;
- for (; n != 0; --n) {
- struct of_drconf_cell drmem;
-
- read_drconf_cell(&drmem, &dm);
+ base = lmb->base_addr;
+ size = drmem_lmb_size();
+ ranges = 1;
- /* skip this block if the reserved bit is set in flags (0x80)
- or if the block is not assigned to this partition (0x8) */
- if ((drmem.flags & DRCONF_MEM_RESERVED)
- || !(drmem.flags & DRCONF_MEM_ASSIGNED))
- continue;
-
- base = drmem.base_addr;
- size = lmb_size;
- ranges = 1;
+ if (is_kexec_kdump) {
+ ranges = read_usm_ranges(usm);
+ if (!ranges) /* there are no (base, size) duple */
+ return;
+ }
+ do {
if (is_kexec_kdump) {
- ranges = read_usm_ranges(&usm);
- if (!ranges) /* there are no (base, size) duple */
- continue;
+ base = read_n_cells(n_mem_addr_cells, usm);
+ size = read_n_cells(n_mem_size_cells, usm);
}
- do {
- if (is_kexec_kdump) {
- base = read_n_cells(n_mem_addr_cells, &usm);
- size = read_n_cells(n_mem_size_cells, &usm);
- }
- nid = of_drconf_to_nid_single(&drmem, &aa);
- fake_numa_create_new_node(
- ((base + size) >> PAGE_SHIFT),
- &nid);
- node_set_online(nid);
- sz = numa_enforce_memory_limit(base, size);
- if (sz)
- memblock_set_node(base, sz,
- &memblock.memory, nid);
- } while (--ranges);
- }
+
+ nid = of_drconf_to_nid_single(lmb);
+ fake_numa_create_new_node(((base + size) >> PAGE_SHIFT),
+ &nid);
+ node_set_online(nid);
+ sz = numa_enforce_memory_limit(base, size);
+ if (sz)
+ memblock_set_node(base, sz, &memblock.memory, nid);
+ } while (--ranges);
}
static int __init parse_numa_properties(void)
@@ -800,8 +716,10 @@ new_range:
* ibm,dynamic-reconfiguration-memory node.
*/
memory = of_find_node_by_path("/ibm,dynamic-reconfiguration-memory");
- if (memory)
- parse_drconf_memory(memory);
+ if (memory) {
+ walk_drmem_lmbs(memory, numa_setup_drmem_lmb);
+ of_node_put(memory);
+ }
return 0;
}
@@ -892,6 +810,32 @@ static void __init setup_node_data(int nid, u64 start_pfn, u64 end_pfn)
NODE_DATA(nid)->node_spanned_pages = spanned_pages;
}
+static void __init find_possible_nodes(void)
+{
+ struct device_node *rtas;
+ u32 numnodes, i;
+
+ if (min_common_depth <= 0)
+ return;
+
+ rtas = of_find_node_by_path("/rtas");
+ if (!rtas)
+ return;
+
+ if (of_property_read_u32_index(rtas,
+ "ibm,max-associativity-domains",
+ min_common_depth, &numnodes))
+ goto out;
+
+ for (i = 0; i < numnodes; i++) {
+ if (!node_possible(i))
+ node_set(i, node_possible_map);
+ }
+
+out:
+ of_node_put(rtas);
+}
+
void __init initmem_init(void)
{
int nid, cpu;
@@ -905,12 +849,15 @@ void __init initmem_init(void)
memblock_dump_all();
/*
- * Reduce the possible NUMA nodes to the online NUMA nodes,
- * since we do not support node hotplug. This ensures that we
- * lower the maximum NUMA node ID to what is actually present.
+ * Modify the set of possible NUMA nodes to reflect information
+ * available about the set of online nodes, and the set of nodes
+ * that we expect to make use of for this platform's affinity
+ * calculations.
*/
nodes_and(node_possible_map, node_possible_map, node_online_map);
+ find_possible_nodes();
+
for_each_online_node(nid) {
unsigned long start_pfn, end_pfn;
@@ -979,43 +926,26 @@ early_param("topology_updates", early_topology_updates);
* memory represented in the device tree by the property
* ibm,dynamic-reconfiguration-memory/ibm,dynamic-memory.
*/
-static int hot_add_drconf_scn_to_nid(struct device_node *memory,
- unsigned long scn_addr)
+static int hot_add_drconf_scn_to_nid(unsigned long scn_addr)
{
- const __be32 *dm;
- unsigned int drconf_cell_cnt, rc;
+ struct drmem_lmb *lmb;
unsigned long lmb_size;
- struct assoc_arrays aa;
int nid = -1;
- drconf_cell_cnt = of_get_drconf_memory(memory, &dm);
- if (!drconf_cell_cnt)
- return -1;
-
- lmb_size = of_get_lmb_size(memory);
- if (!lmb_size)
- return -1;
-
- rc = of_get_assoc_arrays(memory, &aa);
- if (rc)
- return -1;
-
- for (; drconf_cell_cnt != 0; --drconf_cell_cnt) {
- struct of_drconf_cell drmem;
-
- read_drconf_cell(&drmem, &dm);
+ lmb_size = drmem_lmb_size();
+ for_each_drmem_lmb(lmb) {
/* skip this block if it is reserved or not assigned to
* this partition */
- if ((drmem.flags & DRCONF_MEM_RESERVED)
- || !(drmem.flags & DRCONF_MEM_ASSIGNED))
+ if ((lmb->flags & DRCONF_MEM_RESERVED)
+ || !(lmb->flags & DRCONF_MEM_ASSIGNED))
continue;
- if ((scn_addr < drmem.base_addr)
- || (scn_addr >= (drmem.base_addr + lmb_size)))
+ if ((scn_addr < lmb->base_addr)
+ || (scn_addr >= (lmb->base_addr + lmb_size)))
continue;
- nid = of_drconf_to_nid_single(&drmem, &aa);
+ nid = of_drconf_to_nid_single(lmb);
break;
}
@@ -1080,7 +1010,7 @@ int hot_add_scn_to_nid(unsigned long scn_addr)
memory = of_find_node_by_path("/ibm,dynamic-reconfiguration-memory");
if (memory) {
- nid = hot_add_drconf_scn_to_nid(memory, scn_addr);
+ nid = hot_add_drconf_scn_to_nid(scn_addr);
of_node_put(memory);
} else {
nid = hot_add_node_scn_to_nid(scn_addr);
@@ -1096,11 +1026,7 @@ static u64 hot_add_drconf_memory_max(void)
{
struct device_node *memory = NULL;
struct device_node *dn = NULL;
- unsigned int drconf_cell_cnt = 0;
- u64 lmb_size = 0;
- const __be32 *dm = NULL;
const __be64 *lrdr = NULL;
- struct of_drconf_cell drmem;
dn = of_find_node_by_path("/rtas");
if (dn) {
@@ -1112,14 +1038,8 @@ static u64 hot_add_drconf_memory_max(void)
memory = of_find_node_by_path("/ibm,dynamic-reconfiguration-memory");
if (memory) {
- drconf_cell_cnt = of_get_drconf_memory(memory, &dm);
- lmb_size = of_get_lmb_size(memory);
-
- /* Advance to the last cell, each cell has 6 32 bit integers */
- dm += (drconf_cell_cnt - 1) * 6;
- read_drconf_cell(&drmem, &dm);
of_node_put(memory);
- return drmem.base_addr + lmb_size;
+ return drmem_lmb_memory_max();
}
return 0;
}
@@ -1278,6 +1198,42 @@ static long vphn_get_associativity(unsigned long cpu,
return rc;
}
+int find_and_online_cpu_nid(int cpu)
+{
+ __be32 associativity[VPHN_ASSOC_BUFSIZE] = {0};
+ int new_nid;
+
+ /* Use associativity from first thread for all siblings */
+ vphn_get_associativity(cpu, associativity);
+ new_nid = associativity_to_nid(associativity);
+ if (new_nid < 0 || !node_possible(new_nid))
+ new_nid = first_online_node;
+
+ if (NODE_DATA(new_nid) == NULL) {
+#ifdef CONFIG_MEMORY_HOTPLUG
+ /*
+ * Need to ensure that NODE_DATA is initialized for a node from
+ * available memory (see memblock_alloc_try_nid). If unable to
+ * init the node, then default to nearest node that has memory
+ * installed.
+ */
+ if (try_online_node(new_nid))
+ new_nid = first_online_node;
+#else
+ /*
+ * Default to using the nearest node that has memory installed.
+ * Otherwise, it would be necessary to patch the kernel MM code
+ * to deal with more memoryless-node error conditions.
+ */
+ new_nid = first_online_node;
+#endif
+ }
+
+ pr_debug("%s:%d cpu %d nid %d\n", __FUNCTION__, __LINE__,
+ cpu, new_nid);
+ return new_nid;
+}
+
/*
* Update the CPU maps and sysfs entries for a single CPU when its NUMA
* characteristics change. This function doesn't perform any locking and is
@@ -1345,7 +1301,6 @@ int numa_update_cpu_topology(bool cpus_locked)
{
unsigned int cpu, sibling, changed = 0;
struct topology_update_data *updates, *ud;
- __be32 associativity[VPHN_ASSOC_BUFSIZE] = {0};
cpumask_t updated_cpus;
struct device *dev;
int weight, new_nid, i = 0;
@@ -1383,11 +1338,7 @@ int numa_update_cpu_topology(bool cpus_locked)
continue;
}
- /* Use associativity from first thread for all siblings */
- vphn_get_associativity(cpu, associativity);
- new_nid = associativity_to_nid(associativity);
- if (new_nid < 0 || !node_online(new_nid))
- new_nid = first_online_node;
+ new_nid = find_and_online_cpu_nid(cpu);
if (new_nid == numa_cpu_lookup_table[cpu]) {
cpumask_andnot(&cpu_associativity_changes_mask,
diff --git a/arch/powerpc/mm/pgtable-radix.c b/arch/powerpc/mm/pgtable-radix.c
index cfbbee941a76..573a9a2ee455 100644
--- a/arch/powerpc/mm/pgtable-radix.c
+++ b/arch/powerpc/mm/pgtable-radix.c
@@ -579,6 +579,9 @@ void __init radix__early_init_mmu(void)
radix_init_iamr();
radix_init_pgtable();
+
+ if (cpu_has_feature(CPU_FTR_HVMODE))
+ tlbiel_all();
}
void radix__early_init_mmu_secondary(void)
@@ -600,6 +603,9 @@ void radix__early_init_mmu_secondary(void)
radix_init_amor();
}
radix_init_iamr();
+
+ if (cpu_has_feature(CPU_FTR_HVMODE))
+ tlbiel_all();
}
void radix__mmu_cleanup_all(void)
@@ -622,22 +628,11 @@ void radix__setup_initial_memory_limit(phys_addr_t first_memblock_base,
* physical on those processors
*/
BUG_ON(first_memblock_base != 0);
+
/*
- * We limit the allocation that depend on ppc64_rma_size
- * to first_memblock_size. We also clamp it to 1GB to
- * avoid some funky things such as RTAS bugs.
- *
- * On radix config we really don't have a limitation
- * on real mode access. But keeping it as above works
- * well enough.
- */
- ppc64_rma_size = min_t(u64, first_memblock_size, 0x40000000);
- /*
- * Finally limit subsequent allocations. We really don't want
- * to limit the memblock allocations to rma_size. FIXME!! should
- * we even limit at all ?
+ * Radix mode is not limited by RMA / VRMA addressing.
*/
- memblock_set_current_limit(first_memblock_base + first_memblock_size);
+ ppc64_rma_size = ULONG_MAX;
}
#ifdef CONFIG_MEMORY_HOTPLUG
diff --git a/arch/powerpc/mm/pgtable.c b/arch/powerpc/mm/pgtable.c
index a03ff3d99e0c..9f361ae571e9 100644
--- a/arch/powerpc/mm/pgtable.c
+++ b/arch/powerpc/mm/pgtable.c
@@ -54,7 +54,8 @@ static inline int pte_looks_normal(pte_t pte)
return 0;
#else
return (pte_val(pte) &
- (_PAGE_PRESENT | _PAGE_SPECIAL | _PAGE_NO_CACHE | _PAGE_USER)) ==
+ (_PAGE_PRESENT | _PAGE_SPECIAL | _PAGE_NO_CACHE | _PAGE_USER |
+ _PAGE_PRIVILEGED)) ==
(_PAGE_PRESENT | _PAGE_USER);
#endif
}
diff --git a/arch/powerpc/mm/pgtable_32.c b/arch/powerpc/mm/pgtable_32.c
index f6c7f54c0515..d35d9ad3c1cd 100644
--- a/arch/powerpc/mm/pgtable_32.c
+++ b/arch/powerpc/mm/pgtable_32.c
@@ -98,14 +98,7 @@ ioremap_prot(phys_addr_t addr, unsigned long size, unsigned long flags)
/* we don't want to let _PAGE_USER and _PAGE_EXEC leak out */
flags &= ~(_PAGE_USER | _PAGE_EXEC);
-
-#ifdef _PAGE_BAP_SR
- /* _PAGE_USER contains _PAGE_BAP_SR on BookE using the new PTE format
- * which means that we just cleared supervisor access... oops ;-) This
- * restores it
- */
- flags |= _PAGE_BAP_SR;
-#endif
+ flags |= _PAGE_PRIVILEGED;
return __ioremap_caller(addr, size, flags, __builtin_return_address(0));
}
diff --git a/arch/powerpc/mm/pgtable_64.c b/arch/powerpc/mm/pgtable_64.c
index 813ea22c3e00..c9a623c2d8a2 100644
--- a/arch/powerpc/mm/pgtable_64.c
+++ b/arch/powerpc/mm/pgtable_64.c
@@ -244,20 +244,8 @@ void __iomem * ioremap_prot(phys_addr_t addr, unsigned long size,
/*
* Force kernel mapping.
*/
-#if defined(CONFIG_PPC_BOOK3S_64)
- flags |= _PAGE_PRIVILEGED;
-#else
flags &= ~_PAGE_USER;
-#endif
-
-
-#ifdef _PAGE_BAP_SR
- /* _PAGE_USER contains _PAGE_BAP_SR on BookE using the new PTE format
- * which means that we just cleared supervisor access... oops ;-) This
- * restores it
- */
- flags |= _PAGE_BAP_SR;
-#endif
+ flags |= _PAGE_PRIVILEGED;
if (ppc_md.ioremap)
return ppc_md.ioremap(addr, size, flags, caller);
diff --git a/arch/powerpc/mm/pkeys.c b/arch/powerpc/mm/pkeys.c
new file mode 100644
index 000000000000..ba71c5481f42
--- /dev/null
+++ b/arch/powerpc/mm/pkeys.c
@@ -0,0 +1,468 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * PowerPC Memory Protection Keys management
+ *
+ * Copyright 2017, Ram Pai, IBM Corporation.
+ */
+
+#include <asm/mman.h>
+#include <asm/setup.h>
+#include <linux/pkeys.h>
+#include <linux/of_device.h>
+
+DEFINE_STATIC_KEY_TRUE(pkey_disabled);
+bool pkey_execute_disable_supported;
+int pkeys_total; /* Total pkeys as per device tree */
+bool pkeys_devtree_defined; /* pkey property exported by device tree */
+u32 initial_allocation_mask; /* Bits set for reserved keys */
+u64 pkey_amr_uamor_mask; /* Bits in AMR/UMOR not to be touched */
+u64 pkey_iamr_mask; /* Bits in AMR not to be touched */
+
+#define AMR_BITS_PER_PKEY 2
+#define AMR_RD_BIT 0x1UL
+#define AMR_WR_BIT 0x2UL
+#define IAMR_EX_BIT 0x1UL
+#define PKEY_REG_BITS (sizeof(u64)*8)
+#define pkeyshift(pkey) (PKEY_REG_BITS - ((pkey+1) * AMR_BITS_PER_PKEY))
+
+static void scan_pkey_feature(void)
+{
+ u32 vals[2];
+ struct device_node *cpu;
+
+ cpu = of_find_node_by_type(NULL, "cpu");
+ if (!cpu)
+ return;
+
+ if (of_property_read_u32_array(cpu,
+ "ibm,processor-storage-keys", vals, 2))
+ return;
+
+ /*
+ * Since any pkey can be used for data or execute, we will just treat
+ * all keys as equal and track them as one entity.
+ */
+ pkeys_total = be32_to_cpu(vals[0]);
+ pkeys_devtree_defined = true;
+}
+
+static inline bool pkey_mmu_enabled(void)
+{
+ if (firmware_has_feature(FW_FEATURE_LPAR))
+ return pkeys_total;
+ else
+ return cpu_has_feature(CPU_FTR_PKEY);
+}
+
+int pkey_initialize(void)
+{
+ int os_reserved, i;
+
+ /*
+ * We define PKEY_DISABLE_EXECUTE in addition to the arch-neutral
+ * generic defines for PKEY_DISABLE_ACCESS and PKEY_DISABLE_WRITE.
+ * Ensure that the bits a distinct.
+ */
+ BUILD_BUG_ON(PKEY_DISABLE_EXECUTE &
+ (PKEY_DISABLE_ACCESS | PKEY_DISABLE_WRITE));
+
+ /*
+ * pkey_to_vmflag_bits() assumes that the pkey bits are contiguous
+ * in the vmaflag. Make sure that is really the case.
+ */
+ BUILD_BUG_ON(__builtin_clzl(ARCH_VM_PKEY_FLAGS >> VM_PKEY_SHIFT) +
+ __builtin_popcountl(ARCH_VM_PKEY_FLAGS >> VM_PKEY_SHIFT)
+ != (sizeof(u64) * BITS_PER_BYTE));
+
+ /* scan the device tree for pkey feature */
+ scan_pkey_feature();
+
+ /*
+ * Let's assume 32 pkeys on P8 bare metal, if its not defined by device
+ * tree. We make this exception since skiboot forgot to expose this
+ * property on power8.
+ */
+ if (!pkeys_devtree_defined && !firmware_has_feature(FW_FEATURE_LPAR) &&
+ cpu_has_feature(CPU_FTRS_POWER8))
+ pkeys_total = 32;
+
+ /*
+ * Adjust the upper limit, based on the number of bits supported by
+ * arch-neutral code.
+ */
+ pkeys_total = min_t(int, pkeys_total,
+ (ARCH_VM_PKEY_FLAGS >> VM_PKEY_SHIFT));
+
+ if (!pkey_mmu_enabled() || radix_enabled() || !pkeys_total)
+ static_branch_enable(&pkey_disabled);
+ else
+ static_branch_disable(&pkey_disabled);
+
+ if (static_branch_likely(&pkey_disabled))
+ return 0;
+
+ /*
+ * The device tree cannot be relied to indicate support for
+ * execute_disable support. Instead we use a PVR check.
+ */
+ if (pvr_version_is(PVR_POWER7) || pvr_version_is(PVR_POWER7p))
+ pkey_execute_disable_supported = false;
+ else
+ pkey_execute_disable_supported = true;
+
+#ifdef CONFIG_PPC_4K_PAGES
+ /*
+ * The OS can manage only 8 pkeys due to its inability to represent them
+ * in the Linux 4K PTE.
+ */
+ os_reserved = pkeys_total - 8;
+#else
+ os_reserved = 0;
+#endif
+ /*
+ * Bits are in LE format. NOTE: 1, 0 are reserved.
+ * key 0 is the default key, which allows read/write/execute.
+ * key 1 is recommended not to be used. PowerISA(3.0) page 1015,
+ * programming note.
+ */
+ initial_allocation_mask = ~0x0;
+
+ /* register mask is in BE format */
+ pkey_amr_uamor_mask = ~0x0ul;
+ pkey_iamr_mask = ~0x0ul;
+
+ for (i = 2; i < (pkeys_total - os_reserved); i++) {
+ initial_allocation_mask &= ~(0x1 << i);
+ pkey_amr_uamor_mask &= ~(0x3ul << pkeyshift(i));
+ pkey_iamr_mask &= ~(0x1ul << pkeyshift(i));
+ }
+ return 0;
+}
+
+arch_initcall(pkey_initialize);
+
+void pkey_mm_init(struct mm_struct *mm)
+{
+ if (static_branch_likely(&pkey_disabled))
+ return;
+ mm_pkey_allocation_map(mm) = initial_allocation_mask;
+ /* -1 means unallocated or invalid */
+ mm->context.execute_only_pkey = -1;
+}
+
+static inline u64 read_amr(void)
+{
+ return mfspr(SPRN_AMR);
+}
+
+static inline void write_amr(u64 value)
+{
+ mtspr(SPRN_AMR, value);
+}
+
+static inline u64 read_iamr(void)
+{
+ if (!likely(pkey_execute_disable_supported))
+ return 0x0UL;
+
+ return mfspr(SPRN_IAMR);
+}
+
+static inline void write_iamr(u64 value)
+{
+ if (!likely(pkey_execute_disable_supported))
+ return;
+
+ mtspr(SPRN_IAMR, value);
+}
+
+static inline u64 read_uamor(void)
+{
+ return mfspr(SPRN_UAMOR);
+}
+
+static inline void write_uamor(u64 value)
+{
+ mtspr(SPRN_UAMOR, value);
+}
+
+static bool is_pkey_enabled(int pkey)
+{
+ u64 uamor = read_uamor();
+ u64 pkey_bits = 0x3ul << pkeyshift(pkey);
+ u64 uamor_pkey_bits = (uamor & pkey_bits);
+
+ /*
+ * Both the bits in UAMOR corresponding to the key should be set or
+ * reset.
+ */
+ WARN_ON(uamor_pkey_bits && (uamor_pkey_bits != pkey_bits));
+ return !!(uamor_pkey_bits);
+}
+
+static inline void init_amr(int pkey, u8 init_bits)
+{
+ u64 new_amr_bits = (((u64)init_bits & 0x3UL) << pkeyshift(pkey));
+ u64 old_amr = read_amr() & ~((u64)(0x3ul) << pkeyshift(pkey));
+
+ write_amr(old_amr | new_amr_bits);
+}
+
+static inline void init_iamr(int pkey, u8 init_bits)
+{
+ u64 new_iamr_bits = (((u64)init_bits & 0x1UL) << pkeyshift(pkey));
+ u64 old_iamr = read_iamr() & ~((u64)(0x1ul) << pkeyshift(pkey));
+
+ write_iamr(old_iamr | new_iamr_bits);
+}
+
+static void pkey_status_change(int pkey, bool enable)
+{
+ u64 old_uamor;
+
+ /* Reset the AMR and IAMR bits for this key */
+ init_amr(pkey, 0x0);
+ init_iamr(pkey, 0x0);
+
+ /* Enable/disable key */
+ old_uamor = read_uamor();
+ if (enable)
+ old_uamor |= (0x3ul << pkeyshift(pkey));
+ else
+ old_uamor &= ~(0x3ul << pkeyshift(pkey));
+ write_uamor(old_uamor);
+}
+
+void __arch_activate_pkey(int pkey)
+{
+ pkey_status_change(pkey, true);
+}
+
+void __arch_deactivate_pkey(int pkey)
+{
+ pkey_status_change(pkey, false);
+}
+
+/*
+ * Set the access rights in AMR IAMR and UAMOR registers for @pkey to that
+ * specified in @init_val.
+ */
+int __arch_set_user_pkey_access(struct task_struct *tsk, int pkey,
+ unsigned long init_val)
+{
+ u64 new_amr_bits = 0x0ul;
+ u64 new_iamr_bits = 0x0ul;
+
+ if (!is_pkey_enabled(pkey))
+ return -EINVAL;
+
+ if (init_val & PKEY_DISABLE_EXECUTE) {
+ if (!pkey_execute_disable_supported)
+ return -EINVAL;
+ new_iamr_bits |= IAMR_EX_BIT;
+ }
+ init_iamr(pkey, new_iamr_bits);
+
+ /* Set the bits we need in AMR: */
+ if (init_val & PKEY_DISABLE_ACCESS)
+ new_amr_bits |= AMR_RD_BIT | AMR_WR_BIT;
+ else if (init_val & PKEY_DISABLE_WRITE)
+ new_amr_bits |= AMR_WR_BIT;
+
+ init_amr(pkey, new_amr_bits);
+ return 0;
+}
+
+void thread_pkey_regs_save(struct thread_struct *thread)
+{
+ if (static_branch_likely(&pkey_disabled))
+ return;
+
+ /*
+ * TODO: Skip saving registers if @thread hasn't used any keys yet.
+ */
+ thread->amr = read_amr();
+ thread->iamr = read_iamr();
+ thread->uamor = read_uamor();
+}
+
+void thread_pkey_regs_restore(struct thread_struct *new_thread,
+ struct thread_struct *old_thread)
+{
+ if (static_branch_likely(&pkey_disabled))
+ return;
+
+ /*
+ * TODO: Just set UAMOR to zero if @new_thread hasn't used any keys yet.
+ */
+ if (old_thread->amr != new_thread->amr)
+ write_amr(new_thread->amr);
+ if (old_thread->iamr != new_thread->iamr)
+ write_iamr(new_thread->iamr);
+ if (old_thread->uamor != new_thread->uamor)
+ write_uamor(new_thread->uamor);
+}
+
+void thread_pkey_regs_init(struct thread_struct *thread)
+{
+ if (static_branch_likely(&pkey_disabled))
+ return;
+
+ write_amr(read_amr() & pkey_amr_uamor_mask);
+ write_iamr(read_iamr() & pkey_iamr_mask);
+ write_uamor(read_uamor() & pkey_amr_uamor_mask);
+}
+
+static inline bool pkey_allows_readwrite(int pkey)
+{
+ int pkey_shift = pkeyshift(pkey);
+
+ if (!is_pkey_enabled(pkey))
+ return true;
+
+ return !(read_amr() & ((AMR_RD_BIT|AMR_WR_BIT) << pkey_shift));
+}
+
+int __execute_only_pkey(struct mm_struct *mm)
+{
+ bool need_to_set_mm_pkey = false;
+ int execute_only_pkey = mm->context.execute_only_pkey;
+ int ret;
+
+ /* Do we need to assign a pkey for mm's execute-only maps? */
+ if (execute_only_pkey == -1) {
+ /* Go allocate one to use, which might fail */
+ execute_only_pkey = mm_pkey_alloc(mm);
+ if (execute_only_pkey < 0)
+ return -1;
+ need_to_set_mm_pkey = true;
+ }
+
+ /*
+ * We do not want to go through the relatively costly dance to set AMR
+ * if we do not need to. Check it first and assume that if the
+ * execute-only pkey is readwrite-disabled than we do not have to set it
+ * ourselves.
+ */
+ if (!need_to_set_mm_pkey && !pkey_allows_readwrite(execute_only_pkey))
+ return execute_only_pkey;
+
+ /*
+ * Set up AMR so that it denies access for everything other than
+ * execution.
+ */
+ ret = __arch_set_user_pkey_access(current, execute_only_pkey,
+ PKEY_DISABLE_ACCESS |
+ PKEY_DISABLE_WRITE);
+ /*
+ * If the AMR-set operation failed somehow, just return 0 and
+ * effectively disable execute-only support.
+ */
+ if (ret) {
+ mm_pkey_free(mm, execute_only_pkey);
+ return -1;
+ }
+
+ /* We got one, store it and use it from here on out */
+ if (need_to_set_mm_pkey)
+ mm->context.execute_only_pkey = execute_only_pkey;
+ return execute_only_pkey;
+}
+
+static inline bool vma_is_pkey_exec_only(struct vm_area_struct *vma)
+{
+ /* Do this check first since the vm_flags should be hot */
+ if ((vma->vm_flags & (VM_READ | VM_WRITE | VM_EXEC)) != VM_EXEC)
+ return false;
+
+ return (vma_pkey(vma) == vma->vm_mm->context.execute_only_pkey);
+}
+
+/*
+ * This should only be called for *plain* mprotect calls.
+ */
+int __arch_override_mprotect_pkey(struct vm_area_struct *vma, int prot,
+ int pkey)
+{
+ /*
+ * If the currently associated pkey is execute-only, but the requested
+ * protection requires read or write, move it back to the default pkey.
+ */
+ if (vma_is_pkey_exec_only(vma) && (prot & (PROT_READ | PROT_WRITE)))
+ return 0;
+
+ /*
+ * The requested protection is execute-only. Hence let's use an
+ * execute-only pkey.
+ */
+ if (prot == PROT_EXEC) {
+ pkey = execute_only_pkey(vma->vm_mm);
+ if (pkey > 0)
+ return pkey;
+ }
+
+ /* Nothing to override. */
+ return vma_pkey(vma);
+}
+
+static bool pkey_access_permitted(int pkey, bool write, bool execute)
+{
+ int pkey_shift;
+ u64 amr;
+
+ if (!pkey)
+ return true;
+
+ if (!is_pkey_enabled(pkey))
+ return true;
+
+ pkey_shift = pkeyshift(pkey);
+ if (execute && !(read_iamr() & (IAMR_EX_BIT << pkey_shift)))
+ return true;
+
+ amr = read_amr(); /* Delay reading amr until absolutely needed */
+ return ((!write && !(amr & (AMR_RD_BIT << pkey_shift))) ||
+ (write && !(amr & (AMR_WR_BIT << pkey_shift))));
+}
+
+bool arch_pte_access_permitted(u64 pte, bool write, bool execute)
+{
+ if (static_branch_likely(&pkey_disabled))
+ return true;
+
+ return pkey_access_permitted(pte_to_pkey_bits(pte), write, execute);
+}
+
+/*
+ * We only want to enforce protection keys on the current thread because we
+ * effectively have no access to AMR/IAMR for other threads or any way to tell
+ * which AMR/IAMR in a threaded process we could use.
+ *
+ * So do not enforce things if the VMA is not from the current mm, or if we are
+ * in a kernel thread.
+ */
+static inline bool vma_is_foreign(struct vm_area_struct *vma)
+{
+ if (!current->mm)
+ return true;
+
+ /* if it is not our ->mm, it has to be foreign */
+ if (current->mm != vma->vm_mm)
+ return true;
+
+ return false;
+}
+
+bool arch_vma_access_permitted(struct vm_area_struct *vma, bool write,
+ bool execute, bool foreign)
+{
+ if (static_branch_likely(&pkey_disabled))
+ return true;
+ /*
+ * Do not enforce our key-permissions on a foreign vma.
+ */
+ if (foreign || vma_is_foreign(vma))
+ return true;
+
+ return pkey_access_permitted(vma_pkey(vma), write, execute);
+}
diff --git a/arch/powerpc/mm/subpage-prot.c b/arch/powerpc/mm/subpage-prot.c
index 781532d7bc4d..f14a07c2fb90 100644
--- a/arch/powerpc/mm/subpage-prot.c
+++ b/arch/powerpc/mm/subpage-prot.c
@@ -195,6 +195,9 @@ long sys_subpage_prot(unsigned long addr, unsigned long len, u32 __user *map)
unsigned long next, limit;
int err;
+ if (radix_enabled())
+ return -ENOENT;
+
/* Check parameters */
if ((addr & ~PAGE_MASK) || (len & ~PAGE_MASK) ||
addr >= mm->task_size || len >= mm->task_size ||
diff --git a/arch/powerpc/mm/tlb-radix.c b/arch/powerpc/mm/tlb-radix.c
index 884f4b705b57..71d1b19ad1c0 100644
--- a/arch/powerpc/mm/tlb-radix.c
+++ b/arch/powerpc/mm/tlb-radix.c
@@ -23,6 +23,72 @@
#define RIC_FLUSH_PWC 1
#define RIC_FLUSH_ALL 2
+/*
+ * tlbiel instruction for radix, set invalidation
+ * i.e., r=1 and is=01 or is=10 or is=11
+ */
+static inline void tlbiel_radix_set_isa300(unsigned int set, unsigned int is,
+ unsigned int pid,
+ unsigned int ric, unsigned int prs)
+{
+ unsigned long rb;
+ unsigned long rs;
+ unsigned int r = 1; /* radix format */
+
+ rb = (set << PPC_BITLSHIFT(51)) | (is << PPC_BITLSHIFT(53));
+ rs = ((unsigned long)pid << PPC_BITLSHIFT(31));
+
+ asm volatile(PPC_TLBIEL(%0, %1, %2, %3, %4)
+ : : "r"(rb), "r"(rs), "i"(ric), "i"(prs), "r"(r)
+ : "memory");
+}
+
+static void tlbiel_all_isa300(unsigned int num_sets, unsigned int is)
+{
+ unsigned int set;
+
+ asm volatile("ptesync": : :"memory");
+
+ /*
+ * Flush the first set of the TLB, and the entire Page Walk Cache
+ * and partition table entries. Then flush the remaining sets of the
+ * TLB.
+ */
+ tlbiel_radix_set_isa300(0, is, 0, RIC_FLUSH_ALL, 0);
+ for (set = 1; set < num_sets; set++)
+ tlbiel_radix_set_isa300(set, is, 0, RIC_FLUSH_TLB, 0);
+
+ /* Do the same for process scoped entries. */
+ tlbiel_radix_set_isa300(0, is, 0, RIC_FLUSH_ALL, 1);
+ for (set = 1; set < num_sets; set++)
+ tlbiel_radix_set_isa300(set, is, 0, RIC_FLUSH_TLB, 1);
+
+ asm volatile("ptesync": : :"memory");
+}
+
+void radix__tlbiel_all(unsigned int action)
+{
+ unsigned int is;
+
+ switch (action) {
+ case TLB_INVAL_SCOPE_GLOBAL:
+ is = 3;
+ break;
+ case TLB_INVAL_SCOPE_LPID:
+ is = 2;
+ break;
+ default:
+ BUG();
+ }
+
+ if (early_cpu_has_feature(CPU_FTR_ARCH_300))
+ tlbiel_all_isa300(POWER9_TLB_SETS_RADIX, is);
+ else
+ WARN(1, "%s called on pre-POWER9 CPU\n", __func__);
+
+ asm volatile(PPC_INVALIDATE_ERAT "; isync" : : :"memory");
+}
+
static inline void __tlbiel_pid(unsigned long pid, int set,
unsigned long ric)
{
@@ -600,14 +666,12 @@ void radix__flush_tlb_all(void)
*/
asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1)
: : "r"(rb), "i"(r), "i"(1), "i"(ric), "r"(rs) : "memory");
- trace_tlbie(0, 0, rb, rs, ric, prs, r);
/*
* now flush host entires by passing PRS = 0 and LPID == 0
*/
asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1)
: : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(0) : "memory");
asm volatile("eieio; tlbsync; ptesync": : :"memory");
- trace_tlbie(0, 0, rb, 0, ric, prs, r);
}
void radix__flush_tlb_pte_p9_dd1(unsigned long old_pte, struct mm_struct *mm,
diff --git a/arch/powerpc/mm/tlb_nohash.c b/arch/powerpc/mm/tlb_nohash.c
index bfc4a0869609..15fe5f0c8665 100644
--- a/arch/powerpc/mm/tlb_nohash.c
+++ b/arch/powerpc/mm/tlb_nohash.c
@@ -388,7 +388,10 @@ void flush_tlb_range(struct vm_area_struct *vma, unsigned long start,
unsigned long end)
{
- flush_tlb_mm(vma->vm_mm);
+ if (end - start == PAGE_SIZE && !(start & ~PAGE_MASK))
+ flush_tlb_page(vma, start);
+ else
+ flush_tlb_mm(vma->vm_mm);
}
EXPORT_SYMBOL(flush_tlb_range);
diff --git a/arch/powerpc/perf/8xx-pmu.c b/arch/powerpc/perf/8xx-pmu.c
index 3c39f05f0af3..6c0020d1c561 100644
--- a/arch/powerpc/perf/8xx-pmu.c
+++ b/arch/powerpc/perf/8xx-pmu.c
@@ -18,6 +18,7 @@
#include <asm/machdep.h>
#include <asm/firmware.h>
#include <asm/ptrace.h>
+#include <asm/code-patching.h>
#define PERF_8xx_ID_CPU_CYCLES 1
#define PERF_8xx_ID_HW_INSTRUCTIONS 2
@@ -30,8 +31,13 @@
extern unsigned long itlb_miss_counter, dtlb_miss_counter;
extern atomic_t instruction_counter;
+extern unsigned int itlb_miss_perf, dtlb_miss_perf;
+extern unsigned int itlb_miss_exit_1, itlb_miss_exit_2;
+extern unsigned int dtlb_miss_exit_1, dtlb_miss_exit_2, dtlb_miss_exit_3;
static atomic_t insn_ctr_ref;
+static atomic_t itlb_miss_ref;
+static atomic_t dtlb_miss_ref;
static s64 get_insn_ctr(void)
{
@@ -96,9 +102,24 @@ static int mpc8xx_pmu_add(struct perf_event *event, int flags)
val = get_insn_ctr();
break;
case PERF_8xx_ID_ITLB_LOAD_MISS:
+ if (atomic_inc_return(&itlb_miss_ref) == 1) {
+ unsigned long target = (unsigned long)&itlb_miss_perf;
+
+ patch_branch(&itlb_miss_exit_1, target, 0);
+#ifndef CONFIG_PIN_TLB_TEXT
+ patch_branch(&itlb_miss_exit_2, target, 0);
+#endif
+ }
val = itlb_miss_counter;
break;
case PERF_8xx_ID_DTLB_LOAD_MISS:
+ if (atomic_inc_return(&dtlb_miss_ref) == 1) {
+ unsigned long target = (unsigned long)&dtlb_miss_perf;
+
+ patch_branch(&dtlb_miss_exit_1, target, 0);
+ patch_branch(&dtlb_miss_exit_2, target, 0);
+ patch_branch(&dtlb_miss_exit_3, target, 0);
+ }
val = dtlb_miss_counter;
break;
}
@@ -143,13 +164,36 @@ static void mpc8xx_pmu_read(struct perf_event *event)
static void mpc8xx_pmu_del(struct perf_event *event, int flags)
{
+ /* mfspr r10, SPRN_SPRG_SCRATCH0 */
+ unsigned int insn = PPC_INST_MFSPR | __PPC_RS(R10) |
+ __PPC_SPR(SPRN_SPRG_SCRATCH0);
+
mpc8xx_pmu_read(event);
- if (event_type(event) != PERF_8xx_ID_HW_INSTRUCTIONS)
- return;
/* If it was the last user, stop counting to avoid useles overhead */
- if (atomic_dec_return(&insn_ctr_ref) == 0)
- mtspr(SPRN_ICTRL, 7);
+ switch (event_type(event)) {
+ case PERF_8xx_ID_CPU_CYCLES:
+ break;
+ case PERF_8xx_ID_HW_INSTRUCTIONS:
+ if (atomic_dec_return(&insn_ctr_ref) == 0)
+ mtspr(SPRN_ICTRL, 7);
+ break;
+ case PERF_8xx_ID_ITLB_LOAD_MISS:
+ if (atomic_dec_return(&itlb_miss_ref) == 0) {
+ patch_instruction(&itlb_miss_exit_1, insn);
+#ifndef CONFIG_PIN_TLB_TEXT
+ patch_instruction(&itlb_miss_exit_2, insn);
+#endif
+ }
+ break;
+ case PERF_8xx_ID_DTLB_LOAD_MISS:
+ if (atomic_dec_return(&dtlb_miss_ref) == 0) {
+ patch_instruction(&dtlb_miss_exit_1, insn);
+ patch_instruction(&dtlb_miss_exit_2, insn);
+ patch_instruction(&dtlb_miss_exit_3, insn);
+ }
+ break;
+ }
}
static struct pmu mpc8xx_pmu = {
diff --git a/arch/powerpc/perf/Makefile b/arch/powerpc/perf/Makefile
index 225c9c86d7c0..57ebc655d2ac 100644
--- a/arch/powerpc/perf/Makefile
+++ b/arch/powerpc/perf/Makefile
@@ -15,7 +15,7 @@ obj-$(CONFIG_FSL_EMB_PERF_EVENT_E500) += e500-pmu.o e6500-pmu.o
obj-$(CONFIG_HV_PERF_CTRS) += hv-24x7.o hv-gpci.o hv-common.o
-obj-$(CONFIG_PPC_8xx_PERF_EVENT) += 8xx-pmu.o
+obj-$(CONFIG_PPC_8xx) += 8xx-pmu.o
obj-$(CONFIG_PPC64) += $(obj64-y)
obj-$(CONFIG_PPC32) += $(obj32-y)
diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c
index fce545774d50..f89bbd54ecec 100644
--- a/arch/powerpc/perf/core-book3s.c
+++ b/arch/powerpc/perf/core-book3s.c
@@ -322,7 +322,7 @@ static inline void perf_read_regs(struct pt_regs *regs)
*/
static inline int perf_intr_is_nmi(struct pt_regs *regs)
{
- return !regs->softe;
+ return (regs->softe & IRQS_DISABLED);
}
/*
diff --git a/arch/powerpc/perf/imc-pmu.c b/arch/powerpc/perf/imc-pmu.c
index be4e7f84f70a..d7532e7b9ab5 100644
--- a/arch/powerpc/perf/imc-pmu.c
+++ b/arch/powerpc/perf/imc-pmu.c
@@ -40,7 +40,6 @@ static struct imc_pmu *core_imc_pmu;
/* Thread IMC data structures and variables */
static DEFINE_PER_CPU(u64 *, thread_imc_mem);
-static struct imc_pmu *thread_imc_pmu;
static int thread_imc_mem_size;
struct imc_pmu *imc_event_to_pmu(struct perf_event *event)
@@ -117,17 +116,13 @@ static struct attribute *device_str_attr_create(const char *name, const char *st
return &attr->attr.attr;
}
-struct imc_events *imc_parse_event(struct device_node *np, const char *scale,
- const char *unit, const char *prefix, u32 base)
+static int imc_parse_event(struct device_node *np, const char *scale,
+ const char *unit, const char *prefix,
+ u32 base, struct imc_events *event)
{
- struct imc_events *event;
const char *s;
u32 reg;
- event = kzalloc(sizeof(struct imc_events), GFP_KERNEL);
- if (!event)
- return NULL;
-
if (of_property_read_u32(np, "reg", &reg))
goto error;
/* Add the base_reg value to the "reg" */
@@ -158,14 +153,32 @@ struct imc_events *imc_parse_event(struct device_node *np, const char *scale,
goto error;
}
- return event;
+ return 0;
error:
kfree(event->unit);
kfree(event->scale);
kfree(event->name);
- kfree(event);
+ return -EINVAL;
+}
+
+/*
+ * imc_free_events: Function to cleanup the events list, having
+ * "nr_entries".
+ */
+static void imc_free_events(struct imc_events *events, int nr_entries)
+{
+ int i;
- return NULL;
+ /* Nothing to clean, return */
+ if (!events)
+ return;
+ for (i = 0; i < nr_entries; i++) {
+ kfree(events[i].unit);
+ kfree(events[i].scale);
+ kfree(events[i].name);
+ }
+
+ kfree(events);
}
/*
@@ -177,9 +190,8 @@ static int update_events_in_group(struct device_node *node, struct imc_pmu *pmu)
struct attribute_group *attr_group;
struct attribute **attrs, *dev_str;
struct device_node *np, *pmu_events;
- struct imc_events *ev;
u32 handle, base_reg;
- int i=0, j=0, ct;
+ int i = 0, j = 0, ct, ret;
const char *prefix, *g_scale, *g_unit;
const char *ev_val_str, *ev_scale_str, *ev_unit_str;
@@ -217,15 +229,17 @@ static int update_events_in_group(struct device_node *node, struct imc_pmu *pmu)
ct = 0;
/* Parse the events and update the struct */
for_each_child_of_node(pmu_events, np) {
- ev = imc_parse_event(np, g_scale, g_unit, prefix, base_reg);
- if (ev)
- pmu->events[ct++] = ev;
+ ret = imc_parse_event(np, g_scale, g_unit, prefix, base_reg, &pmu->events[ct]);
+ if (!ret)
+ ct++;
}
/* Allocate memory for attribute group */
attr_group = kzalloc(sizeof(*attr_group), GFP_KERNEL);
- if (!attr_group)
+ if (!attr_group) {
+ imc_free_events(pmu->events, ct);
return -ENOMEM;
+ }
/*
* Allocate memory for attributes.
@@ -238,31 +252,31 @@ static int update_events_in_group(struct device_node *node, struct imc_pmu *pmu)
attrs = kcalloc(((ct * 3) + 1), sizeof(struct attribute *), GFP_KERNEL);
if (!attrs) {
kfree(attr_group);
- kfree(pmu->events);
+ imc_free_events(pmu->events, ct);
return -ENOMEM;
}
attr_group->name = "events";
attr_group->attrs = attrs;
do {
- ev_val_str = kasprintf(GFP_KERNEL, "event=0x%x", pmu->events[i]->value);
- dev_str = device_str_attr_create(pmu->events[i]->name, ev_val_str);
+ ev_val_str = kasprintf(GFP_KERNEL, "event=0x%x", pmu->events[i].value);
+ dev_str = device_str_attr_create(pmu->events[i].name, ev_val_str);
if (!dev_str)
continue;
attrs[j++] = dev_str;
- if (pmu->events[i]->scale) {
- ev_scale_str = kasprintf(GFP_KERNEL, "%s.scale",pmu->events[i]->name);
- dev_str = device_str_attr_create(ev_scale_str, pmu->events[i]->scale);
+ if (pmu->events[i].scale) {
+ ev_scale_str = kasprintf(GFP_KERNEL, "%s.scale", pmu->events[i].name);
+ dev_str = device_str_attr_create(ev_scale_str, pmu->events[i].scale);
if (!dev_str)
continue;
attrs[j++] = dev_str;
}
- if (pmu->events[i]->unit) {
- ev_unit_str = kasprintf(GFP_KERNEL, "%s.unit",pmu->events[i]->name);
- dev_str = device_str_attr_create(ev_unit_str, pmu->events[i]->unit);
+ if (pmu->events[i].unit) {
+ ev_unit_str = kasprintf(GFP_KERNEL, "%s.unit", pmu->events[i].name);
+ dev_str = device_str_attr_create(ev_unit_str, pmu->events[i].unit);
if (!dev_str)
continue;
@@ -273,7 +287,6 @@ static int update_events_in_group(struct device_node *node, struct imc_pmu *pmu)
/* Save the event attribute */
pmu->attr_groups[IMC_EVENT_ATTR] = attr_group;
- kfree(pmu->events);
return 0;
}
@@ -611,7 +624,8 @@ static int ppc_core_imc_cpu_online(unsigned int cpu)
static int ppc_core_imc_cpu_offline(unsigned int cpu)
{
- unsigned int ncpu, core_id;
+ unsigned int core_id;
+ int ncpu;
struct imc_pmu_ref *ref;
/*
@@ -1171,6 +1185,15 @@ static void cleanup_all_thread_imc_memory(void)
}
}
+/* Function to free the attr_groups which are dynamically allocated */
+static void imc_common_mem_free(struct imc_pmu *pmu_ptr)
+{
+ if (pmu_ptr->attr_groups[IMC_EVENT_ATTR])
+ kfree(pmu_ptr->attr_groups[IMC_EVENT_ATTR]->attrs);
+ kfree(pmu_ptr->attr_groups[IMC_EVENT_ATTR]);
+ kfree(pmu_ptr);
+}
+
/*
* Common function to unregister cpu hotplug callback and
* free the memory.
@@ -1203,13 +1226,6 @@ static void imc_common_cpuhp_mem_free(struct imc_pmu *pmu_ptr)
cpuhp_remove_state(CPUHP_AP_PERF_POWERPC_THREAD_IMC_ONLINE);
cleanup_all_thread_imc_memory();
}
-
- /* Only free the attr_groups which are dynamically allocated */
- if (pmu_ptr->attr_groups[IMC_EVENT_ATTR])
- kfree(pmu_ptr->attr_groups[IMC_EVENT_ATTR]->attrs);
- kfree(pmu_ptr->attr_groups[IMC_EVENT_ATTR]);
- kfree(pmu_ptr);
- return;
}
@@ -1258,8 +1274,10 @@ static int imc_mem_init(struct imc_pmu *pmu_ptr, struct device_node *parent,
core_imc_refc = kcalloc(nr_cores, sizeof(struct imc_pmu_ref),
GFP_KERNEL);
- if (!core_imc_refc)
+ if (!core_imc_refc) {
+ kfree(pmu_ptr->mem_info);
return -ENOMEM;
+ }
core_imc_pmu = pmu_ptr;
break;
@@ -1272,11 +1290,12 @@ static int imc_mem_init(struct imc_pmu *pmu_ptr, struct device_node *parent,
thread_imc_mem_size = pmu_ptr->counter_mem_size;
for_each_online_cpu(cpu) {
res = thread_imc_mem_alloc(cpu, pmu_ptr->counter_mem_size);
- if (res)
+ if (res) {
+ cleanup_all_thread_imc_memory();
return res;
+ }
}
- thread_imc_pmu = pmu_ptr;
break;
default:
return -EINVAL;
@@ -1300,8 +1319,10 @@ int init_imc_pmu(struct device_node *parent, struct imc_pmu *pmu_ptr, int pmu_id
int ret;
ret = imc_mem_init(pmu_ptr, parent, pmu_idx);
- if (ret)
- goto err_free;
+ if (ret) {
+ imc_common_mem_free(pmu_ptr);
+ return ret;
+ }
switch (pmu_ptr->domain) {
case IMC_DOMAIN_NEST:
@@ -1368,6 +1389,7 @@ int init_imc_pmu(struct device_node *parent, struct imc_pmu *pmu_ptr, int pmu_id
return 0;
err_free:
+ imc_common_mem_free(pmu_ptr);
imc_common_cpuhp_mem_free(pmu_ptr);
return ret;
}
diff --git a/arch/powerpc/platforms/44x/fsp2.c b/arch/powerpc/platforms/44x/fsp2.c
index 92e98048404f..04f0c73a9b4f 100644
--- a/arch/powerpc/platforms/44x/fsp2.c
+++ b/arch/powerpc/platforms/44x/fsp2.c
@@ -27,6 +27,17 @@
#include <asm/time.h>
#include <asm/uic.h>
#include <asm/ppc4xx.h>
+#include <asm/dcr.h>
+#include <linux/interrupt.h>
+#include <linux/of_irq.h>
+#include "fsp2.h"
+
+#define FSP2_BUS_ERR "ibm,bus-error-irq"
+#define FSP2_CMU_ERR "ibm,cmu-error-irq"
+#define FSP2_CONF_ERR "ibm,conf-error-irq"
+#define FSP2_OPBD_ERR "ibm,opbd-error-irq"
+#define FSP2_MCUE "ibm,mc-ue-irq"
+#define FSP2_RST_WRN "ibm,reset-warning-irq"
static __initdata struct of_device_id fsp2_of_bus[] = {
{ .compatible = "ibm,plb4", },
@@ -35,6 +46,194 @@ static __initdata struct of_device_id fsp2_of_bus[] = {
{},
};
+static void l2regs(void)
+{
+ pr_err("L2 Controller:\n");
+ pr_err("MCK: 0x%08x\n", mfl2(L2MCK));
+ pr_err("INT: 0x%08x\n", mfl2(L2INT));
+ pr_err("PLBSTAT0: 0x%08x\n", mfl2(L2PLBSTAT0));
+ pr_err("PLBSTAT1: 0x%08x\n", mfl2(L2PLBSTAT1));
+ pr_err("ARRSTAT0: 0x%08x\n", mfl2(L2ARRSTAT0));
+ pr_err("ARRSTAT1: 0x%08x\n", mfl2(L2ARRSTAT1));
+ pr_err("ARRSTAT2: 0x%08x\n", mfl2(L2ARRSTAT2));
+ pr_err("CPUSTAT: 0x%08x\n", mfl2(L2CPUSTAT));
+ pr_err("RACSTAT0: 0x%08x\n", mfl2(L2RACSTAT0));
+ pr_err("WACSTAT0: 0x%08x\n", mfl2(L2WACSTAT0));
+ pr_err("WACSTAT1: 0x%08x\n", mfl2(L2WACSTAT1));
+ pr_err("WACSTAT2: 0x%08x\n", mfl2(L2WACSTAT2));
+ pr_err("WDFSTAT: 0x%08x\n", mfl2(L2WDFSTAT));
+ pr_err("LOG0: 0x%08x\n", mfl2(L2LOG0));
+ pr_err("LOG1: 0x%08x\n", mfl2(L2LOG1));
+ pr_err("LOG2: 0x%08x\n", mfl2(L2LOG2));
+ pr_err("LOG3: 0x%08x\n", mfl2(L2LOG3));
+ pr_err("LOG4: 0x%08x\n", mfl2(L2LOG4));
+ pr_err("LOG5: 0x%08x\n", mfl2(L2LOG5));
+}
+
+static void show_plbopb_regs(u32 base, int num)
+{
+ pr_err("\nPLBOPB Bridge %d:\n", num);
+ pr_err("GESR0: 0x%08x\n", mfdcr(base + PLB4OPB_GESR0));
+ pr_err("GESR1: 0x%08x\n", mfdcr(base + PLB4OPB_GESR1));
+ pr_err("GESR2: 0x%08x\n", mfdcr(base + PLB4OPB_GESR2));
+ pr_err("GEARU: 0x%08x\n", mfdcr(base + PLB4OPB_GEARU));
+ pr_err("GEAR: 0x%08x\n", mfdcr(base + PLB4OPB_GEAR));
+}
+
+static irqreturn_t bus_err_handler(int irq, void *data)
+{
+ pr_err("Bus Error\n");
+
+ l2regs();
+
+ pr_err("\nPLB6 Controller:\n");
+ pr_err("BC_SHD: 0x%08x\n", mfdcr(DCRN_PLB6_SHD));
+ pr_err("BC_ERR: 0x%08x\n", mfdcr(DCRN_PLB6_ERR));
+
+ pr_err("\nPLB6-to-PLB4 Bridge:\n");
+ pr_err("ESR: 0x%08x\n", mfdcr(DCRN_PLB6PLB4_ESR));
+ pr_err("EARH: 0x%08x\n", mfdcr(DCRN_PLB6PLB4_EARH));
+ pr_err("EARL: 0x%08x\n", mfdcr(DCRN_PLB6PLB4_EARL));
+
+ pr_err("\nPLB4-to-PLB6 Bridge:\n");
+ pr_err("ESR: 0x%08x\n", mfdcr(DCRN_PLB4PLB6_ESR));
+ pr_err("EARH: 0x%08x\n", mfdcr(DCRN_PLB4PLB6_EARH));
+ pr_err("EARL: 0x%08x\n", mfdcr(DCRN_PLB4PLB6_EARL));
+
+ pr_err("\nPLB6-to-MCIF Bridge:\n");
+ pr_err("BESR0: 0x%08x\n", mfdcr(DCRN_PLB6MCIF_BESR0));
+ pr_err("BESR1: 0x%08x\n", mfdcr(DCRN_PLB6MCIF_BESR1));
+ pr_err("BEARH: 0x%08x\n", mfdcr(DCRN_PLB6MCIF_BEARH));
+ pr_err("BEARL: 0x%08x\n", mfdcr(DCRN_PLB6MCIF_BEARL));
+
+ pr_err("\nPLB4 Arbiter:\n");
+ pr_err("P0ESRH 0x%08x\n", mfdcr(DCRN_PLB4_P0ESRH));
+ pr_err("P0ESRL 0x%08x\n", mfdcr(DCRN_PLB4_P0ESRL));
+ pr_err("P0EARH 0x%08x\n", mfdcr(DCRN_PLB4_P0EARH));
+ pr_err("P0EARH 0x%08x\n", mfdcr(DCRN_PLB4_P0EARH));
+ pr_err("P1ESRH 0x%08x\n", mfdcr(DCRN_PLB4_P1ESRH));
+ pr_err("P1ESRL 0x%08x\n", mfdcr(DCRN_PLB4_P1ESRL));
+ pr_err("P1EARH 0x%08x\n", mfdcr(DCRN_PLB4_P1EARH));
+ pr_err("P1EARH 0x%08x\n", mfdcr(DCRN_PLB4_P1EARH));
+
+ show_plbopb_regs(DCRN_PLB4OPB0_BASE, 0);
+ show_plbopb_regs(DCRN_PLB4OPB1_BASE, 1);
+ show_plbopb_regs(DCRN_PLB4OPB2_BASE, 2);
+ show_plbopb_regs(DCRN_PLB4OPB3_BASE, 3);
+
+ pr_err("\nPLB4-to-AHB Bridge:\n");
+ pr_err("ESR: 0x%08x\n", mfdcr(DCRN_PLB4AHB_ESR));
+ pr_err("SEUAR: 0x%08x\n", mfdcr(DCRN_PLB4AHB_SEUAR));
+ pr_err("SELAR: 0x%08x\n", mfdcr(DCRN_PLB4AHB_SELAR));
+
+ pr_err("\nAHB-to-PLB4 Bridge:\n");
+ pr_err("\nESR: 0x%08x\n", mfdcr(DCRN_AHBPLB4_ESR));
+ pr_err("\nEAR: 0x%08x\n", mfdcr(DCRN_AHBPLB4_EAR));
+ panic("Bus Error\n");
+}
+
+static irqreturn_t cmu_err_handler(int irq, void *data) {
+ pr_err("CMU Error\n");
+ pr_err("FIR0: 0x%08x\n", mfcmu(CMUN_FIR0));
+ panic("CMU Error\n");
+}
+
+static irqreturn_t conf_err_handler(int irq, void *data) {
+ pr_err("Configuration Logic Error\n");
+ pr_err("CONF_FIR: 0x%08x\n", mfdcr(DCRN_CONF_FIR_RWC));
+ pr_err("RPERR0: 0x%08x\n", mfdcr(DCRN_CONF_RPERR0));
+ pr_err("RPERR1: 0x%08x\n", mfdcr(DCRN_CONF_RPERR1));
+ panic("Configuration Logic Error\n");
+}
+
+static irqreturn_t opbd_err_handler(int irq, void *data) {
+ panic("OPBD Error\n");
+}
+
+static irqreturn_t mcue_handler(int irq, void *data) {
+ pr_err("DDR: Uncorrectable Error\n");
+ pr_err("MCSTAT: 0x%08x\n",
+ mfdcr(DCRN_DDR34_BASE + DCRN_DDR34_MCSTAT));
+ pr_err("MCOPT1: 0x%08x\n",
+ mfdcr(DCRN_DDR34_BASE + DCRN_DDR34_MCOPT1));
+ pr_err("MCOPT2: 0x%08x\n",
+ mfdcr(DCRN_DDR34_BASE + DCRN_DDR34_MCOPT2));
+ pr_err("PHYSTAT: 0x%08x\n",
+ mfdcr(DCRN_DDR34_BASE + DCRN_DDR34_PHYSTAT));
+ pr_err("CFGR0: 0x%08x\n",
+ mfdcr(DCRN_DDR34_BASE + DCRN_DDR34_CFGR0));
+ pr_err("CFGR1: 0x%08x\n",
+ mfdcr(DCRN_DDR34_BASE + DCRN_DDR34_CFGR1));
+ pr_err("CFGR2: 0x%08x\n",
+ mfdcr(DCRN_DDR34_BASE + DCRN_DDR34_CFGR2));
+ pr_err("CFGR3: 0x%08x\n",
+ mfdcr(DCRN_DDR34_BASE + DCRN_DDR34_CFGR3));
+ pr_err("SCRUB_CNTL: 0x%08x\n",
+ mfdcr(DCRN_DDR34_BASE + DCRN_DDR34_SCRUB_CNTL));
+ pr_err("ECCERR_PORT0: 0x%08x\n",
+ mfdcr(DCRN_DDR34_BASE + DCRN_DDR34_ECCERR_PORT0));
+ pr_err("ECCERR_ADDR_PORT0: 0x%08x\n",
+ mfdcr(DCRN_DDR34_BASE + DCRN_DDR34_ECCERR_ADDR_PORT0));
+ pr_err("ECCERR_CNT_PORT0: 0x%08x\n",
+ mfdcr(DCRN_DDR34_BASE + DCRN_DDR34_ECCERR_COUNT_PORT0));
+ pr_err("ECC_CHECK_PORT0: 0x%08x\n",
+ mfdcr(DCRN_DDR34_BASE + DCRN_DDR34_ECC_CHECK_PORT0));
+ pr_err("MCER0: 0x%08x\n",
+ mfdcr(DCRN_CW_BASE + DCRN_CW_MCER0));
+ pr_err("MCER1: 0x%08x\n",
+ mfdcr(DCRN_CW_BASE + DCRN_CW_MCER1));
+ pr_err("BESR: 0x%08x\n",
+ mfdcr(DCRN_PLB6MCIF_BESR0));
+ pr_err("BEARL: 0x%08x\n",
+ mfdcr(DCRN_PLB6MCIF_BEARL));
+ pr_err("BEARH: 0x%08x\n",
+ mfdcr(DCRN_PLB6MCIF_BEARH));
+ panic("DDR: Uncorrectable Error\n");
+}
+
+static irqreturn_t rst_wrn_handler(int irq, void *data) {
+ u32 crcs = mfcmu(CMUN_CRCS);
+ switch (crcs & CRCS_STAT_MASK) {
+ case CRCS_STAT_CHIP_RST_B:
+ panic("Received chassis-initiated reset request");
+ default:
+ panic("Unknown external reset: CRCS=0x%x", crcs);
+ }
+}
+
+static void node_irq_request(const char *compat, irq_handler_t errirq_handler)
+{
+ struct device_node *np;
+ unsigned int irq;
+ int32_t rc;
+
+ for_each_compatible_node(np, NULL, compat) {
+ irq = irq_of_parse_and_map(np, 0);
+ if (irq == NO_IRQ) {
+ pr_err("device tree node %s is missing a interrupt",
+ np->name);
+ return;
+ }
+
+ rc = request_irq(irq, errirq_handler, 0, np->name, np);
+ if (rc) {
+ pr_err("fsp_of_probe: request_irq failed: np=%s rc=%d",
+ np->full_name, rc);
+ return;
+ }
+ }
+}
+
+static void critical_irq_setup(void)
+{
+ node_irq_request(FSP2_CMU_ERR, cmu_err_handler);
+ node_irq_request(FSP2_BUS_ERR, bus_err_handler);
+ node_irq_request(FSP2_CONF_ERR, conf_err_handler);
+ node_irq_request(FSP2_OPBD_ERR, opbd_err_handler);
+ node_irq_request(FSP2_MCUE, mcue_handler);
+ node_irq_request(FSP2_RST_WRN, rst_wrn_handler);
+}
+
static int __init fsp2_device_probe(void)
{
of_platform_bus_probe(NULL, fsp2_of_bus, NULL);
@@ -44,18 +243,76 @@ machine_device_initcall(fsp2, fsp2_device_probe);
static int __init fsp2_probe(void)
{
+ u32 val;
unsigned long root = of_get_flat_dt_root();
if (!of_flat_dt_is_compatible(root, "ibm,fsp2"))
return 0;
+
+ /* Clear BC_ERR and mask snoopable request plb errors. */
+ val = mfdcr(DCRN_PLB6_CR0);
+ val |= 0x20000000;
+ mtdcr(DCRN_PLB6_BASE, val);
+ mtdcr(DCRN_PLB6_HD, 0xffff0000);
+ mtdcr(DCRN_PLB6_SHD, 0xffff0000);
+
+ /* TVSENSE reset is blocked (clock gated) by the POR default of the TVS
+ * sleep config bit. As a consequence, TVSENSE will provide erratic
+ * sensor values, which may result in spurious (parity) errors
+ * recorded in the CMU FIR and leading to erroneous interrupt requests
+ * once the CMU interrupt is unmasked.
+ */
+
+ /* 1. set TVS1[UNDOZE] */
+ val = mfcmu(CMUN_TVS1);
+ val |= 0x4;
+ mtcmu(CMUN_TVS1, val);
+
+ /* 2. clear FIR[TVS] and FIR[TVSPAR] */
+ val = mfcmu(CMUN_FIR0);
+ val |= 0x30000000;
+ mtcmu(CMUN_FIR0, val);
+
+ /* L2 machine checks */
+ mtl2(L2PLBMCKEN0, 0xffffffff);
+ mtl2(L2PLBMCKEN1, 0x0000ffff);
+ mtl2(L2ARRMCKEN0, 0xffffffff);
+ mtl2(L2ARRMCKEN1, 0xffffffff);
+ mtl2(L2ARRMCKEN2, 0xfffff000);
+ mtl2(L2CPUMCKEN, 0xffffffff);
+ mtl2(L2RACMCKEN0, 0xffffffff);
+ mtl2(L2WACMCKEN0, 0xffffffff);
+ mtl2(L2WACMCKEN1, 0xffffffff);
+ mtl2(L2WACMCKEN2, 0xffffffff);
+ mtl2(L2WDFMCKEN, 0xffffffff);
+
+ /* L2 interrupts */
+ mtl2(L2PLBINTEN1, 0xffff0000);
+
+ /*
+ * At a global level, enable all L2 machine checks and interrupts
+ * reported by the L2 subsystems, except for the external machine check
+ * input (UIC0.1).
+ */
+ mtl2(L2MCKEN, 0x000007ff);
+ mtl2(L2INTEN, 0x000004ff);
+
+ /* Enable FSP-2 configuration logic parity errors */
+ mtdcr(DCRN_CONF_EIR_RS, 0x80000000);
return 1;
}
+static void __init fsp2_irq_init(void)
+{
+ uic_init_tree();
+ critical_irq_setup();
+}
+
define_machine(fsp2) {
.name = "FSP-2",
.probe = fsp2_probe,
.progress = udbg_progress,
- .init_IRQ = uic_init_tree,
+ .init_IRQ = fsp2_irq_init,
.get_irq = uic_get_irq,
.restart = ppc4xx_reset_system,
.calibrate_decr = generic_calibrate_decr,
diff --git a/arch/powerpc/platforms/44x/fsp2.h b/arch/powerpc/platforms/44x/fsp2.h
new file mode 100644
index 000000000000..9e1d52754c8b
--- /dev/null
+++ b/arch/powerpc/platforms/44x/fsp2.h
@@ -0,0 +1,272 @@
+#ifndef _ASM_POWERPC_FSP_DCR_H_
+#define _ASM_POWERPC_FSP_DCR_H_
+#ifdef __KERNEL__
+#include <asm/dcr.h>
+
+#define DCRN_CMU_ADDR 0x00C /* Chip management unic addr */
+#define DCRN_CMU_DATA 0x00D /* Chip management unic data */
+
+/* PLB4 Arbiter */
+#define DCRN_PLB4_PCBI 0x010 /* PLB Crossbar ID/Rev Register */
+#define DCRN_PLB4_P0ACR 0x011 /* PLB0 Arbiter Control Register */
+#define DCRN_PLB4_P0ESRL 0x012 /* PLB0 Error Status Register Low */
+#define DCRN_PLB4_P0ESRH 0x013 /* PLB0 Error Status Register High */
+#define DCRN_PLB4_P0EARL 0x014 /* PLB0 Error Address Register Low */
+#define DCRN_PLB4_P0EARH 0x015 /* PLB0 Error Address Register High */
+#define DCRN_PLB4_P0ESRLS 0x016 /* PLB0 Error Status Register Low Set*/
+#define DCRN_PLB4_P0ESRHS 0x017 /* PLB0 Error Status Register High */
+#define DCRN_PLB4_PCBC 0x018 /* PLB Crossbar Control Register */
+#define DCRN_PLB4_P1ACR 0x019 /* PLB1 Arbiter Control Register */
+#define DCRN_PLB4_P1ESRL 0x01A /* PLB1 Error Status Register Low */
+#define DCRN_PLB4_P1ESRH 0x01B /* PLB1 Error Status Register High */
+#define DCRN_PLB4_P1EARL 0x01C /* PLB1 Error Address Register Low */
+#define DCRN_PLB4_P1EARH 0x01D /* PLB1 Error Address Register High */
+#define DCRN_PLB4_P1ESRLS 0x01E /* PLB1 Error Status Register Low Set*/
+#define DCRN_PLB4_P1ESRHS 0x01F /*PLB1 Error Status Register High Set*/
+
+/* PLB4/OPB bridge 0, 1, 2, 3 */
+#define DCRN_PLB4OPB0_BASE 0x020
+#define DCRN_PLB4OPB1_BASE 0x030
+#define DCRN_PLB4OPB2_BASE 0x040
+#define DCRN_PLB4OPB3_BASE 0x050
+
+#define PLB4OPB_GESR0 0x0 /* Error status 0: Master Dev 0-3 */
+#define PLB4OPB_GEAR 0x2 /* Error Address Register */
+#define PLB4OPB_GEARU 0x3 /* Error Upper Address Register */
+#define PLB4OPB_GESR1 0x4 /* Error Status 1: Master Dev 4-7 */
+#define PLB4OPB_GESR2 0xC /* Error Status 2: Master Dev 8-11 */
+
+/* PLB4-to-AHB Bridge */
+#define DCRN_PLB4AHB_BASE 0x400
+#define DCRN_PLB4AHB_SEUAR (DCRN_PLB4AHB_BASE + 1)
+#define DCRN_PLB4AHB_SELAR (DCRN_PLB4AHB_BASE + 2)
+#define DCRN_PLB4AHB_ESR (DCRN_PLB4AHB_BASE + 3)
+#define DCRN_AHBPLB4_ESR (DCRN_PLB4AHB_BASE + 8)
+#define DCRN_AHBPLB4_EAR (DCRN_PLB4AHB_BASE + 9)
+
+/* PLB6 Controller */
+#define DCRN_PLB6_BASE 0x11111300
+#define DCRN_PLB6_CR0 (DCRN_PLB6_BASE)
+#define DCRN_PLB6_ERR (DCRN_PLB6_BASE + 0x0B)
+#define DCRN_PLB6_HD (DCRN_PLB6_BASE + 0x0E)
+#define DCRN_PLB6_SHD (DCRN_PLB6_BASE + 0x10)
+
+/* PLB4-to-PLB6 Bridge */
+#define DCRN_PLB4PLB6_BASE 0x11111320
+#define DCRN_PLB4PLB6_ESR (DCRN_PLB4PLB6_BASE + 1)
+#define DCRN_PLB4PLB6_EARH (DCRN_PLB4PLB6_BASE + 3)
+#define DCRN_PLB4PLB6_EARL (DCRN_PLB4PLB6_BASE + 4)
+
+/* PLB6-to-PLB4 Bridge */
+#define DCRN_PLB6PLB4_BASE 0x11111350
+#define DCRN_PLB6PLB4_ESR (DCRN_PLB6PLB4_BASE + 1)
+#define DCRN_PLB6PLB4_EARH (DCRN_PLB6PLB4_BASE + 3)
+#define DCRN_PLB6PLB4_EARL (DCRN_PLB6PLB4_BASE + 4)
+
+/* PLB6-to-MCIF Bridge */
+#define DCRN_PLB6MCIF_BASE 0x11111380
+#define DCRN_PLB6MCIF_BESR0 (DCRN_PLB6MCIF_BASE + 0)
+#define DCRN_PLB6MCIF_BESR1 (DCRN_PLB6MCIF_BASE + 1)
+#define DCRN_PLB6MCIF_BEARL (DCRN_PLB6MCIF_BASE + 2)
+#define DCRN_PLB6MCIF_BEARH (DCRN_PLB6MCIF_BASE + 3)
+
+/* Configuration Logic Registers */
+#define DCRN_CONF_BASE 0x11111400
+#define DCRN_CONF_FIR_RWC (DCRN_CONF_BASE + 0x3A)
+#define DCRN_CONF_EIR_RS (DCRN_CONF_BASE + 0x3E)
+#define DCRN_CONF_RPERR0 (DCRN_CONF_BASE + 0x4D)
+#define DCRN_CONF_RPERR1 (DCRN_CONF_BASE + 0x4E)
+
+#define DCRN_L2CDCRAI 0x11111100
+#define DCRN_L2CDCRDI 0x11111104
+/* L2 indirect addresses */
+#define L2MCK 0x120
+#define L2MCKEN 0x130
+#define L2INT 0x150
+#define L2INTEN 0x160
+#define L2LOG0 0x180
+#define L2LOG1 0x184
+#define L2LOG2 0x188
+#define L2LOG3 0x18C
+#define L2LOG4 0x190
+#define L2LOG5 0x194
+#define L2PLBSTAT0 0x300
+#define L2PLBSTAT1 0x304
+#define L2PLBMCKEN0 0x330
+#define L2PLBMCKEN1 0x334
+#define L2PLBINTEN0 0x360
+#define L2PLBINTEN1 0x364
+#define L2ARRSTAT0 0x500
+#define L2ARRSTAT1 0x504
+#define L2ARRSTAT2 0x508
+#define L2ARRMCKEN0 0x530
+#define L2ARRMCKEN1 0x534
+#define L2ARRMCKEN2 0x538
+#define L2ARRINTEN0 0x560
+#define L2ARRINTEN1 0x564
+#define L2ARRINTEN2 0x568
+#define L2CPUSTAT 0x700
+#define L2CPUMCKEN 0x730
+#define L2CPUINTEN 0x760
+#define L2RACSTAT0 0x900
+#define L2RACMCKEN0 0x930
+#define L2RACINTEN0 0x960
+#define L2WACSTAT0 0xD00
+#define L2WACSTAT1 0xD04
+#define L2WACSTAT2 0xD08
+#define L2WACMCKEN0 0xD30
+#define L2WACMCKEN1 0xD34
+#define L2WACMCKEN2 0xD38
+#define L2WACINTEN0 0xD60
+#define L2WACINTEN1 0xD64
+#define L2WACINTEN2 0xD68
+#define L2WDFSTAT 0xF00
+#define L2WDFMCKEN 0xF30
+#define L2WDFINTEN 0xF60
+
+/* DDR3/4 Memory Controller */
+#define DCRN_DDR34_BASE 0x11120000
+#define DCRN_DDR34_MCSTAT 0x10
+#define DCRN_DDR34_MCOPT1 0x20
+#define DCRN_DDR34_MCOPT2 0x21
+#define DCRN_DDR34_PHYSTAT 0x32
+#define DCRN_DDR34_CFGR0 0x40
+#define DCRN_DDR34_CFGR1 0x41
+#define DCRN_DDR34_CFGR2 0x42
+#define DCRN_DDR34_CFGR3 0x43
+#define DCRN_DDR34_SCRUB_CNTL 0xAA
+#define DCRN_DDR34_SCRUB_INT 0xAB
+#define DCRN_DDR34_SCRUB_START_ADDR 0xB0
+#define DCRN_DDR34_SCRUB_END_ADDR 0xD0
+#define DCRN_DDR34_ECCERR_ADDR_PORT0 0xE0
+#define DCRN_DDR34_ECCERR_ADDR_PORT1 0xE1
+#define DCRN_DDR34_ECCERR_ADDR_PORT2 0xE2
+#define DCRN_DDR34_ECCERR_ADDR_PORT3 0xE3
+#define DCRN_DDR34_ECCERR_COUNT_PORT0 0xE4
+#define DCRN_DDR34_ECCERR_COUNT_PORT1 0xE5
+#define DCRN_DDR34_ECCERR_COUNT_PORT2 0xE6
+#define DCRN_DDR34_ECCERR_COUNT_PORT3 0xE7
+#define DCRN_DDR34_ECCERR_PORT0 0xF0
+#define DCRN_DDR34_ECCERR_PORT1 0xF2
+#define DCRN_DDR34_ECCERR_PORT2 0xF4
+#define DCRN_DDR34_ECCERR_PORT3 0xF6
+#define DCRN_DDR34_ECC_CHECK_PORT0 0xF8
+#define DCRN_DDR34_ECC_CHECK_PORT1 0xF9
+#define DCRN_DDR34_ECC_CHECK_PORT2 0xF9
+#define DCRN_DDR34_ECC_CHECK_PORT3 0xFB
+
+#define DDR34_SCRUB_CNTL_STOP 0x00000000
+#define DDR34_SCRUB_CNTL_SCRUB 0x80000000
+#define DDR34_SCRUB_CNTL_UE_STOP 0x20000000
+#define DDR34_SCRUB_CNTL_CE_STOP 0x10000000
+#define DDR34_SCRUB_CNTL_RANK_EN 0x00008000
+
+/* PLB-Attached DDR3/4 Core Wrapper */
+#define DCRN_CW_BASE 0x11111800
+#define DCRN_CW_MCER0 0x00
+#define DCRN_CW_MCER1 0x01
+#define DCRN_CW_MCER_AND0 0x02
+#define DCRN_CW_MCER_AND1 0x03
+#define DCRN_CW_MCER_OR0 0x04
+#define DCRN_CW_MCER_OR1 0x05
+#define DCRN_CW_MCER_MASK0 0x06
+#define DCRN_CW_MCER_MASK1 0x07
+#define DCRN_CW_MCER_MASK_AND0 0x08
+#define DCRN_CW_MCER_MASK_AND1 0x09
+#define DCRN_CW_MCER_MASK_OR0 0x0A
+#define DCRN_CW_MCER_MASK_OR1 0x0B
+#define DCRN_CW_MCER_ACTION0 0x0C
+#define DCRN_CW_MCER_ACTION1 0x0D
+#define DCRN_CW_MCER_WOF0 0x0E
+#define DCRN_CW_MCER_WOF1 0x0F
+#define DCRN_CW_LFIR 0x10
+#define DCRN_CW_LFIR_AND 0x11
+#define DCRN_CW_LFIR_OR 0x12
+#define DCRN_CW_LFIR_MASK 0x13
+#define DCRN_CW_LFIR_MASK_AND 0x14
+#define DCRN_CW_LFIR_MASK_OR 0x15
+
+#define CW_MCER0_MEM_CE 0x00020000
+/* CMU addresses */
+#define CMUN_CRCS 0x00 /* Chip Reset Control/Status */
+#define CMUN_CONFFIR0 0x20 /* Config Reg Parity FIR 0 */
+#define CMUN_CONFFIR1 0x21 /* Config Reg Parity FIR 1 */
+#define CMUN_CONFFIR2 0x22 /* Config Reg Parity FIR 2 */
+#define CMUN_CONFFIR3 0x23 /* Config Reg Parity FIR 3 */
+#define CMUN_URCR3_RS 0x24 /* Unit Reset Control Reg 3 Set */
+#define CMUN_URCR3_C 0x25 /* Unit Reset Control Reg 3 Clear */
+#define CMUN_URCR3_P 0x26 /* Unit Reset Control Reg 3 Pulse */
+#define CMUN_PW0 0x2C /* Pulse Width Register */
+#define CMUN_URCR0_P 0x2D /* Unit Reset Control Reg 0 Pulse */
+#define CMUN_URCR1_P 0x2E /* Unit Reset Control Reg 1 Pulse */
+#define CMUN_URCR2_P 0x2F /* Unit Reset Control Reg 2 Pulse */
+#define CMUN_CLS_RW 0x30 /* Code Load Status (Read/Write) */
+#define CMUN_CLS_S 0x31 /* Code Load Status (Set) */
+#define CMUN_CLS_C 0x32 /* Code Load Status (Clear */
+#define CMUN_URCR2_RS 0x33 /* Unit Reset Control Reg 2 Set */
+#define CMUN_URCR2_C 0x34 /* Unit Reset Control Reg 2 Clear */
+#define CMUN_CLKEN0 0x35 /* Clock Enable 0 */
+#define CMUN_CLKEN1 0x36 /* Clock Enable 1 */
+#define CMUN_PCD0 0x37 /* PSI clock divider 0 */
+#define CMUN_PCD1 0x38 /* PSI clock divider 1 */
+#define CMUN_TMR0 0x39 /* Reset Timer */
+#define CMUN_TVS0 0x3A /* TV Sense Reg 0 */
+#define CMUN_TVS1 0x3B /* TV Sense Reg 1 */
+#define CMUN_MCCR 0x3C /* DRAM Configuration Reg */
+#define CMUN_FIR0 0x3D /* Fault Isolation Reg 0 */
+#define CMUN_FMR0 0x3E /* FIR Mask Reg 0 */
+#define CMUN_ETDRB 0x3F /* ETDR Backdoor */
+
+/* CRCS bit fields */
+#define CRCS_STAT_MASK 0xF0000000
+#define CRCS_STAT_POR 0x10000000
+#define CRCS_STAT_PHR 0x20000000
+#define CRCS_STAT_PCIE 0x30000000
+#define CRCS_STAT_CRCS_SYS 0x40000000
+#define CRCS_STAT_DBCR_SYS 0x50000000
+#define CRCS_STAT_HOST_SYS 0x60000000
+#define CRCS_STAT_CHIP_RST_B 0x70000000
+#define CRCS_STAT_CRCS_CHIP 0x80000000
+#define CRCS_STAT_DBCR_CHIP 0x90000000
+#define CRCS_STAT_HOST_CHIP 0xA0000000
+#define CRCS_STAT_PSI_CHIP 0xB0000000
+#define CRCS_STAT_CRCS_CORE 0xC0000000
+#define CRCS_STAT_DBCR_CORE 0xD0000000
+#define CRCS_STAT_HOST_CORE 0xE0000000
+#define CRCS_STAT_PCIE_HOT 0xF0000000
+#define CRCS_STAT_SELF_CORE 0x40000000
+#define CRCS_STAT_SELF_CHIP 0x50000000
+#define CRCS_WATCHE 0x08000000
+#define CRCS_CORE 0x04000000 /* Reset PPC440 core */
+#define CRCS_CHIP 0x02000000 /* Chip Reset */
+#define CRCS_SYS 0x01000000 /* System Reset */
+#define CRCS_WRCR 0x00800000 /* Watchdog reset on core reset */
+#define CRCS_EXTCR 0x00080000 /* CHIP_RST_B triggers chip reset */
+#define CRCS_PLOCK 0x00000002 /* PLL Locked */
+
+#define mtcmu(reg, data) \
+do { \
+ mtdcr(DCRN_CMU_ADDR, reg); \
+ mtdcr(DCRN_CMU_DATA, data); \
+} while (0)
+
+#define mfcmu(reg)\
+ ({u32 data; \
+ mtdcr(DCRN_CMU_ADDR, reg); \
+ data = mfdcr(DCRN_CMU_DATA); \
+ data; })
+
+#define mtl2(reg, data) \
+do { \
+ mtdcr(DCRN_L2CDCRAI, reg); \
+ mtdcr(DCRN_L2CDCRDI, data); \
+} while (0)
+
+#define mfl2(reg) \
+ ({u32 data; \
+ mtdcr(DCRN_L2CDCRAI, reg); \
+ data = mfdcr(DCRN_L2CDCRDI); \
+ data; })
+
+#endif /* __KERNEL__ */
+#endif /* _ASM_POWERPC_FSP2_DCR_H_ */
diff --git a/arch/powerpc/platforms/512x/mpc512x_shared.c b/arch/powerpc/platforms/512x/mpc512x_shared.c
index f99e79ee060e..48abb4cb304c 100644
--- a/arch/powerpc/platforms/512x/mpc512x_shared.c
+++ b/arch/powerpc/platforms/512x/mpc512x_shared.c
@@ -387,8 +387,8 @@ static unsigned int __init get_fifo_size(struct device_node *np,
if (fp)
return *fp;
- pr_warning("no %s property in %pOF node, defaulting to %d\n",
- prop_name, np, DEFAULT_FIFO_SIZE);
+ pr_warn("no %s property in %pOF node, defaulting to %d\n",
+ prop_name, np, DEFAULT_FIFO_SIZE);
return DEFAULT_FIFO_SIZE;
}
diff --git a/arch/powerpc/platforms/52xx/mpc52xx_gpt.c b/arch/powerpc/platforms/52xx/mpc52xx_gpt.c
index 9e974b1e1697..17cf249b18ee 100644
--- a/arch/powerpc/platforms/52xx/mpc52xx_gpt.c
+++ b/arch/powerpc/platforms/52xx/mpc52xx_gpt.c
@@ -90,7 +90,7 @@ struct mpc52xx_gpt_priv {
struct list_head list; /* List of all GPT devices */
struct device *dev;
struct mpc52xx_gpt __iomem *regs;
- spinlock_t lock;
+ raw_spinlock_t lock;
struct irq_domain *irqhost;
u32 ipb_freq;
u8 wdt_mode;
@@ -141,9 +141,9 @@ static void mpc52xx_gpt_irq_unmask(struct irq_data *d)
struct mpc52xx_gpt_priv *gpt = irq_data_get_irq_chip_data(d);
unsigned long flags;
- spin_lock_irqsave(&gpt->lock, flags);
+ raw_spin_lock_irqsave(&gpt->lock, flags);
setbits32(&gpt->regs->mode, MPC52xx_GPT_MODE_IRQ_EN);
- spin_unlock_irqrestore(&gpt->lock, flags);
+ raw_spin_unlock_irqrestore(&gpt->lock, flags);
}
static void mpc52xx_gpt_irq_mask(struct irq_data *d)
@@ -151,9 +151,9 @@ static void mpc52xx_gpt_irq_mask(struct irq_data *d)
struct mpc52xx_gpt_priv *gpt = irq_data_get_irq_chip_data(d);
unsigned long flags;
- spin_lock_irqsave(&gpt->lock, flags);
+ raw_spin_lock_irqsave(&gpt->lock, flags);
clrbits32(&gpt->regs->mode, MPC52xx_GPT_MODE_IRQ_EN);
- spin_unlock_irqrestore(&gpt->lock, flags);
+ raw_spin_unlock_irqrestore(&gpt->lock, flags);
}
static void mpc52xx_gpt_irq_ack(struct irq_data *d)
@@ -171,14 +171,14 @@ static int mpc52xx_gpt_irq_set_type(struct irq_data *d, unsigned int flow_type)
dev_dbg(gpt->dev, "%s: virq=%i type=%x\n", __func__, d->irq, flow_type);
- spin_lock_irqsave(&gpt->lock, flags);
+ raw_spin_lock_irqsave(&gpt->lock, flags);
reg = in_be32(&gpt->regs->mode) & ~MPC52xx_GPT_MODE_ICT_MASK;
if (flow_type & IRQF_TRIGGER_RISING)
reg |= MPC52xx_GPT_MODE_ICT_RISING;
if (flow_type & IRQF_TRIGGER_FALLING)
reg |= MPC52xx_GPT_MODE_ICT_FALLING;
out_be32(&gpt->regs->mode, reg);
- spin_unlock_irqrestore(&gpt->lock, flags);
+ raw_spin_unlock_irqrestore(&gpt->lock, flags);
return 0;
}
@@ -264,11 +264,11 @@ mpc52xx_gpt_irq_setup(struct mpc52xx_gpt_priv *gpt, struct device_node *node)
/* If the GPT is currently disabled, then change it to be in Input
* Capture mode. If the mode is non-zero, then the pin could be
* already in use for something. */
- spin_lock_irqsave(&gpt->lock, flags);
+ raw_spin_lock_irqsave(&gpt->lock, flags);
mode = in_be32(&gpt->regs->mode);
if ((mode & MPC52xx_GPT_MODE_MS_MASK) == 0)
out_be32(&gpt->regs->mode, mode | MPC52xx_GPT_MODE_MS_IC);
- spin_unlock_irqrestore(&gpt->lock, flags);
+ raw_spin_unlock_irqrestore(&gpt->lock, flags);
dev_dbg(gpt->dev, "%s() complete. virq=%i\n", __func__, cascade_virq);
}
@@ -295,9 +295,9 @@ mpc52xx_gpt_gpio_set(struct gpio_chip *gc, unsigned int gpio, int v)
dev_dbg(gpt->dev, "%s: gpio:%d v:%d\n", __func__, gpio, v);
r = v ? MPC52xx_GPT_MODE_GPIO_OUT_HIGH : MPC52xx_GPT_MODE_GPIO_OUT_LOW;
- spin_lock_irqsave(&gpt->lock, flags);
+ raw_spin_lock_irqsave(&gpt->lock, flags);
clrsetbits_be32(&gpt->regs->mode, MPC52xx_GPT_MODE_GPIO_MASK, r);
- spin_unlock_irqrestore(&gpt->lock, flags);
+ raw_spin_unlock_irqrestore(&gpt->lock, flags);
}
static int mpc52xx_gpt_gpio_dir_in(struct gpio_chip *gc, unsigned int gpio)
@@ -307,9 +307,9 @@ static int mpc52xx_gpt_gpio_dir_in(struct gpio_chip *gc, unsigned int gpio)
dev_dbg(gpt->dev, "%s: gpio:%d\n", __func__, gpio);
- spin_lock_irqsave(&gpt->lock, flags);
+ raw_spin_lock_irqsave(&gpt->lock, flags);
clrbits32(&gpt->regs->mode, MPC52xx_GPT_MODE_GPIO_MASK);
- spin_unlock_irqrestore(&gpt->lock, flags);
+ raw_spin_unlock_irqrestore(&gpt->lock, flags);
return 0;
}
@@ -436,16 +436,16 @@ static int mpc52xx_gpt_do_start(struct mpc52xx_gpt_priv *gpt, u64 period,
}
/* Set and enable the timer, reject an attempt to use a wdt as gpt */
- spin_lock_irqsave(&gpt->lock, flags);
+ raw_spin_lock_irqsave(&gpt->lock, flags);
if (as_wdt)
gpt->wdt_mode |= MPC52xx_GPT_IS_WDT;
else if ((gpt->wdt_mode & MPC52xx_GPT_IS_WDT) != 0) {
- spin_unlock_irqrestore(&gpt->lock, flags);
+ raw_spin_unlock_irqrestore(&gpt->lock, flags);
return -EBUSY;
}
out_be32(&gpt->regs->count, prescale << 16 | clocks);
clrsetbits_be32(&gpt->regs->mode, clear, set);
- spin_unlock_irqrestore(&gpt->lock, flags);
+ raw_spin_unlock_irqrestore(&gpt->lock, flags);
return 0;
}
@@ -476,14 +476,14 @@ int mpc52xx_gpt_stop_timer(struct mpc52xx_gpt_priv *gpt)
unsigned long flags;
/* reject the operation if the timer is used as watchdog (gpt 0 only) */
- spin_lock_irqsave(&gpt->lock, flags);
+ raw_spin_lock_irqsave(&gpt->lock, flags);
if ((gpt->wdt_mode & MPC52xx_GPT_IS_WDT) != 0) {
- spin_unlock_irqrestore(&gpt->lock, flags);
+ raw_spin_unlock_irqrestore(&gpt->lock, flags);
return -EBUSY;
}
clrbits32(&gpt->regs->mode, MPC52xx_GPT_MODE_COUNTER_ENABLE);
- spin_unlock_irqrestore(&gpt->lock, flags);
+ raw_spin_unlock_irqrestore(&gpt->lock, flags);
return 0;
}
EXPORT_SYMBOL(mpc52xx_gpt_stop_timer);
@@ -500,9 +500,9 @@ u64 mpc52xx_gpt_timer_period(struct mpc52xx_gpt_priv *gpt)
u64 prescale;
unsigned long flags;
- spin_lock_irqsave(&gpt->lock, flags);
+ raw_spin_lock_irqsave(&gpt->lock, flags);
period = in_be32(&gpt->regs->count);
- spin_unlock_irqrestore(&gpt->lock, flags);
+ raw_spin_unlock_irqrestore(&gpt->lock, flags);
prescale = period >> 16;
period &= 0xffff;
@@ -532,9 +532,9 @@ static inline void mpc52xx_gpt_wdt_ping(struct mpc52xx_gpt_priv *gpt_wdt)
{
unsigned long flags;
- spin_lock_irqsave(&gpt_wdt->lock, flags);
+ raw_spin_lock_irqsave(&gpt_wdt->lock, flags);
out_8((u8 *) &gpt_wdt->regs->mode, MPC52xx_GPT_MODE_WDT_PING);
- spin_unlock_irqrestore(&gpt_wdt->lock, flags);
+ raw_spin_unlock_irqrestore(&gpt_wdt->lock, flags);
}
/* wdt misc device api */
@@ -638,11 +638,11 @@ static int mpc52xx_wdt_release(struct inode *inode, struct file *file)
struct mpc52xx_gpt_priv *gpt_wdt = file->private_data;
unsigned long flags;
- spin_lock_irqsave(&gpt_wdt->lock, flags);
+ raw_spin_lock_irqsave(&gpt_wdt->lock, flags);
clrbits32(&gpt_wdt->regs->mode,
MPC52xx_GPT_MODE_COUNTER_ENABLE | MPC52xx_GPT_MODE_WDT_EN);
gpt_wdt->wdt_mode &= ~MPC52xx_GPT_IS_WDT;
- spin_unlock_irqrestore(&gpt_wdt->lock, flags);
+ raw_spin_unlock_irqrestore(&gpt_wdt->lock, flags);
#endif
clear_bit(0, &wdt_is_active);
return 0;
@@ -723,7 +723,7 @@ static int mpc52xx_gpt_probe(struct platform_device *ofdev)
if (!gpt)
return -ENOMEM;
- spin_lock_init(&gpt->lock);
+ raw_spin_lock_init(&gpt->lock);
gpt->dev = &ofdev->dev;
gpt->ipb_freq = mpc5xxx_get_bus_frequency(ofdev->dev.of_node);
gpt->regs = of_iomap(ofdev->dev.of_node, 0);
diff --git a/arch/powerpc/platforms/83xx/mcu_mpc8349emitx.c b/arch/powerpc/platforms/83xx/mcu_mpc8349emitx.c
index 96bb55ca61d3..d2ef39f0edc8 100644
--- a/arch/powerpc/platforms/83xx/mcu_mpc8349emitx.c
+++ b/arch/powerpc/platforms/83xx/mcu_mpc8349emitx.c
@@ -84,7 +84,7 @@ static ssize_t show_status(struct device *d,
return sprintf(buf, "%02x\n", ret);
}
-static DEVICE_ATTR(status, S_IRUGO, show_status, NULL);
+static DEVICE_ATTR(status, 0444, show_status, NULL);
static void mcu_power_off(void)
{
diff --git a/arch/powerpc/platforms/83xx/mpc832x_mds.c b/arch/powerpc/platforms/83xx/mpc832x_mds.c
index bb7b25acf26f..74c154e67c8b 100644
--- a/arch/powerpc/platforms/83xx/mpc832x_mds.c
+++ b/arch/powerpc/platforms/83xx/mpc832x_mds.c
@@ -75,7 +75,7 @@ static void __init mpc832x_sys_setup_arch(void)
par_io_init(np);
of_node_put(np);
- for (np = NULL; (np = of_find_node_by_name(np, "ucc")) != NULL;)
+ for_each_node_by_name(np, "ucc")
par_io_of_config(np);
}
diff --git a/arch/powerpc/platforms/83xx/mpc832x_rdb.c b/arch/powerpc/platforms/83xx/mpc832x_rdb.c
index a4539c5accb0..438986593873 100644
--- a/arch/powerpc/platforms/83xx/mpc832x_rdb.c
+++ b/arch/powerpc/platforms/83xx/mpc832x_rdb.c
@@ -204,7 +204,7 @@ static void __init mpc832x_rdb_setup_arch(void)
par_io_init(np);
of_node_put(np);
- for (np = NULL; (np = of_find_node_by_name(np, "ucc")) != NULL;)
+ for_each_node_by_name(np, "ucc")
par_io_of_config(np);
}
#endif /* CONFIG_QUICC_ENGINE */
diff --git a/arch/powerpc/platforms/83xx/mpc836x_mds.c b/arch/powerpc/platforms/83xx/mpc836x_mds.c
index 4fc3051c2b2e..fd44dd03e1f3 100644
--- a/arch/powerpc/platforms/83xx/mpc836x_mds.c
+++ b/arch/powerpc/platforms/83xx/mpc836x_mds.c
@@ -83,7 +83,7 @@ static void __init mpc836x_mds_setup_arch(void)
par_io_init(np);
of_node_put(np);
- for (np = NULL; (np = of_find_node_by_name(np, "ucc")) != NULL;)
+ for_each_node_by_name(np, "ucc")
par_io_of_config(np);
#ifdef CONFIG_QE_USB
/* Must fixup Par IO before QE GPIO chips are registered. */
diff --git a/arch/powerpc/platforms/85xx/socrates_fpga_pic.c b/arch/powerpc/platforms/85xx/socrates_fpga_pic.c
index 82f8490b5aa7..38d4ba9f37b5 100644
--- a/arch/powerpc/platforms/85xx/socrates_fpga_pic.c
+++ b/arch/powerpc/platforms/85xx/socrates_fpga_pic.c
@@ -252,8 +252,7 @@ static int socrates_fpga_pic_host_xlate(struct irq_domain *h,
/* type is configurable */
if (intspec[1] != IRQ_TYPE_LEVEL_LOW &&
intspec[1] != IRQ_TYPE_LEVEL_HIGH) {
- pr_warning("FPGA PIC: invalid irq type, "
- "setting default active low\n");
+ pr_warn("FPGA PIC: invalid irq type, setting default active low\n");
*out_flags = IRQ_TYPE_LEVEL_LOW;
} else {
*out_flags = intspec[1];
@@ -267,7 +266,7 @@ static int socrates_fpga_pic_host_xlate(struct irq_domain *h,
if (intspec[2] <= 2)
fpga_irq->irq_line = intspec[2];
else
- pr_warning("FPGA PIC: invalid irq routing\n");
+ pr_warn("FPGA PIC: invalid irq routing\n");
return 0;
}
@@ -293,7 +292,7 @@ void socrates_fpga_pic_init(struct device_node *pic)
for (i = 0; i < 3; i++) {
socrates_fpga_irqs[i] = irq_of_parse_and_map(pic, i);
if (!socrates_fpga_irqs[i]) {
- pr_warning("FPGA PIC: can't get irq%d.\n", i);
+ pr_warn("FPGA PIC: can't get irq%d\n", i);
continue;
}
irq_set_chained_handler(socrates_fpga_irqs[i],
diff --git a/arch/powerpc/platforms/86xx/mpc86xx_hpcn.c b/arch/powerpc/platforms/86xx/mpc86xx_hpcn.c
index a0e989ed4b6f..17c6cd3d02e6 100644
--- a/arch/powerpc/platforms/86xx/mpc86xx_hpcn.c
+++ b/arch/powerpc/platforms/86xx/mpc86xx_hpcn.c
@@ -101,7 +101,7 @@ static int __init mpc86xx_hpcn_probe(void)
/* Be nice and don't give silent boot death. Delete this in 2.6.27 */
if (of_machine_is_compatible("mpc86xx")) {
- pr_warning("WARNING: your dts/dtb is old. You must update before the next kernel release\n");
+ pr_warn("WARNING: your dts/dtb is old. You must update before the next kernel release.\n");
return 1;
}
diff --git a/arch/powerpc/platforms/8xx/Kconfig b/arch/powerpc/platforms/8xx/Kconfig
index e2089d3de00c..d408162d5af4 100644
--- a/arch/powerpc/platforms/8xx/Kconfig
+++ b/arch/powerpc/platforms/8xx/Kconfig
@@ -116,18 +116,6 @@ config 8xx_GPIO
If in doubt, say Y here.
-config 8xx_CPU6
- bool "CPU6 Silicon Errata (860 Pre Rev. C)"
- help
- MPC860 CPUs, prior to Rev C have some bugs in the silicon, which
- require workarounds for Linux (and most other OSes to work). If you
- get a BUG() very early in boot, this might fix the problem. For
- more details read the document entitled "MPC860 Family Device Errata
- Reference" on Freescale's website. This option also incurs a
- performance hit.
-
- If in doubt, say N here.
-
config 8xx_CPU15
bool "CPU15 Silicon Errata"
depends on !HUGETLB_PAGE
diff --git a/arch/powerpc/platforms/Kconfig b/arch/powerpc/platforms/Kconfig
index 5a96a2763e4a..14ef17e10ec9 100644
--- a/arch/powerpc/platforms/Kconfig
+++ b/arch/powerpc/platforms/Kconfig
@@ -293,17 +293,6 @@ config CPM2
you wish to build a kernel for a machine with a CPM2 coprocessor
on it (826x, 827x, 8560).
-config AXON_RAM
- tristate "Axon DDR2 memory device driver"
- depends on PPC_IBM_CELL_BLADE && BLOCK
- select DAX
- default m
- help
- It registers one block device per Axon's DDR2 memory bank found
- on a system. Block devices are called axonram?, their major and
- minor numbers are available in /proc/devices, /proc/partitions or
- in /sys/block/axonram?/dev.
-
config FSL_ULI1575
bool
default n
diff --git a/arch/powerpc/platforms/Kconfig.cputype b/arch/powerpc/platforms/Kconfig.cputype
index ae07470fde3c..a429d859f15d 100644
--- a/arch/powerpc/platforms/Kconfig.cputype
+++ b/arch/powerpc/platforms/Kconfig.cputype
@@ -33,7 +33,6 @@ config PPC_85xx
config PPC_8xx
bool "Freescale 8xx"
select FSL_SOC
- select PPC_LIB_RHEAP
select SYS_SUPPORTS_HUGETLBFS
config 40x
@@ -168,13 +167,6 @@ config PPC_FPU
bool
default y if PPC64
-config PPC_8xx_PERF_EVENT
- bool "PPC 8xx perf events"
- depends on PPC_8xx && PERF_EVENTS
- help
- This is Performance Events support for PPC 8xx. The 8xx doesn't
- have a PMU but some events are emulated using 8xx features.
-
config FSL_EMB_PERFMON
bool "Freescale Embedded Perfmon"
depends on E500 || PPC_83xx
diff --git a/arch/powerpc/platforms/cell/interrupt.c b/arch/powerpc/platforms/cell/interrupt.c
index 6fc85e29dc08..5d4bf9aed51a 100644
--- a/arch/powerpc/platforms/cell/interrupt.c
+++ b/arch/powerpc/platforms/cell/interrupt.c
@@ -315,8 +315,7 @@ static int __init setup_iic(void)
struct cbe_iic_regs __iomem *node_iic;
const u32 *np;
- for (dn = NULL;
- (dn = of_find_node_by_name(dn,"interrupt-controller")) != NULL;) {
+ for_each_node_by_name(dn, "interrupt-controller") {
if (!of_device_is_compatible(dn,
"IBM,CBEA-Internal-Interrupt-Controller"))
continue;
diff --git a/arch/powerpc/platforms/cell/setup.c b/arch/powerpc/platforms/cell/setup.c
index d3543e68efe8..7d31b8d14661 100644
--- a/arch/powerpc/platforms/cell/setup.c
+++ b/arch/powerpc/platforms/cell/setup.c
@@ -192,8 +192,7 @@ static void __init mpic_init_IRQ(void)
struct device_node *dn;
struct mpic *mpic;
- for (dn = NULL;
- (dn = of_find_node_by_name(dn, "interrupt-controller"));) {
+ for_each_node_by_name(dn, "interrupt-controller") {
if (!of_device_is_compatible(dn, "CBEA,platform-open-pic"))
continue;
diff --git a/arch/powerpc/platforms/cell/spider-pic.c b/arch/powerpc/platforms/cell/spider-pic.c
index aa44bfc46467..c137f0cb4151 100644
--- a/arch/powerpc/platforms/cell/spider-pic.c
+++ b/arch/powerpc/platforms/cell/spider-pic.c
@@ -343,8 +343,7 @@ void __init spider_init_IRQ(void)
* device-tree is bogus anyway) so all we can do is pray or maybe test
* the address and deduce the node-id
*/
- for (dn = NULL;
- (dn = of_find_node_by_name(dn, "interrupt-controller"));) {
+ for_each_node_by_name(dn, "interrupt-controller") {
if (of_device_is_compatible(dn, "CBEA,platform-spider-pic")) {
if (of_address_to_resource(dn, 0, &r)) {
printk(KERN_WARNING "spider-pic: Failed\n");
diff --git a/arch/powerpc/platforms/cell/spu_manage.c b/arch/powerpc/platforms/cell/spu_manage.c
index f636ee22b203..5c409c98cca8 100644
--- a/arch/powerpc/platforms/cell/spu_manage.c
+++ b/arch/powerpc/platforms/cell/spu_manage.c
@@ -292,12 +292,12 @@ static int __init of_enumerate_spus(int (*fn)(void *data))
unsigned int n = 0;
ret = -ENODEV;
- for (node = of_find_node_by_type(NULL, "spe");
- node; node = of_find_node_by_type(node, "spe")) {
+ for_each_node_by_type(node, "spe") {
ret = fn(node);
if (ret) {
printk(KERN_WARNING "%s: Error initializing %s\n",
__func__, node->name);
+ of_node_put(node);
break;
}
n++;
diff --git a/arch/powerpc/platforms/cell/spufs/file.c b/arch/powerpc/platforms/cell/spufs/file.c
index fc7772c3d068..c1be486da899 100644
--- a/arch/powerpc/platforms/cell/spufs/file.c
+++ b/arch/powerpc/platforms/cell/spufs/file.c
@@ -2375,8 +2375,8 @@ static int switch_log_sprint(struct spu_context *ctx, char *tbuf, int n)
p = ctx->switch_log->log + ctx->switch_log->tail % SWITCH_LOG_BUFSIZE;
- return snprintf(tbuf, n, "%u.%09u %d %u %u %llu\n",
- (unsigned int) p->tstamp.tv_sec,
+ return snprintf(tbuf, n, "%llu.%09u %d %u %u %llu\n",
+ (unsigned long long) p->tstamp.tv_sec,
(unsigned int) p->tstamp.tv_nsec,
p->spu_id,
(unsigned int) p->type,
@@ -2499,7 +2499,7 @@ void spu_switch_log_notify(struct spu *spu, struct spu_context *ctx,
struct switch_log_entry *p;
p = ctx->switch_log->log + ctx->switch_log->head;
- ktime_get_ts(&p->tstamp);
+ ktime_get_ts64(&p->tstamp);
p->timebase = get_tb();
p->spu_id = spu ? spu->number : -1;
p->type = type;
diff --git a/arch/powerpc/platforms/cell/spufs/inode.c b/arch/powerpc/platforms/cell/spufs/inode.c
index 9558d725a99b..db329d4bf1c3 100644
--- a/arch/powerpc/platforms/cell/spufs/inode.c
+++ b/arch/powerpc/platforms/cell/spufs/inode.c
@@ -455,7 +455,7 @@ spufs_create_context(struct inode *inode, struct dentry *dentry,
}
}
- ret = spufs_mkdir(inode, dentry, flags, mode & S_IRWXUGO);
+ ret = spufs_mkdir(inode, dentry, flags, mode & 0777);
if (ret)
goto out_aff_unlock;
@@ -546,7 +546,7 @@ static int spufs_create_gang(struct inode *inode,
struct path path = {.mnt = mnt, .dentry = dentry};
int ret;
- ret = spufs_mkgang(inode, dentry, mode & S_IRWXUGO);
+ ret = spufs_mkgang(inode, dentry, mode & 0777);
if (!ret) {
ret = spufs_gang_open(&path);
if (ret < 0) {
diff --git a/arch/powerpc/platforms/cell/spufs/spufs.h b/arch/powerpc/platforms/cell/spufs/spufs.h
index 2d0479ad3af4..b5fc1b3fe538 100644
--- a/arch/powerpc/platforms/cell/spufs/spufs.h
+++ b/arch/powerpc/platforms/cell/spufs/spufs.h
@@ -69,7 +69,7 @@ struct switch_log {
unsigned long head;
unsigned long tail;
struct switch_log_entry {
- struct timespec tstamp;
+ struct timespec64 tstamp;
s32 spu_id;
u32 type;
u32 val;
diff --git a/arch/powerpc/platforms/pasemi/dma_lib.c b/arch/powerpc/platforms/pasemi/dma_lib.c
index aafa01ba062f..2c72263ad6ab 100644
--- a/arch/powerpc/platforms/pasemi/dma_lib.c
+++ b/arch/powerpc/platforms/pasemi/dma_lib.c
@@ -589,7 +589,7 @@ int pasemi_dma_init(void)
pasemi_write_dma_reg(PAS_DMA_COM_RXCMD, 0);
while (pasemi_read_dma_reg(PAS_DMA_COM_RXSTA) & 1) {
if (time_after(jiffies, timeout)) {
- pr_warning("Warning: Could not disable RX section\n");
+ pr_warn("Warning: Could not disable RX section\n");
break;
}
}
@@ -598,7 +598,7 @@ int pasemi_dma_init(void)
pasemi_write_dma_reg(PAS_DMA_COM_TXCMD, 0);
while (pasemi_read_dma_reg(PAS_DMA_COM_TXSTA) & 1) {
if (time_after(jiffies, timeout)) {
- pr_warning("Warning: Could not disable TX section\n");
+ pr_warn("Warning: Could not disable TX section\n");
break;
}
}
diff --git a/arch/powerpc/platforms/powermac/backlight.c b/arch/powerpc/platforms/powermac/backlight.c
index a00096b1c713..6b5dcccae1d3 100644
--- a/arch/powerpc/platforms/powermac/backlight.c
+++ b/arch/powerpc/platforms/powermac/backlight.c
@@ -186,7 +186,7 @@ int pmac_backlight_set_legacy_brightness(int brightness)
return __pmac_backlight_set_legacy_brightness(brightness);
}
-int pmac_backlight_get_legacy_brightness()
+int pmac_backlight_get_legacy_brightness(void)
{
int result = -ENXIO;
@@ -205,12 +205,12 @@ int pmac_backlight_get_legacy_brightness()
return result;
}
-void pmac_backlight_disable()
+void pmac_backlight_disable(void)
{
atomic_inc(&kernel_backlight_disabled);
}
-void pmac_backlight_enable()
+void pmac_backlight_enable(void)
{
atomic_dec(&kernel_backlight_disabled);
}
diff --git a/arch/powerpc/platforms/powermac/feature.c b/arch/powerpc/platforms/powermac/feature.c
index 9e3f39d36e88..466b84234683 100644
--- a/arch/powerpc/platforms/powermac/feature.c
+++ b/arch/powerpc/platforms/powermac/feature.c
@@ -2641,7 +2641,7 @@ static void __init probe_one_macio(const char *name, const char *compat, int typ
phys_addr_t addr;
u64 size;
- for (node = NULL; (node = of_find_node_by_name(node, name)) != NULL;) {
+ for_each_node_by_name(node, name) {
if (!compat)
break;
if (of_device_is_compatible(node, compat))
@@ -2853,7 +2853,6 @@ set_initial_features(void)
}
/* Enable ATA-100 before PCI probe. */
- np = of_find_node_by_name(NULL, "ata-6");
for_each_node_by_name(np, "ata-6") {
if (np->parent
&& of_device_is_compatible(np->parent, "uni-north")
diff --git a/arch/powerpc/platforms/powermac/pic.c b/arch/powerpc/platforms/powermac/pic.c
index 5e0719b27294..57bbff465964 100644
--- a/arch/powerpc/platforms/powermac/pic.c
+++ b/arch/powerpc/platforms/powermac/pic.c
@@ -486,15 +486,16 @@ static int __init pmac_pic_probe_mpic(void)
struct device_node *np, *master = NULL, *slave = NULL;
/* We can have up to 2 MPICs cascaded */
- for (np = NULL; (np = of_find_node_by_type(np, "open-pic"))
- != NULL;) {
+ for_each_node_by_type(np, "open-pic") {
if (master == NULL &&
of_get_property(np, "interrupts", NULL) == NULL)
master = of_node_get(np);
else if (slave == NULL)
slave = of_node_get(np);
- if (master && slave)
+ if (master && slave) {
+ of_node_put(np);
break;
+ }
}
/* Check for bogus setups */
@@ -604,6 +605,7 @@ static int pmacpic_find_viaint(void)
if (np == NULL)
goto not_found;
viaint = irq_of_parse_and_map(np, 0);
+ of_node_put(np);
not_found:
#endif /* CONFIG_ADB_PMU */
diff --git a/arch/powerpc/platforms/powermac/smp.c b/arch/powerpc/platforms/powermac/smp.c
index 2cd99eb30762..95275e0e2efa 100644
--- a/arch/powerpc/platforms/powermac/smp.c
+++ b/arch/powerpc/platforms/powermac/smp.c
@@ -774,8 +774,8 @@ static void __init smp_core99_probe(void)
if (ppc_md.progress) ppc_md.progress("smp_core99_probe", 0x345);
/* Count CPUs in the device-tree */
- for (cpus = NULL; (cpus = of_find_node_by_type(cpus, "cpu")) != NULL;)
- ++ncpus;
+ for_each_node_by_type(cpus, "cpu")
+ ++ncpus;
printk(KERN_INFO "PowerMac SMP probe found %d cpus\n", ncpus);
diff --git a/arch/powerpc/platforms/powernv/Makefile b/arch/powerpc/platforms/powernv/Makefile
index 3732118a0482..6c9d5199a7e2 100644
--- a/arch/powerpc/platforms/powernv/Makefile
+++ b/arch/powerpc/platforms/powernv/Makefile
@@ -17,3 +17,4 @@ obj-$(CONFIG_PERF_EVENTS) += opal-imc.o
obj-$(CONFIG_PPC_MEMTRACE) += memtrace.o
obj-$(CONFIG_PPC_VAS) += vas.o vas-window.o vas-debug.o
obj-$(CONFIG_PPC_FTW) += nx-ftw.o
+obj-$(CONFIG_OCXL_BASE) += ocxl.o
diff --git a/arch/powerpc/platforms/powernv/eeh-powernv.c b/arch/powerpc/platforms/powernv/eeh-powernv.c
index 4650fb294e7a..33c86c1a1720 100644
--- a/arch/powerpc/platforms/powernv/eeh-powernv.c
+++ b/arch/powerpc/platforms/powernv/eeh-powernv.c
@@ -43,6 +43,22 @@
static int eeh_event_irq = -EINVAL;
+void pnv_pcibios_bus_add_device(struct pci_dev *pdev)
+{
+ struct pci_dn *pdn = pci_get_pdn(pdev);
+
+ if (!pdev->is_virtfn)
+ return;
+
+ /*
+ * The following operations will fail if VF's sysfs files
+ * aren't created or its resources aren't finalized.
+ */
+ eeh_add_device_early(pdn);
+ eeh_add_device_late(pdev);
+ eeh_sysfs_add_device(pdev);
+}
+
static int pnv_eeh_init(void)
{
struct pci_controller *hose;
@@ -86,6 +102,7 @@ static int pnv_eeh_init(void)
}
eeh_set_pe_aux_size(max_diag_size);
+ ppc_md.pcibios_bus_add_device = pnv_pcibios_bus_add_device;
return 0;
}
@@ -1638,70 +1655,11 @@ static int pnv_eeh_next_error(struct eeh_pe **pe)
return ret;
}
-static int pnv_eeh_restore_vf_config(struct pci_dn *pdn)
-{
- struct eeh_dev *edev = pdn_to_eeh_dev(pdn);
- u32 devctl, cmd, cap2, aer_capctl;
- int old_mps;
-
- if (edev->pcie_cap) {
- /* Restore MPS */
- old_mps = (ffs(pdn->mps) - 8) << 5;
- eeh_ops->read_config(pdn, edev->pcie_cap + PCI_EXP_DEVCTL,
- 2, &devctl);
- devctl &= ~PCI_EXP_DEVCTL_PAYLOAD;
- devctl |= old_mps;
- eeh_ops->write_config(pdn, edev->pcie_cap + PCI_EXP_DEVCTL,
- 2, devctl);
-
- /* Disable Completion Timeout */
- eeh_ops->read_config(pdn, edev->pcie_cap + PCI_EXP_DEVCAP2,
- 4, &cap2);
- if (cap2 & 0x10) {
- eeh_ops->read_config(pdn,
- edev->pcie_cap + PCI_EXP_DEVCTL2,
- 4, &cap2);
- cap2 |= 0x10;
- eeh_ops->write_config(pdn,
- edev->pcie_cap + PCI_EXP_DEVCTL2,
- 4, cap2);
- }
- }
-
- /* Enable SERR and parity checking */
- eeh_ops->read_config(pdn, PCI_COMMAND, 2, &cmd);
- cmd |= (PCI_COMMAND_PARITY | PCI_COMMAND_SERR);
- eeh_ops->write_config(pdn, PCI_COMMAND, 2, cmd);
-
- /* Enable report various errors */
- if (edev->pcie_cap) {
- eeh_ops->read_config(pdn, edev->pcie_cap + PCI_EXP_DEVCTL,
- 2, &devctl);
- devctl &= ~PCI_EXP_DEVCTL_CERE;
- devctl |= (PCI_EXP_DEVCTL_NFERE |
- PCI_EXP_DEVCTL_FERE |
- PCI_EXP_DEVCTL_URRE);
- eeh_ops->write_config(pdn, edev->pcie_cap + PCI_EXP_DEVCTL,
- 2, devctl);
- }
-
- /* Enable ECRC generation and check */
- if (edev->pcie_cap && edev->aer_cap) {
- eeh_ops->read_config(pdn, edev->aer_cap + PCI_ERR_CAP,
- 4, &aer_capctl);
- aer_capctl |= (PCI_ERR_CAP_ECRC_GENE | PCI_ERR_CAP_ECRC_CHKE);
- eeh_ops->write_config(pdn, edev->aer_cap + PCI_ERR_CAP,
- 4, aer_capctl);
- }
-
- return 0;
-}
-
static int pnv_eeh_restore_config(struct pci_dn *pdn)
{
struct eeh_dev *edev = pdn_to_eeh_dev(pdn);
struct pnv_phb *phb;
- s64 ret;
+ s64 ret = 0;
int config_addr = (pdn->busno << 8) | (pdn->devfn);
if (!edev)
@@ -1715,7 +1673,7 @@ static int pnv_eeh_restore_config(struct pci_dn *pdn)
* to be exported by firmware in extendible way.
*/
if (edev->physfn) {
- ret = pnv_eeh_restore_vf_config(pdn);
+ ret = eeh_restore_vf_config(pdn);
} else {
phb = pdn->phb->private_data;
ret = opal_pci_reinit(phb->opal_id,
@@ -1728,7 +1686,7 @@ static int pnv_eeh_restore_config(struct pci_dn *pdn)
return -EIO;
}
- return 0;
+ return ret;
}
static struct eeh_ops pnv_eeh_ops = {
@@ -1746,25 +1704,10 @@ static struct eeh_ops pnv_eeh_ops = {
.read_config = pnv_eeh_read_config,
.write_config = pnv_eeh_write_config,
.next_error = pnv_eeh_next_error,
- .restore_config = pnv_eeh_restore_config
+ .restore_config = pnv_eeh_restore_config,
+ .notify_resume = NULL
};
-void pcibios_bus_add_device(struct pci_dev *pdev)
-{
- struct pci_dn *pdn = pci_get_pdn(pdev);
-
- if (!pdev->is_virtfn)
- return;
-
- /*
- * The following operations will fail if VF's sysfs files
- * aren't created or its resources aren't finalized.
- */
- eeh_add_device_early(pdn);
- eeh_add_device_late(pdev);
- eeh_sysfs_add_device(pdev);
-}
-
#ifdef CONFIG_PCI_IOV
static void pnv_pci_fixup_vf_mps(struct pci_dev *pdev)
{
diff --git a/arch/powerpc/platforms/powernv/npu-dma.c b/arch/powerpc/platforms/powernv/npu-dma.c
index f6cbc1a71472..0a253b64ac5f 100644
--- a/arch/powerpc/platforms/powernv/npu-dma.c
+++ b/arch/powerpc/platforms/powernv/npu-dma.c
@@ -39,7 +39,10 @@
*/
static struct pci_dev *get_pci_dev(struct device_node *dn)
{
- return PCI_DN(dn)->pcidev;
+ struct pci_dn *pdn = PCI_DN(dn);
+
+ return pci_get_domain_bus_and_slot(pci_domain_nr(pdn->phb->bus),
+ pdn->busno, pdn->devfn);
}
/* Given a NPU device get the associated PCI device. */
@@ -277,7 +280,7 @@ static int pnv_npu_dma_set_bypass(struct pnv_ioda_pe *npe)
int64_t rc = 0;
phys_addr_t top = memblock_end_of_DRAM();
- if (phb->type != PNV_PHB_NPU || !npe->pdev)
+ if (phb->type != PNV_PHB_NPU_NVLINK || !npe->pdev)
return -EINVAL;
rc = pnv_npu_unset_window(npe, 0);
diff --git a/arch/powerpc/platforms/powernv/ocxl.c b/arch/powerpc/platforms/powernv/ocxl.c
new file mode 100644
index 000000000000..fa9b53af3c7b
--- /dev/null
+++ b/arch/powerpc/platforms/powernv/ocxl.c
@@ -0,0 +1,515 @@
+// SPDX-License-Identifier: GPL-2.0+
+// Copyright 2017 IBM Corp.
+#include <asm/pnv-ocxl.h>
+#include <asm/opal.h>
+#include <asm/xive.h>
+#include <misc/ocxl-config.h>
+#include "pci.h"
+
+#define PNV_OCXL_TL_P9_RECV_CAP 0x000000000000000Full
+#define PNV_OCXL_ACTAG_MAX 64
+/* PASIDs are 20-bit, but on P9, NPU can only handle 15 bits */
+#define PNV_OCXL_PASID_BITS 15
+#define PNV_OCXL_PASID_MAX ((1 << PNV_OCXL_PASID_BITS) - 1)
+
+#define AFU_PRESENT (1 << 31)
+#define AFU_INDEX_MASK 0x3F000000
+#define AFU_INDEX_SHIFT 24
+#define ACTAG_MASK 0xFFF
+
+
+struct actag_range {
+ u16 start;
+ u16 count;
+};
+
+struct npu_link {
+ struct list_head list;
+ int domain;
+ int bus;
+ int dev;
+ u16 fn_desired_actags[8];
+ struct actag_range fn_actags[8];
+ bool assignment_done;
+};
+static struct list_head links_list = LIST_HEAD_INIT(links_list);
+static DEFINE_MUTEX(links_list_lock);
+
+
+/*
+ * opencapi actags handling:
+ *
+ * When sending commands, the opencapi device references the memory
+ * context it's targeting with an 'actag', which is really an alias
+ * for a (BDF, pasid) combination. When it receives a command, the NPU
+ * must do a lookup of the actag to identify the memory context. The
+ * hardware supports a finite number of actags per link (64 for
+ * POWER9).
+ *
+ * The device can carry multiple functions, and each function can have
+ * multiple AFUs. Each AFU advertises in its config space the number
+ * of desired actags. The host must configure in the config space of
+ * the AFU how many actags the AFU is really allowed to use (which can
+ * be less than what the AFU desires).
+ *
+ * When a PCI function is probed by the driver, it has no visibility
+ * about the other PCI functions and how many actags they'd like,
+ * which makes it impossible to distribute actags fairly among AFUs.
+ *
+ * Unfortunately, the only way to know how many actags a function
+ * desires is by looking at the data for each AFU in the config space
+ * and add them up. Similarly, the only way to know how many actags
+ * all the functions of the physical device desire is by adding the
+ * previously computed function counts. Then we can match that against
+ * what the hardware supports.
+ *
+ * To get a comprehensive view, we use a 'pci fixup': at the end of
+ * PCI enumeration, each function counts how many actags its AFUs
+ * desire and we save it in a 'npu_link' structure, shared between all
+ * the PCI functions of a same device. Therefore, when the first
+ * function is probed by the driver, we can get an idea of the total
+ * count of desired actags for the device, and assign the actags to
+ * the AFUs, by pro-rating if needed.
+ */
+
+static int find_dvsec_from_pos(struct pci_dev *dev, int dvsec_id, int pos)
+{
+ int vsec = pos;
+ u16 vendor, id;
+
+ while ((vsec = pci_find_next_ext_capability(dev, vsec,
+ OCXL_EXT_CAP_ID_DVSEC))) {
+ pci_read_config_word(dev, vsec + OCXL_DVSEC_VENDOR_OFFSET,
+ &vendor);
+ pci_read_config_word(dev, vsec + OCXL_DVSEC_ID_OFFSET, &id);
+ if (vendor == PCI_VENDOR_ID_IBM && id == dvsec_id)
+ return vsec;
+ }
+ return 0;
+}
+
+static int find_dvsec_afu_ctrl(struct pci_dev *dev, u8 afu_idx)
+{
+ int vsec = 0;
+ u8 idx;
+
+ while ((vsec = find_dvsec_from_pos(dev, OCXL_DVSEC_AFU_CTRL_ID,
+ vsec))) {
+ pci_read_config_byte(dev, vsec + OCXL_DVSEC_AFU_CTRL_AFU_IDX,
+ &idx);
+ if (idx == afu_idx)
+ return vsec;
+ }
+ return 0;
+}
+
+static int get_max_afu_index(struct pci_dev *dev, int *afu_idx)
+{
+ int pos;
+ u32 val;
+
+ pos = find_dvsec_from_pos(dev, OCXL_DVSEC_FUNC_ID, 0);
+ if (!pos)
+ return -ESRCH;
+
+ pci_read_config_dword(dev, pos + OCXL_DVSEC_FUNC_OFF_INDEX, &val);
+ if (val & AFU_PRESENT)
+ *afu_idx = (val & AFU_INDEX_MASK) >> AFU_INDEX_SHIFT;
+ else
+ *afu_idx = -1;
+ return 0;
+}
+
+static int get_actag_count(struct pci_dev *dev, int afu_idx, int *actag)
+{
+ int pos;
+ u16 actag_sup;
+
+ pos = find_dvsec_afu_ctrl(dev, afu_idx);
+ if (!pos)
+ return -ESRCH;
+
+ pci_read_config_word(dev, pos + OCXL_DVSEC_AFU_CTRL_ACTAG_SUP,
+ &actag_sup);
+ *actag = actag_sup & ACTAG_MASK;
+ return 0;
+}
+
+static struct npu_link *find_link(struct pci_dev *dev)
+{
+ struct npu_link *link;
+
+ list_for_each_entry(link, &links_list, list) {
+ /* The functions of a device all share the same link */
+ if (link->domain == pci_domain_nr(dev->bus) &&
+ link->bus == dev->bus->number &&
+ link->dev == PCI_SLOT(dev->devfn)) {
+ return link;
+ }
+ }
+
+ /* link doesn't exist yet. Allocate one */
+ link = kzalloc(sizeof(struct npu_link), GFP_KERNEL);
+ if (!link)
+ return NULL;
+ link->domain = pci_domain_nr(dev->bus);
+ link->bus = dev->bus->number;
+ link->dev = PCI_SLOT(dev->devfn);
+ list_add(&link->list, &links_list);
+ return link;
+}
+
+static void pnv_ocxl_fixup_actag(struct pci_dev *dev)
+{
+ struct pci_controller *hose = pci_bus_to_host(dev->bus);
+ struct pnv_phb *phb = hose->private_data;
+ struct npu_link *link;
+ int rc, afu_idx = -1, i, actag;
+
+ if (!machine_is(powernv))
+ return;
+
+ if (phb->type != PNV_PHB_NPU_OCAPI)
+ return;
+
+ mutex_lock(&links_list_lock);
+
+ link = find_link(dev);
+ if (!link) {
+ dev_warn(&dev->dev, "couldn't update actag information\n");
+ mutex_unlock(&links_list_lock);
+ return;
+ }
+
+ /*
+ * Check how many actags are desired for the AFUs under that
+ * function and add it to the count for the link
+ */
+ rc = get_max_afu_index(dev, &afu_idx);
+ if (rc) {
+ /* Most likely an invalid config space */
+ dev_dbg(&dev->dev, "couldn't find AFU information\n");
+ afu_idx = -1;
+ }
+
+ link->fn_desired_actags[PCI_FUNC(dev->devfn)] = 0;
+ for (i = 0; i <= afu_idx; i++) {
+ /*
+ * AFU index 'holes' are allowed. So don't fail if we
+ * can't read the actag info for an index
+ */
+ rc = get_actag_count(dev, i, &actag);
+ if (rc)
+ continue;
+ link->fn_desired_actags[PCI_FUNC(dev->devfn)] += actag;
+ }
+ dev_dbg(&dev->dev, "total actags for function: %d\n",
+ link->fn_desired_actags[PCI_FUNC(dev->devfn)]);
+
+ mutex_unlock(&links_list_lock);
+}
+DECLARE_PCI_FIXUP_HEADER(PCI_ANY_ID, PCI_ANY_ID, pnv_ocxl_fixup_actag);
+
+static u16 assign_fn_actags(u16 desired, u16 total)
+{
+ u16 count;
+
+ if (total <= PNV_OCXL_ACTAG_MAX)
+ count = desired;
+ else
+ count = PNV_OCXL_ACTAG_MAX * desired / total;
+
+ return count;
+}
+
+static void assign_actags(struct npu_link *link)
+{
+ u16 actag_count, range_start = 0, total_desired = 0;
+ int i;
+
+ for (i = 0; i < 8; i++)
+ total_desired += link->fn_desired_actags[i];
+
+ for (i = 0; i < 8; i++) {
+ if (link->fn_desired_actags[i]) {
+ actag_count = assign_fn_actags(
+ link->fn_desired_actags[i],
+ total_desired);
+ link->fn_actags[i].start = range_start;
+ link->fn_actags[i].count = actag_count;
+ range_start += actag_count;
+ WARN_ON(range_start >= PNV_OCXL_ACTAG_MAX);
+ }
+ pr_debug("link %x:%x:%x fct %d actags: start=%d count=%d (desired=%d)\n",
+ link->domain, link->bus, link->dev, i,
+ link->fn_actags[i].start, link->fn_actags[i].count,
+ link->fn_desired_actags[i]);
+ }
+ link->assignment_done = true;
+}
+
+int pnv_ocxl_get_actag(struct pci_dev *dev, u16 *base, u16 *enabled,
+ u16 *supported)
+{
+ struct npu_link *link;
+
+ mutex_lock(&links_list_lock);
+
+ link = find_link(dev);
+ if (!link) {
+ dev_err(&dev->dev, "actag information not found\n");
+ mutex_unlock(&links_list_lock);
+ return -ENODEV;
+ }
+ /*
+ * On p9, we only have 64 actags per link, so they must be
+ * shared by all the functions of the same adapter. We counted
+ * the desired actag counts during PCI enumeration, so that we
+ * can allocate a pro-rated number of actags to each function.
+ */
+ if (!link->assignment_done)
+ assign_actags(link);
+
+ *base = link->fn_actags[PCI_FUNC(dev->devfn)].start;
+ *enabled = link->fn_actags[PCI_FUNC(dev->devfn)].count;
+ *supported = link->fn_desired_actags[PCI_FUNC(dev->devfn)];
+
+ mutex_unlock(&links_list_lock);
+ return 0;
+}
+EXPORT_SYMBOL_GPL(pnv_ocxl_get_actag);
+
+int pnv_ocxl_get_pasid_count(struct pci_dev *dev, int *count)
+{
+ struct npu_link *link;
+ int i, rc = -EINVAL;
+
+ /*
+ * The number of PASIDs (process address space ID) which can
+ * be used by a function depends on how many functions exist
+ * on the device. The NPU needs to be configured to know how
+ * many bits are available to PASIDs and how many are to be
+ * used by the function BDF indentifier.
+ *
+ * We only support one AFU-carrying function for now.
+ */
+ mutex_lock(&links_list_lock);
+
+ link = find_link(dev);
+ if (!link) {
+ dev_err(&dev->dev, "actag information not found\n");
+ mutex_unlock(&links_list_lock);
+ return -ENODEV;
+ }
+
+ for (i = 0; i < 8; i++)
+ if (link->fn_desired_actags[i] && (i == PCI_FUNC(dev->devfn))) {
+ *count = PNV_OCXL_PASID_MAX;
+ rc = 0;
+ break;
+ }
+
+ mutex_unlock(&links_list_lock);
+ dev_dbg(&dev->dev, "%d PASIDs available for function\n",
+ rc ? 0 : *count);
+ return rc;
+}
+EXPORT_SYMBOL_GPL(pnv_ocxl_get_pasid_count);
+
+static void set_templ_rate(unsigned int templ, unsigned int rate, char *buf)
+{
+ int shift, idx;
+
+ WARN_ON(templ > PNV_OCXL_TL_MAX_TEMPLATE);
+ idx = (PNV_OCXL_TL_MAX_TEMPLATE - templ) / 2;
+ shift = 4 * (1 - ((PNV_OCXL_TL_MAX_TEMPLATE - templ) % 2));
+ buf[idx] |= rate << shift;
+}
+
+int pnv_ocxl_get_tl_cap(struct pci_dev *dev, long *cap,
+ char *rate_buf, int rate_buf_size)
+{
+ if (rate_buf_size != PNV_OCXL_TL_RATE_BUF_SIZE)
+ return -EINVAL;
+ /*
+ * The TL capabilities are a characteristic of the NPU, so
+ * we go with hard-coded values.
+ *
+ * The receiving rate of each template is encoded on 4 bits.
+ *
+ * On P9:
+ * - templates 0 -> 3 are supported
+ * - templates 0, 1 and 3 have a 0 receiving rate
+ * - template 2 has receiving rate of 1 (extra cycle)
+ */
+ memset(rate_buf, 0, rate_buf_size);
+ set_templ_rate(2, 1, rate_buf);
+ *cap = PNV_OCXL_TL_P9_RECV_CAP;
+ return 0;
+}
+EXPORT_SYMBOL_GPL(pnv_ocxl_get_tl_cap);
+
+int pnv_ocxl_set_tl_conf(struct pci_dev *dev, long cap,
+ uint64_t rate_buf_phys, int rate_buf_size)
+{
+ struct pci_controller *hose = pci_bus_to_host(dev->bus);
+ struct pnv_phb *phb = hose->private_data;
+ int rc;
+
+ if (rate_buf_size != PNV_OCXL_TL_RATE_BUF_SIZE)
+ return -EINVAL;
+
+ rc = opal_npu_tl_set(phb->opal_id, dev->devfn, cap,
+ rate_buf_phys, rate_buf_size);
+ if (rc) {
+ dev_err(&dev->dev, "Can't configure host TL: %d\n", rc);
+ return -EINVAL;
+ }
+ return 0;
+}
+EXPORT_SYMBOL_GPL(pnv_ocxl_set_tl_conf);
+
+int pnv_ocxl_get_xsl_irq(struct pci_dev *dev, int *hwirq)
+{
+ int rc;
+
+ rc = of_property_read_u32(dev->dev.of_node, "ibm,opal-xsl-irq", hwirq);
+ if (rc) {
+ dev_err(&dev->dev,
+ "Can't get translation interrupt for device\n");
+ return rc;
+ }
+ return 0;
+}
+EXPORT_SYMBOL_GPL(pnv_ocxl_get_xsl_irq);
+
+void pnv_ocxl_unmap_xsl_regs(void __iomem *dsisr, void __iomem *dar,
+ void __iomem *tfc, void __iomem *pe_handle)
+{
+ iounmap(dsisr);
+ iounmap(dar);
+ iounmap(tfc);
+ iounmap(pe_handle);
+}
+EXPORT_SYMBOL_GPL(pnv_ocxl_unmap_xsl_regs);
+
+int pnv_ocxl_map_xsl_regs(struct pci_dev *dev, void __iomem **dsisr,
+ void __iomem **dar, void __iomem **tfc,
+ void __iomem **pe_handle)
+{
+ u64 reg;
+ int i, j, rc = 0;
+ void __iomem *regs[4];
+
+ /*
+ * opal stores the mmio addresses of the DSISR, DAR, TFC and
+ * PE_HANDLE registers in a device tree property, in that
+ * order
+ */
+ for (i = 0; i < 4; i++) {
+ rc = of_property_read_u64_index(dev->dev.of_node,
+ "ibm,opal-xsl-mmio", i, &reg);
+ if (rc)
+ break;
+ regs[i] = ioremap(reg, 8);
+ if (!regs[i]) {
+ rc = -EINVAL;
+ break;
+ }
+ }
+ if (rc) {
+ dev_err(&dev->dev, "Can't map translation mmio registers\n");
+ for (j = i - 1; j >= 0; j--)
+ iounmap(regs[j]);
+ } else {
+ *dsisr = regs[0];
+ *dar = regs[1];
+ *tfc = regs[2];
+ *pe_handle = regs[3];
+ }
+ return rc;
+}
+EXPORT_SYMBOL_GPL(pnv_ocxl_map_xsl_regs);
+
+struct spa_data {
+ u64 phb_opal_id;
+ u32 bdfn;
+};
+
+int pnv_ocxl_spa_setup(struct pci_dev *dev, void *spa_mem, int PE_mask,
+ void **platform_data)
+{
+ struct pci_controller *hose = pci_bus_to_host(dev->bus);
+ struct pnv_phb *phb = hose->private_data;
+ struct spa_data *data;
+ u32 bdfn;
+ int rc;
+
+ data = kzalloc(sizeof(*data), GFP_KERNEL);
+ if (!data)
+ return -ENOMEM;
+
+ bdfn = (dev->bus->number << 8) | dev->devfn;
+ rc = opal_npu_spa_setup(phb->opal_id, bdfn, virt_to_phys(spa_mem),
+ PE_mask);
+ if (rc) {
+ dev_err(&dev->dev, "Can't setup Shared Process Area: %d\n", rc);
+ kfree(data);
+ return rc;
+ }
+ data->phb_opal_id = phb->opal_id;
+ data->bdfn = bdfn;
+ *platform_data = (void *) data;
+ return 0;
+}
+EXPORT_SYMBOL_GPL(pnv_ocxl_spa_setup);
+
+void pnv_ocxl_spa_release(void *platform_data)
+{
+ struct spa_data *data = (struct spa_data *) platform_data;
+ int rc;
+
+ rc = opal_npu_spa_setup(data->phb_opal_id, data->bdfn, 0, 0);
+ WARN_ON(rc);
+ kfree(data);
+}
+EXPORT_SYMBOL_GPL(pnv_ocxl_spa_release);
+
+int pnv_ocxl_spa_remove_pe(void *platform_data, int pe_handle)
+{
+ struct spa_data *data = (struct spa_data *) platform_data;
+ int rc;
+
+ rc = opal_npu_spa_clear_cache(data->phb_opal_id, data->bdfn, pe_handle);
+ return rc;
+}
+EXPORT_SYMBOL_GPL(pnv_ocxl_spa_remove_pe);
+
+int pnv_ocxl_alloc_xive_irq(u32 *irq, u64 *trigger_addr)
+{
+ __be64 flags, trigger_page;
+ s64 rc;
+ u32 hwirq;
+
+ hwirq = xive_native_alloc_irq();
+ if (!hwirq)
+ return -ENOENT;
+
+ rc = opal_xive_get_irq_info(hwirq, &flags, NULL, &trigger_page, NULL,
+ NULL);
+ if (rc || !trigger_page) {
+ xive_native_free_irq(hwirq);
+ return -ENOENT;
+ }
+ *irq = hwirq;
+ *trigger_addr = be64_to_cpu(trigger_page);
+ return 0;
+
+}
+EXPORT_SYMBOL_GPL(pnv_ocxl_alloc_xive_irq);
+
+void pnv_ocxl_free_xive_irq(u32 irq)
+{
+ xive_native_free_irq(irq);
+}
+EXPORT_SYMBOL_GPL(pnv_ocxl_free_xive_irq);
diff --git a/arch/powerpc/platforms/powernv/opal-dump.c b/arch/powerpc/platforms/powernv/opal-dump.c
index 4c827826c05e..0dc8fa4e0af2 100644
--- a/arch/powerpc/platforms/powernv/opal-dump.c
+++ b/arch/powerpc/platforms/powernv/opal-dump.c
@@ -103,9 +103,9 @@ static ssize_t dump_ack_store(struct dump_obj *dump_obj,
* due to the dynamic size of the dump
*/
static struct dump_attribute id_attribute =
- __ATTR(id, S_IRUGO, dump_id_show, NULL);
+ __ATTR(id, 0444, dump_id_show, NULL);
static struct dump_attribute type_attribute =
- __ATTR(type, S_IRUGO, dump_type_show, NULL);
+ __ATTR(type, 0444, dump_type_show, NULL);
static struct dump_attribute ack_attribute =
__ATTR(acknowledge, 0660, dump_ack_show, dump_ack_store);
diff --git a/arch/powerpc/platforms/powernv/opal-elog.c b/arch/powerpc/platforms/powernv/opal-elog.c
index ecd6d9177d13..ba6e437abb4b 100644
--- a/arch/powerpc/platforms/powernv/opal-elog.c
+++ b/arch/powerpc/platforms/powernv/opal-elog.c
@@ -83,9 +83,9 @@ static ssize_t elog_ack_store(struct elog_obj *elog_obj,
}
static struct elog_attribute id_attribute =
- __ATTR(id, S_IRUGO, elog_id_show, NULL);
+ __ATTR(id, 0444, elog_id_show, NULL);
static struct elog_attribute type_attribute =
- __ATTR(type, S_IRUGO, elog_type_show, NULL);
+ __ATTR(type, 0444, elog_type_show, NULL);
static struct elog_attribute ack_attribute =
__ATTR(acknowledge, 0660, elog_ack_show, elog_ack_store);
diff --git a/arch/powerpc/platforms/powernv/opal-imc.c b/arch/powerpc/platforms/powernv/opal-imc.c
index 465ea105b771..dd4c9b8b8a81 100644
--- a/arch/powerpc/platforms/powernv/opal-imc.c
+++ b/arch/powerpc/platforms/powernv/opal-imc.c
@@ -21,6 +21,78 @@
#include <asm/io.h>
#include <asm/imc-pmu.h>
#include <asm/cputhreads.h>
+#include <asm/debugfs.h>
+
+static struct dentry *imc_debugfs_parent;
+
+/* Helpers to export imc command and mode via debugfs */
+static int imc_mem_get(void *data, u64 *val)
+{
+ *val = cpu_to_be64(*(u64 *)data);
+ return 0;
+}
+
+static int imc_mem_set(void *data, u64 val)
+{
+ *(u64 *)data = cpu_to_be64(val);
+ return 0;
+}
+DEFINE_DEBUGFS_ATTRIBUTE(fops_imc_x64, imc_mem_get, imc_mem_set, "0x%016llx\n");
+
+static struct dentry *imc_debugfs_create_x64(const char *name, umode_t mode,
+ struct dentry *parent, u64 *value)
+{
+ return debugfs_create_file_unsafe(name, mode, parent,
+ value, &fops_imc_x64);
+}
+
+/*
+ * export_imc_mode_and_cmd: Create a debugfs interface
+ * for imc_cmd and imc_mode
+ * for each node in the system.
+ * imc_mode and imc_cmd can be changed by echo into
+ * this interface.
+ */
+static void export_imc_mode_and_cmd(struct device_node *node,
+ struct imc_pmu *pmu_ptr)
+{
+ static u64 loc, *imc_mode_addr, *imc_cmd_addr;
+ int chip = 0, nid;
+ char mode[16], cmd[16];
+ u32 cb_offset;
+
+ imc_debugfs_parent = debugfs_create_dir("imc", powerpc_debugfs_root);
+
+ /*
+ * Return here, either because 'imc' directory already exists,
+ * Or failed to create a new one.
+ */
+ if (!imc_debugfs_parent)
+ return;
+
+ if (of_property_read_u32(node, "cb_offset", &cb_offset))
+ cb_offset = IMC_CNTL_BLK_OFFSET;
+
+ for_each_node(nid) {
+ loc = (u64)(pmu_ptr->mem_info[chip].vbase) + cb_offset;
+ imc_mode_addr = (u64 *)(loc + IMC_CNTL_BLK_MODE_OFFSET);
+ sprintf(mode, "imc_mode_%d", nid);
+ if (!imc_debugfs_create_x64(mode, 0600, imc_debugfs_parent,
+ imc_mode_addr))
+ goto err;
+
+ imc_cmd_addr = (u64 *)(loc + IMC_CNTL_BLK_CMD_OFFSET);
+ sprintf(cmd, "imc_cmd_%d", nid);
+ if (!imc_debugfs_create_x64(cmd, 0600, imc_debugfs_parent,
+ imc_cmd_addr))
+ goto err;
+ chip++;
+ }
+ return;
+
+err:
+ debugfs_remove_recursive(imc_debugfs_parent);
+}
/*
* imc_get_mem_addr_nest: Function to get nest counter memory region
@@ -65,6 +137,7 @@ static int imc_get_mem_addr_nest(struct device_node *node,
}
pmu_ptr->imc_counter_mmaped = true;
+ export_imc_mode_and_cmd(node, pmu_ptr);
kfree(base_addr_arr);
kfree(chipid_arr);
return 0;
@@ -213,6 +286,10 @@ static int opal_imc_counters_probe(struct platform_device *pdev)
}
}
+ /* If none of the nest units are registered, remove debugfs interface */
+ if (pmu_count == 0)
+ debugfs_remove_recursive(imc_debugfs_parent);
+
return 0;
}
diff --git a/arch/powerpc/platforms/powernv/opal-sysparam.c b/arch/powerpc/platforms/powernv/opal-sysparam.c
index 23fb6647dced..6fd4092798d5 100644
--- a/arch/powerpc/platforms/powernv/opal-sysparam.c
+++ b/arch/powerpc/platforms/powernv/opal-sysparam.c
@@ -260,13 +260,13 @@ void __init opal_sys_param_init(void)
/* If the parameter is read-only or read-write */
switch (perm[i] & 3) {
case OPAL_SYSPARAM_READ:
- attr[i].kobj_attr.attr.mode = S_IRUGO;
+ attr[i].kobj_attr.attr.mode = 0444;
break;
case OPAL_SYSPARAM_WRITE:
- attr[i].kobj_attr.attr.mode = S_IWUSR;
+ attr[i].kobj_attr.attr.mode = 0200;
break;
case OPAL_SYSPARAM_RW:
- attr[i].kobj_attr.attr.mode = S_IRUGO | S_IWUSR;
+ attr[i].kobj_attr.attr.mode = 0644;
break;
default:
break;
diff --git a/arch/powerpc/platforms/powernv/opal-wrappers.S b/arch/powerpc/platforms/powernv/opal-wrappers.S
index 6f4b00a2ac46..1b2936ba6040 100644
--- a/arch/powerpc/platforms/powernv/opal-wrappers.S
+++ b/arch/powerpc/platforms/powernv/opal-wrappers.S
@@ -320,3 +320,6 @@ OPAL_CALL(opal_set_powercap, OPAL_SET_POWERCAP);
OPAL_CALL(opal_get_power_shift_ratio, OPAL_GET_POWER_SHIFT_RATIO);
OPAL_CALL(opal_set_power_shift_ratio, OPAL_SET_POWER_SHIFT_RATIO);
OPAL_CALL(opal_sensor_group_clear, OPAL_SENSOR_GROUP_CLEAR);
+OPAL_CALL(opal_npu_spa_setup, OPAL_NPU_SPA_SETUP);
+OPAL_CALL(opal_npu_spa_clear_cache, OPAL_NPU_SPA_CLEAR_CACHE);
+OPAL_CALL(opal_npu_tl_set, OPAL_NPU_TL_SET);
diff --git a/arch/powerpc/platforms/powernv/opal.c b/arch/powerpc/platforms/powernv/opal.c
index 041ddbd1fc57..c15182765ff5 100644
--- a/arch/powerpc/platforms/powernv/opal.c
+++ b/arch/powerpc/platforms/powernv/opal.c
@@ -127,7 +127,7 @@ int __init early_init_dt_scan_opal(unsigned long node,
if (of_flat_dt_is_compatible(node, "ibm,opal-v3")) {
powerpc_firmware_features |= FW_FEATURE_OPAL;
- pr_info("OPAL detected !\n");
+ pr_debug("OPAL detected !\n");
} else {
panic("OPAL != V3 detected, no longer supported.\n");
}
@@ -239,8 +239,8 @@ int opal_message_notifier_register(enum opal_msg_type msg_type,
struct notifier_block *nb)
{
if (!nb || msg_type >= OPAL_MSG_TYPE_MAX) {
- pr_warning("%s: Invalid arguments, msg_type:%d\n",
- __func__, msg_type);
+ pr_warn("%s: Invalid arguments, msg_type:%d\n",
+ __func__, msg_type);
return -EINVAL;
}
@@ -281,8 +281,8 @@ static void opal_handle_message(void)
/* check for errors. */
if (ret) {
- pr_warning("%s: Failed to retrieve opal message, err=%lld\n",
- __func__, ret);
+ pr_warn("%s: Failed to retrieve opal message, err=%lld\n",
+ __func__, ret);
return;
}
@@ -461,24 +461,14 @@ static int opal_recover_mce(struct pt_regs *regs,
void pnv_platform_error_reboot(struct pt_regs *regs, const char *msg)
{
- /*
- * This is mostly taken from kernel/panic.c, but tries to do
- * relatively minimal work. Don't use delay functions (TB may
- * be broken), don't crash dump (need to set a firmware log),
- * don't run notifiers. We do want to get some information to
- * Linux console.
- */
- console_verbose();
- bust_spinlocks(1);
+ panic_flush_kmsg_start();
+
pr_emerg("Hardware platform error: %s\n", msg);
if (regs)
show_regs(regs);
smp_send_stop();
- printk_safe_flush_on_panic();
- kmsg_dump(KMSG_DUMP_PANIC);
- bust_spinlocks(0);
- debug_locks_off();
- console_flush_on_panic();
+
+ panic_flush_kmsg_end();
/*
* Don't bother to shut things down because this will
diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c
index 9582aeb1fe4c..496e47696ed0 100644
--- a/arch/powerpc/platforms/powernv/pci-ioda.c
+++ b/arch/powerpc/platforms/powernv/pci-ioda.c
@@ -54,7 +54,8 @@
#define POWERNV_IOMMU_DEFAULT_LEVELS 1
#define POWERNV_IOMMU_MAX_LEVELS 5
-static const char * const pnv_phb_names[] = { "IODA1", "IODA2", "NPU" };
+static const char * const pnv_phb_names[] = { "IODA1", "IODA2", "NPU_NVLINK",
+ "NPU_OCAPI" };
static void pnv_pci_ioda2_table_free_pages(struct iommu_table *tbl);
void pe_level_printk(const struct pnv_ioda_pe *pe, const char *level,
@@ -89,6 +90,7 @@ void pe_level_printk(const struct pnv_ioda_pe *pe, const char *level,
}
static bool pnv_iommu_bypass_disabled __read_mostly;
+static bool pci_reset_phbs __read_mostly;
static int __init iommu_setup(char *str)
{
@@ -110,6 +112,14 @@ static int __init iommu_setup(char *str)
}
early_param("iommu", iommu_setup);
+static int __init pci_reset_phbs_setup(char *str)
+{
+ pci_reset_phbs = true;
+ return 0;
+}
+
+early_param("ppc_pci_reset_phbs", pci_reset_phbs_setup);
+
static inline bool pnv_pci_is_m64(struct pnv_phb *phb, struct resource *r)
{
/*
@@ -924,7 +934,7 @@ static int pnv_ioda_configure_pe(struct pnv_phb *phb, struct pnv_ioda_pe *pe)
* Configure PELTV. NPUs don't have a PELTV table so skip
* configuration on them.
*/
- if (phb->type != PNV_PHB_NPU)
+ if (phb->type != PNV_PHB_NPU_NVLINK && phb->type != PNV_PHB_NPU_OCAPI)
pnv_ioda_set_peltv(phb, pe, true);
/* Setup reverse map */
@@ -1059,8 +1069,8 @@ static struct pnv_ioda_pe *pnv_ioda_setup_dev_PE(struct pci_dev *dev)
pe = pnv_ioda_alloc_pe(phb);
if (!pe) {
- pr_warning("%s: Not enough PE# available, disabling device\n",
- pci_name(dev));
+ pr_warn("%s: Not enough PE# available, disabling device\n",
+ pci_name(dev));
return NULL;
}
@@ -1072,7 +1082,6 @@ static struct pnv_ioda_pe *pnv_ioda_setup_dev_PE(struct pci_dev *dev)
* At some point we want to remove the PDN completely anyways
*/
pci_dev_get(dev);
- pdn->pcidev = dev;
pdn->pe_number = pe->pe_number;
pe->flags = PNV_IODA_PE_DEV;
pe->pdev = dev;
@@ -1119,7 +1128,6 @@ static void pnv_ioda_setup_same_PE(struct pci_bus *bus, struct pnv_ioda_pe *pe)
continue;
pe->device_count++;
- pdn->pcidev = dev;
pdn->pe_number = pe->pe_number;
if ((pe->flags & PNV_IODA_PE_BUS_ALL) && dev->subordinate)
pnv_ioda_setup_same_PE(dev->subordinate, pe);
@@ -1164,7 +1172,7 @@ static struct pnv_ioda_pe *pnv_ioda_setup_bus_PE(struct pci_bus *bus, bool all)
pe = pnv_ioda_alloc_pe(phb);
if (!pe) {
- pr_warning("%s: Not enough PE# available for PCI bus %04x:%02x\n",
+ pr_warn("%s: Not enough PE# available for PCI bus %04x:%02x\n",
__func__, pci_domain_nr(bus), bus->number);
return NULL;
}
@@ -1234,7 +1242,6 @@ static struct pnv_ioda_pe *pnv_ioda_setup_npu_PE(struct pci_dev *npu_pdev)
pci_dev_get(npu_pdev);
npu_pdn = pci_get_pdn(npu_pdev);
rid = npu_pdev->bus->number << 8 | npu_pdn->devfn;
- npu_pdn->pcidev = npu_pdev;
npu_pdn->pe_number = pe_num;
phb->ioda.pe_rmap[rid] = pe->pe_number;
@@ -1272,16 +1279,23 @@ static void pnv_pci_ioda_setup_PEs(void)
{
struct pci_controller *hose, *tmp;
struct pnv_phb *phb;
+ struct pci_bus *bus;
+ struct pci_dev *pdev;
list_for_each_entry_safe(hose, tmp, &hose_list, list_node) {
phb = hose->private_data;
- if (phb->type == PNV_PHB_NPU) {
+ if (phb->type == PNV_PHB_NPU_NVLINK) {
/* PE#0 is needed for error reporting */
pnv_ioda_reserve_pe(phb, 0);
pnv_ioda_setup_npu_PEs(hose->bus);
if (phb->model == PNV_PHB_MODEL_NPU2)
pnv_npu2_init(phb);
}
+ if (phb->type == PNV_PHB_NPU_OCAPI) {
+ bus = hose->bus;
+ list_for_each_entry(pdev, &bus->devices, bus_list)
+ pnv_ioda_setup_dev_PE(pdev);
+ }
}
}
@@ -1692,7 +1706,7 @@ m64_failed:
return ret;
}
-int pcibios_sriov_disable(struct pci_dev *pdev)
+int pnv_pcibios_sriov_disable(struct pci_dev *pdev)
{
pnv_pci_sriov_disable(pdev);
@@ -1701,7 +1715,7 @@ int pcibios_sriov_disable(struct pci_dev *pdev)
return 0;
}
-int pcibios_sriov_enable(struct pci_dev *pdev, u16 num_vfs)
+int pnv_pcibios_sriov_enable(struct pci_dev *pdev, u16 num_vfs)
{
/* Allocate PCI data */
add_dev_pci_data(pdev);
@@ -2572,7 +2586,6 @@ static unsigned long pnv_pci_ioda2_get_table_size(__u32 page_shift,
unsigned long direct_table_size;
if (!levels || (levels > POWERNV_IOMMU_MAX_LEVELS) ||
- (window_size > memory_hotplug_max()) ||
!is_power_of_2(window_size))
return 0;
@@ -2640,7 +2653,7 @@ static int gpe_table_group_to_npe_cb(struct device *dev, void *opaque)
hose = pci_bus_to_host(pdev->bus);
phb = hose->private_data;
- if (phb->type != PNV_PHB_NPU)
+ if (phb->type != PNV_PHB_NPU_NVLINK)
return 0;
*ptmppe = &phb->ioda.pe_array[pdn->pe_number];
@@ -2724,7 +2737,7 @@ static void pnv_pci_ioda_setup_iommu_api(void)
list_for_each_entry_safe(hose, tmp, &hose_list, list_node) {
phb = hose->private_data;
- if (phb->type != PNV_PHB_NPU)
+ if (phb->type != PNV_PHB_NPU_NVLINK)
continue;
list_for_each_entry(pe, &phb->ioda.pe_list, list) {
@@ -3293,7 +3306,7 @@ static void pnv_pci_ioda_create_dbgfs(void)
sprintf(name, "PCI%04x", hose->global_number);
phb->dbgfs = debugfs_create_dir(name, powerpc_debugfs_root);
if (!phb->dbgfs) {
- pr_warning("%s: Error on creating debugfs on PHB#%x\n",
+ pr_warn("%s: Error on creating debugfs on PHB#%x\n",
__func__, hose->global_number);
continue;
}
@@ -3774,6 +3787,13 @@ static const struct pci_controller_ops pnv_npu_ioda_controller_ops = {
.shutdown = pnv_pci_ioda_shutdown,
};
+static const struct pci_controller_ops pnv_npu_ocapi_ioda_controller_ops = {
+ .enable_device_hook = pnv_pci_enable_device_hook,
+ .window_alignment = pnv_pci_window_alignment,
+ .reset_secondary_bus = pnv_pci_reset_secondary_bus,
+ .shutdown = pnv_pci_ioda_shutdown,
+};
+
#ifdef CONFIG_CXL_BASE
const struct pci_controller_ops pnv_cxl_cx4_ioda_controller_ops = {
.dma_dev_setup = pnv_pci_dma_dev_setup,
@@ -4007,9 +4027,14 @@ static void __init pnv_pci_init_ioda_phb(struct device_node *np,
*/
ppc_md.pcibios_fixup = pnv_pci_ioda_fixup;
- if (phb->type == PNV_PHB_NPU) {
+ switch (phb->type) {
+ case PNV_PHB_NPU_NVLINK:
hose->controller_ops = pnv_npu_ioda_controller_ops;
- } else {
+ break;
+ case PNV_PHB_NPU_OCAPI:
+ hose->controller_ops = pnv_npu_ocapi_ioda_controller_ops;
+ break;
+ default:
phb->dma_dev_setup = pnv_pci_ioda_dma_dev_setup;
hose->controller_ops = pnv_pci_ioda_controller_ops;
}
@@ -4019,6 +4044,8 @@ static void __init pnv_pci_init_ioda_phb(struct device_node *np,
#ifdef CONFIG_PCI_IOV
ppc_md.pcibios_fixup_sriov = pnv_pci_ioda_fixup_iov_resources;
ppc_md.pcibios_iov_resource_alignment = pnv_pci_iov_resource_alignment;
+ ppc_md.pcibios_sriov_enable = pnv_pcibios_sriov_enable;
+ ppc_md.pcibios_sriov_disable = pnv_pcibios_sriov_disable;
#endif
pci_add_flags(PCI_REASSIGN_ALL_RSRC);
@@ -4026,15 +4053,16 @@ static void __init pnv_pci_init_ioda_phb(struct device_node *np,
/* Reset IODA tables to a clean state */
rc = opal_pci_reset(phb_id, OPAL_RESET_PCI_IODA_TABLE, OPAL_ASSERT_RESET);
if (rc)
- pr_warning(" OPAL Error %ld performing IODA table reset !\n", rc);
+ pr_warn(" OPAL Error %ld performing IODA table reset !\n", rc);
/*
* If we're running in kdump kernel, the previous kernel never
* shutdown PCI devices correctly. We already got IODA table
* cleaned out. So we have to issue PHB reset to stop all PCI
- * transactions from previous kernel.
+ * transactions from previous kernel. The ppc_pci_reset_phbs
+ * kernel parameter will force this reset too.
*/
- if (is_kdump_kernel()) {
+ if (is_kdump_kernel() || pci_reset_phbs) {
pr_info(" Issue PHB reset ...\n");
pnv_eeh_phb_reset(hose, EEH_RESET_FUNDAMENTAL);
pnv_eeh_phb_reset(hose, EEH_RESET_DEACTIVATE);
@@ -4052,8 +4080,26 @@ void __init pnv_pci_init_ioda2_phb(struct device_node *np)
void __init pnv_pci_init_npu_phb(struct device_node *np)
{
- pnv_pci_init_ioda_phb(np, 0, PNV_PHB_NPU);
+ pnv_pci_init_ioda_phb(np, 0, PNV_PHB_NPU_NVLINK);
+}
+
+void __init pnv_pci_init_npu2_opencapi_phb(struct device_node *np)
+{
+ pnv_pci_init_ioda_phb(np, 0, PNV_PHB_NPU_OCAPI);
+}
+
+static void pnv_npu2_opencapi_cfg_size_fixup(struct pci_dev *dev)
+{
+ struct pci_controller *hose = pci_bus_to_host(dev->bus);
+ struct pnv_phb *phb = hose->private_data;
+
+ if (!machine_is(powernv))
+ return;
+
+ if (phb->type == PNV_PHB_NPU_OCAPI)
+ dev->cfg_size = PCI_CFG_SPACE_EXP_SIZE;
}
+DECLARE_PCI_FIXUP_EARLY(PCI_ANY_ID, PCI_ANY_ID, pnv_npu2_opencapi_cfg_size_fixup);
void __init pnv_pci_init_ioda_hub(struct device_node *np)
{
diff --git a/arch/powerpc/platforms/powernv/pci.c b/arch/powerpc/platforms/powernv/pci.c
index 5422f4a6317c..69d102cbf48f 100644
--- a/arch/powerpc/platforms/powernv/pci.c
+++ b/arch/powerpc/platforms/powernv/pci.c
@@ -1142,6 +1142,10 @@ void __init pnv_pci_init(void)
for_each_compatible_node(np, NULL, "ibm,ioda2-npu2-phb")
pnv_pci_init_npu_phb(np);
+ /* Look for NPU2 OpenCAPI PHBs */
+ for_each_compatible_node(np, NULL, "ibm,ioda2-npu2-opencapi-phb")
+ pnv_pci_init_npu2_opencapi_phb(np);
+
/* Configure IOMMU DMA hooks */
set_pci_dma_ops(&dma_iommu_ops);
}
diff --git a/arch/powerpc/platforms/powernv/pci.h b/arch/powerpc/platforms/powernv/pci.h
index b772d7473896..eada4b6068cb 100644
--- a/arch/powerpc/platforms/powernv/pci.h
+++ b/arch/powerpc/platforms/powernv/pci.h
@@ -12,9 +12,10 @@ struct pci_dn;
#define NV_NMMU_ATSD_REGS 8
enum pnv_phb_type {
- PNV_PHB_IODA1 = 0,
- PNV_PHB_IODA2 = 1,
- PNV_PHB_NPU = 2,
+ PNV_PHB_IODA1 = 0,
+ PNV_PHB_IODA2 = 1,
+ PNV_PHB_NPU_NVLINK = 2,
+ PNV_PHB_NPU_OCAPI = 3,
};
/* Precise PHB model for error management */
@@ -227,6 +228,7 @@ extern void pnv_pci_setup_iommu_table(struct iommu_table *tbl,
extern void pnv_pci_init_ioda_hub(struct device_node *np);
extern void pnv_pci_init_ioda2_phb(struct device_node *np);
extern void pnv_pci_init_npu_phb(struct device_node *np);
+extern void pnv_pci_init_npu2_opencapi_phb(struct device_node *np);
extern void pnv_pci_reset_secondary_bus(struct pci_dev *dev);
extern int pnv_eeh_phb_reset(struct pci_controller *hose, int option);
diff --git a/arch/powerpc/platforms/powernv/smp.c b/arch/powerpc/platforms/powernv/smp.c
index ba030669eca1..9664c8461f03 100644
--- a/arch/powerpc/platforms/powernv/smp.c
+++ b/arch/powerpc/platforms/powernv/smp.c
@@ -37,6 +37,8 @@
#include <asm/kvm_ppc.h>
#include <asm/ppc-opcode.h>
#include <asm/cpuidle.h>
+#include <asm/kexec.h>
+#include <asm/reg.h>
#include "powernv.h"
@@ -209,9 +211,32 @@ static void pnv_smp_cpu_kill_self(void)
} else if ((srr1 & wmask) == SRR1_WAKEHDBELL) {
unsigned long msg = PPC_DBELL_TYPE(PPC_DBELL_SERVER);
asm volatile(PPC_MSGCLR(%0) : : "r" (msg));
+ } else if ((srr1 & wmask) == SRR1_WAKERESET) {
+ irq_set_pending_from_srr1(srr1);
+ /* Does not return */
}
+
smp_mb();
+ /*
+ * For kdump kernels, we process the ipi and jump to
+ * crash_ipi_callback
+ */
+ if (kdump_in_progress()) {
+ /*
+ * If we got to this point, we've not used
+ * NMI's, otherwise we would have gone
+ * via the SRR1_WAKERESET path. We are
+ * using regular IPI's for waking up offline
+ * threads.
+ */
+ struct pt_regs regs;
+
+ ppc_save_regs(&regs);
+ crash_ipi_callback(&regs);
+ /* Does not return */
+ }
+
if (cpu_core_split_required())
continue;
@@ -371,5 +396,8 @@ void __init pnv_smp_init(void)
#ifdef CONFIG_HOTPLUG_CPU
ppc_md.cpu_die = pnv_smp_cpu_kill_self;
+#ifdef CONFIG_KEXEC_CORE
+ crash_wake_offline = 1;
+#endif
#endif
}
diff --git a/arch/powerpc/platforms/ps3/device-init.c b/arch/powerpc/platforms/ps3/device-init.c
index e48462447ff0..e7075aaff1bb 100644
--- a/arch/powerpc/platforms/ps3/device-init.c
+++ b/arch/powerpc/platforms/ps3/device-init.c
@@ -663,8 +663,8 @@ static void ps3_find_and_add_device(u64 bus_id, u64 dev_id)
if (rem)
break;
}
- pr_warning("%s:%u: device %llu:%llu not found\n", __func__, __LINE__,
- bus_id, dev_id);
+ pr_warn("%s:%u: device %llu:%llu not found\n",
+ __func__, __LINE__, bus_id, dev_id);
return;
found:
@@ -859,11 +859,9 @@ static int ps3_probe_thread(void *data)
if (notify_event->event_type != notify_region_probe ||
notify_event->bus_id != dev.sbd.bus_id) {
- pr_warning("%s:%u: bad notify_event: event %llu, "
- "dev_id %llu, dev_type %llu\n",
- __func__, __LINE__, notify_event->event_type,
- notify_event->dev_id,
- notify_event->dev_type);
+ pr_warn("%s:%u: bad notify_event: event %llu, dev_id %llu, dev_type %llu\n",
+ __func__, __LINE__, notify_event->event_type,
+ notify_event->dev_id, notify_event->dev_type);
continue;
}
diff --git a/arch/powerpc/platforms/ps3/mm.c b/arch/powerpc/platforms/ps3/mm.c
index b0f34663b1ae..7f870ec29daf 100644
--- a/arch/powerpc/platforms/ps3/mm.c
+++ b/arch/powerpc/platforms/ps3/mm.c
@@ -607,8 +607,8 @@ static int dma_ioc0_map_pages(struct ps3_dma_region *r, unsigned long phys_addr,
r->ioid,
iopte_flag);
if (result) {
- pr_warning("%s:%d: lv1_put_iopte failed: %s\n",
- __func__, __LINE__, ps3_result(result));
+ pr_warn("%s:%d: lv1_put_iopte failed: %s\n",
+ __func__, __LINE__, ps3_result(result));
goto fail_map;
}
DBG("%s: pg=%d bus=%#lx, lpar=%#lx, ioid=%#x\n", __func__,
diff --git a/arch/powerpc/platforms/ps3/os-area.c b/arch/powerpc/platforms/ps3/os-area.c
index 3db53e8aff92..cdbfc5cfd6f3 100644
--- a/arch/powerpc/platforms/ps3/os-area.c
+++ b/arch/powerpc/platforms/ps3/os-area.c
@@ -699,7 +699,7 @@ static void os_area_queue_work_handler(struct work_struct *work)
error = update_flash_db();
if (error)
- pr_warning("%s: Could not update FLASH ROM\n", __func__);
+ pr_warn("%s: Could not update FLASH ROM\n", __func__);
pr_debug(" <- %s:%d\n", __func__, __LINE__);
}
diff --git a/arch/powerpc/platforms/ps3/setup.c b/arch/powerpc/platforms/ps3/setup.c
index 6244bc849469..77a37520068d 100644
--- a/arch/powerpc/platforms/ps3/setup.c
+++ b/arch/powerpc/platforms/ps3/setup.c
@@ -113,6 +113,7 @@ static void ps3_panic(char *str)
printk(" System does not reboot automatically.\n");
printk(" Please press POWER button.\n");
printk("\n");
+ panic_flush_kmsg_end();
while(1)
lv1_pause(1);
diff --git a/arch/powerpc/platforms/pseries/cmm.c b/arch/powerpc/platforms/pseries/cmm.c
index 560aefde06c0..25427a48feae 100644
--- a/arch/powerpc/platforms/pseries/cmm.c
+++ b/arch/powerpc/platforms/pseries/cmm.c
@@ -72,20 +72,20 @@ MODULE_DESCRIPTION("IBM System p Collaborative Memory Manager");
MODULE_LICENSE("GPL");
MODULE_VERSION(CMM_DRIVER_VERSION);
-module_param_named(delay, delay, uint, S_IRUGO | S_IWUSR);
+module_param_named(delay, delay, uint, 0644);
MODULE_PARM_DESC(delay, "Delay (in seconds) between polls to query hypervisor paging requests. "
"[Default=" __stringify(CMM_DEFAULT_DELAY) "]");
-module_param_named(hotplug_delay, hotplug_delay, uint, S_IRUGO | S_IWUSR);
+module_param_named(hotplug_delay, hotplug_delay, uint, 0644);
MODULE_PARM_DESC(hotplug_delay, "Delay (in seconds) after memory hotplug remove "
"before loaning resumes. "
"[Default=" __stringify(CMM_HOTPLUG_DELAY) "]");
-module_param_named(oom_kb, oom_kb, uint, S_IRUGO | S_IWUSR);
+module_param_named(oom_kb, oom_kb, uint, 0644);
MODULE_PARM_DESC(oom_kb, "Amount of memory in kb to free on OOM. "
"[Default=" __stringify(CMM_OOM_KB) "]");
-module_param_named(min_mem_mb, min_mem_mb, ulong, S_IRUGO | S_IWUSR);
+module_param_named(min_mem_mb, min_mem_mb, ulong, 0644);
MODULE_PARM_DESC(min_mem_mb, "Minimum amount of memory (in MB) to not balloon. "
"[Default=" __stringify(CMM_MIN_MEM_MB) "]");
-module_param_named(debug, cmm_debug, uint, S_IRUGO | S_IWUSR);
+module_param_named(debug, cmm_debug, uint, 0644);
MODULE_PARM_DESC(debug, "Enable module debugging logging. Set to 1 to enable. "
"[Default=" __stringify(CMM_DEBUG) "]");
@@ -385,7 +385,7 @@ static int cmm_thread(void *dummy)
{ \
return sprintf(buf, format, ##args); \
} \
- static DEVICE_ATTR(name, S_IRUGO, show_##name, NULL)
+ static DEVICE_ATTR(name, 0444, show_##name, NULL)
CMM_SHOW(loaned_kb, "%lu\n", PAGES2KB(loaned_pages));
CMM_SHOW(loaned_target_kb, "%lu\n", PAGES2KB(loaned_pages_target));
@@ -411,7 +411,7 @@ static ssize_t store_oom_pages(struct device *dev,
return count;
}
-static DEVICE_ATTR(oom_freed_kb, S_IWUSR | S_IRUGO,
+static DEVICE_ATTR(oom_freed_kb, 0644,
show_oom_pages, store_oom_pages);
static struct device_attribute *cmm_attrs[] = {
@@ -765,7 +765,7 @@ static int cmm_set_disable(const char *val, const struct kernel_param *kp)
}
module_param_call(disable, cmm_set_disable, param_get_uint,
- &cmm_disabled, S_IRUGO | S_IWUSR);
+ &cmm_disabled, 0644);
MODULE_PARM_DESC(disable, "Disable CMM. Set to 1 to disable. "
"[Default=" __stringify(CMM_DISABLE) "]");
diff --git a/arch/powerpc/platforms/pseries/eeh_pseries.c b/arch/powerpc/platforms/pseries/eeh_pseries.c
index 6b812ad990e4..823cb27efa8b 100644
--- a/arch/powerpc/platforms/pseries/eeh_pseries.c
+++ b/arch/powerpc/platforms/pseries/eeh_pseries.c
@@ -55,6 +55,43 @@ static int ibm_get_config_addr_info;
static int ibm_get_config_addr_info2;
static int ibm_configure_pe;
+#ifdef CONFIG_PCI_IOV
+void pseries_pcibios_bus_add_device(struct pci_dev *pdev)
+{
+ struct pci_dn *pdn = pci_get_pdn(pdev);
+ struct pci_dn *physfn_pdn;
+ struct eeh_dev *edev;
+
+ if (!pdev->is_virtfn)
+ return;
+
+ pdn->device_id = pdev->device;
+ pdn->vendor_id = pdev->vendor;
+ pdn->class_code = pdev->class;
+ /*
+ * Last allow unfreeze return code used for retrieval
+ * by user space in eeh-sysfs to show the last command
+ * completion from platform.
+ */
+ pdn->last_allow_rc = 0;
+ physfn_pdn = pci_get_pdn(pdev->physfn);
+ pdn->pe_number = physfn_pdn->pe_num_map[pdn->vf_index];
+ edev = pdn_to_eeh_dev(pdn);
+
+ /*
+ * The following operations will fail if VF's sysfs files
+ * aren't created or its resources aren't finalized.
+ */
+ eeh_add_device_early(pdn);
+ eeh_add_device_late(pdev);
+ edev->pe_config_addr = (pdn->busno << 16) | (pdn->devfn << 8);
+ eeh_rmv_from_parent_pe(edev); /* Remove as it is adding to bus pe */
+ eeh_add_to_parent_pe(edev); /* Add as VF PE type */
+ eeh_sysfs_add_device(pdev);
+
+}
+#endif
+
/*
* Buffer for reporting slot-error-detail rtas calls. Its here
* in BSS, and not dynamically alloced, so that it ends up in
@@ -120,6 +157,11 @@ static int pseries_eeh_init(void)
/* Set EEH probe mode */
eeh_add_flag(EEH_PROBE_MODE_DEVTREE | EEH_ENABLE_IO_FOR_LOG);
+#ifdef CONFIG_PCI_IOV
+ /* Set EEH machine dependent code */
+ ppc_md.pcibios_bus_add_device = pseries_pcibios_bus_add_device;
+#endif
+
return 0;
}
@@ -684,6 +726,121 @@ static int pseries_eeh_write_config(struct pci_dn *pdn, int where, int size, u32
return rtas_write_config(pdn, where, size, val);
}
+static int pseries_eeh_restore_config(struct pci_dn *pdn)
+{
+ struct eeh_dev *edev = pdn_to_eeh_dev(pdn);
+ s64 ret = 0;
+
+ if (!edev)
+ return -EEXIST;
+
+ /*
+ * FIXME: The MPS, error routing rules, timeout setting are worthy
+ * to be exported by firmware in extendible way.
+ */
+ if (edev->physfn)
+ ret = eeh_restore_vf_config(pdn);
+
+ if (ret) {
+ pr_warn("%s: Can't reinit PCI dev 0x%x (%lld)\n",
+ __func__, edev->pe_config_addr, ret);
+ return -EIO;
+ }
+
+ return ret;
+}
+
+#ifdef CONFIG_PCI_IOV
+int pseries_send_allow_unfreeze(struct pci_dn *pdn,
+ u16 *vf_pe_array, int cur_vfs)
+{
+ int rc;
+ int ibm_allow_unfreeze = rtas_token("ibm,open-sriov-allow-unfreeze");
+ unsigned long buid, addr;
+
+ addr = rtas_config_addr(pdn->busno, pdn->devfn, 0);
+ buid = pdn->phb->buid;
+ spin_lock(&rtas_data_buf_lock);
+ memcpy(rtas_data_buf, vf_pe_array, RTAS_DATA_BUF_SIZE);
+ rc = rtas_call(ibm_allow_unfreeze, 5, 1, NULL,
+ addr,
+ BUID_HI(buid),
+ BUID_LO(buid),
+ rtas_data_buf, cur_vfs * sizeof(u16));
+ spin_unlock(&rtas_data_buf_lock);
+ if (rc)
+ pr_warn("%s: Failed to allow unfreeze for PHB#%x-PE#%lx, rc=%x\n",
+ __func__,
+ pdn->phb->global_number, addr, rc);
+ return rc;
+}
+
+static int pseries_call_allow_unfreeze(struct eeh_dev *edev)
+{
+ struct pci_dn *pdn, *tmp, *parent, *physfn_pdn;
+ int cur_vfs = 0, rc = 0, vf_index, bus, devfn;
+ u16 *vf_pe_array;
+
+ vf_pe_array = kzalloc(RTAS_DATA_BUF_SIZE, GFP_KERNEL);
+ if (!vf_pe_array)
+ return -ENOMEM;
+ if (pci_num_vf(edev->physfn ? edev->physfn : edev->pdev)) {
+ if (edev->pdev->is_physfn) {
+ cur_vfs = pci_num_vf(edev->pdev);
+ pdn = eeh_dev_to_pdn(edev);
+ parent = pdn->parent;
+ for (vf_index = 0; vf_index < cur_vfs; vf_index++)
+ vf_pe_array[vf_index] =
+ cpu_to_be16(pdn->pe_num_map[vf_index]);
+ rc = pseries_send_allow_unfreeze(pdn, vf_pe_array,
+ cur_vfs);
+ pdn->last_allow_rc = rc;
+ for (vf_index = 0; vf_index < cur_vfs; vf_index++) {
+ list_for_each_entry_safe(pdn, tmp,
+ &parent->child_list,
+ list) {
+ bus = pci_iov_virtfn_bus(edev->pdev,
+ vf_index);
+ devfn = pci_iov_virtfn_devfn(edev->pdev,
+ vf_index);
+ if (pdn->busno != bus ||
+ pdn->devfn != devfn)
+ continue;
+ pdn->last_allow_rc = rc;
+ }
+ }
+ } else {
+ pdn = pci_get_pdn(edev->pdev);
+ vf_pe_array[0] = cpu_to_be16(pdn->pe_number);
+ physfn_pdn = pci_get_pdn(edev->physfn);
+ rc = pseries_send_allow_unfreeze(physfn_pdn,
+ vf_pe_array, 1);
+ pdn->last_allow_rc = rc;
+ }
+ }
+
+ kfree(vf_pe_array);
+ return rc;
+}
+
+static int pseries_notify_resume(struct pci_dn *pdn)
+{
+ struct eeh_dev *edev = pdn_to_eeh_dev(pdn);
+
+ if (!edev)
+ return -EEXIST;
+
+ if (rtas_token("ibm,open-sriov-allow-unfreeze")
+ == RTAS_UNKNOWN_SERVICE)
+ return -EINVAL;
+
+ if (edev->pdev->is_physfn || edev->pdev->is_virtfn)
+ return pseries_call_allow_unfreeze(edev);
+
+ return 0;
+}
+#endif
+
static struct eeh_ops pseries_eeh_ops = {
.name = "pseries",
.init = pseries_eeh_init,
@@ -699,7 +856,10 @@ static struct eeh_ops pseries_eeh_ops = {
.read_config = pseries_eeh_read_config,
.write_config = pseries_eeh_write_config,
.next_error = NULL,
- .restore_config = NULL
+ .restore_config = pseries_eeh_restore_config,
+#ifdef CONFIG_PCI_IOV
+ .notify_resume = pseries_notify_resume
+#endif
};
/**
diff --git a/arch/powerpc/platforms/pseries/firmware.c b/arch/powerpc/platforms/pseries/firmware.c
index 63cc82ad58ac..a3bbeb43689e 100644
--- a/arch/powerpc/platforms/pseries/firmware.c
+++ b/arch/powerpc/platforms/pseries/firmware.c
@@ -114,6 +114,8 @@ static __initdata struct vec5_fw_feature
vec5_fw_features_table[] = {
{FW_FEATURE_TYPE1_AFFINITY, OV5_TYPE1_AFFINITY},
{FW_FEATURE_PRRN, OV5_PRRN},
+ {FW_FEATURE_DRMEM_V2, OV5_DRMEM_V2},
+ {FW_FEATURE_DRC_INFO, OV5_DRC_INFO},
};
static void __init fw_vec5_feature_init(const char *vec5, unsigned long len)
diff --git a/arch/powerpc/platforms/pseries/hotplug-cpu.c b/arch/powerpc/platforms/pseries/hotplug-cpu.c
index a7d14aa7bb7c..dceb51454d8d 100644
--- a/arch/powerpc/platforms/pseries/hotplug-cpu.c
+++ b/arch/powerpc/platforms/pseries/hotplug-cpu.c
@@ -340,6 +340,8 @@ static void pseries_remove_processor(struct device_node *np)
cpu_maps_update_done();
}
+extern int find_and_online_cpu_nid(int cpu);
+
static int dlpar_online_cpu(struct device_node *dn)
{
int rc = 0;
@@ -364,6 +366,7 @@ static int dlpar_online_cpu(struct device_node *dn)
!= CPU_STATE_OFFLINE);
cpu_maps_update_done();
timed_topology_update(1);
+ find_and_online_cpu_nid(cpu);
rc = device_online(get_cpu_device(cpu));
if (rc)
goto out;
diff --git a/arch/powerpc/platforms/pseries/hotplug-memory.c b/arch/powerpc/platforms/pseries/hotplug-memory.c
index 1d48ab424bd9..c1578f54c626 100644
--- a/arch/powerpc/platforms/pseries/hotplug-memory.c
+++ b/arch/powerpc/platforms/pseries/hotplug-memory.c
@@ -23,6 +23,7 @@
#include <asm/prom.h>
#include <asm/sparsemem.h>
#include <asm/fadump.h>
+#include <asm/drmem.h>
#include "pseries.h"
static bool rtas_hp_event;
@@ -100,100 +101,6 @@ static struct property *dlpar_clone_property(struct property *prop,
return new_prop;
}
-static struct property *dlpar_clone_drconf_property(struct device_node *dn)
-{
- struct property *prop, *new_prop;
- struct of_drconf_cell *lmbs;
- u32 num_lmbs, *p;
- int i;
-
- prop = of_find_property(dn, "ibm,dynamic-memory", NULL);
- if (!prop)
- return NULL;
-
- new_prop = dlpar_clone_property(prop, prop->length);
- if (!new_prop)
- return NULL;
-
- /* Convert the property to cpu endian-ness */
- p = new_prop->value;
- *p = be32_to_cpu(*p);
-
- num_lmbs = *p++;
- lmbs = (struct of_drconf_cell *)p;
-
- for (i = 0; i < num_lmbs; i++) {
- lmbs[i].base_addr = be64_to_cpu(lmbs[i].base_addr);
- lmbs[i].drc_index = be32_to_cpu(lmbs[i].drc_index);
- lmbs[i].aa_index = be32_to_cpu(lmbs[i].aa_index);
- lmbs[i].flags = be32_to_cpu(lmbs[i].flags);
- }
-
- return new_prop;
-}
-
-static void dlpar_update_drconf_property(struct device_node *dn,
- struct property *prop)
-{
- struct of_drconf_cell *lmbs;
- u32 num_lmbs, *p;
- int i;
-
- /* Convert the property back to BE */
- p = prop->value;
- num_lmbs = *p;
- *p = cpu_to_be32(*p);
- p++;
-
- lmbs = (struct of_drconf_cell *)p;
- for (i = 0; i < num_lmbs; i++) {
- lmbs[i].base_addr = cpu_to_be64(lmbs[i].base_addr);
- lmbs[i].drc_index = cpu_to_be32(lmbs[i].drc_index);
- lmbs[i].aa_index = cpu_to_be32(lmbs[i].aa_index);
- lmbs[i].flags = cpu_to_be32(lmbs[i].flags);
- }
-
- rtas_hp_event = true;
- of_update_property(dn, prop);
- rtas_hp_event = false;
-}
-
-static int dlpar_update_device_tree_lmb(struct of_drconf_cell *lmb)
-{
- struct device_node *dn;
- struct property *prop;
- struct of_drconf_cell *lmbs;
- u32 *p, num_lmbs;
- int i;
-
- dn = of_find_node_by_path("/ibm,dynamic-reconfiguration-memory");
- if (!dn)
- return -ENODEV;
-
- prop = dlpar_clone_drconf_property(dn);
- if (!prop) {
- of_node_put(dn);
- return -ENODEV;
- }
-
- p = prop->value;
- num_lmbs = *p++;
- lmbs = (struct of_drconf_cell *)p;
-
- for (i = 0; i < num_lmbs; i++) {
- if (lmbs[i].drc_index == lmb->drc_index) {
- lmbs[i].flags = lmb->flags;
- lmbs[i].aa_index = lmb->aa_index;
-
- dlpar_update_drconf_property(dn, prop);
- break;
- }
- }
-
- of_node_put(dn);
- return 0;
-}
-
static u32 find_aa_index(struct device_node *dr_node,
struct property *ala_prop, const u32 *lmb_assoc)
{
@@ -256,7 +163,7 @@ static u32 find_aa_index(struct device_node *dr_node,
return aa_index;
}
-static u32 lookup_lmb_associativity_index(struct of_drconf_cell *lmb)
+static u32 lookup_lmb_associativity_index(struct drmem_lmb *lmb)
{
struct device_node *parent, *lmb_node, *dr_node;
struct property *ala_prop;
@@ -299,9 +206,9 @@ static u32 lookup_lmb_associativity_index(struct of_drconf_cell *lmb)
return aa_index;
}
-static int dlpar_add_device_tree_lmb(struct of_drconf_cell *lmb)
+static int dlpar_add_device_tree_lmb(struct drmem_lmb *lmb)
{
- int aa_index;
+ int rc, aa_index;
lmb->flags |= DRCONF_MEM_ASSIGNED;
@@ -313,17 +220,29 @@ static int dlpar_add_device_tree_lmb(struct of_drconf_cell *lmb)
}
lmb->aa_index = aa_index;
- return dlpar_update_device_tree_lmb(lmb);
+
+ rtas_hp_event = true;
+ rc = drmem_update_dt();
+ rtas_hp_event = false;
+
+ return rc;
}
-static int dlpar_remove_device_tree_lmb(struct of_drconf_cell *lmb)
+static int dlpar_remove_device_tree_lmb(struct drmem_lmb *lmb)
{
+ int rc;
+
lmb->flags &= ~DRCONF_MEM_ASSIGNED;
lmb->aa_index = 0xffffffff;
- return dlpar_update_device_tree_lmb(lmb);
+
+ rtas_hp_event = true;
+ rc = drmem_update_dt();
+ rtas_hp_event = false;
+
+ return rc;
}
-static struct memory_block *lmb_to_memblock(struct of_drconf_cell *lmb)
+static struct memory_block *lmb_to_memblock(struct drmem_lmb *lmb)
{
unsigned long section_nr;
struct mem_section *mem_sect;
@@ -336,7 +255,36 @@ static struct memory_block *lmb_to_memblock(struct of_drconf_cell *lmb)
return mem_block;
}
-static int dlpar_change_lmb_state(struct of_drconf_cell *lmb, bool online)
+static int get_lmb_range(u32 drc_index, int n_lmbs,
+ struct drmem_lmb **start_lmb,
+ struct drmem_lmb **end_lmb)
+{
+ struct drmem_lmb *lmb, *start, *end;
+ struct drmem_lmb *last_lmb;
+
+ start = NULL;
+ for_each_drmem_lmb(lmb) {
+ if (lmb->drc_index == drc_index) {
+ start = lmb;
+ break;
+ }
+ }
+
+ if (!start)
+ return -EINVAL;
+
+ end = &start[n_lmbs - 1];
+
+ last_lmb = &drmem_info->lmbs[drmem_info->n_lmbs - 1];
+ if (end > last_lmb)
+ return -EINVAL;
+
+ *start_lmb = start;
+ *end_lmb = end;
+ return 0;
+}
+
+static int dlpar_change_lmb_state(struct drmem_lmb *lmb, bool online)
{
struct memory_block *mem_block;
int rc;
@@ -357,13 +305,13 @@ static int dlpar_change_lmb_state(struct of_drconf_cell *lmb, bool online)
return rc;
}
-static int dlpar_online_lmb(struct of_drconf_cell *lmb)
+static int dlpar_online_lmb(struct drmem_lmb *lmb)
{
return dlpar_change_lmb_state(lmb, true);
}
#ifdef CONFIG_MEMORY_HOTREMOVE
-static int dlpar_offline_lmb(struct of_drconf_cell *lmb)
+static int dlpar_offline_lmb(struct drmem_lmb *lmb)
{
return dlpar_change_lmb_state(lmb, false);
}
@@ -426,7 +374,7 @@ static int pseries_remove_mem_node(struct device_node *np)
return 0;
}
-static bool lmb_is_removable(struct of_drconf_cell *lmb)
+static bool lmb_is_removable(struct drmem_lmb *lmb)
{
int i, scns_per_block;
int rc = 1;
@@ -458,9 +406,9 @@ static bool lmb_is_removable(struct of_drconf_cell *lmb)
return rc ? true : false;
}
-static int dlpar_add_lmb(struct of_drconf_cell *);
+static int dlpar_add_lmb(struct drmem_lmb *);
-static int dlpar_remove_lmb(struct of_drconf_cell *lmb)
+static int dlpar_remove_lmb(struct drmem_lmb *lmb)
{
unsigned long block_sz;
int nid, rc;
@@ -484,28 +432,25 @@ static int dlpar_remove_lmb(struct of_drconf_cell *lmb)
return 0;
}
-static int dlpar_memory_remove_by_count(u32 lmbs_to_remove,
- struct property *prop)
+static int dlpar_memory_remove_by_count(u32 lmbs_to_remove)
{
- struct of_drconf_cell *lmbs;
+ struct drmem_lmb *lmb;
int lmbs_removed = 0;
int lmbs_available = 0;
- u32 num_lmbs, *p;
- int i, rc;
+ int rc;
pr_info("Attempting to hot-remove %d LMB(s)\n", lmbs_to_remove);
if (lmbs_to_remove == 0)
return -EINVAL;
- p = prop->value;
- num_lmbs = *p++;
- lmbs = (struct of_drconf_cell *)p;
-
/* Validate that there are enough LMBs to satisfy the request */
- for (i = 0; i < num_lmbs; i++) {
- if (lmb_is_removable(&lmbs[i]))
+ for_each_drmem_lmb(lmb) {
+ if (lmb_is_removable(lmb))
lmbs_available++;
+
+ if (lmbs_available == lmbs_to_remove)
+ break;
}
if (lmbs_available < lmbs_to_remove) {
@@ -514,45 +459,47 @@ static int dlpar_memory_remove_by_count(u32 lmbs_to_remove,
return -EINVAL;
}
- for (i = 0; i < num_lmbs && lmbs_removed < lmbs_to_remove; i++) {
- rc = dlpar_remove_lmb(&lmbs[i]);
+ for_each_drmem_lmb(lmb) {
+ rc = dlpar_remove_lmb(lmb);
if (rc)
continue;
- lmbs_removed++;
-
/* Mark this lmb so we can add it later if all of the
* requested LMBs cannot be removed.
*/
- lmbs[i].reserved = 1;
+ drmem_mark_lmb_reserved(lmb);
+
+ lmbs_removed++;
+ if (lmbs_removed == lmbs_to_remove)
+ break;
}
if (lmbs_removed != lmbs_to_remove) {
pr_err("Memory hot-remove failed, adding LMB's back\n");
- for (i = 0; i < num_lmbs; i++) {
- if (!lmbs[i].reserved)
+ for_each_drmem_lmb(lmb) {
+ if (!drmem_lmb_reserved(lmb))
continue;
- rc = dlpar_add_lmb(&lmbs[i]);
+ rc = dlpar_add_lmb(lmb);
if (rc)
pr_err("Failed to add LMB back, drc index %x\n",
- lmbs[i].drc_index);
+ lmb->drc_index);
- lmbs[i].reserved = 0;
+ drmem_remove_lmb_reservation(lmb);
}
rc = -EINVAL;
} else {
- for (i = 0; i < num_lmbs; i++) {
- if (!lmbs[i].reserved)
+ for_each_drmem_lmb(lmb) {
+ if (!drmem_lmb_reserved(lmb))
continue;
- dlpar_release_drc(lmbs[i].drc_index);
+ dlpar_release_drc(lmb->drc_index);
pr_info("Memory at %llx was hot-removed\n",
- lmbs[i].base_addr);
+ lmb->base_addr);
- lmbs[i].reserved = 0;
+ drmem_remove_lmb_reservation(lmb);
}
rc = 0;
}
@@ -560,26 +507,21 @@ static int dlpar_memory_remove_by_count(u32 lmbs_to_remove,
return rc;
}
-static int dlpar_memory_remove_by_index(u32 drc_index, struct property *prop)
+static int dlpar_memory_remove_by_index(u32 drc_index)
{
- struct of_drconf_cell *lmbs;
- u32 num_lmbs, *p;
+ struct drmem_lmb *lmb;
int lmb_found;
- int i, rc;
+ int rc;
pr_info("Attempting to hot-remove LMB, drc index %x\n", drc_index);
- p = prop->value;
- num_lmbs = *p++;
- lmbs = (struct of_drconf_cell *)p;
-
lmb_found = 0;
- for (i = 0; i < num_lmbs; i++) {
- if (lmbs[i].drc_index == drc_index) {
+ for_each_drmem_lmb(lmb) {
+ if (lmb->drc_index == drc_index) {
lmb_found = 1;
- rc = dlpar_remove_lmb(&lmbs[i]);
+ rc = dlpar_remove_lmb(lmb);
if (!rc)
- dlpar_release_drc(lmbs[i].drc_index);
+ dlpar_release_drc(lmb->drc_index);
break;
}
@@ -590,35 +532,30 @@ static int dlpar_memory_remove_by_index(u32 drc_index, struct property *prop)
if (rc)
pr_info("Failed to hot-remove memory at %llx\n",
- lmbs[i].base_addr);
+ lmb->base_addr);
else
- pr_info("Memory at %llx was hot-removed\n", lmbs[i].base_addr);
+ pr_info("Memory at %llx was hot-removed\n", lmb->base_addr);
return rc;
}
-static int dlpar_memory_readd_by_index(u32 drc_index, struct property *prop)
+static int dlpar_memory_readd_by_index(u32 drc_index)
{
- struct of_drconf_cell *lmbs;
- u32 num_lmbs, *p;
+ struct drmem_lmb *lmb;
int lmb_found;
- int i, rc;
+ int rc;
pr_info("Attempting to update LMB, drc index %x\n", drc_index);
- p = prop->value;
- num_lmbs = *p++;
- lmbs = (struct of_drconf_cell *)p;
-
lmb_found = 0;
- for (i = 0; i < num_lmbs; i++) {
- if (lmbs[i].drc_index == drc_index) {
+ for_each_drmem_lmb(lmb) {
+ if (lmb->drc_index == drc_index) {
lmb_found = 1;
- rc = dlpar_remove_lmb(&lmbs[i]);
+ rc = dlpar_remove_lmb(lmb);
if (!rc) {
- rc = dlpar_add_lmb(&lmbs[i]);
+ rc = dlpar_add_lmb(lmb);
if (rc)
- dlpar_release_drc(lmbs[i].drc_index);
+ dlpar_release_drc(lmb->drc_index);
}
break;
}
@@ -629,20 +566,18 @@ static int dlpar_memory_readd_by_index(u32 drc_index, struct property *prop)
if (rc)
pr_info("Failed to update memory at %llx\n",
- lmbs[i].base_addr);
+ lmb->base_addr);
else
- pr_info("Memory at %llx was updated\n", lmbs[i].base_addr);
+ pr_info("Memory at %llx was updated\n", lmb->base_addr);
return rc;
}
-static int dlpar_memory_remove_by_ic(u32 lmbs_to_remove, u32 drc_index,
- struct property *prop)
+static int dlpar_memory_remove_by_ic(u32 lmbs_to_remove, u32 drc_index)
{
- struct of_drconf_cell *lmbs;
- u32 num_lmbs, *p;
- int i, rc, start_lmb_found;
- int lmbs_available = 0, start_index = 0, end_index;
+ struct drmem_lmb *lmb, *start_lmb, *end_lmb;
+ int lmbs_available = 0;
+ int rc;
pr_info("Attempting to hot-remove %u LMB(s) at %x\n",
lmbs_to_remove, drc_index);
@@ -650,29 +585,13 @@ static int dlpar_memory_remove_by_ic(u32 lmbs_to_remove, u32 drc_index,
if (lmbs_to_remove == 0)
return -EINVAL;
- p = prop->value;
- num_lmbs = *p++;
- lmbs = (struct of_drconf_cell *)p;
- start_lmb_found = 0;
-
- /* Navigate to drc_index */
- while (start_index < num_lmbs) {
- if (lmbs[start_index].drc_index == drc_index) {
- start_lmb_found = 1;
- break;
- }
-
- start_index++;
- }
-
- if (!start_lmb_found)
+ rc = get_lmb_range(drc_index, lmbs_to_remove, &start_lmb, &end_lmb);
+ if (rc)
return -EINVAL;
- end_index = start_index + lmbs_to_remove;
-
/* Validate that there are enough LMBs to satisfy the request */
- for (i = start_index; i < end_index; i++) {
- if (lmbs[i].flags & DRCONF_MEM_RESERVED)
+ for_each_drmem_lmb_in_range(lmb, start_lmb, end_lmb) {
+ if (lmb->flags & DRCONF_MEM_RESERVED)
break;
lmbs_available++;
@@ -681,42 +600,43 @@ static int dlpar_memory_remove_by_ic(u32 lmbs_to_remove, u32 drc_index,
if (lmbs_available < lmbs_to_remove)
return -EINVAL;
- for (i = start_index; i < end_index; i++) {
- if (!(lmbs[i].flags & DRCONF_MEM_ASSIGNED))
+ for_each_drmem_lmb_in_range(lmb, start_lmb, end_lmb) {
+ if (!(lmb->flags & DRCONF_MEM_ASSIGNED))
continue;
- rc = dlpar_remove_lmb(&lmbs[i]);
+ rc = dlpar_remove_lmb(lmb);
if (rc)
break;
- lmbs[i].reserved = 1;
+ drmem_mark_lmb_reserved(lmb);
}
if (rc) {
pr_err("Memory indexed-count-remove failed, adding any removed LMBs\n");
- for (i = start_index; i < end_index; i++) {
- if (!lmbs[i].reserved)
+
+ for_each_drmem_lmb_in_range(lmb, start_lmb, end_lmb) {
+ if (!drmem_lmb_reserved(lmb))
continue;
- rc = dlpar_add_lmb(&lmbs[i]);
+ rc = dlpar_add_lmb(lmb);
if (rc)
pr_err("Failed to add LMB, drc index %x\n",
- be32_to_cpu(lmbs[i].drc_index));
+ lmb->drc_index);
- lmbs[i].reserved = 0;
+ drmem_remove_lmb_reservation(lmb);
}
rc = -EINVAL;
} else {
- for (i = start_index; i < end_index; i++) {
- if (!lmbs[i].reserved)
+ for_each_drmem_lmb_in_range(lmb, start_lmb, end_lmb) {
+ if (!drmem_lmb_reserved(lmb))
continue;
- dlpar_release_drc(lmbs[i].drc_index);
+ dlpar_release_drc(lmb->drc_index);
pr_info("Memory at %llx (drc index %x) was hot-removed\n",
- lmbs[i].base_addr, lmbs[i].drc_index);
+ lmb->base_addr, lmb->drc_index);
- lmbs[i].reserved = 0;
+ drmem_remove_lmb_reservation(lmb);
}
}
@@ -737,32 +657,30 @@ static inline int dlpar_memory_remove(struct pseries_hp_errorlog *hp_elog)
{
return -EOPNOTSUPP;
}
-static int dlpar_remove_lmb(struct of_drconf_cell *lmb)
+static int dlpar_remove_lmb(struct drmem_lmb *lmb)
{
return -EOPNOTSUPP;
}
-static int dlpar_memory_remove_by_count(u32 lmbs_to_remove,
- struct property *prop)
+static int dlpar_memory_remove_by_count(u32 lmbs_to_remove)
{
return -EOPNOTSUPP;
}
-static int dlpar_memory_remove_by_index(u32 drc_index, struct property *prop)
+static int dlpar_memory_remove_by_index(u32 drc_index)
{
return -EOPNOTSUPP;
}
-static int dlpar_memory_readd_by_index(u32 drc_index, struct property *prop)
+static int dlpar_memory_readd_by_index(u32 drc_index)
{
return -EOPNOTSUPP;
}
-static int dlpar_memory_remove_by_ic(u32 lmbs_to_remove, u32 drc_index,
- struct property *prop)
+static int dlpar_memory_remove_by_ic(u32 lmbs_to_remove, u32 drc_index)
{
return -EOPNOTSUPP;
}
#endif /* CONFIG_MEMORY_HOTREMOVE */
-static int dlpar_add_lmb(struct of_drconf_cell *lmb)
+static int dlpar_add_lmb(struct drmem_lmb *lmb)
{
unsigned long block_sz;
int nid, rc;
@@ -801,77 +719,79 @@ static int dlpar_add_lmb(struct of_drconf_cell *lmb)
return rc;
}
-static int dlpar_memory_add_by_count(u32 lmbs_to_add, struct property *prop)
+static int dlpar_memory_add_by_count(u32 lmbs_to_add)
{
- struct of_drconf_cell *lmbs;
- u32 num_lmbs, *p;
+ struct drmem_lmb *lmb;
int lmbs_available = 0;
int lmbs_added = 0;
- int i, rc;
+ int rc;
pr_info("Attempting to hot-add %d LMB(s)\n", lmbs_to_add);
if (lmbs_to_add == 0)
return -EINVAL;
- p = prop->value;
- num_lmbs = *p++;
- lmbs = (struct of_drconf_cell *)p;
-
/* Validate that there are enough LMBs to satisfy the request */
- for (i = 0; i < num_lmbs; i++) {
- if (!(lmbs[i].flags & DRCONF_MEM_ASSIGNED))
+ for_each_drmem_lmb(lmb) {
+ if (!(lmb->flags & DRCONF_MEM_ASSIGNED))
lmbs_available++;
+
+ if (lmbs_available == lmbs_to_add)
+ break;
}
if (lmbs_available < lmbs_to_add)
return -EINVAL;
- for (i = 0; i < num_lmbs && lmbs_to_add != lmbs_added; i++) {
- if (lmbs[i].flags & DRCONF_MEM_ASSIGNED)
+ for_each_drmem_lmb(lmb) {
+ if (lmb->flags & DRCONF_MEM_ASSIGNED)
continue;
- rc = dlpar_acquire_drc(lmbs[i].drc_index);
+ rc = dlpar_acquire_drc(lmb->drc_index);
if (rc)
continue;
- rc = dlpar_add_lmb(&lmbs[i]);
+ rc = dlpar_add_lmb(lmb);
if (rc) {
- dlpar_release_drc(lmbs[i].drc_index);
+ dlpar_release_drc(lmb->drc_index);
continue;
}
- lmbs_added++;
-
/* Mark this lmb so we can remove it later if all of the
* requested LMBs cannot be added.
*/
- lmbs[i].reserved = 1;
+ drmem_mark_lmb_reserved(lmb);
+
+ lmbs_added++;
+ if (lmbs_added == lmbs_to_add)
+ break;
}
if (lmbs_added != lmbs_to_add) {
pr_err("Memory hot-add failed, removing any added LMBs\n");
- for (i = 0; i < num_lmbs; i++) {
- if (!lmbs[i].reserved)
+ for_each_drmem_lmb(lmb) {
+ if (!drmem_lmb_reserved(lmb))
continue;
- rc = dlpar_remove_lmb(&lmbs[i]);
+ rc = dlpar_remove_lmb(lmb);
if (rc)
pr_err("Failed to remove LMB, drc index %x\n",
- be32_to_cpu(lmbs[i].drc_index));
+ lmb->drc_index);
else
- dlpar_release_drc(lmbs[i].drc_index);
+ dlpar_release_drc(lmb->drc_index);
+
+ drmem_remove_lmb_reservation(lmb);
}
rc = -EINVAL;
} else {
- for (i = 0; i < num_lmbs; i++) {
- if (!lmbs[i].reserved)
+ for_each_drmem_lmb(lmb) {
+ if (!drmem_lmb_reserved(lmb))
continue;
pr_info("Memory at %llx (drc index %x) was hot-added\n",
- lmbs[i].base_addr, lmbs[i].drc_index);
- lmbs[i].reserved = 0;
+ lmb->base_addr, lmb->drc_index);
+ drmem_remove_lmb_reservation(lmb);
}
rc = 0;
}
@@ -879,28 +799,22 @@ static int dlpar_memory_add_by_count(u32 lmbs_to_add, struct property *prop)
return rc;
}
-static int dlpar_memory_add_by_index(u32 drc_index, struct property *prop)
+static int dlpar_memory_add_by_index(u32 drc_index)
{
- struct of_drconf_cell *lmbs;
- u32 num_lmbs, *p;
- int i, lmb_found;
- int rc;
+ struct drmem_lmb *lmb;
+ int rc, lmb_found;
pr_info("Attempting to hot-add LMB, drc index %x\n", drc_index);
- p = prop->value;
- num_lmbs = *p++;
- lmbs = (struct of_drconf_cell *)p;
-
lmb_found = 0;
- for (i = 0; i < num_lmbs; i++) {
- if (lmbs[i].drc_index == drc_index) {
+ for_each_drmem_lmb(lmb) {
+ if (lmb->drc_index == drc_index) {
lmb_found = 1;
- rc = dlpar_acquire_drc(lmbs[i].drc_index);
+ rc = dlpar_acquire_drc(lmb->drc_index);
if (!rc) {
- rc = dlpar_add_lmb(&lmbs[i]);
+ rc = dlpar_add_lmb(lmb);
if (rc)
- dlpar_release_drc(lmbs[i].drc_index);
+ dlpar_release_drc(lmb->drc_index);
}
break;
@@ -914,18 +828,16 @@ static int dlpar_memory_add_by_index(u32 drc_index, struct property *prop)
pr_info("Failed to hot-add memory, drc index %x\n", drc_index);
else
pr_info("Memory at %llx (drc index %x) was hot-added\n",
- lmbs[i].base_addr, drc_index);
+ lmb->base_addr, drc_index);
return rc;
}
-static int dlpar_memory_add_by_ic(u32 lmbs_to_add, u32 drc_index,
- struct property *prop)
+static int dlpar_memory_add_by_ic(u32 lmbs_to_add, u32 drc_index)
{
- struct of_drconf_cell *lmbs;
- u32 num_lmbs, *p;
- int i, rc, start_lmb_found;
- int lmbs_available = 0, start_index = 0, end_index;
+ struct drmem_lmb *lmb, *start_lmb, *end_lmb;
+ int lmbs_available = 0;
+ int rc;
pr_info("Attempting to hot-add %u LMB(s) at index %x\n",
lmbs_to_add, drc_index);
@@ -933,29 +845,13 @@ static int dlpar_memory_add_by_ic(u32 lmbs_to_add, u32 drc_index,
if (lmbs_to_add == 0)
return -EINVAL;
- p = prop->value;
- num_lmbs = *p++;
- lmbs = (struct of_drconf_cell *)p;
- start_lmb_found = 0;
-
- /* Navigate to drc_index */
- while (start_index < num_lmbs) {
- if (lmbs[start_index].drc_index == drc_index) {
- start_lmb_found = 1;
- break;
- }
-
- start_index++;
- }
-
- if (!start_lmb_found)
+ rc = get_lmb_range(drc_index, lmbs_to_add, &start_lmb, &end_lmb);
+ if (rc)
return -EINVAL;
- end_index = start_index + lmbs_to_add;
-
/* Validate that the LMBs in this range are not reserved */
- for (i = start_index; i < end_index; i++) {
- if (lmbs[i].flags & DRCONF_MEM_RESERVED)
+ for_each_drmem_lmb_in_range(lmb, start_lmb, end_lmb) {
+ if (lmb->flags & DRCONF_MEM_RESERVED)
break;
lmbs_available++;
@@ -964,46 +860,48 @@ static int dlpar_memory_add_by_ic(u32 lmbs_to_add, u32 drc_index,
if (lmbs_available < lmbs_to_add)
return -EINVAL;
- for (i = start_index; i < end_index; i++) {
- if (lmbs[i].flags & DRCONF_MEM_ASSIGNED)
+ for_each_drmem_lmb_in_range(lmb, start_lmb, end_lmb) {
+ if (lmb->flags & DRCONF_MEM_ASSIGNED)
continue;
- rc = dlpar_acquire_drc(lmbs[i].drc_index);
+ rc = dlpar_acquire_drc(lmb->drc_index);
if (rc)
break;
- rc = dlpar_add_lmb(&lmbs[i]);
+ rc = dlpar_add_lmb(lmb);
if (rc) {
- dlpar_release_drc(lmbs[i].drc_index);
+ dlpar_release_drc(lmb->drc_index);
break;
}
- lmbs[i].reserved = 1;
+ drmem_mark_lmb_reserved(lmb);
}
if (rc) {
pr_err("Memory indexed-count-add failed, removing any added LMBs\n");
- for (i = start_index; i < end_index; i++) {
- if (!lmbs[i].reserved)
+ for_each_drmem_lmb_in_range(lmb, start_lmb, end_lmb) {
+ if (!drmem_lmb_reserved(lmb))
continue;
- rc = dlpar_remove_lmb(&lmbs[i]);
+ rc = dlpar_remove_lmb(lmb);
if (rc)
pr_err("Failed to remove LMB, drc index %x\n",
- be32_to_cpu(lmbs[i].drc_index));
+ lmb->drc_index);
else
- dlpar_release_drc(lmbs[i].drc_index);
+ dlpar_release_drc(lmb->drc_index);
+
+ drmem_remove_lmb_reservation(lmb);
}
rc = -EINVAL;
} else {
- for (i = start_index; i < end_index; i++) {
- if (!lmbs[i].reserved)
+ for_each_drmem_lmb_in_range(lmb, start_lmb, end_lmb) {
+ if (!drmem_lmb_reserved(lmb))
continue;
pr_info("Memory at %llx (drc index %x) was hot-added\n",
- lmbs[i].base_addr, lmbs[i].drc_index);
- lmbs[i].reserved = 0;
+ lmb->base_addr, lmb->drc_index);
+ drmem_remove_lmb_reservation(lmb);
}
}
@@ -1012,37 +910,23 @@ static int dlpar_memory_add_by_ic(u32 lmbs_to_add, u32 drc_index,
int dlpar_memory(struct pseries_hp_errorlog *hp_elog)
{
- struct device_node *dn;
- struct property *prop;
u32 count, drc_index;
int rc;
lock_device_hotplug();
- dn = of_find_node_by_path("/ibm,dynamic-reconfiguration-memory");
- if (!dn) {
- rc = -EINVAL;
- goto dlpar_memory_out;
- }
-
- prop = dlpar_clone_drconf_property(dn);
- if (!prop) {
- rc = -EINVAL;
- goto dlpar_memory_out;
- }
-
switch (hp_elog->action) {
case PSERIES_HP_ELOG_ACTION_ADD:
if (hp_elog->id_type == PSERIES_HP_ELOG_ID_DRC_COUNT) {
count = hp_elog->_drc_u.drc_count;
- rc = dlpar_memory_add_by_count(count, prop);
+ rc = dlpar_memory_add_by_count(count);
} else if (hp_elog->id_type == PSERIES_HP_ELOG_ID_DRC_INDEX) {
drc_index = hp_elog->_drc_u.drc_index;
- rc = dlpar_memory_add_by_index(drc_index, prop);
+ rc = dlpar_memory_add_by_index(drc_index);
} else if (hp_elog->id_type == PSERIES_HP_ELOG_ID_DRC_IC) {
count = hp_elog->_drc_u.ic.count;
drc_index = hp_elog->_drc_u.ic.index;
- rc = dlpar_memory_add_by_ic(count, drc_index, prop);
+ rc = dlpar_memory_add_by_ic(count, drc_index);
} else {
rc = -EINVAL;
}
@@ -1051,14 +935,14 @@ int dlpar_memory(struct pseries_hp_errorlog *hp_elog)
case PSERIES_HP_ELOG_ACTION_REMOVE:
if (hp_elog->id_type == PSERIES_HP_ELOG_ID_DRC_COUNT) {
count = hp_elog->_drc_u.drc_count;
- rc = dlpar_memory_remove_by_count(count, prop);
+ rc = dlpar_memory_remove_by_count(count);
} else if (hp_elog->id_type == PSERIES_HP_ELOG_ID_DRC_INDEX) {
drc_index = hp_elog->_drc_u.drc_index;
- rc = dlpar_memory_remove_by_index(drc_index, prop);
+ rc = dlpar_memory_remove_by_index(drc_index);
} else if (hp_elog->id_type == PSERIES_HP_ELOG_ID_DRC_IC) {
count = hp_elog->_drc_u.ic.count;
drc_index = hp_elog->_drc_u.ic.index;
- rc = dlpar_memory_remove_by_ic(count, drc_index, prop);
+ rc = dlpar_memory_remove_by_ic(count, drc_index);
} else {
rc = -EINVAL;
}
@@ -1066,7 +950,7 @@ int dlpar_memory(struct pseries_hp_errorlog *hp_elog)
break;
case PSERIES_HP_ELOG_ACTION_READD:
drc_index = hp_elog->_drc_u.drc_index;
- rc = dlpar_memory_readd_by_index(drc_index, prop);
+ rc = dlpar_memory_readd_by_index(drc_index);
break;
default:
pr_err("Invalid action (%d) specified\n", hp_elog->action);
@@ -1074,10 +958,6 @@ int dlpar_memory(struct pseries_hp_errorlog *hp_elog)
break;
}
- dlpar_free_property(prop);
-
-dlpar_memory_out:
- of_node_put(dn);
unlock_device_hotplug();
return rc;
}
@@ -1116,7 +996,7 @@ static int pseries_add_mem_node(struct device_node *np)
static int pseries_update_drconf_memory(struct of_reconfig_data *pr)
{
- struct of_drconf_cell *new_drmem, *old_drmem;
+ struct of_drconf_cell_v1 *new_drmem, *old_drmem;
unsigned long memblock_size;
u32 entries;
__be32 *p;
@@ -1139,11 +1019,11 @@ static int pseries_update_drconf_memory(struct of_reconfig_data *pr)
* of_drconf_cell's.
*/
entries = be32_to_cpu(*p++);
- old_drmem = (struct of_drconf_cell *)p;
+ old_drmem = (struct of_drconf_cell_v1 *)p;
p = (__be32 *)pr->prop->value;
p++;
- new_drmem = (struct of_drconf_cell *)p;
+ new_drmem = (struct of_drconf_cell_v1 *)p;
for (i = 0; i < entries; i++) {
if ((be32_to_cpu(old_drmem[i].flags) & DRCONF_MEM_ASSIGNED) &&
diff --git a/arch/powerpc/platforms/pseries/hvCall_inst.c b/arch/powerpc/platforms/pseries/hvCall_inst.c
index 957ae347b0b3..89b7ce807e70 100644
--- a/arch/powerpc/platforms/pseries/hvCall_inst.c
+++ b/arch/powerpc/platforms/pseries/hvCall_inst.c
@@ -163,7 +163,7 @@ static int __init hcall_inst_init(void)
for_each_possible_cpu(cpu) {
snprintf(cpu_name_buf, CPU_NAME_BUF_SIZE, "cpu%d", cpu);
- hcall_file = debugfs_create_file(cpu_name_buf, S_IRUGO,
+ hcall_file = debugfs_create_file(cpu_name_buf, 0444,
hcall_root,
per_cpu(hcall_stats, cpu),
&hcall_inst_seq_fops);
diff --git a/arch/powerpc/platforms/pseries/ibmebus.c b/arch/powerpc/platforms/pseries/ibmebus.c
index 408a86044133..c7c1140c13b6 100644
--- a/arch/powerpc/platforms/pseries/ibmebus.c
+++ b/arch/powerpc/platforms/pseries/ibmebus.c
@@ -298,7 +298,7 @@ out:
return rc;
return count;
}
-static BUS_ATTR(probe, S_IWUSR, NULL, ibmebus_store_probe);
+static BUS_ATTR(probe, 0200, NULL, ibmebus_store_probe);
static ssize_t ibmebus_store_remove(struct bus_type *bus,
const char *buf, size_t count)
@@ -325,7 +325,7 @@ static ssize_t ibmebus_store_remove(struct bus_type *bus,
return -ENODEV;
}
}
-static BUS_ATTR(remove, S_IWUSR, NULL, ibmebus_store_remove);
+static BUS_ATTR(remove, 0200, NULL, ibmebus_store_remove);
static struct attribute *ibmbus_bus_attrs[] = {
&bus_attr_probe.attr,
diff --git a/arch/powerpc/platforms/pseries/iommu.c b/arch/powerpc/platforms/pseries/iommu.c
index eaa11334fc8c..06f02960b439 100644
--- a/arch/powerpc/platforms/pseries/iommu.c
+++ b/arch/powerpc/platforms/pseries/iommu.c
@@ -816,15 +816,15 @@ static void remove_ddw(struct device_node *np, bool remove_prop)
ret = tce_clearrange_multi_pSeriesLP(0,
1ULL << (be32_to_cpu(dwp->window_shift) - PAGE_SHIFT), dwp);
if (ret)
- pr_warning("%pOF failed to clear tces in window.\n",
- np);
+ pr_warn("%pOF failed to clear tces in window.\n",
+ np);
else
pr_debug("%pOF successfully cleared tces in window.\n",
np);
ret = rtas_call(ddw_avail[2], 1, 1, NULL, liobn);
if (ret)
- pr_warning("%pOF: failed to remove direct window: rtas returned "
+ pr_warn("%pOF: failed to remove direct window: rtas returned "
"%d to ibm,remove-pe-dma-window(%x) %llx\n",
np, ret, ddw_avail[2], liobn);
else
@@ -836,7 +836,7 @@ delprop:
if (remove_prop)
ret = of_remove_property(np, win64);
if (ret)
- pr_warning("%pOF: failed to remove direct window property: %d\n",
+ pr_warn("%pOF: failed to remove direct window property: %d\n",
np, ret);
}
diff --git a/arch/powerpc/platforms/pseries/lparcfg.c b/arch/powerpc/platforms/pseries/lparcfg.c
index b2706c483067..c508c938dc71 100644
--- a/arch/powerpc/platforms/pseries/lparcfg.c
+++ b/arch/powerpc/platforms/pseries/lparcfg.c
@@ -370,10 +370,10 @@ static void parse_system_parameter_string(struct seq_file *m)
*/
static int lparcfg_count_active_processors(void)
{
- struct device_node *cpus_dn = NULL;
+ struct device_node *cpus_dn;
int count = 0;
- while ((cpus_dn = of_find_node_by_type(cpus_dn, "cpu"))) {
+ for_each_node_by_type(cpus_dn, "cpu") {
#ifdef LPARCFG_DEBUG
printk(KERN_ERR "cpus_dn %p\n", cpus_dn);
#endif
@@ -697,11 +697,11 @@ static const struct file_operations lparcfg_fops = {
static int __init lparcfg_init(void)
{
- umode_t mode = S_IRUSR | S_IRGRP | S_IROTH;
+ umode_t mode = 0444;
/* Allow writing if we have FW_FEATURE_SPLPAR */
if (firmware_has_feature(FW_FEATURE_SPLPAR))
- mode |= S_IWUSR;
+ mode |= 0200;
if (!proc_create("powerpc/lparcfg", mode, NULL, &lparcfg_fops)) {
printk(KERN_ERR "Failed to create powerpc/lparcfg\n");
diff --git a/arch/powerpc/platforms/pseries/mobility.c b/arch/powerpc/platforms/pseries/mobility.c
index f7042ad492ba..0f7fb7170b03 100644
--- a/arch/powerpc/platforms/pseries/mobility.c
+++ b/arch/powerpc/platforms/pseries/mobility.c
@@ -384,7 +384,7 @@ static ssize_t migration_store(struct class *class,
#define MIGRATION_API_VERSION 1
static CLASS_ATTR_WO(migration);
-static CLASS_ATTR_STRING(api_version, S_IRUGO, __stringify(MIGRATION_API_VERSION));
+static CLASS_ATTR_STRING(api_version, 0444, __stringify(MIGRATION_API_VERSION));
static int __init mobility_sysfs_init(void)
{
diff --git a/arch/powerpc/platforms/pseries/of_helpers.c b/arch/powerpc/platforms/pseries/of_helpers.c
index 7e75101fa522..6df192f38f80 100644
--- a/arch/powerpc/platforms/pseries/of_helpers.c
+++ b/arch/powerpc/platforms/pseries/of_helpers.c
@@ -3,6 +3,7 @@
#include <linux/err.h>
#include <linux/slab.h>
#include <linux/of.h>
+#include <asm/prom.h>
#include "of_helpers.h"
@@ -37,3 +38,62 @@ struct device_node *pseries_of_derive_parent(const char *path)
kfree(parent_path);
return parent ? parent : ERR_PTR(-EINVAL);
}
+
+
+/* Helper Routines to convert between drc_index to cpu numbers */
+
+int of_read_drc_info_cell(struct property **prop, const __be32 **curval,
+ struct of_drc_info *data)
+{
+ const char *p;
+ const __be32 *p2;
+
+ if (!data)
+ return -EINVAL;
+
+ /* Get drc-type:encode-string */
+ p = data->drc_type = (char*) (*curval);
+ p = of_prop_next_string(*prop, p);
+ if (!p)
+ return -EINVAL;
+
+ /* Get drc-name-prefix:encode-string */
+ data->drc_name_prefix = (char *)p;
+ p = of_prop_next_string(*prop, p);
+ if (!p)
+ return -EINVAL;
+
+ /* Get drc-index-start:encode-int */
+ p2 = (const __be32 *)p;
+ p2 = of_prop_next_u32(*prop, p2, &data->drc_index_start);
+ if (!p2)
+ return -EINVAL;
+
+ /* Get drc-name-suffix-start:encode-int */
+ p2 = of_prop_next_u32(*prop, p2, &data->drc_name_suffix_start);
+ if (!p2)
+ return -EINVAL;
+
+ /* Get number-sequential-elements:encode-int */
+ p2 = of_prop_next_u32(*prop, p2, &data->num_sequential_elems);
+ if (!p2)
+ return -EINVAL;
+
+ /* Get sequential-increment:encode-int */
+ p2 = of_prop_next_u32(*prop, p2, &data->sequential_inc);
+ if (!p2)
+ return -EINVAL;
+
+ /* Get drc-power-domain:encode-int */
+ p2 = of_prop_next_u32(*prop, p2, &data->drc_power_domain);
+ if (!p2)
+ return -EINVAL;
+
+ /* Should now know end of current entry */
+ (*curval) = (void *)p2;
+ data->last_drc_index = data->drc_index_start +
+ ((data->num_sequential_elems - 1) * data->sequential_inc);
+
+ return 0;
+}
+EXPORT_SYMBOL(of_read_drc_info_cell);
diff --git a/arch/powerpc/platforms/pseries/pci.c b/arch/powerpc/platforms/pseries/pci.c
index 09eba5a9929a..eab96637d6cf 100644
--- a/arch/powerpc/platforms/pseries/pci.c
+++ b/arch/powerpc/platforms/pseries/pci.c
@@ -3,17 +3,17 @@
* Copyright (C) 2003 Anton Blanchard <anton@au.ibm.com>, IBM
*
* pSeries specific routines for PCI.
- *
+ *
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
- *
+ *
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
- *
+ *
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
@@ -54,10 +54,174 @@ void pcibios_name_device(struct pci_dev *dev)
}
}
}
-}
+}
DECLARE_PCI_FIXUP_HEADER(PCI_ANY_ID, PCI_ANY_ID, pcibios_name_device);
#endif
+#ifdef CONFIG_PCI_IOV
+#define MAX_VFS_FOR_MAP_PE 256
+struct pe_map_bar_entry {
+ __be64 bar; /* Input: Virtual Function BAR */
+ __be16 rid; /* Input: Virtual Function Router ID */
+ __be16 pe_num; /* Output: Virtual Function PE Number */
+ __be32 reserved; /* Reserved Space */
+};
+
+int pseries_send_map_pe(struct pci_dev *pdev,
+ u16 num_vfs,
+ struct pe_map_bar_entry *vf_pe_array)
+{
+ struct pci_dn *pdn;
+ int rc;
+ unsigned long buid, addr;
+ int ibm_map_pes = rtas_token("ibm,open-sriov-map-pe-number");
+
+ if (ibm_map_pes == RTAS_UNKNOWN_SERVICE)
+ return -EINVAL;
+
+ pdn = pci_get_pdn(pdev);
+ addr = rtas_config_addr(pdn->busno, pdn->devfn, 0);
+ buid = pdn->phb->buid;
+ spin_lock(&rtas_data_buf_lock);
+ memcpy(rtas_data_buf, vf_pe_array,
+ RTAS_DATA_BUF_SIZE);
+ rc = rtas_call(ibm_map_pes, 5, 1, NULL, addr,
+ BUID_HI(buid), BUID_LO(buid),
+ rtas_data_buf,
+ num_vfs * sizeof(struct pe_map_bar_entry));
+ memcpy(vf_pe_array, rtas_data_buf, RTAS_DATA_BUF_SIZE);
+ spin_unlock(&rtas_data_buf_lock);
+
+ if (rc)
+ dev_err(&pdev->dev,
+ "%s: Failed to associate pes PE#%lx, rc=%x\n",
+ __func__, addr, rc);
+
+ return rc;
+}
+
+void pseries_set_pe_num(struct pci_dev *pdev, u16 vf_index, __be16 pe_num)
+{
+ struct pci_dn *pdn;
+
+ pdn = pci_get_pdn(pdev);
+ pdn->pe_num_map[vf_index] = be16_to_cpu(pe_num);
+ dev_dbg(&pdev->dev, "VF %04x:%02x:%02x.%x associated with PE#%x\n",
+ pci_domain_nr(pdev->bus),
+ pdev->bus->number,
+ PCI_SLOT(pci_iov_virtfn_devfn(pdev, vf_index)),
+ PCI_FUNC(pci_iov_virtfn_devfn(pdev, vf_index)),
+ pdn->pe_num_map[vf_index]);
+}
+
+int pseries_associate_pes(struct pci_dev *pdev, u16 num_vfs)
+{
+ struct pci_dn *pdn;
+ int i, rc, vf_index;
+ struct pe_map_bar_entry *vf_pe_array;
+ struct resource *res;
+ u64 size;
+
+ vf_pe_array = kzalloc(RTAS_DATA_BUF_SIZE, GFP_KERNEL);
+ if (!vf_pe_array)
+ return -ENOMEM;
+
+ pdn = pci_get_pdn(pdev);
+ /* create firmware structure to associate pes */
+ for (vf_index = 0; vf_index < num_vfs; vf_index++) {
+ pdn->pe_num_map[vf_index] = IODA_INVALID_PE;
+ for (i = 0; i < PCI_SRIOV_NUM_BARS; i++) {
+ res = &pdev->resource[i + PCI_IOV_RESOURCES];
+ if (!res->parent)
+ continue;
+ size = pcibios_iov_resource_alignment(pdev, i +
+ PCI_IOV_RESOURCES);
+ vf_pe_array[vf_index].bar =
+ cpu_to_be64(res->start + size * vf_index);
+ vf_pe_array[vf_index].rid =
+ cpu_to_be16((pci_iov_virtfn_bus(pdev, vf_index)
+ << 8) | pci_iov_virtfn_devfn(pdev,
+ vf_index));
+ vf_pe_array[vf_index].pe_num =
+ cpu_to_be16(IODA_INVALID_PE);
+ }
+ }
+
+ rc = pseries_send_map_pe(pdev, num_vfs, vf_pe_array);
+ /* Only zero is success */
+ if (!rc)
+ for (vf_index = 0; vf_index < num_vfs; vf_index++)
+ pseries_set_pe_num(pdev, vf_index,
+ vf_pe_array[vf_index].pe_num);
+
+ kfree(vf_pe_array);
+ return rc;
+}
+
+int pseries_pci_sriov_enable(struct pci_dev *pdev, u16 num_vfs)
+{
+ struct pci_dn *pdn;
+ int rc;
+ const int *max_vfs;
+ int max_config_vfs;
+ struct device_node *dn = pci_device_to_OF_node(pdev);
+
+ max_vfs = of_get_property(dn, "ibm,number-of-configurable-vfs", NULL);
+
+ if (!max_vfs)
+ return -EINVAL;
+
+ /* First integer stores max config */
+ max_config_vfs = of_read_number(&max_vfs[0], 1);
+ if (max_config_vfs < num_vfs && num_vfs > MAX_VFS_FOR_MAP_PE) {
+ dev_err(&pdev->dev,
+ "Num VFs %x > %x Configurable VFs\n",
+ num_vfs, (num_vfs > MAX_VFS_FOR_MAP_PE) ?
+ MAX_VFS_FOR_MAP_PE : max_config_vfs);
+ return -EINVAL;
+ }
+
+ pdn = pci_get_pdn(pdev);
+ pdn->pe_num_map = kmalloc_array(num_vfs,
+ sizeof(*pdn->pe_num_map),
+ GFP_KERNEL);
+ if (!pdn->pe_num_map)
+ return -ENOMEM;
+
+ rc = pseries_associate_pes(pdev, num_vfs);
+
+ /* Anything other than zero is failure */
+ if (rc) {
+ dev_err(&pdev->dev, "Failure to enable sriov: %x\n", rc);
+ kfree(pdn->pe_num_map);
+ } else {
+ pci_vf_drivers_autoprobe(pdev, false);
+ }
+
+ return rc;
+}
+
+int pseries_pcibios_sriov_enable(struct pci_dev *pdev, u16 num_vfs)
+{
+ /* Allocate PCI data */
+ add_dev_pci_data(pdev);
+ return pseries_pci_sriov_enable(pdev, num_vfs);
+}
+
+int pseries_pcibios_sriov_disable(struct pci_dev *pdev)
+{
+ struct pci_dn *pdn;
+
+ pdn = pci_get_pdn(pdev);
+ /* Releasing pe_num_map */
+ kfree(pdn->pe_num_map);
+ /* Release PCI data */
+ remove_dev_pci_data(pdev);
+ pci_vf_drivers_autoprobe(pdev, true);
+ return 0;
+}
+#endif
+
static void __init pSeries_request_regions(void)
{
if (!isa_io_base)
@@ -76,6 +240,11 @@ void __init pSeries_final_fixup(void)
pSeries_request_regions();
eeh_addr_cache_build();
+
+#ifdef CONFIG_PCI_IOV
+ ppc_md.pcibios_sriov_enable = pseries_pcibios_sriov_enable;
+ ppc_md.pcibios_sriov_disable = pseries_pcibios_sriov_disable;
+#endif
}
/*
diff --git a/arch/powerpc/platforms/pseries/pseries_energy.c b/arch/powerpc/platforms/pseries/pseries_energy.c
index 35c891aabef0..6ed22127391b 100644
--- a/arch/powerpc/platforms/pseries/pseries_energy.c
+++ b/arch/powerpc/platforms/pseries/pseries_energy.c
@@ -22,6 +22,7 @@
#include <asm/page.h>
#include <asm/hvcall.h>
#include <asm/firmware.h>
+#include <asm/prom.h>
#define MODULE_VERS "1.0"
@@ -38,26 +39,58 @@ static int sysfs_entries;
static u32 cpu_to_drc_index(int cpu)
{
struct device_node *dn = NULL;
- const int *indexes;
- int i;
+ int thread_index;
int rc = 1;
u32 ret = 0;
dn = of_find_node_by_path("/cpus");
if (dn == NULL)
goto err;
- indexes = of_get_property(dn, "ibm,drc-indexes", NULL);
- if (indexes == NULL)
- goto err_of_node_put;
+
/* Convert logical cpu number to core number */
- i = cpu_core_index_of_thread(cpu);
- /*
- * The first element indexes[0] is the number of drc_indexes
- * returned in the list. Hence i+1 will get the drc_index
- * corresponding to core number i.
- */
- WARN_ON(i > indexes[0]);
- ret = indexes[i + 1];
+ thread_index = cpu_core_index_of_thread(cpu);
+
+ if (firmware_has_feature(FW_FEATURE_DRC_INFO)) {
+ struct property *info = NULL;
+ struct of_drc_info drc;
+ int j;
+ u32 num_set_entries;
+ const __be32 *value;
+
+ info = of_find_property(dn, "ibm,drc-info", NULL);
+ if (info == NULL)
+ goto err_of_node_put;
+
+ value = of_prop_next_u32(info, NULL, &num_set_entries);
+ if (!value)
+ goto err_of_node_put;
+
+ for (j = 0; j < num_set_entries; j++) {
+
+ of_read_drc_info_cell(&info, &value, &drc);
+ if (strncmp(drc.drc_type, "CPU", 3))
+ goto err;
+
+ if (thread_index < drc.last_drc_index)
+ break;
+ }
+
+ ret = drc.drc_index_start + (thread_index * drc.sequential_inc);
+ } else {
+ const __be32 *indexes;
+
+ indexes = of_get_property(dn, "ibm,drc-indexes", NULL);
+ if (indexes == NULL)
+ goto err_of_node_put;
+
+ /*
+ * The first element indexes[0] is the number of drc_indexes
+ * returned in the list. Hence thread_index+1 will get the
+ * drc_index corresponding to core number thread_index.
+ */
+ ret = indexes[thread_index + 1];
+ }
+
rc = 0;
err_of_node_put:
@@ -72,34 +105,71 @@ static int drc_index_to_cpu(u32 drc_index)
{
struct device_node *dn = NULL;
const int *indexes;
- int i, cpu = 0;
+ int thread_index = 0, cpu = 0;
int rc = 1;
dn = of_find_node_by_path("/cpus");
if (dn == NULL)
goto err;
- indexes = of_get_property(dn, "ibm,drc-indexes", NULL);
- if (indexes == NULL)
- goto err_of_node_put;
- /*
- * First element in the array is the number of drc_indexes
- * returned. Search through the list to find the matching
- * drc_index and get the core number
- */
- for (i = 0; i < indexes[0]; i++) {
- if (indexes[i + 1] == drc_index)
+
+ if (firmware_has_feature(FW_FEATURE_DRC_INFO)) {
+ struct property *info = NULL;
+ struct of_drc_info drc;
+ int j;
+ u32 num_set_entries;
+ const __be32 *value;
+
+ info = of_find_property(dn, "ibm,drc-info", NULL);
+ if (info == NULL)
+ goto err_of_node_put;
+
+ value = of_prop_next_u32(info, NULL, &num_set_entries);
+ if (!value)
+ goto err_of_node_put;
+
+ for (j = 0; j < num_set_entries; j++) {
+
+ of_read_drc_info_cell(&info, &value, &drc);
+ if (strncmp(drc.drc_type, "CPU", 3))
+ goto err;
+
+ if (drc_index > drc.last_drc_index) {
+ cpu += drc.num_sequential_elems;
+ continue;
+ }
+ cpu += ((drc_index - drc.drc_index_start) /
+ drc.sequential_inc);
+
+ thread_index = cpu_first_thread_of_core(cpu);
+ rc = 0;
break;
+ }
+ } else {
+ unsigned long int i;
+
+ indexes = of_get_property(dn, "ibm,drc-indexes", NULL);
+ if (indexes == NULL)
+ goto err_of_node_put;
+ /*
+ * First element in the array is the number of drc_indexes
+ * returned. Search through the list to find the matching
+ * drc_index and get the core number
+ */
+ for (i = 0; i < indexes[0]; i++) {
+ if (indexes[i + 1] == drc_index)
+ break;
+ }
+ /* Convert core number to logical cpu number */
+ thread_index = cpu_first_thread_of_core(i);
+ rc = 0;
}
- /* Convert core number to logical cpu number */
- cpu = cpu_first_thread_of_core(i);
- rc = 0;
err_of_node_put:
of_node_put(dn);
err:
if (rc)
printk(KERN_WARNING "drc_index_to_cpu(%d) failed", drc_index);
- return cpu;
+ return thread_index;
}
/*
diff --git a/arch/powerpc/platforms/pseries/reconfig.c b/arch/powerpc/platforms/pseries/reconfig.c
index f24d8159c9e1..0e0208117e77 100644
--- a/arch/powerpc/platforms/pseries/reconfig.c
+++ b/arch/powerpc/platforms/pseries/reconfig.c
@@ -405,7 +405,7 @@ static int proc_ppc64_create_ofdt(void)
{
struct proc_dir_entry *ent;
- ent = proc_create("powerpc/ofdt", S_IWUSR, NULL, &ofdt_fops);
+ ent = proc_create("powerpc/ofdt", 0200, NULL, &ofdt_fops);
if (ent)
proc_set_size(ent, 0);
diff --git a/arch/powerpc/platforms/pseries/scanlog.c b/arch/powerpc/platforms/pseries/scanlog.c
index c47585a78b69..054ce7a16fc3 100644
--- a/arch/powerpc/platforms/pseries/scanlog.c
+++ b/arch/powerpc/platforms/pseries/scanlog.c
@@ -179,7 +179,7 @@ static int __init scanlog_init(void)
if (!scanlog_buffer)
goto err;
- ent = proc_create("powerpc/rtas/scan-log-dump", S_IRUSR, NULL,
+ ent = proc_create("powerpc/rtas/scan-log-dump", 0400, NULL,
&scanlog_fops);
if (!ent)
goto err;
diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c
index ae4f596273b5..372d7ada1a0c 100644
--- a/arch/powerpc/platforms/pseries/setup.c
+++ b/arch/powerpc/platforms/pseries/setup.c
@@ -371,8 +371,8 @@ void pseries_disable_reloc_on_exc(void)
mdelay(get_longbusy_msecs(rc));
}
if (rc != H_SUCCESS)
- pr_warning("Warning: Failed to disable relocation on "
- "exceptions: %ld\n", rc);
+ pr_warn("Warning: Failed to disable relocation on exceptions: %ld\n",
+ rc);
}
EXPORT_SYMBOL(pseries_disable_reloc_on_exc);
@@ -492,6 +492,162 @@ static void pseries_setup_rfi_flush(void)
setup_rfi_flush(types, enable);
}
+#ifdef CONFIG_PCI_IOV
+enum rtas_iov_fw_value_map {
+ NUM_RES_PROPERTY = 0, /* Number of Resources */
+ LOW_INT = 1, /* Lowest 32 bits of Address */
+ START_OF_ENTRIES = 2, /* Always start of entry */
+ APERTURE_PROPERTY = 2, /* Start of entry+ to Aperture Size */
+ WDW_SIZE_PROPERTY = 4, /* Start of entry+ to Window Size */
+ NEXT_ENTRY = 7 /* Go to next entry on array */
+};
+
+enum get_iov_fw_value_index {
+ BAR_ADDRS = 1, /* Get Bar Address */
+ APERTURE_SIZE = 2, /* Get Aperture Size */
+ WDW_SIZE = 3 /* Get Window Size */
+};
+
+resource_size_t pseries_get_iov_fw_value(struct pci_dev *dev, int resno,
+ enum get_iov_fw_value_index value)
+{
+ const int *indexes;
+ struct device_node *dn = pci_device_to_OF_node(dev);
+ int i, num_res, ret = 0;
+
+ indexes = of_get_property(dn, "ibm,open-sriov-vf-bar-info", NULL);
+ if (!indexes)
+ return 0;
+
+ /*
+ * First element in the array is the number of Bars
+ * returned. Search through the list to find the matching
+ * bar
+ */
+ num_res = of_read_number(&indexes[NUM_RES_PROPERTY], 1);
+ if (resno >= num_res)
+ return 0; /* or an errror */
+
+ i = START_OF_ENTRIES + NEXT_ENTRY * resno;
+ switch (value) {
+ case BAR_ADDRS:
+ ret = of_read_number(&indexes[i], 2);
+ break;
+ case APERTURE_SIZE:
+ ret = of_read_number(&indexes[i + APERTURE_PROPERTY], 2);
+ break;
+ case WDW_SIZE:
+ ret = of_read_number(&indexes[i + WDW_SIZE_PROPERTY], 2);
+ break;
+ }
+
+ return ret;
+}
+
+void of_pci_set_vf_bar_size(struct pci_dev *dev, const int *indexes)
+{
+ struct resource *res;
+ resource_size_t base, size;
+ int i, r, num_res;
+
+ num_res = of_read_number(&indexes[NUM_RES_PROPERTY], 1);
+ num_res = min_t(int, num_res, PCI_SRIOV_NUM_BARS);
+ for (i = START_OF_ENTRIES, r = 0; r < num_res && r < PCI_SRIOV_NUM_BARS;
+ i += NEXT_ENTRY, r++) {
+ res = &dev->resource[r + PCI_IOV_RESOURCES];
+ base = of_read_number(&indexes[i], 2);
+ size = of_read_number(&indexes[i + APERTURE_PROPERTY], 2);
+ res->flags = pci_parse_of_flags(of_read_number
+ (&indexes[i + LOW_INT], 1), 0);
+ res->flags |= (IORESOURCE_MEM_64 | IORESOURCE_PCI_FIXED);
+ res->name = pci_name(dev);
+ res->start = base;
+ res->end = base + size - 1;
+ }
+}
+
+void of_pci_parse_iov_addrs(struct pci_dev *dev, const int *indexes)
+{
+ struct resource *res, *root, *conflict;
+ resource_size_t base, size;
+ int i, r, num_res;
+
+ /*
+ * First element in the array is the number of Bars
+ * returned. Search through the list to find the matching
+ * bars assign them from firmware into resources structure.
+ */
+ num_res = of_read_number(&indexes[NUM_RES_PROPERTY], 1);
+ for (i = START_OF_ENTRIES, r = 0; r < num_res && r < PCI_SRIOV_NUM_BARS;
+ i += NEXT_ENTRY, r++) {
+ res = &dev->resource[r + PCI_IOV_RESOURCES];
+ base = of_read_number(&indexes[i], 2);
+ size = of_read_number(&indexes[i + WDW_SIZE_PROPERTY], 2);
+ res->name = pci_name(dev);
+ res->start = base;
+ res->end = base + size - 1;
+ root = &iomem_resource;
+ dev_dbg(&dev->dev,
+ "pSeries IOV BAR %d: trying firmware assignment %pR\n",
+ r + PCI_IOV_RESOURCES, res);
+ conflict = request_resource_conflict(root, res);
+ if (conflict) {
+ dev_info(&dev->dev,
+ "BAR %d: %pR conflicts with %s %pR\n",
+ r + PCI_IOV_RESOURCES, res,
+ conflict->name, conflict);
+ res->flags |= IORESOURCE_UNSET;
+ }
+ }
+}
+
+static void pseries_pci_fixup_resources(struct pci_dev *pdev)
+{
+ const int *indexes;
+ struct device_node *dn = pci_device_to_OF_node(pdev);
+
+ /*Firmware must support open sriov otherwise dont configure*/
+ indexes = of_get_property(dn, "ibm,open-sriov-vf-bar-info", NULL);
+ if (!indexes)
+ return;
+ /* Assign the addresses from device tree*/
+ of_pci_set_vf_bar_size(pdev, indexes);
+}
+
+static void pseries_pci_fixup_iov_resources(struct pci_dev *pdev)
+{
+ const int *indexes;
+ struct device_node *dn = pci_device_to_OF_node(pdev);
+
+ if (!pdev->is_physfn || pdev->is_added)
+ return;
+ /*Firmware must support open sriov otherwise dont configure*/
+ indexes = of_get_property(dn, "ibm,open-sriov-vf-bar-info", NULL);
+ if (!indexes)
+ return;
+ /* Assign the addresses from device tree*/
+ of_pci_parse_iov_addrs(pdev, indexes);
+}
+
+static resource_size_t pseries_pci_iov_resource_alignment(struct pci_dev *pdev,
+ int resno)
+{
+ const __be32 *reg;
+ struct device_node *dn = pci_device_to_OF_node(pdev);
+
+ /*Firmware must support open sriov otherwise report regular alignment*/
+ reg = of_get_property(dn, "ibm,is-open-sriov-pf", NULL);
+ if (!reg)
+ return pci_iov_resource_size(pdev, resno);
+
+ if (!pdev->is_physfn)
+ return 0;
+ return pseries_get_iov_fw_value(pdev,
+ resno - PCI_IOV_RESOURCES,
+ APERTURE_SIZE);
+}
+#endif
+
static void __init pSeries_setup_arch(void)
{
set_arch_panic_timeout(10, ARCH_PANIC_TIMEOUT);
@@ -525,6 +681,14 @@ static void __init pSeries_setup_arch(void)
vpa_init(boot_cpuid);
ppc_md.power_save = pseries_lpar_idle;
ppc_md.enable_pmcs = pseries_lpar_enable_pmcs;
+#ifdef CONFIG_PCI_IOV
+ ppc_md.pcibios_fixup_resources =
+ pseries_pci_fixup_resources;
+ ppc_md.pcibios_fixup_sriov =
+ pseries_pci_fixup_iov_resources;
+ ppc_md.pcibios_iov_resource_alignment =
+ pseries_pci_iov_resource_alignment;
+#endif
} else {
/* No special idle routine */
ppc_md.enable_pmcs = power4_enable_pmcs;
@@ -533,6 +697,12 @@ static void __init pSeries_setup_arch(void)
ppc_md.pcibios_root_bridge_prepare = pseries_root_bridge_prepare;
}
+static void pseries_panic(char *str)
+{
+ panic_flush_kmsg_end();
+ rtas_os_term(str);
+}
+
static int __init pSeries_init_panel(void)
{
/* Manually leave the kernel version on the panel. */
@@ -761,7 +931,7 @@ define_machine(pseries) {
.pcibios_fixup = pSeries_final_fixup,
.restart = rtas_restart,
.halt = rtas_halt,
- .panic = rtas_os_term,
+ .panic = pseries_panic,
.get_boot_time = rtas_get_boot_time,
.get_rtc_time = rtas_get_rtc_time,
.set_rtc_time = rtas_set_rtc_time,
diff --git a/arch/powerpc/platforms/pseries/suspend.c b/arch/powerpc/platforms/pseries/suspend.c
index 89726f07d249..52a021e1f86b 100644
--- a/arch/powerpc/platforms/pseries/suspend.c
+++ b/arch/powerpc/platforms/pseries/suspend.c
@@ -214,8 +214,7 @@ static ssize_t show_hibernate(struct device *dev,
return sprintf(buf, "%d\n", KERN_DT_UPDATE);
}
-static DEVICE_ATTR(hibernate, S_IWUSR | S_IRUGO,
- show_hibernate, store_hibernate);
+static DEVICE_ATTR(hibernate, 0644, show_hibernate, store_hibernate);
static struct bus_type suspend_subsys = {
.name = "power",
diff --git a/arch/powerpc/sysdev/Makefile b/arch/powerpc/sysdev/Makefile
index 0baba21404dc..9861407d644a 100644
--- a/arch/powerpc/sysdev/Makefile
+++ b/arch/powerpc/sysdev/Makefile
@@ -32,7 +32,6 @@ mv64x60-$(CONFIG_PCI) += mv64x60_pci.o
obj-$(CONFIG_MV64X60) += $(mv64x60-y) mv64x60_pic.o mv64x60_dev.o \
mv64x60_udbg.o
obj-$(CONFIG_RTC_DRV_CMOS) += rtc_cmos_setup.o
-obj-$(CONFIG_AXON_RAM) += axonram.o
obj-$(CONFIG_PPC_INDIRECT_PCI) += indirect_pci.o
obj-$(CONFIG_PPC_I8259) += i8259.o
@@ -43,7 +42,8 @@ obj-$(CONFIG_OF_RTC) += of_rtc.o
obj-$(CONFIG_CPM) += cpm_common.o
obj-$(CONFIG_CPM1) += cpm1.o
-obj-$(CONFIG_CPM2) += cpm2.o cpm2_pic.o
+obj-$(CONFIG_CPM2) += cpm2.o cpm2_pic.o cpm_gpio.o
+obj-$(CONFIG_8xx_GPIO) += cpm_gpio.o
obj-$(CONFIG_QUICC_ENGINE) += cpm_common.o
obj-$(CONFIG_PPC_DCR) += dcr.o
obj-$(CONFIG_UCODE_PATCH) += micropatch.o
diff --git a/arch/powerpc/sysdev/axonram.c b/arch/powerpc/sysdev/axonram.c
deleted file mode 100644
index 1b307c80b401..000000000000
--- a/arch/powerpc/sysdev/axonram.c
+++ /dev/null
@@ -1,383 +0,0 @@
-/*
- * (C) Copyright IBM Deutschland Entwicklung GmbH 2006
- *
- * Author: Maxim Shchetynin <maxim@de.ibm.com>
- *
- * Axon DDR2 device driver.
- * It registers one block device per Axon's DDR2 memory bank found on a system.
- * Block devices are called axonram?, their major and minor numbers are
- * available in /proc/devices, /proc/partitions or in /sys/block/axonram?/dev.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2, or (at your option)
- * any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- */
-
-#include <linux/bio.h>
-#include <linux/blkdev.h>
-#include <linux/dax.h>
-#include <linux/device.h>
-#include <linux/errno.h>
-#include <linux/fs.h>
-#include <linux/genhd.h>
-#include <linux/interrupt.h>
-#include <linux/io.h>
-#include <linux/ioport.h>
-#include <linux/irq.h>
-#include <linux/irqreturn.h>
-#include <linux/kernel.h>
-#include <linux/mm.h>
-#include <linux/mod_devicetable.h>
-#include <linux/module.h>
-#include <linux/slab.h>
-#include <linux/string.h>
-#include <linux/types.h>
-#include <linux/of_device.h>
-#include <linux/of_platform.h>
-#include <linux/pfn_t.h>
-#include <linux/uio.h>
-
-#include <asm/page.h>
-#include <asm/prom.h>
-
-#define AXON_RAM_MODULE_NAME "axonram"
-#define AXON_RAM_DEVICE_NAME "axonram"
-#define AXON_RAM_MINORS_PER_DISK 16
-#define AXON_RAM_BLOCK_SHIFT PAGE_SHIFT
-#define AXON_RAM_BLOCK_SIZE 1 << AXON_RAM_BLOCK_SHIFT
-#define AXON_RAM_SECTOR_SHIFT 9
-#define AXON_RAM_SECTOR_SIZE 1 << AXON_RAM_SECTOR_SHIFT
-#define AXON_RAM_IRQ_FLAGS IRQF_SHARED | IRQF_TRIGGER_RISING
-
-static int azfs_major, azfs_minor;
-
-struct axon_ram_bank {
- struct platform_device *device;
- struct gendisk *disk;
- struct dax_device *dax_dev;
- unsigned int irq_id;
- unsigned long ph_addr;
- unsigned long io_addr;
- unsigned long size;
- unsigned long ecc_counter;
-};
-
-static ssize_t
-axon_ram_sysfs_ecc(struct device *dev, struct device_attribute *attr, char *buf)
-{
- struct platform_device *device = to_platform_device(dev);
- struct axon_ram_bank *bank = device->dev.platform_data;
-
- BUG_ON(!bank);
-
- return sprintf(buf, "%ld\n", bank->ecc_counter);
-}
-
-static DEVICE_ATTR(ecc, S_IRUGO, axon_ram_sysfs_ecc, NULL);
-
-/**
- * axon_ram_irq_handler - interrupt handler for Axon RAM ECC
- * @irq: interrupt ID
- * @dev: pointer to of_device
- */
-static irqreturn_t
-axon_ram_irq_handler(int irq, void *dev)
-{
- struct platform_device *device = dev;
- struct axon_ram_bank *bank = device->dev.platform_data;
-
- BUG_ON(!bank);
-
- dev_err(&device->dev, "Correctable memory error occurred\n");
- bank->ecc_counter++;
- return IRQ_HANDLED;
-}
-
-/**
- * axon_ram_make_request - make_request() method for block device
- * @queue, @bio: see blk_queue_make_request()
- */
-static blk_qc_t
-axon_ram_make_request(struct request_queue *queue, struct bio *bio)
-{
- struct axon_ram_bank *bank = bio->bi_disk->private_data;
- unsigned long phys_mem, phys_end;
- void *user_mem;
- struct bio_vec vec;
- unsigned int transfered;
- struct bvec_iter iter;
-
- phys_mem = bank->io_addr + (bio->bi_iter.bi_sector <<
- AXON_RAM_SECTOR_SHIFT);
- phys_end = bank->io_addr + bank->size;
- transfered = 0;
- bio_for_each_segment(vec, bio, iter) {
- if (unlikely(phys_mem + vec.bv_len > phys_end)) {
- bio_io_error(bio);
- return BLK_QC_T_NONE;
- }
-
- user_mem = page_address(vec.bv_page) + vec.bv_offset;
- if (bio_data_dir(bio) == READ)
- memcpy(user_mem, (void *) phys_mem, vec.bv_len);
- else
- memcpy((void *) phys_mem, user_mem, vec.bv_len);
-
- phys_mem += vec.bv_len;
- transfered += vec.bv_len;
- }
- bio_endio(bio);
- return BLK_QC_T_NONE;
-}
-
-static const struct block_device_operations axon_ram_devops = {
- .owner = THIS_MODULE,
-};
-
-static long
-__axon_ram_direct_access(struct axon_ram_bank *bank, pgoff_t pgoff, long nr_pages,
- void **kaddr, pfn_t *pfn)
-{
- resource_size_t offset = pgoff * PAGE_SIZE;
-
- *kaddr = (void *) bank->io_addr + offset;
- *pfn = phys_to_pfn_t(bank->ph_addr + offset, PFN_DEV);
- return (bank->size - offset) / PAGE_SIZE;
-}
-
-static long
-axon_ram_dax_direct_access(struct dax_device *dax_dev, pgoff_t pgoff, long nr_pages,
- void **kaddr, pfn_t *pfn)
-{
- struct axon_ram_bank *bank = dax_get_private(dax_dev);
-
- return __axon_ram_direct_access(bank, pgoff, nr_pages, kaddr, pfn);
-}
-
-static size_t axon_ram_copy_from_iter(struct dax_device *dax_dev, pgoff_t pgoff,
- void *addr, size_t bytes, struct iov_iter *i)
-{
- return copy_from_iter(addr, bytes, i);
-}
-
-static const struct dax_operations axon_ram_dax_ops = {
- .direct_access = axon_ram_dax_direct_access,
- .copy_from_iter = axon_ram_copy_from_iter,
-};
-
-/**
- * axon_ram_probe - probe() method for platform driver
- * @device: see platform_driver method
- */
-static int axon_ram_probe(struct platform_device *device)
-{
- static int axon_ram_bank_id = -1;
- struct axon_ram_bank *bank;
- struct resource resource;
- int rc;
-
- axon_ram_bank_id++;
-
- dev_info(&device->dev, "Found memory controller on %pOF\n",
- device->dev.of_node);
-
- bank = kzalloc(sizeof(*bank), GFP_KERNEL);
- if (!bank)
- return -ENOMEM;
-
- device->dev.platform_data = bank;
-
- bank->device = device;
-
- if (of_address_to_resource(device->dev.of_node, 0, &resource) != 0) {
- dev_err(&device->dev, "Cannot access device tree\n");
- rc = -EFAULT;
- goto failed;
- }
-
- bank->size = resource_size(&resource);
-
- if (bank->size == 0) {
- dev_err(&device->dev, "No DDR2 memory found for %s%d\n",
- AXON_RAM_DEVICE_NAME, axon_ram_bank_id);
- rc = -ENODEV;
- goto failed;
- }
-
- dev_info(&device->dev, "Register DDR2 memory device %s%d with %luMB\n",
- AXON_RAM_DEVICE_NAME, axon_ram_bank_id, bank->size >> 20);
-
- bank->ph_addr = resource.start;
- bank->io_addr = (unsigned long) ioremap_prot(
- bank->ph_addr, bank->size, _PAGE_NO_CACHE);
- if (bank->io_addr == 0) {
- dev_err(&device->dev, "ioremap() failed\n");
- rc = -EFAULT;
- goto failed;
- }
-
- bank->disk = alloc_disk(AXON_RAM_MINORS_PER_DISK);
- if (bank->disk == NULL) {
- dev_err(&device->dev, "Cannot register disk\n");
- rc = -EFAULT;
- goto failed;
- }
-
-
- bank->disk->major = azfs_major;
- bank->disk->first_minor = azfs_minor;
- bank->disk->fops = &axon_ram_devops;
- bank->disk->private_data = bank;
-
- sprintf(bank->disk->disk_name, "%s%d",
- AXON_RAM_DEVICE_NAME, axon_ram_bank_id);
-
- bank->dax_dev = alloc_dax(bank, bank->disk->disk_name,
- &axon_ram_dax_ops);
- if (!bank->dax_dev) {
- rc = -ENOMEM;
- goto failed;
- }
-
- bank->disk->queue = blk_alloc_queue(GFP_KERNEL);
- if (bank->disk->queue == NULL) {
- dev_err(&device->dev, "Cannot register disk queue\n");
- rc = -EFAULT;
- goto failed;
- }
-
- set_capacity(bank->disk, bank->size >> AXON_RAM_SECTOR_SHIFT);
- blk_queue_make_request(bank->disk->queue, axon_ram_make_request);
- blk_queue_logical_block_size(bank->disk->queue, AXON_RAM_SECTOR_SIZE);
- device_add_disk(&device->dev, bank->disk);
-
- bank->irq_id = irq_of_parse_and_map(device->dev.of_node, 0);
- if (!bank->irq_id) {
- dev_err(&device->dev, "Cannot access ECC interrupt ID\n");
- rc = -EFAULT;
- goto failed;
- }
-
- rc = request_irq(bank->irq_id, axon_ram_irq_handler,
- AXON_RAM_IRQ_FLAGS, bank->disk->disk_name, device);
- if (rc != 0) {
- dev_err(&device->dev, "Cannot register ECC interrupt handler\n");
- bank->irq_id = 0;
- rc = -EFAULT;
- goto failed;
- }
-
- rc = device_create_file(&device->dev, &dev_attr_ecc);
- if (rc != 0) {
- dev_err(&device->dev, "Cannot create sysfs file\n");
- rc = -EFAULT;
- goto failed;
- }
-
- azfs_minor += bank->disk->minors;
-
- return 0;
-
-failed:
- if (bank->irq_id)
- free_irq(bank->irq_id, device);
- if (bank->disk != NULL) {
- if (bank->disk->major > 0)
- unregister_blkdev(bank->disk->major,
- bank->disk->disk_name);
- if (bank->disk->flags & GENHD_FL_UP)
- del_gendisk(bank->disk);
- put_disk(bank->disk);
- }
- kill_dax(bank->dax_dev);
- put_dax(bank->dax_dev);
- device->dev.platform_data = NULL;
- if (bank->io_addr != 0)
- iounmap((void __iomem *) bank->io_addr);
- kfree(bank);
- return rc;
-}
-
-/**
- * axon_ram_remove - remove() method for platform driver
- * @device: see of_platform_driver method
- */
-static int
-axon_ram_remove(struct platform_device *device)
-{
- struct axon_ram_bank *bank = device->dev.platform_data;
-
- BUG_ON(!bank || !bank->disk);
-
- device_remove_file(&device->dev, &dev_attr_ecc);
- free_irq(bank->irq_id, device);
- kill_dax(bank->dax_dev);
- put_dax(bank->dax_dev);
- del_gendisk(bank->disk);
- put_disk(bank->disk);
- iounmap((void __iomem *) bank->io_addr);
- kfree(bank);
-
- return 0;
-}
-
-static const struct of_device_id axon_ram_device_id[] = {
- {
- .type = "dma-memory"
- },
- {}
-};
-MODULE_DEVICE_TABLE(of, axon_ram_device_id);
-
-static struct platform_driver axon_ram_driver = {
- .probe = axon_ram_probe,
- .remove = axon_ram_remove,
- .driver = {
- .name = AXON_RAM_MODULE_NAME,
- .of_match_table = axon_ram_device_id,
- },
-};
-
-/**
- * axon_ram_init
- */
-static int __init
-axon_ram_init(void)
-{
- azfs_major = register_blkdev(azfs_major, AXON_RAM_DEVICE_NAME);
- if (azfs_major < 0) {
- printk(KERN_ERR "%s cannot become block device major number\n",
- AXON_RAM_MODULE_NAME);
- return -EFAULT;
- }
- azfs_minor = 0;
-
- return platform_driver_register(&axon_ram_driver);
-}
-
-/**
- * axon_ram_exit
- */
-static void __exit
-axon_ram_exit(void)
-{
- platform_driver_unregister(&axon_ram_driver);
- unregister_blkdev(azfs_major, AXON_RAM_DEVICE_NAME);
-}
-
-module_init(axon_ram_init);
-module_exit(axon_ram_exit);
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Maxim Shchetynin <maxim@de.ibm.com>");
-MODULE_DESCRIPTION("Axon DDR2 RAM device driver for IBM Cell BE");
diff --git a/arch/powerpc/sysdev/cpm1.c b/arch/powerpc/sysdev/cpm1.c
index c6f154b602fb..5240d3a74a10 100644
--- a/arch/powerpc/sysdev/cpm1.c
+++ b/arch/powerpc/sysdev/cpm1.c
@@ -629,8 +629,9 @@ static int cpm1_gpio16_dir_in(struct gpio_chip *gc, unsigned int gpio)
return 0;
}
-int cpm1_gpiochip_add16(struct device_node *np)
+int cpm1_gpiochip_add16(struct device *dev)
{
+ struct device_node *np = dev->of_node;
struct cpm1_gpio16_chip *cpm1_gc;
struct of_mm_gpio_chip *mm_gc;
struct gpio_chip *gc;
@@ -660,6 +661,8 @@ int cpm1_gpiochip_add16(struct device_node *np)
gc->get = cpm1_gpio16_get;
gc->set = cpm1_gpio16_set;
gc->to_irq = cpm1_gpio16_to_irq;
+ gc->parent = dev;
+ gc->owner = THIS_MODULE;
return of_mm_gpiochip_add_data(np, mm_gc, cpm1_gc);
}
@@ -755,8 +758,9 @@ static int cpm1_gpio32_dir_in(struct gpio_chip *gc, unsigned int gpio)
return 0;
}
-int cpm1_gpiochip_add32(struct device_node *np)
+int cpm1_gpiochip_add32(struct device *dev)
{
+ struct device_node *np = dev->of_node;
struct cpm1_gpio32_chip *cpm1_gc;
struct of_mm_gpio_chip *mm_gc;
struct gpio_chip *gc;
@@ -776,31 +780,10 @@ int cpm1_gpiochip_add32(struct device_node *np)
gc->direction_output = cpm1_gpio32_dir_out;
gc->get = cpm1_gpio32_get;
gc->set = cpm1_gpio32_set;
+ gc->parent = dev;
+ gc->owner = THIS_MODULE;
return of_mm_gpiochip_add_data(np, mm_gc, cpm1_gc);
}
-static int cpm_init_par_io(void)
-{
- struct device_node *np;
-
- for_each_compatible_node(np, NULL, "fsl,cpm1-pario-bank-a")
- cpm1_gpiochip_add16(np);
-
- for_each_compatible_node(np, NULL, "fsl,cpm1-pario-bank-b")
- cpm1_gpiochip_add32(np);
-
- for_each_compatible_node(np, NULL, "fsl,cpm1-pario-bank-c")
- cpm1_gpiochip_add16(np);
-
- for_each_compatible_node(np, NULL, "fsl,cpm1-pario-bank-d")
- cpm1_gpiochip_add16(np);
-
- /* Port E uses CPM2 layout */
- for_each_compatible_node(np, NULL, "fsl,cpm1-pario-bank-e")
- cpm2_gpiochip_add32(np);
- return 0;
-}
-arch_initcall(cpm_init_par_io);
-
#endif /* CONFIG_8xx_GPIO */
diff --git a/arch/powerpc/sysdev/cpm2.c b/arch/powerpc/sysdev/cpm2.c
index f78ff841652c..07718b9a2c99 100644
--- a/arch/powerpc/sysdev/cpm2.c
+++ b/arch/powerpc/sysdev/cpm2.c
@@ -354,14 +354,3 @@ void cpm2_set_pin(int port, int pin, int flags)
else
clrbits32(&iop[port].odr, pin);
}
-
-static int cpm_init_par_io(void)
-{
- struct device_node *np;
-
- for_each_compatible_node(np, NULL, "fsl,cpm2-pario-bank")
- cpm2_gpiochip_add32(np);
- return 0;
-}
-arch_initcall(cpm_init_par_io);
-
diff --git a/arch/powerpc/sysdev/cpm_common.c b/arch/powerpc/sysdev/cpm_common.c
index 51bf749a4f3a..b74508175b67 100644
--- a/arch/powerpc/sysdev/cpm_common.c
+++ b/arch/powerpc/sysdev/cpm_common.c
@@ -190,8 +190,9 @@ static int cpm2_gpio32_dir_in(struct gpio_chip *gc, unsigned int gpio)
return 0;
}
-int cpm2_gpiochip_add32(struct device_node *np)
+int cpm2_gpiochip_add32(struct device *dev)
{
+ struct device_node *np = dev->of_node;
struct cpm2_gpio32_chip *cpm2_gc;
struct of_mm_gpio_chip *mm_gc;
struct gpio_chip *gc;
@@ -211,6 +212,8 @@ int cpm2_gpiochip_add32(struct device_node *np)
gc->direction_output = cpm2_gpio32_dir_out;
gc->get = cpm2_gpio32_get;
gc->set = cpm2_gpio32_set;
+ gc->parent = dev;
+ gc->owner = THIS_MODULE;
return of_mm_gpiochip_add_data(np, mm_gc, cpm2_gc);
}
diff --git a/arch/powerpc/sysdev/cpm_gpio.c b/arch/powerpc/sysdev/cpm_gpio.c
new file mode 100644
index 000000000000..0badc90be666
--- /dev/null
+++ b/arch/powerpc/sysdev/cpm_gpio.c
@@ -0,0 +1,80 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Common CPM GPIO wrapper for the CPM GPIO ports
+ *
+ * Author: Christophe Leroy <christophe.leroy@c-s.fr>
+ *
+ * Copyright 2017 CS Systemes d'Information.
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/of_device.h>
+
+#include <asm/cpm.h>
+#ifdef CONFIG_8xx_GPIO
+#include <asm/cpm1.h>
+#endif
+
+static int cpm_gpio_probe(struct platform_device *ofdev)
+{
+ struct device *dev = &ofdev->dev;
+ int (*gp_add)(struct device *dev) = of_device_get_match_data(dev);
+
+ if (!gp_add)
+ return -ENODEV;
+
+ return gp_add(dev);
+}
+
+static const struct of_device_id cpm_gpio_match[] = {
+#ifdef CONFIG_8xx_GPIO
+ {
+ .compatible = "fsl,cpm1-pario-bank-a",
+ .data = cpm1_gpiochip_add16,
+ },
+ {
+ .compatible = "fsl,cpm1-pario-bank-b",
+ .data = cpm1_gpiochip_add32,
+ },
+ {
+ .compatible = "fsl,cpm1-pario-bank-c",
+ .data = cpm1_gpiochip_add16,
+ },
+ {
+ .compatible = "fsl,cpm1-pario-bank-d",
+ .data = cpm1_gpiochip_add16,
+ },
+ /* Port E uses CPM2 layout */
+ {
+ .compatible = "fsl,cpm1-pario-bank-e",
+ .data = cpm2_gpiochip_add32,
+ },
+#endif
+ {
+ .compatible = "fsl,cpm2-pario-bank",
+ .data = cpm2_gpiochip_add32,
+ },
+ {},
+};
+MODULE_DEVICE_TABLE(of, cpm_gpio_match);
+
+static struct platform_driver cpm_gpio_driver = {
+ .probe = cpm_gpio_probe,
+ .driver = {
+ .name = "cpm-gpio",
+ .owner = THIS_MODULE,
+ .of_match_table = cpm_gpio_match,
+ },
+};
+
+static int __init cpm_gpio_init(void)
+{
+ return platform_driver_register(&cpm_gpio_driver);
+}
+arch_initcall(cpm_gpio_init);
+
+MODULE_AUTHOR("Christophe Leroy <christophe.leroy@c-s.fr>");
+MODULE_DESCRIPTION("Driver for CPM GPIO");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS("platform:cpm-gpio");
diff --git a/arch/powerpc/sysdev/fsl_mpic_timer_wakeup.c b/arch/powerpc/sysdev/fsl_mpic_timer_wakeup.c
index 1707bf04dec6..94278e8af192 100644
--- a/arch/powerpc/sysdev/fsl_mpic_timer_wakeup.c
+++ b/arch/powerpc/sysdev/fsl_mpic_timer_wakeup.c
@@ -56,17 +56,16 @@ static ssize_t fsl_timer_wakeup_show(struct device *dev,
struct device_attribute *attr,
char *buf)
{
- struct timeval interval;
- int val = 0;
+ time64_t interval = 0;
mutex_lock(&sysfs_lock);
if (fsl_wakeup->timer) {
mpic_get_remain_time(fsl_wakeup->timer, &interval);
- val = interval.tv_sec + 1;
+ interval++;
}
mutex_unlock(&sysfs_lock);
- return sprintf(buf, "%d\n", val);
+ return sprintf(buf, "%lld\n", interval);
}
static ssize_t fsl_timer_wakeup_store(struct device *dev,
@@ -74,11 +73,10 @@ static ssize_t fsl_timer_wakeup_store(struct device *dev,
const char *buf,
size_t count)
{
- struct timeval interval;
+ time64_t interval;
int ret;
- interval.tv_usec = 0;
- if (kstrtol(buf, 0, &interval.tv_sec))
+ if (kstrtoll(buf, 0, &interval))
return -EINVAL;
mutex_lock(&sysfs_lock);
@@ -89,13 +87,13 @@ static ssize_t fsl_timer_wakeup_store(struct device *dev,
fsl_wakeup->timer = NULL;
}
- if (!interval.tv_sec) {
+ if (!interval) {
mutex_unlock(&sysfs_lock);
return count;
}
fsl_wakeup->timer = mpic_request_timer(fsl_mpic_timer_irq,
- fsl_wakeup, &interval);
+ fsl_wakeup, interval);
if (!fsl_wakeup->timer) {
mutex_unlock(&sysfs_lock);
return -EINVAL;
diff --git a/arch/powerpc/sysdev/fsl_pci.c b/arch/powerpc/sysdev/fsl_pci.c
index 61e07c78d64f..918be816b097 100644
--- a/arch/powerpc/sysdev/fsl_pci.c
+++ b/arch/powerpc/sysdev/fsl_pci.c
@@ -448,7 +448,7 @@ static void setup_pci_atmu(struct pci_controller *hose)
#endif
/* adjusting outbound windows could reclaim space in mem map */
if (paddr_hi < 0xffffffffull)
- pr_warning("%pOF: WARNING: Outbound window cfg leaves "
+ pr_warn("%pOF: WARNING: Outbound window cfg leaves "
"gaps in memory map. Adjusting the memory map "
"could reduce unnecessary bounce buffering.\n",
hose->dn);
@@ -531,7 +531,7 @@ int fsl_add_bridge(struct platform_device *pdev, int is_primary)
dev = pdev->dev.of_node;
if (!of_device_is_available(dev)) {
- pr_warning("%pOF: disabled\n", dev);
+ pr_warn("%pOF: disabled\n", dev);
return -ENODEV;
}
@@ -808,8 +808,8 @@ int __init mpc83xx_add_bridge(struct device_node *dev)
is_mpc83xx_pci = 1;
if (!of_device_is_available(dev)) {
- pr_warning("%pOF: disabled by the firmware.\n",
- dev);
+ pr_warn("%pOF: disabled by the firmware.\n",
+ dev);
return -ENODEV;
}
pr_debug("Adding PCI host bridge %pOF\n", dev);
@@ -1070,7 +1070,7 @@ int fsl_pci_mcheck_exception(struct pt_regs *regs)
if (is_in_pci_mem_space(addr)) {
if (user_mode(regs)) {
pagefault_disable();
- ret = get_user(regs->nip, &inst);
+ ret = get_user(inst, (__u32 __user *)regs->nip);
pagefault_enable();
} else {
ret = probe_kernel_address((void *)regs->nip, inst);
@@ -1304,10 +1304,8 @@ static int add_err_dev(struct platform_device *pdev)
pdev->resource,
pdev->num_resources,
&pd, sizeof(pd));
- if (IS_ERR(errdev))
- return PTR_ERR(errdev);
- return 0;
+ return PTR_ERR_OR_ZERO(errdev);
}
static int fsl_pci_probe(struct platform_device *pdev)
diff --git a/arch/powerpc/sysdev/mpic.c b/arch/powerpc/sysdev/mpic.c
index ead3e2549ebf..73067805300a 100644
--- a/arch/powerpc/sysdev/mpic.c
+++ b/arch/powerpc/sysdev/mpic.c
@@ -1008,9 +1008,8 @@ static int mpic_host_map(struct irq_domain *h, unsigned int virq,
if (hw == mpic->spurious_vec)
return -EINVAL;
if (mpic->protected && test_bit(hw, mpic->protected)) {
- pr_warning("mpic: Mapping of source 0x%x failed, "
- "source protected by firmware !\n",\
- (unsigned int)hw);
+ pr_warn("mpic: Mapping of source 0x%x failed, source protected by firmware !\n",
+ (unsigned int)hw);
return -EPERM;
}
@@ -1040,9 +1039,8 @@ static int mpic_host_map(struct irq_domain *h, unsigned int virq,
return 0;
if (hw >= mpic->num_sources) {
- pr_warning("mpic: Mapping of source 0x%x failed, "
- "source out of range !\n",\
- (unsigned int)hw);
+ pr_warn("mpic: Mapping of source 0x%x failed, source out of range !\n",
+ (unsigned int)hw);
return -EINVAL;
}
diff --git a/arch/powerpc/sysdev/mpic_timer.c b/arch/powerpc/sysdev/mpic_timer.c
index a418579591be..87e7c42777a8 100644
--- a/arch/powerpc/sysdev/mpic_timer.c
+++ b/arch/powerpc/sysdev/mpic_timer.c
@@ -47,9 +47,6 @@
#define MAX_TICKS_CASCADE (~0U)
#define TIMER_OFFSET(num) (1 << (TIMERS_PER_GROUP - 1 - num))
-/* tv_usec should be less than ONE_SECOND, otherwise use tv_sec */
-#define ONE_SECOND 1000000
-
struct timer_regs {
u32 gtccr;
u32 res0[3];
@@ -90,51 +87,23 @@ static struct cascade_priv cascade_timer[] = {
static LIST_HEAD(timer_group_list);
static void convert_ticks_to_time(struct timer_group_priv *priv,
- const u64 ticks, struct timeval *time)
+ const u64 ticks, time64_t *time)
{
- u64 tmp_sec;
-
- time->tv_sec = (__kernel_time_t)div_u64(ticks, priv->timerfreq);
- tmp_sec = (u64)time->tv_sec * (u64)priv->timerfreq;
-
- time->tv_usec = 0;
-
- if (tmp_sec <= ticks)
- time->tv_usec = (__kernel_suseconds_t)
- div_u64((ticks - tmp_sec) * 1000000, priv->timerfreq);
-
- return;
+ *time = (u64)div_u64(ticks, priv->timerfreq);
}
/* the time set by the user is converted to "ticks" */
static int convert_time_to_ticks(struct timer_group_priv *priv,
- const struct timeval *time, u64 *ticks)
+ time64_t time, u64 *ticks)
{
u64 max_value; /* prevent u64 overflow */
- u64 tmp = 0;
-
- u64 tmp_sec;
- u64 tmp_ms;
- u64 tmp_us;
max_value = div_u64(ULLONG_MAX, priv->timerfreq);
- if (time->tv_sec > max_value ||
- (time->tv_sec == max_value && time->tv_usec > 0))
+ if (time > max_value)
return -EINVAL;
- tmp_sec = (u64)time->tv_sec * (u64)priv->timerfreq;
- tmp += tmp_sec;
-
- tmp_ms = time->tv_usec / 1000;
- tmp_ms = div_u64((u64)tmp_ms * (u64)priv->timerfreq, 1000);
- tmp += tmp_ms;
-
- tmp_us = time->tv_usec % 1000;
- tmp_us = div_u64((u64)tmp_us * (u64)priv->timerfreq, 1000000);
- tmp += tmp_us;
-
- *ticks = tmp;
+ *ticks = (u64)time * (u64)priv->timerfreq;
return 0;
}
@@ -223,7 +192,7 @@ static struct mpic_timer *get_cascade_timer(struct timer_group_priv *priv,
return allocated_timer;
}
-static struct mpic_timer *get_timer(const struct timeval *time)
+static struct mpic_timer *get_timer(time64_t time)
{
struct timer_group_priv *priv;
struct mpic_timer *timer;
@@ -277,7 +246,7 @@ static struct mpic_timer *get_timer(const struct timeval *time)
* @handle: the timer to be started.
*
* It will do ->fn(->dev) callback from the hardware interrupt at
- * the ->timeval point in the future.
+ * the 'time64_t' point in the future.
*/
void mpic_start_timer(struct mpic_timer *handle)
{
@@ -319,7 +288,7 @@ EXPORT_SYMBOL(mpic_stop_timer);
*
* Query timer remaining time.
*/
-void mpic_get_remain_time(struct mpic_timer *handle, struct timeval *time)
+void mpic_get_remain_time(struct mpic_timer *handle, time64_t *time)
{
struct timer_group_priv *priv = container_of(handle,
struct timer_group_priv, timer[handle->num]);
@@ -391,7 +360,7 @@ EXPORT_SYMBOL(mpic_free_timer);
* else "handle" on success.
*/
struct mpic_timer *mpic_request_timer(irq_handler_t fn, void *dev,
- const struct timeval *time)
+ time64_t time)
{
struct mpic_timer *allocated_timer;
int ret;
@@ -399,11 +368,7 @@ struct mpic_timer *mpic_request_timer(irq_handler_t fn, void *dev,
if (list_empty(&timer_group_list))
return NULL;
- if (!(time->tv_sec + time->tv_usec) ||
- time->tv_sec < 0 || time->tv_usec < 0)
- return NULL;
-
- if (time->tv_usec > ONE_SECOND)
+ if (time < 0)
return NULL;
allocated_timer = get_timer(time);
diff --git a/arch/powerpc/sysdev/mv64x60_pci.c b/arch/powerpc/sysdev/mv64x60_pci.c
index d52b3b81e05f..50c411b1761e 100644
--- a/arch/powerpc/sysdev/mv64x60_pci.c
+++ b/arch/powerpc/sysdev/mv64x60_pci.c
@@ -73,7 +73,7 @@ static ssize_t mv64x60_hs_reg_write(struct file *filp, struct kobject *kobj,
static const struct bin_attribute mv64x60_hs_reg_attr = { /* Hotswap register */
.attr = {
.name = "hs_reg",
- .mode = S_IRUGO | S_IWUSR,
+ .mode = 0644,
},
.size = MV64X60_VAL_LEN_MAX,
.read = mv64x60_hs_reg_read,
diff --git a/arch/powerpc/sysdev/xics/icp-native.c b/arch/powerpc/sysdev/xics/icp-native.c
index 2bfb9968d562..1459f4e8b698 100644
--- a/arch/powerpc/sysdev/xics/icp-native.c
+++ b/arch/powerpc/sysdev/xics/icp-native.c
@@ -241,18 +241,16 @@ static int __init icp_native_map_one_cpu(int hw_id, unsigned long addr,
cpu, hw_id);
if (!request_mem_region(addr, size, rname)) {
- pr_warning("icp_native: Could not reserve ICP MMIO"
- " for CPU %d, interrupt server #0x%x\n",
- cpu, hw_id);
+ pr_warn("icp_native: Could not reserve ICP MMIO for CPU %d, interrupt server #0x%x\n",
+ cpu, hw_id);
return -EBUSY;
}
icp_native_regs[cpu] = ioremap(addr, size);
kvmppc_set_xics_phys(cpu, addr);
if (!icp_native_regs[cpu]) {
- pr_warning("icp_native: Failed ioremap for CPU %d, "
- "interrupt server #0x%x, addr %#lx\n",
- cpu, hw_id, addr);
+ pr_warn("icp_native: Failed ioremap for CPU %d, interrupt server #0x%x, addr %#lx\n",
+ cpu, hw_id, addr);
release_mem_region(addr, size);
return -ENOMEM;
}
diff --git a/arch/powerpc/sysdev/xics/ics-opal.c b/arch/powerpc/sysdev/xics/ics-opal.c
index 1c6bf4b66f56..f85f916ba432 100644
--- a/arch/powerpc/sysdev/xics/ics-opal.c
+++ b/arch/powerpc/sysdev/xics/ics-opal.c
@@ -131,8 +131,8 @@ static int ics_opal_set_affinity(struct irq_data *d,
wanted_server = xics_get_irq_server(d->irq, cpumask, 1);
if (wanted_server < 0) {
- pr_warning("%s: No online cpus in the mask %*pb for irq %d\n",
- __func__, cpumask_pr_args(cpumask), d->irq);
+ pr_warn("%s: No online cpus in the mask %*pb for irq %d\n",
+ __func__, cpumask_pr_args(cpumask), d->irq);
return -1;
}
server = ics_opal_mangle_server(wanted_server);
diff --git a/arch/powerpc/sysdev/xics/ics-rtas.c b/arch/powerpc/sysdev/xics/ics-rtas.c
index 42e0c56ff81c..6aabc74688a6 100644
--- a/arch/powerpc/sysdev/xics/ics-rtas.c
+++ b/arch/powerpc/sysdev/xics/ics-rtas.c
@@ -141,8 +141,8 @@ static int ics_rtas_set_affinity(struct irq_data *d,
irq_server = xics_get_irq_server(d->irq, cpumask, 1);
if (irq_server == -1) {
- pr_warning("%s: No online cpus in the mask %*pb for irq %d\n",
- __func__, cpumask_pr_args(cpumask), d->irq);
+ pr_warn("%s: No online cpus in the mask %*pb for irq %d\n",
+ __func__, cpumask_pr_args(cpumask), d->irq);
return -1;
}
diff --git a/arch/powerpc/sysdev/xics/xics-common.c b/arch/powerpc/sysdev/xics/xics-common.c
index ffe138b8b9dc..77e864d5506d 100644
--- a/arch/powerpc/sysdev/xics/xics-common.c
+++ b/arch/powerpc/sysdev/xics/xics-common.c
@@ -243,8 +243,8 @@ void xics_migrate_irqs_away(void)
/* This is expected during cpu offline. */
if (cpu_online(cpu))
- pr_warning("IRQ %u affinity broken off cpu %u\n",
- virq, cpu);
+ pr_warn("IRQ %u affinity broken off cpu %u\n",
+ virq, cpu);
/* Reset affinity to all cpus */
raw_spin_unlock_irqrestore(&desc->lock, flags);
@@ -466,7 +466,7 @@ void __init xics_init(void)
rc = icp_opal_init();
}
if (rc < 0) {
- pr_warning("XICS: Cannot find a Presentation Controller !\n");
+ pr_warn("XICS: Cannot find a Presentation Controller !\n");
return;
}
@@ -481,7 +481,7 @@ void __init xics_init(void)
if (rc < 0)
rc = ics_opal_init();
if (rc < 0)
- pr_warning("XICS: Cannot find a Source Controller !\n");
+ pr_warn("XICS: Cannot find a Source Controller !\n");
/* Initialize common bits */
xics_get_server_size();
diff --git a/arch/powerpc/sysdev/xive/common.c b/arch/powerpc/sysdev/xive/common.c
index a3b8d7d1316e..40c06110821c 100644
--- a/arch/powerpc/sysdev/xive/common.c
+++ b/arch/powerpc/sysdev/xive/common.c
@@ -367,7 +367,8 @@ static void xive_irq_eoi(struct irq_data *d)
* EOI the source if it hasn't been disabled and hasn't
* been passed-through to a KVM guest
*/
- if (!irqd_irq_disabled(d) && !irqd_is_forwarded_to_vcpu(d))
+ if (!irqd_irq_disabled(d) && !irqd_is_forwarded_to_vcpu(d) &&
+ !(xd->flags & XIVE_IRQ_NO_EOI))
xive_do_source_eoi(irqd_to_hwirq(d), xd);
/*
@@ -1269,11 +1270,6 @@ static void xive_setup_cpu(void)
{
struct xive_cpu *xc = __this_cpu_read(xive_cpu);
- /* Debug: Dump the TM state */
- pr_devel("CPU %d [HW 0x%02x] VT=%02x\n",
- smp_processor_id(), hard_smp_processor_id(),
- in_8(xive_tima + xive_tima_offset + TM_WORD2));
-
/* The backend might have additional things to do */
if (xive_ops->setup_cpu)
xive_ops->setup_cpu(smp_processor_id(), xc);
diff --git a/arch/powerpc/xmon/ppc-dis.c b/arch/powerpc/xmon/ppc-dis.c
index 31db8c072acd..9deea5ee13f6 100644
--- a/arch/powerpc/xmon/ppc-dis.c
+++ b/arch/powerpc/xmon/ppc-dis.c
@@ -93,10 +93,6 @@ lookup_powerpc (unsigned long insn, ppc_cpu_t dialect)
{
const struct powerpc_opcode *opcode;
const struct powerpc_opcode *opcode_end;
- unsigned long op;
-
- /* Get the major opcode of the instruction. */
- op = PPC_OP (insn);
opcode_end = powerpc_opcodes + powerpc_num_opcodes;
/* Find the first match in the opcode table for this major opcode. */
diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c
index 0ddc7ac6c5f1..82e1a3ee6e0f 100644
--- a/arch/powerpc/xmon/xmon.c
+++ b/arch/powerpc/xmon/xmon.c
@@ -1623,7 +1623,7 @@ static void excprint(struct pt_regs *fp)
printf(" current = 0x%lx\n", current);
#ifdef CONFIG_PPC64
printf(" paca = 0x%lx\t softe: %d\t irq_happened: 0x%02x\n",
- local_paca, local_paca->soft_enabled, local_paca->irq_happened);
+ local_paca, local_paca->irq_soft_mask, local_paca->irq_happened);
#endif
if (current) {
printf(" pid = %ld, comm = %s\n",
@@ -2377,8 +2377,6 @@ static void dump_one_paca(int cpu)
printf(" slb_cache[%d]: = 0x%016lx\n", i, p->slb_cache[i]);
DUMP(p, rfi_flush_fallback_area, "px");
- DUMP(p, l1d_flush_congruence, "llx");
- DUMP(p, l1d_flush_sets, "llx");
#endif
DUMP(p, dscr_default, "llx");
#ifdef CONFIG_PPC_BOOK3E
@@ -2395,7 +2393,7 @@ static void dump_one_paca(int cpu)
DUMP(p, stab_rr, "lx");
DUMP(p, saved_r1, "lx");
DUMP(p, trap_save, "x");
- DUMP(p, soft_enabled, "x");
+ DUMP(p, irq_soft_mask, "x");
DUMP(p, irq_happened, "x");
DUMP(p, io_sync, "x");
DUMP(p, irq_work_pending, "x");
diff --git a/drivers/cpuidle/cpuidle-powernv.c b/drivers/cpuidle/cpuidle-powernv.c
index e06605b21841..1a8234e706bc 100644
--- a/drivers/cpuidle/cpuidle-powernv.c
+++ b/drivers/cpuidle/cpuidle-powernv.c
@@ -76,6 +76,8 @@ static int snooze_loop(struct cpuidle_device *dev,
ppc64_runlatch_on();
clear_thread_flag(TIF_POLLING_NRFLAG);
+ local_irq_disable();
+
return index;
}
diff --git a/drivers/cpuidle/cpuidle-pseries.c b/drivers/cpuidle/cpuidle-pseries.c
index a187a39fb866..9e56bc411061 100644
--- a/drivers/cpuidle/cpuidle-pseries.c
+++ b/drivers/cpuidle/cpuidle-pseries.c
@@ -51,8 +51,6 @@ static inline void idle_loop_epilog(unsigned long in_purr)
get_lppaca()->wait_state_cycles = cpu_to_be64(wait_cycles);
get_lppaca()->idle = 0;
- if (irqs_disabled())
- local_irq_enable();
ppc64_runlatch_on();
}
@@ -87,6 +85,8 @@ static int snooze_loop(struct cpuidle_device *dev,
HMT_medium();
clear_thread_flag(TIF_POLLING_NRFLAG);
+ local_irq_disable();
+
idle_loop_epilog(in_purr);
return index;
@@ -121,6 +121,7 @@ static int dedicated_cede_loop(struct cpuidle_device *dev,
HMT_medium();
check_and_cede_processor();
+ local_irq_disable();
get_lppaca()->donate_dedicated_cpu = 0;
idle_loop_epilog(in_purr);
@@ -145,6 +146,7 @@ static int shared_cede_loop(struct cpuidle_device *dev,
*/
check_and_cede_processor();
+ local_irq_disable();
idle_loop_epilog(in_purr);
return index;
@@ -172,11 +174,17 @@ static struct cpuidle_state dedicated_states[] = {
* States for shared partition case.
*/
static struct cpuidle_state shared_states[] = {
+ { /* Snooze */
+ .name = "snooze",
+ .desc = "snooze",
+ .exit_latency = 0,
+ .target_residency = 0,
+ .enter = &snooze_loop },
{ /* Shared Cede */
.name = "Shared Cede",
.desc = "Shared Cede",
- .exit_latency = 0,
- .target_residency = 0,
+ .exit_latency = 10,
+ .target_residency = 100,
.enter = &shared_cede_loop },
};
diff --git a/drivers/macintosh/adb.c b/drivers/macintosh/adb.c
index 289800b5235d..4c8097e0e6fe 100644
--- a/drivers/macintosh/adb.c
+++ b/drivers/macintosh/adb.c
@@ -203,18 +203,17 @@ static int adb_scan_bus(void)
}
/* Now fill in the handler_id field of the adb_handler entries. */
- printk(KERN_DEBUG "adb devices:");
+ pr_debug("adb devices:\n");
for (i = 1; i < 16; i++) {
if (adb_handler[i].original_address == 0)
continue;
adb_request(&req, NULL, ADBREQ_SYNC | ADBREQ_REPLY, 1,
(i << 4) | 0xf);
adb_handler[i].handler_id = req.reply[2];
- printk(" [%d]: %d %x", i, adb_handler[i].original_address,
- adb_handler[i].handler_id);
+ pr_debug(" [%d]: %d %x\n", i, adb_handler[i].original_address,
+ adb_handler[i].handler_id);
devmask |= 1 << i;
}
- printk("\n");
return devmask;
}
@@ -225,9 +224,9 @@ static int adb_scan_bus(void)
static int
adb_probe_task(void *x)
{
- printk(KERN_INFO "adb: starting probe task...\n");
+ pr_debug("adb: starting probe task...\n");
do_adb_reset_bus();
- printk(KERN_INFO "adb: finished probe task...\n");
+ pr_debug("adb: finished probe task...\n");
up(&adb_probe_mutex);
@@ -337,7 +336,7 @@ static int __init adb_init(void)
adb_controller->init())
adb_controller = NULL;
if (adb_controller == NULL) {
- printk(KERN_WARNING "Warning: no ADB interface detected\n");
+ pr_warn("Warning: no ADB interface detected\n");
} else {
#ifdef CONFIG_PPC
if (of_machine_is_compatible("AAPL,PowerBook1998") ||
@@ -480,8 +479,7 @@ adb_register(int default_id, int handler_id, struct adb_ids *ids,
(!handler_id || (handler_id == adb_handler[i].handler_id) ||
try_handler_change(i, handler_id))) {
if (adb_handler[i].handler != 0) {
- printk(KERN_ERR
- "Two handlers for ADB device %d\n",
+ pr_err("Two handlers for ADB device %d\n",
default_id);
continue;
}
@@ -535,10 +533,10 @@ adb_input(unsigned char *buf, int nb, int autopoll)
id = buf[0] >> 4;
if (dump_adb_input) {
- printk(KERN_INFO "adb packet: ");
+ pr_info("adb packet: ");
for (i = 0; i < nb; ++i)
- printk(" %x", buf[i]);
- printk(", id = %d\n", id);
+ pr_cont(" %x", buf[i]);
+ pr_cont(", id = %d\n", id);
}
write_lock_irqsave(&adb_handler_lock, flags);
handler = adb_handler[id].handler;
@@ -884,7 +882,7 @@ static void __init
adbdev_init(void)
{
if (register_chrdev(ADB_MAJOR, "adb", &adb_fops)) {
- printk(KERN_ERR "adb: unable to get major %d\n", ADB_MAJOR);
+ pr_err("adb: unable to get major %d\n", ADB_MAJOR);
return;
}
diff --git a/drivers/macintosh/adbhid.c b/drivers/macintosh/adbhid.c
index e091193104f7..a261892c03b3 100644
--- a/drivers/macintosh/adbhid.c
+++ b/drivers/macintosh/adbhid.c
@@ -268,7 +268,7 @@ adbhid_keyboard_input(unsigned char *data, int nb, int apoll)
int id = (data[0] >> 4) & 0x0f;
if (!adbhid[id]) {
- printk(KERN_ERR "ADB HID on ID %d not yet registered, packet %#02x, %#02x, %#02x, %#02x\n",
+ pr_err("ADB HID on ID %d not yet registered, packet %#02x, %#02x, %#02x, %#02x\n",
id, data[0], data[1], data[2], data[3]);
return;
}
@@ -320,8 +320,7 @@ adbhid_input_keycode(int id, int scancode, int repeat)
ahid->flags &= ~FLAG_CAPSLOCK_TRANSLATE;
}
} else {
- printk(KERN_INFO "Spurious caps lock event "
- "(scancode 0xff).\n");
+ pr_info("Spurious caps lock event (scancode 0xff).\n");
}
}
}
@@ -397,8 +396,8 @@ adbhid_input_keycode(int id, int scancode, int repeat)
input_report_key(adbhid[id]->input, key, !up_flag);
input_sync(adbhid[id]->input);
} else
- printk(KERN_INFO "Unhandled ADB key (scancode %#02x) %s.\n", keycode,
- up_flag ? "released" : "pressed");
+ pr_info("Unhandled ADB key (scancode %#02x) %s.\n", keycode,
+ up_flag ? "released" : "pressed");
}
@@ -408,7 +407,7 @@ adbhid_mouse_input(unsigned char *data, int nb, int autopoll)
int id = (data[0] >> 4) & 0x0f;
if (!adbhid[id]) {
- printk(KERN_ERR "ADB HID on ID %d not yet registered\n", id);
+ pr_err("ADB HID on ID %d not yet registered\n", id);
return;
}
@@ -506,7 +505,7 @@ adbhid_buttons_input(unsigned char *data, int nb, int autopoll)
int id = (data[0] >> 4) & 0x0f;
if (!adbhid[id]) {
- printk(KERN_ERR "ADB HID on ID %d not yet registered\n", id);
+ pr_err("ADB HID on ID %d not yet registered\n", id);
return;
}
@@ -534,8 +533,8 @@ adbhid_buttons_input(unsigned char *data, int nb, int autopoll)
break;
default:
- printk(KERN_INFO "Unhandled ADB_MISC event %02x, %02x, %02x, %02x\n",
- data[0], data[1], data[2], data[3]);
+ pr_info("Unhandled ADB_MISC event %02x, %02x, %02x, %02x\n",
+ data[0], data[1], data[2], data[3]);
break;
}
}
@@ -609,14 +608,14 @@ adbhid_buttons_input(unsigned char *data, int nb, int autopoll)
break;
default:
- printk(KERN_INFO "Unhandled ADB_MISC event %02x, %02x, %02x, %02x\n",
- data[0], data[1], data[2], data[3]);
+ pr_info("Unhandled ADB_MISC event %02x, %02x, %02x, %02x\n",
+ data[0], data[1], data[2], data[3]);
break;
}
break;
default:
- printk(KERN_INFO "Unhandled ADB_MISC event %02x, %02x, %02x, %02x\n",
- data[0], data[1], data[2], data[3]);
+ pr_info("Unhandled ADB_MISC event %02x, %02x, %02x, %02x\n",
+ data[0], data[1], data[2], data[3]);
break;
}
}
@@ -760,7 +759,7 @@ adbhid_input_register(int id, int default_id, int original_handler_id,
int i;
if (adbhid[id]) {
- printk(KERN_ERR "Trying to reregister ADB HID on ID %d\n", id);
+ pr_err("Trying to reregister ADB HID on ID %d\n", id);
return -EEXIST;
}
@@ -799,24 +798,24 @@ adbhid_input_register(int id, int default_id, int original_handler_id,
memcpy(hid->keycode, adb_to_linux_keycodes, sizeof(adb_to_linux_keycodes));
- printk(KERN_INFO "Detected ADB keyboard, type ");
+ pr_info("Detected ADB keyboard, type ");
switch (original_handler_id) {
default:
- printk("<unknown>.\n");
+ pr_cont("<unknown>.\n");
input_dev->id.version = ADB_KEYBOARD_UNKNOWN;
break;
case 0x01: case 0x02: case 0x03: case 0x06: case 0x08:
case 0x0C: case 0x10: case 0x18: case 0x1B: case 0x1C:
case 0xC0: case 0xC3: case 0xC6:
- printk("ANSI.\n");
+ pr_cont("ANSI.\n");
input_dev->id.version = ADB_KEYBOARD_ANSI;
break;
case 0x04: case 0x05: case 0x07: case 0x09: case 0x0D:
case 0x11: case 0x14: case 0x19: case 0x1D: case 0xC1:
case 0xC4: case 0xC7:
- printk("ISO, swapping keys.\n");
+ pr_cont("ISO, swapping keys.\n");
input_dev->id.version = ADB_KEYBOARD_ISO;
i = hid->keycode[10];
hid->keycode[10] = hid->keycode[50];
@@ -825,7 +824,7 @@ adbhid_input_register(int id, int default_id, int original_handler_id,
case 0x12: case 0x15: case 0x16: case 0x17: case 0x1A:
case 0x1E: case 0xC2: case 0xC5: case 0xC8: case 0xC9:
- printk("JIS.\n");
+ pr_cont("JIS.\n");
input_dev->id.version = ADB_KEYBOARD_JIS;
break;
}
@@ -884,7 +883,7 @@ adbhid_input_register(int id, int default_id, int original_handler_id,
/* else fall through */
default:
- printk(KERN_INFO "Trying to register unknown ADB device to input layer.\n");
+ pr_info("Trying to register unknown ADB device to input layer.\n");
err = -ENODEV;
goto fail;
}
@@ -1073,12 +1072,12 @@ adbhid_probe(void)
(req.reply[1] == 0x4b) && (req.reply[2] == 0x4f) &&
(req.reply[3] == 0x49) && (req.reply[4] == 0x54)) {
if (adb_try_handler_change(id, 0x42)) {
- printk("\nADB MacAlly 2-button mouse at %d, handler set to 0x42", id);
+ pr_cont("\nADB MacAlly 2-button mouse at %d, handler set to 0x42", id);
mouse_kind = ADBMOUSE_MACALLY2;
}
}
}
- printk("\n");
+ pr_cont("\n");
adb_get_infos(id, &default_id, &cur_handler_id);
reg |= adbhid_input_reregister(id, default_id, org_handler_id,
@@ -1093,12 +1092,12 @@ init_trackpad(int id)
struct adb_request req;
unsigned char r1_buffer[8];
- printk(" (trackpad)");
+ pr_cont(" (trackpad)");
adb_request(&req, NULL, ADBREQ_SYNC | ADBREQ_REPLY, 1,
ADB_READREG(id,1));
if (req.reply_len < 8)
- printk("bad length for reg. 1\n");
+ pr_cont("bad length for reg. 1\n");
else
{
memcpy(r1_buffer, &req.reply[1], 8);
@@ -1146,7 +1145,7 @@ init_trackball(int id)
{
struct adb_request req;
- printk(" (trackman/mouseman)");
+ pr_cont(" (trackman/mouseman)");
adb_request(&req, NULL, ADBREQ_SYNC, 3,
ADB_WRITEREG(id,1), 00,0x81);
@@ -1178,7 +1177,7 @@ init_turbomouse(int id)
{
struct adb_request req;
- printk(" (TurboMouse 5)");
+ pr_cont(" (TurboMouse 5)");
adb_request(&req, NULL, ADBREQ_SYNC, 1, ADB_FLUSH(id));
@@ -1214,7 +1213,7 @@ init_microspeed(int id)
{
struct adb_request req;
- printk(" (Microspeed/MacPoint or compatible)");
+ pr_cont(" (Microspeed/MacPoint or compatible)");
adb_request(&req, NULL, ADBREQ_SYNC, 1, ADB_FLUSH(id));
@@ -1254,7 +1253,7 @@ init_ms_a3(int id)
{
struct adb_request req;
- printk(" (Mouse Systems A3 Mouse, or compatible)");
+ pr_cont(" (Mouse Systems A3 Mouse, or compatible)");
adb_request(&req, NULL, ADBREQ_SYNC, 3,
ADB_WRITEREG(id, 0x2),
0x00,
diff --git a/drivers/macintosh/ams/ams-input.c b/drivers/macintosh/ams/ams-input.c
index 2edae7dfcab2..fe248f6a30c1 100644
--- a/drivers/macintosh/ams/ams-input.c
+++ b/drivers/macintosh/ams/ams-input.c
@@ -91,7 +91,7 @@ static int ams_input_enable(void)
return error;
}
- joystick = 1;
+ joystick = true;
return 0;
}
@@ -104,7 +104,7 @@ static void ams_input_disable(void)
ams_info.idev = NULL;
}
- joystick = 0;
+ joystick = false;
}
static ssize_t ams_input_show_joystick(struct device *dev,
diff --git a/drivers/macintosh/therm_adt746x.c b/drivers/macintosh/therm_adt746x.c
index f433521a6f9d..d7cd5afa38cd 100644
--- a/drivers/macintosh/therm_adt746x.c
+++ b/drivers/macintosh/therm_adt746x.c
@@ -230,7 +230,7 @@ static void update_fans_speed (struct thermostat *th)
/* we don't care about local sensor, so we start at sensor 1 */
for (i = 1; i < 3; i++) {
- int started = 0;
+ bool started = false;
int fan_number = (th->type == ADT7460 && i == 2);
int var = th->temps[i] - th->limits[i];
@@ -243,7 +243,7 @@ static void update_fans_speed (struct thermostat *th)
if (abs(var - th->last_var[fan_number]) < 2)
continue;
- started = 1;
+ started = true;
new_speed = fan_speed + ((var-1)*step);
if (new_speed < fan_speed)
diff --git a/drivers/macintosh/via-pmu-backlight.c b/drivers/macintosh/via-pmu-backlight.c
index 89ed51571b62..50ada02ae75d 100644
--- a/drivers/macintosh/via-pmu-backlight.c
+++ b/drivers/macintosh/via-pmu-backlight.c
@@ -137,7 +137,7 @@ void pmu_backlight_set_sleep(int sleep)
}
#endif /* CONFIG_PM */
-void __init pmu_backlight_init()
+void __init pmu_backlight_init(void)
{
struct backlight_properties props;
struct backlight_device *bd;
diff --git a/drivers/macintosh/windfarm_pm112.c b/drivers/macintosh/windfarm_pm112.c
index 96d16fca68b2..fec91db1142e 100644
--- a/drivers/macintosh/windfarm_pm112.c
+++ b/drivers/macintosh/windfarm_pm112.c
@@ -96,14 +96,14 @@ static int cpu_last_target;
static struct wf_pid_state backside_pid;
static int backside_tick;
static struct wf_pid_state slots_pid;
-static int slots_started;
+static bool slots_started;
static struct wf_pid_state drive_bay_pid;
static int drive_bay_tick;
static int nr_cores;
static int have_all_controls;
static int have_all_sensors;
-static int started;
+static bool started;
static int failure_state;
#define FAILURE_SENSOR 1
@@ -462,7 +462,7 @@ static void slots_fan_tick(void)
/* first time; initialize things */
printk(KERN_INFO "windfarm: Slots control loop started.\n");
wf_pid_init(&slots_pid, &slots_param);
- slots_started = 1;
+ slots_started = true;
}
err = slots_power->ops->get_value(slots_power, &power);
@@ -506,7 +506,7 @@ static void pm112_tick(void)
int i, last_failure;
if (!started) {
- started = 1;
+ started = true;
printk(KERN_INFO "windfarm: CPUs control loops started.\n");
for (i = 0; i < nr_cores; ++i) {
if (create_cpu_loop(i) < 0) {
diff --git a/drivers/macintosh/windfarm_pm121.c b/drivers/macintosh/windfarm_pm121.c
index b350fb86ff08..4d72d8f58cb6 100644
--- a/drivers/macintosh/windfarm_pm121.c
+++ b/drivers/macintosh/windfarm_pm121.c
@@ -246,7 +246,8 @@ enum {
static struct wf_control *controls[N_CONTROLS] = {};
/* Set to kick the control loop into life */
-static int pm121_all_controls_ok, pm121_all_sensors_ok, pm121_started;
+static int pm121_all_controls_ok, pm121_all_sensors_ok;
+static bool pm121_started;
enum {
FAILURE_FAN = 1 << 0,
@@ -806,7 +807,7 @@ static void pm121_tick(void)
pm121_create_sys_fans(i);
pm121_create_cpu_fans();
- pm121_started = 1;
+ pm121_started = true;
}
/* skipping ticks */
diff --git a/drivers/macintosh/windfarm_pm72.c b/drivers/macintosh/windfarm_pm72.c
index e88cfb36a74d..833021508c05 100644
--- a/drivers/macintosh/windfarm_pm72.c
+++ b/drivers/macintosh/windfarm_pm72.c
@@ -611,7 +611,7 @@ static void pm72_tick(void)
int i, last_failure;
if (!started) {
- started = 1;
+ started = true;
printk(KERN_INFO "windfarm: CPUs control loops started.\n");
for (i = 0; i < nr_chips; ++i) {
if (cpu_setup_pid(i) < 0) {
diff --git a/drivers/macintosh/windfarm_pm81.c b/drivers/macintosh/windfarm_pm81.c
index 93faf298a3c5..d9ea45581b9e 100644
--- a/drivers/macintosh/windfarm_pm81.c
+++ b/drivers/macintosh/windfarm_pm81.c
@@ -140,7 +140,8 @@ static struct wf_control *fan_system;
static struct wf_control *cpufreq_clamp;
/* Set to kick the control loop into life */
-static int wf_smu_all_controls_ok, wf_smu_all_sensors_ok, wf_smu_started;
+static int wf_smu_all_controls_ok, wf_smu_all_sensors_ok;
+static bool wf_smu_started;
/* Failure handling.. could be nicer */
#define FAILURE_FAN 0x01
@@ -549,7 +550,7 @@ static void wf_smu_tick(void)
DBG("wf: creating control loops !\n");
wf_smu_create_sys_fans();
wf_smu_create_cpu_fans();
- wf_smu_started = 1;
+ wf_smu_started = true;
}
/* Skipping ticks */
diff --git a/drivers/macintosh/windfarm_pm91.c b/drivers/macintosh/windfarm_pm91.c
index 81fdf40c5b82..7fd73dcb2b0a 100644
--- a/drivers/macintosh/windfarm_pm91.c
+++ b/drivers/macintosh/windfarm_pm91.c
@@ -75,7 +75,8 @@ static struct wf_control *fan_slots;
static struct wf_control *cpufreq_clamp;
/* Set to kick the control loop into life */
-static int wf_smu_all_controls_ok, wf_smu_all_sensors_ok, wf_smu_started;
+static int wf_smu_all_controls_ok, wf_smu_all_sensors_ok;
+static bool wf_smu_started;
static bool wf_smu_overtemp;
/* Failure handling.. could be nicer */
@@ -467,7 +468,7 @@ static void wf_smu_tick(void)
wf_smu_create_drive_fans();
wf_smu_create_slots_fans();
wf_smu_create_cpu_fans();
- wf_smu_started = 1;
+ wf_smu_started = true;
}
/* Skipping ticks */
diff --git a/drivers/macintosh/windfarm_rm31.c b/drivers/macintosh/windfarm_rm31.c
index a0cd9c7f9835..9ce87cc0597f 100644
--- a/drivers/macintosh/windfarm_rm31.c
+++ b/drivers/macintosh/windfarm_rm31.c
@@ -514,7 +514,7 @@ static void rm31_tick(void)
int i, last_failure;
if (!started) {
- started = 1;
+ started = true;
printk(KERN_INFO "windfarm: CPUs control loops started.\n");
for (i = 0; i < nr_chips; ++i) {
if (cpu_setup_pid(i) < 0) {
diff --git a/drivers/misc/Kconfig b/drivers/misc/Kconfig
index 6722073e339b..03605f8fc0dc 100644
--- a/drivers/misc/Kconfig
+++ b/drivers/misc/Kconfig
@@ -512,5 +512,6 @@ source "drivers/misc/mic/Kconfig"
source "drivers/misc/genwqe/Kconfig"
source "drivers/misc/echo/Kconfig"
source "drivers/misc/cxl/Kconfig"
+source "drivers/misc/ocxl/Kconfig"
source "drivers/misc/cardreader/Kconfig"
endmenu
diff --git a/drivers/misc/Makefile b/drivers/misc/Makefile
index 8d8cc096063b..c3c8624f4d95 100644
--- a/drivers/misc/Makefile
+++ b/drivers/misc/Makefile
@@ -55,7 +55,8 @@ obj-$(CONFIG_CXL_BASE) += cxl/
obj-$(CONFIG_ASPEED_LPC_CTRL) += aspeed-lpc-ctrl.o
obj-$(CONFIG_ASPEED_LPC_SNOOP) += aspeed-lpc-snoop.o
obj-$(CONFIG_PCI_ENDPOINT_TEST) += pci_endpoint_test.o
-obj-$(CONFIG_MISC_RTSX) += cardreader/
+obj-$(CONFIG_OCXL) += ocxl/
+obj-$(CONFIG_MISC_RTSX) += cardreader/
lkdtm-$(CONFIG_LKDTM) += lkdtm_core.o
lkdtm-$(CONFIG_LKDTM) += lkdtm_bugs.o
diff --git a/drivers/misc/cxl/context.c b/drivers/misc/cxl/context.c
index 12a41b2753f0..7ff315ad3692 100644
--- a/drivers/misc/cxl/context.c
+++ b/drivers/misc/cxl/context.c
@@ -45,6 +45,8 @@ int cxl_context_init(struct cxl_context *ctx, struct cxl_afu *afu, bool master)
ctx->pid = NULL; /* Set in start work ioctl */
mutex_init(&ctx->mapping_lock);
ctx->mapping = NULL;
+ ctx->tidr = 0;
+ ctx->assign_tidr = false;
if (cxl_is_power8()) {
spin_lock_init(&ctx->sste_lock);
diff --git a/drivers/misc/cxl/cxl.h b/drivers/misc/cxl/cxl.h
index a798c2ccd67d..4f015da78f28 100644
--- a/drivers/misc/cxl/cxl.h
+++ b/drivers/misc/cxl/cxl.h
@@ -630,6 +630,9 @@ struct cxl_context {
struct list_head extra_irq_contexts;
struct mm_struct *mm;
+
+ u16 tidr;
+ bool assign_tidr;
};
struct cxl_irq_info;
diff --git a/drivers/misc/cxl/cxllib.c b/drivers/misc/cxl/cxllib.c
index dc9bc1807fdf..30ccba436b3b 100644
--- a/drivers/misc/cxl/cxllib.c
+++ b/drivers/misc/cxl/cxllib.c
@@ -199,10 +199,11 @@ int cxllib_get_PE_attributes(struct task_struct *task,
*/
attr->pid = mm->context.id;
mmput(mm);
+ attr->tid = task->thread.tidr;
} else {
attr->pid = 0;
+ attr->tid = 0;
}
- attr->tid = 0;
return 0;
}
EXPORT_SYMBOL_GPL(cxllib_get_PE_attributes);
diff --git a/drivers/misc/cxl/file.c b/drivers/misc/cxl/file.c
index 90341ccda9bd..0162516f5e57 100644
--- a/drivers/misc/cxl/file.c
+++ b/drivers/misc/cxl/file.c
@@ -173,7 +173,7 @@ static long afu_ioctl_start_work(struct cxl_context *ctx,
* flags are set it's invalid
*/
if (work.reserved1 || work.reserved2 || work.reserved3 ||
- work.reserved4 || work.reserved5 || work.reserved6 ||
+ work.reserved4 || work.reserved5 ||
(work.flags & ~CXL_START_WORK_ALL)) {
rc = -EINVAL;
goto out;
@@ -186,12 +186,16 @@ static long afu_ioctl_start_work(struct cxl_context *ctx,
rc = -EINVAL;
goto out;
}
+
if ((rc = afu_register_irqs(ctx, work.num_interrupts)))
goto out;
if (work.flags & CXL_START_WORK_AMR)
amr = work.amr & mfspr(SPRN_UAMOR);
+ if (work.flags & CXL_START_WORK_TID)
+ ctx->assign_tidr = true;
+
ctx->mmio_err_ff = !!(work.flags & CXL_START_WORK_ERR_FF);
/*
@@ -263,8 +267,15 @@ static long afu_ioctl_start_work(struct cxl_context *ctx,
goto out;
}
- ctx->status = STARTED;
rc = 0;
+ if (work.flags & CXL_START_WORK_TID) {
+ work.tid = ctx->tidr;
+ if (copy_to_user(uwork, &work, sizeof(work)))
+ rc = -EFAULT;
+ }
+
+ ctx->status = STARTED;
+
out:
mutex_unlock(&ctx->status_mutex);
return rc;
diff --git a/drivers/misc/cxl/native.c b/drivers/misc/cxl/native.c
index 02b6b45b4c20..1b3d7c65ea3f 100644
--- a/drivers/misc/cxl/native.c
+++ b/drivers/misc/cxl/native.c
@@ -16,6 +16,7 @@
#include <linux/uaccess.h>
#include <linux/delay.h>
#include <asm/synch.h>
+#include <asm/switch_to.h>
#include <misc/cxl-base.h>
#include "cxl.h"
@@ -655,6 +656,7 @@ static void update_ivtes_directed(struct cxl_context *ctx)
static int process_element_entry_psl9(struct cxl_context *ctx, u64 wed, u64 amr)
{
u32 pid;
+ int rc;
cxl_assign_psn_space(ctx);
@@ -673,7 +675,16 @@ static int process_element_entry_psl9(struct cxl_context *ctx, u64 wed, u64 amr)
pid = ctx->mm->context.id;
}
- ctx->elem->common.tid = 0;
+ /* Assign a unique TIDR (thread id) for the current thread */
+ if (!(ctx->tidr) && (ctx->assign_tidr)) {
+ rc = set_thread_tidr(current);
+ if (rc)
+ return -ENODEV;
+ ctx->tidr = current->thread.tidr;
+ pr_devel("%s: current tidr: %d\n", __func__, ctx->tidr);
+ }
+
+ ctx->elem->common.tid = cpu_to_be32(ctx->tidr);
ctx->elem->common.pid = cpu_to_be32(pid);
ctx->elem->sr = cpu_to_be64(calculate_sr(ctx));
diff --git a/drivers/misc/cxl/pci.c b/drivers/misc/cxl/pci.c
index 19969ee86d6f..758842f65a1b 100644
--- a/drivers/misc/cxl/pci.c
+++ b/drivers/misc/cxl/pci.c
@@ -125,8 +125,6 @@ static const struct pci_device_id cxl_pci_tbl[] = {
{ PCI_DEVICE(PCI_VENDOR_ID_IBM, 0x0601), },
{ PCI_DEVICE(PCI_VENDOR_ID_IBM, 0x0623), },
{ PCI_DEVICE(PCI_VENDOR_ID_IBM, 0x0628), },
- { PCI_DEVICE_CLASS(0x120000, ~0), },
-
{ }
};
MODULE_DEVICE_TABLE(pci, cxl_pci_tbl);
diff --git a/drivers/misc/ocxl/Kconfig b/drivers/misc/ocxl/Kconfig
new file mode 100644
index 000000000000..4bbdb0d3c8ee
--- /dev/null
+++ b/drivers/misc/ocxl/Kconfig
@@ -0,0 +1,31 @@
+#
+# Open Coherent Accelerator (OCXL) compatible devices
+#
+
+config OCXL_BASE
+ bool
+ default n
+ select PPC_COPRO_BASE
+
+config OCXL
+ tristate "OpenCAPI coherent accelerator support"
+ depends on PPC_POWERNV && PCI && EEH
+ select OCXL_BASE
+ default m
+ help
+ Select this option to enable the ocxl driver for Open
+ Coherent Accelerator Processor Interface (OpenCAPI) devices.
+
+ OpenCAPI allows FPGA and ASIC accelerators to be coherently
+ attached to a CPU over an OpenCAPI link.
+
+ The ocxl driver enables userspace programs to access these
+ accelerators through devices in /dev/ocxl/.
+
+ For more information, see http://opencapi.org.
+
+ This is not to be confused with the support for IBM CAPI
+ accelerators (CONFIG_CXL), which are PCI-based instead of a
+ dedicated OpenCAPI link, and don't follow the same protocol.
+
+ If unsure, say N.
diff --git a/drivers/misc/ocxl/Makefile b/drivers/misc/ocxl/Makefile
new file mode 100644
index 000000000000..5229dcda8297
--- /dev/null
+++ b/drivers/misc/ocxl/Makefile
@@ -0,0 +1,11 @@
+# SPDX-License-Identifier: GPL-2.0+
+ccflags-$(CONFIG_PPC_WERROR) += -Werror
+
+ocxl-y += main.o pci.o config.o file.o pasid.o
+ocxl-y += link.o context.o afu_irq.o sysfs.o trace.o
+obj-$(CONFIG_OCXL) += ocxl.o
+
+# For tracepoints to include our trace.h from tracepoint infrastructure:
+CFLAGS_trace.o := -I$(src)
+
+# ccflags-y += -DDEBUG
diff --git a/drivers/misc/ocxl/afu_irq.c b/drivers/misc/ocxl/afu_irq.c
new file mode 100644
index 000000000000..e70cfa24577f
--- /dev/null
+++ b/drivers/misc/ocxl/afu_irq.c
@@ -0,0 +1,202 @@
+// SPDX-License-Identifier: GPL-2.0+
+// Copyright 2017 IBM Corp.
+#include <linux/interrupt.h>
+#include <linux/eventfd.h>
+#include <asm/pnv-ocxl.h>
+#include "ocxl_internal.h"
+#include "trace.h"
+
+struct afu_irq {
+ int id;
+ int hw_irq;
+ unsigned int virq;
+ char *name;
+ u64 trigger_page;
+ struct eventfd_ctx *ev_ctx;
+};
+
+static int irq_offset_to_id(struct ocxl_context *ctx, u64 offset)
+{
+ return (offset - ctx->afu->irq_base_offset) >> PAGE_SHIFT;
+}
+
+static u64 irq_id_to_offset(struct ocxl_context *ctx, int id)
+{
+ return ctx->afu->irq_base_offset + (id << PAGE_SHIFT);
+}
+
+static irqreturn_t afu_irq_handler(int virq, void *data)
+{
+ struct afu_irq *irq = (struct afu_irq *) data;
+
+ trace_ocxl_afu_irq_receive(virq);
+ if (irq->ev_ctx)
+ eventfd_signal(irq->ev_ctx, 1);
+ return IRQ_HANDLED;
+}
+
+static int setup_afu_irq(struct ocxl_context *ctx, struct afu_irq *irq)
+{
+ int rc;
+
+ irq->virq = irq_create_mapping(NULL, irq->hw_irq);
+ if (!irq->virq) {
+ pr_err("irq_create_mapping failed\n");
+ return -ENOMEM;
+ }
+ pr_debug("hw_irq %d mapped to virq %u\n", irq->hw_irq, irq->virq);
+
+ irq->name = kasprintf(GFP_KERNEL, "ocxl-afu-%u", irq->virq);
+ if (!irq->name) {
+ irq_dispose_mapping(irq->virq);
+ return -ENOMEM;
+ }
+
+ rc = request_irq(irq->virq, afu_irq_handler, 0, irq->name, irq);
+ if (rc) {
+ kfree(irq->name);
+ irq->name = NULL;
+ irq_dispose_mapping(irq->virq);
+ pr_err("request_irq failed: %d\n", rc);
+ return rc;
+ }
+ return 0;
+}
+
+static void release_afu_irq(struct afu_irq *irq)
+{
+ free_irq(irq->virq, irq);
+ irq_dispose_mapping(irq->virq);
+ kfree(irq->name);
+}
+
+int ocxl_afu_irq_alloc(struct ocxl_context *ctx, u64 *irq_offset)
+{
+ struct afu_irq *irq;
+ int rc;
+
+ irq = kzalloc(sizeof(struct afu_irq), GFP_KERNEL);
+ if (!irq)
+ return -ENOMEM;
+
+ /*
+ * We limit the number of afu irqs per context and per link to
+ * avoid a single process or user depleting the pool of IPIs
+ */
+
+ mutex_lock(&ctx->irq_lock);
+
+ irq->id = idr_alloc(&ctx->irq_idr, irq, 0, MAX_IRQ_PER_CONTEXT,
+ GFP_KERNEL);
+ if (irq->id < 0) {
+ rc = -ENOSPC;
+ goto err_unlock;
+ }
+
+ rc = ocxl_link_irq_alloc(ctx->afu->fn->link, &irq->hw_irq,
+ &irq->trigger_page);
+ if (rc)
+ goto err_idr;
+
+ rc = setup_afu_irq(ctx, irq);
+ if (rc)
+ goto err_alloc;
+
+ *irq_offset = irq_id_to_offset(ctx, irq->id);
+
+ trace_ocxl_afu_irq_alloc(ctx->pasid, irq->id, irq->virq, irq->hw_irq,
+ *irq_offset);
+ mutex_unlock(&ctx->irq_lock);
+ return 0;
+
+err_alloc:
+ ocxl_link_free_irq(ctx->afu->fn->link, irq->hw_irq);
+err_idr:
+ idr_remove(&ctx->irq_idr, irq->id);
+err_unlock:
+ mutex_unlock(&ctx->irq_lock);
+ kfree(irq);
+ return rc;
+}
+
+static void afu_irq_free(struct afu_irq *irq, struct ocxl_context *ctx)
+{
+ trace_ocxl_afu_irq_free(ctx->pasid, irq->id);
+ if (ctx->mapping)
+ unmap_mapping_range(ctx->mapping,
+ irq_id_to_offset(ctx, irq->id),
+ 1 << PAGE_SHIFT, 1);
+ release_afu_irq(irq);
+ if (irq->ev_ctx)
+ eventfd_ctx_put(irq->ev_ctx);
+ ocxl_link_free_irq(ctx->afu->fn->link, irq->hw_irq);
+ kfree(irq);
+}
+
+int ocxl_afu_irq_free(struct ocxl_context *ctx, u64 irq_offset)
+{
+ struct afu_irq *irq;
+ int id = irq_offset_to_id(ctx, irq_offset);
+
+ mutex_lock(&ctx->irq_lock);
+
+ irq = idr_find(&ctx->irq_idr, id);
+ if (!irq) {
+ mutex_unlock(&ctx->irq_lock);
+ return -EINVAL;
+ }
+ idr_remove(&ctx->irq_idr, irq->id);
+ afu_irq_free(irq, ctx);
+ mutex_unlock(&ctx->irq_lock);
+ return 0;
+}
+
+void ocxl_afu_irq_free_all(struct ocxl_context *ctx)
+{
+ struct afu_irq *irq;
+ int id;
+
+ mutex_lock(&ctx->irq_lock);
+ idr_for_each_entry(&ctx->irq_idr, irq, id)
+ afu_irq_free(irq, ctx);
+ mutex_unlock(&ctx->irq_lock);
+}
+
+int ocxl_afu_irq_set_fd(struct ocxl_context *ctx, u64 irq_offset, int eventfd)
+{
+ struct afu_irq *irq;
+ struct eventfd_ctx *ev_ctx;
+ int rc = 0, id = irq_offset_to_id(ctx, irq_offset);
+
+ mutex_lock(&ctx->irq_lock);
+ irq = idr_find(&ctx->irq_idr, id);
+ if (!irq) {
+ rc = -EINVAL;
+ goto unlock;
+ }
+
+ ev_ctx = eventfd_ctx_fdget(eventfd);
+ if (IS_ERR(ev_ctx)) {
+ rc = -EINVAL;
+ goto unlock;
+ }
+
+ irq->ev_ctx = ev_ctx;
+unlock:
+ mutex_unlock(&ctx->irq_lock);
+ return rc;
+}
+
+u64 ocxl_afu_irq_get_addr(struct ocxl_context *ctx, u64 irq_offset)
+{
+ struct afu_irq *irq;
+ int id = irq_offset_to_id(ctx, irq_offset);
+ u64 addr = 0;
+
+ mutex_lock(&ctx->irq_lock);
+ irq = idr_find(&ctx->irq_idr, id);
+ if (irq)
+ addr = irq->trigger_page;
+ mutex_unlock(&ctx->irq_lock);
+ return addr;
+}
diff --git a/drivers/misc/ocxl/config.c b/drivers/misc/ocxl/config.c
new file mode 100644
index 000000000000..2e30de9c694a
--- /dev/null
+++ b/drivers/misc/ocxl/config.c
@@ -0,0 +1,723 @@
+// SPDX-License-Identifier: GPL-2.0+
+// Copyright 2017 IBM Corp.
+#include <linux/pci.h>
+#include <asm/pnv-ocxl.h>
+#include <misc/ocxl.h>
+#include <misc/ocxl-config.h>
+
+#define EXTRACT_BIT(val, bit) (!!(val & BIT(bit)))
+#define EXTRACT_BITS(val, s, e) ((val & GENMASK(e, s)) >> s)
+
+#define OCXL_DVSEC_AFU_IDX_MASK GENMASK(5, 0)
+#define OCXL_DVSEC_ACTAG_MASK GENMASK(11, 0)
+#define OCXL_DVSEC_PASID_MASK GENMASK(19, 0)
+#define OCXL_DVSEC_PASID_LOG_MASK GENMASK(4, 0)
+
+#define OCXL_DVSEC_TEMPL_VERSION 0x0
+#define OCXL_DVSEC_TEMPL_NAME 0x4
+#define OCXL_DVSEC_TEMPL_AFU_VERSION 0x1C
+#define OCXL_DVSEC_TEMPL_MMIO_GLOBAL 0x20
+#define OCXL_DVSEC_TEMPL_MMIO_GLOBAL_SZ 0x28
+#define OCXL_DVSEC_TEMPL_MMIO_PP 0x30
+#define OCXL_DVSEC_TEMPL_MMIO_PP_SZ 0x38
+#define OCXL_DVSEC_TEMPL_MEM_SZ 0x3C
+#define OCXL_DVSEC_TEMPL_WWID 0x40
+
+#define OCXL_MAX_AFU_PER_FUNCTION 64
+#define OCXL_TEMPL_LEN 0x58
+#define OCXL_TEMPL_NAME_LEN 24
+#define OCXL_CFG_TIMEOUT 3
+
+static int find_dvsec(struct pci_dev *dev, int dvsec_id)
+{
+ int vsec = 0;
+ u16 vendor, id;
+
+ while ((vsec = pci_find_next_ext_capability(dev, vsec,
+ OCXL_EXT_CAP_ID_DVSEC))) {
+ pci_read_config_word(dev, vsec + OCXL_DVSEC_VENDOR_OFFSET,
+ &vendor);
+ pci_read_config_word(dev, vsec + OCXL_DVSEC_ID_OFFSET, &id);
+ if (vendor == PCI_VENDOR_ID_IBM && id == dvsec_id)
+ return vsec;
+ }
+ return 0;
+}
+
+static int find_dvsec_afu_ctrl(struct pci_dev *dev, u8 afu_idx)
+{
+ int vsec = 0;
+ u16 vendor, id;
+ u8 idx;
+
+ while ((vsec = pci_find_next_ext_capability(dev, vsec,
+ OCXL_EXT_CAP_ID_DVSEC))) {
+ pci_read_config_word(dev, vsec + OCXL_DVSEC_VENDOR_OFFSET,
+ &vendor);
+ pci_read_config_word(dev, vsec + OCXL_DVSEC_ID_OFFSET, &id);
+
+ if (vendor == PCI_VENDOR_ID_IBM &&
+ id == OCXL_DVSEC_AFU_CTRL_ID) {
+ pci_read_config_byte(dev,
+ vsec + OCXL_DVSEC_AFU_CTRL_AFU_IDX,
+ &idx);
+ if (idx == afu_idx)
+ return vsec;
+ }
+ }
+ return 0;
+}
+
+static int read_pasid(struct pci_dev *dev, struct ocxl_fn_config *fn)
+{
+ u16 val;
+ int pos;
+
+ pos = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_PASID);
+ if (!pos) {
+ /*
+ * PASID capability is not mandatory, but there
+ * shouldn't be any AFU
+ */
+ dev_dbg(&dev->dev, "Function doesn't require any PASID\n");
+ fn->max_pasid_log = -1;
+ goto out;
+ }
+ pci_read_config_word(dev, pos + PCI_PASID_CAP, &val);
+ fn->max_pasid_log = EXTRACT_BITS(val, 8, 12);
+
+out:
+ dev_dbg(&dev->dev, "PASID capability:\n");
+ dev_dbg(&dev->dev, " Max PASID log = %d\n", fn->max_pasid_log);
+ return 0;
+}
+
+static int read_dvsec_tl(struct pci_dev *dev, struct ocxl_fn_config *fn)
+{
+ int pos;
+
+ pos = find_dvsec(dev, OCXL_DVSEC_TL_ID);
+ if (!pos && PCI_FUNC(dev->devfn) == 0) {
+ dev_err(&dev->dev, "Can't find TL DVSEC\n");
+ return -ENODEV;
+ }
+ if (pos && PCI_FUNC(dev->devfn) != 0) {
+ dev_err(&dev->dev, "TL DVSEC is only allowed on function 0\n");
+ return -ENODEV;
+ }
+ fn->dvsec_tl_pos = pos;
+ return 0;
+}
+
+static int read_dvsec_function(struct pci_dev *dev, struct ocxl_fn_config *fn)
+{
+ int pos, afu_present;
+ u32 val;
+
+ pos = find_dvsec(dev, OCXL_DVSEC_FUNC_ID);
+ if (!pos) {
+ dev_err(&dev->dev, "Can't find function DVSEC\n");
+ return -ENODEV;
+ }
+ fn->dvsec_function_pos = pos;
+
+ pci_read_config_dword(dev, pos + OCXL_DVSEC_FUNC_OFF_INDEX, &val);
+ afu_present = EXTRACT_BIT(val, 31);
+ if (!afu_present) {
+ fn->max_afu_index = -1;
+ dev_dbg(&dev->dev, "Function doesn't define any AFU\n");
+ goto out;
+ }
+ fn->max_afu_index = EXTRACT_BITS(val, 24, 29);
+
+out:
+ dev_dbg(&dev->dev, "Function DVSEC:\n");
+ dev_dbg(&dev->dev, " Max AFU index = %d\n", fn->max_afu_index);
+ return 0;
+}
+
+static int read_dvsec_afu_info(struct pci_dev *dev, struct ocxl_fn_config *fn)
+{
+ int pos;
+
+ if (fn->max_afu_index < 0) {
+ fn->dvsec_afu_info_pos = -1;
+ return 0;
+ }
+
+ pos = find_dvsec(dev, OCXL_DVSEC_AFU_INFO_ID);
+ if (!pos) {
+ dev_err(&dev->dev, "Can't find AFU information DVSEC\n");
+ return -ENODEV;
+ }
+ fn->dvsec_afu_info_pos = pos;
+ return 0;
+}
+
+static int read_dvsec_vendor(struct pci_dev *dev)
+{
+ int pos;
+ u32 cfg, tlx, dlx;
+
+ /*
+ * vendor specific DVSEC is optional
+ *
+ * It's currently only used on function 0 to specify the
+ * version of some logic blocks. Some older images may not
+ * even have it so we ignore any errors
+ */
+ if (PCI_FUNC(dev->devfn) != 0)
+ return 0;
+
+ pos = find_dvsec(dev, OCXL_DVSEC_VENDOR_ID);
+ if (!pos)
+ return 0;
+
+ pci_read_config_dword(dev, pos + OCXL_DVSEC_VENDOR_CFG_VERS, &cfg);
+ pci_read_config_dword(dev, pos + OCXL_DVSEC_VENDOR_TLX_VERS, &tlx);
+ pci_read_config_dword(dev, pos + OCXL_DVSEC_VENDOR_DLX_VERS, &dlx);
+
+ dev_dbg(&dev->dev, "Vendor specific DVSEC:\n");
+ dev_dbg(&dev->dev, " CFG version = 0x%x\n", cfg);
+ dev_dbg(&dev->dev, " TLX version = 0x%x\n", tlx);
+ dev_dbg(&dev->dev, " DLX version = 0x%x\n", dlx);
+ return 0;
+}
+
+static int validate_function(struct pci_dev *dev, struct ocxl_fn_config *fn)
+{
+ if (fn->max_pasid_log == -1 && fn->max_afu_index >= 0) {
+ dev_err(&dev->dev,
+ "AFUs are defined but no PASIDs are requested\n");
+ return -EINVAL;
+ }
+
+ if (fn->max_afu_index > OCXL_MAX_AFU_PER_FUNCTION) {
+ dev_err(&dev->dev,
+ "Max AFU index out of architectural limit (%d vs %d)\n",
+ fn->max_afu_index, OCXL_MAX_AFU_PER_FUNCTION);
+ return -EINVAL;
+ }
+ return 0;
+}
+
+int ocxl_config_read_function(struct pci_dev *dev, struct ocxl_fn_config *fn)
+{
+ int rc;
+
+ rc = read_pasid(dev, fn);
+ if (rc) {
+ dev_err(&dev->dev, "Invalid PASID configuration: %d\n", rc);
+ return -ENODEV;
+ }
+
+ rc = read_dvsec_tl(dev, fn);
+ if (rc) {
+ dev_err(&dev->dev,
+ "Invalid Transaction Layer DVSEC configuration: %d\n",
+ rc);
+ return -ENODEV;
+ }
+
+ rc = read_dvsec_function(dev, fn);
+ if (rc) {
+ dev_err(&dev->dev,
+ "Invalid Function DVSEC configuration: %d\n", rc);
+ return -ENODEV;
+ }
+
+ rc = read_dvsec_afu_info(dev, fn);
+ if (rc) {
+ dev_err(&dev->dev, "Invalid AFU configuration: %d\n", rc);
+ return -ENODEV;
+ }
+
+ rc = read_dvsec_vendor(dev);
+ if (rc) {
+ dev_err(&dev->dev,
+ "Invalid vendor specific DVSEC configuration: %d\n",
+ rc);
+ return -ENODEV;
+ }
+
+ rc = validate_function(dev, fn);
+ return rc;
+}
+EXPORT_SYMBOL_GPL(ocxl_config_read_function);
+
+static int read_afu_info(struct pci_dev *dev, struct ocxl_fn_config *fn,
+ int offset, u32 *data)
+{
+ u32 val;
+ unsigned long timeout = jiffies + (HZ * OCXL_CFG_TIMEOUT);
+ int pos = fn->dvsec_afu_info_pos;
+
+ /* Protect 'data valid' bit */
+ if (EXTRACT_BIT(offset, 31)) {
+ dev_err(&dev->dev, "Invalid offset in AFU info DVSEC\n");
+ return -EINVAL;
+ }
+
+ pci_write_config_dword(dev, pos + OCXL_DVSEC_AFU_INFO_OFF, offset);
+ pci_read_config_dword(dev, pos + OCXL_DVSEC_AFU_INFO_OFF, &val);
+ while (!EXTRACT_BIT(val, 31)) {
+ if (time_after_eq(jiffies, timeout)) {
+ dev_err(&dev->dev,
+ "Timeout while reading AFU info DVSEC (offset=%d)\n",
+ offset);
+ return -EBUSY;
+ }
+ cpu_relax();
+ pci_read_config_dword(dev, pos + OCXL_DVSEC_AFU_INFO_OFF, &val);
+ }
+ pci_read_config_dword(dev, pos + OCXL_DVSEC_AFU_INFO_DATA, data);
+ return 0;
+}
+
+int ocxl_config_check_afu_index(struct pci_dev *dev,
+ struct ocxl_fn_config *fn, int afu_idx)
+{
+ u32 val;
+ int rc, templ_major, templ_minor, len;
+
+ pci_write_config_word(dev, fn->dvsec_afu_info_pos, afu_idx);
+ rc = read_afu_info(dev, fn, OCXL_DVSEC_TEMPL_VERSION, &val);
+ if (rc)
+ return rc;
+
+ /* AFU index map can have holes */
+ if (!val)
+ return 0;
+
+ templ_major = EXTRACT_BITS(val, 8, 15);
+ templ_minor = EXTRACT_BITS(val, 0, 7);
+ dev_dbg(&dev->dev, "AFU descriptor template version %d.%d\n",
+ templ_major, templ_minor);
+
+ len = EXTRACT_BITS(val, 16, 31);
+ if (len != OCXL_TEMPL_LEN) {
+ dev_warn(&dev->dev,
+ "Unexpected template length in AFU information (%#x)\n",
+ len);
+ }
+ return 1;
+}
+EXPORT_SYMBOL_GPL(ocxl_config_check_afu_index);
+
+static int read_afu_name(struct pci_dev *dev, struct ocxl_fn_config *fn,
+ struct ocxl_afu_config *afu)
+{
+ int i, rc;
+ u32 val, *ptr;
+
+ BUILD_BUG_ON(OCXL_AFU_NAME_SZ < OCXL_TEMPL_NAME_LEN);
+ for (i = 0; i < OCXL_TEMPL_NAME_LEN; i += 4) {
+ rc = read_afu_info(dev, fn, OCXL_DVSEC_TEMPL_NAME + i, &val);
+ if (rc)
+ return rc;
+ ptr = (u32 *) &afu->name[i];
+ *ptr = val;
+ }
+ afu->name[OCXL_AFU_NAME_SZ - 1] = '\0'; /* play safe */
+ return 0;
+}
+
+static int read_afu_mmio(struct pci_dev *dev, struct ocxl_fn_config *fn,
+ struct ocxl_afu_config *afu)
+{
+ int rc;
+ u32 val;
+
+ /*
+ * Global MMIO
+ */
+ rc = read_afu_info(dev, fn, OCXL_DVSEC_TEMPL_MMIO_GLOBAL, &val);
+ if (rc)
+ return rc;
+ afu->global_mmio_bar = EXTRACT_BITS(val, 0, 2);
+ afu->global_mmio_offset = EXTRACT_BITS(val, 16, 31) << 16;
+
+ rc = read_afu_info(dev, fn, OCXL_DVSEC_TEMPL_MMIO_GLOBAL + 4, &val);
+ if (rc)
+ return rc;
+ afu->global_mmio_offset += (u64) val << 32;
+
+ rc = read_afu_info(dev, fn, OCXL_DVSEC_TEMPL_MMIO_GLOBAL_SZ, &val);
+ if (rc)
+ return rc;
+ afu->global_mmio_size = val;
+
+ /*
+ * Per-process MMIO
+ */
+ rc = read_afu_info(dev, fn, OCXL_DVSEC_TEMPL_MMIO_PP, &val);
+ if (rc)
+ return rc;
+ afu->pp_mmio_bar = EXTRACT_BITS(val, 0, 2);
+ afu->pp_mmio_offset = EXTRACT_BITS(val, 16, 31) << 16;
+
+ rc = read_afu_info(dev, fn, OCXL_DVSEC_TEMPL_MMIO_PP + 4, &val);
+ if (rc)
+ return rc;
+ afu->pp_mmio_offset += (u64) val << 32;
+
+ rc = read_afu_info(dev, fn, OCXL_DVSEC_TEMPL_MMIO_PP_SZ, &val);
+ if (rc)
+ return rc;
+ afu->pp_mmio_stride = val;
+
+ return 0;
+}
+
+static int read_afu_control(struct pci_dev *dev, struct ocxl_afu_config *afu)
+{
+ int pos;
+ u8 val8;
+ u16 val16;
+
+ pos = find_dvsec_afu_ctrl(dev, afu->idx);
+ if (!pos) {
+ dev_err(&dev->dev, "Can't find AFU control DVSEC for AFU %d\n",
+ afu->idx);
+ return -ENODEV;
+ }
+ afu->dvsec_afu_control_pos = pos;
+
+ pci_read_config_byte(dev, pos + OCXL_DVSEC_AFU_CTRL_PASID_SUP, &val8);
+ afu->pasid_supported_log = EXTRACT_BITS(val8, 0, 4);
+
+ pci_read_config_word(dev, pos + OCXL_DVSEC_AFU_CTRL_ACTAG_SUP, &val16);
+ afu->actag_supported = EXTRACT_BITS(val16, 0, 11);
+ return 0;
+}
+
+static bool char_allowed(int c)
+{
+ /*
+ * Permitted Characters : Alphanumeric, hyphen, underscore, comma
+ */
+ if ((c >= 0x30 && c <= 0x39) /* digits */ ||
+ (c >= 0x41 && c <= 0x5A) /* upper case */ ||
+ (c >= 0x61 && c <= 0x7A) /* lower case */ ||
+ c == 0 /* NULL */ ||
+ c == 0x2D /* - */ ||
+ c == 0x5F /* _ */ ||
+ c == 0x2C /* , */)
+ return true;
+ return false;
+}
+
+static int validate_afu(struct pci_dev *dev, struct ocxl_afu_config *afu)
+{
+ int i;
+
+ if (!afu->name[0]) {
+ dev_err(&dev->dev, "Empty AFU name\n");
+ return -EINVAL;
+ }
+ for (i = 0; i < OCXL_TEMPL_NAME_LEN; i++) {
+ if (!char_allowed(afu->name[i])) {
+ dev_err(&dev->dev,
+ "Invalid character in AFU name\n");
+ return -EINVAL;
+ }
+ }
+
+ if (afu->global_mmio_bar != 0 &&
+ afu->global_mmio_bar != 2 &&
+ afu->global_mmio_bar != 4) {
+ dev_err(&dev->dev, "Invalid global MMIO bar number\n");
+ return -EINVAL;
+ }
+ if (afu->pp_mmio_bar != 0 &&
+ afu->pp_mmio_bar != 2 &&
+ afu->pp_mmio_bar != 4) {
+ dev_err(&dev->dev, "Invalid per-process MMIO bar number\n");
+ return -EINVAL;
+ }
+ return 0;
+}
+
+int ocxl_config_read_afu(struct pci_dev *dev, struct ocxl_fn_config *fn,
+ struct ocxl_afu_config *afu, u8 afu_idx)
+{
+ int rc;
+ u32 val32;
+
+ /*
+ * First, we need to write the AFU idx for the AFU we want to
+ * access.
+ */
+ WARN_ON((afu_idx & OCXL_DVSEC_AFU_IDX_MASK) != afu_idx);
+ afu->idx = afu_idx;
+ pci_write_config_byte(dev,
+ fn->dvsec_afu_info_pos + OCXL_DVSEC_AFU_INFO_AFU_IDX,
+ afu->idx);
+
+ rc = read_afu_name(dev, fn, afu);
+ if (rc)
+ return rc;
+
+ rc = read_afu_info(dev, fn, OCXL_DVSEC_TEMPL_AFU_VERSION, &val32);
+ if (rc)
+ return rc;
+ afu->version_major = EXTRACT_BITS(val32, 24, 31);
+ afu->version_minor = EXTRACT_BITS(val32, 16, 23);
+ afu->afuc_type = EXTRACT_BITS(val32, 14, 15);
+ afu->afum_type = EXTRACT_BITS(val32, 12, 13);
+ afu->profile = EXTRACT_BITS(val32, 0, 7);
+
+ rc = read_afu_mmio(dev, fn, afu);
+ if (rc)
+ return rc;
+
+ rc = read_afu_info(dev, fn, OCXL_DVSEC_TEMPL_MEM_SZ, &val32);
+ if (rc)
+ return rc;
+ afu->log_mem_size = EXTRACT_BITS(val32, 0, 7);
+
+ rc = read_afu_control(dev, afu);
+ if (rc)
+ return rc;
+
+ dev_dbg(&dev->dev, "AFU configuration:\n");
+ dev_dbg(&dev->dev, " name = %s\n", afu->name);
+ dev_dbg(&dev->dev, " version = %d.%d\n", afu->version_major,
+ afu->version_minor);
+ dev_dbg(&dev->dev, " global mmio bar = %hhu\n", afu->global_mmio_bar);
+ dev_dbg(&dev->dev, " global mmio offset = %#llx\n",
+ afu->global_mmio_offset);
+ dev_dbg(&dev->dev, " global mmio size = %#x\n", afu->global_mmio_size);
+ dev_dbg(&dev->dev, " pp mmio bar = %hhu\n", afu->pp_mmio_bar);
+ dev_dbg(&dev->dev, " pp mmio offset = %#llx\n", afu->pp_mmio_offset);
+ dev_dbg(&dev->dev, " pp mmio stride = %#x\n", afu->pp_mmio_stride);
+ dev_dbg(&dev->dev, " mem size (log) = %hhu\n", afu->log_mem_size);
+ dev_dbg(&dev->dev, " pasid supported (log) = %u\n",
+ afu->pasid_supported_log);
+ dev_dbg(&dev->dev, " actag supported = %u\n",
+ afu->actag_supported);
+
+ rc = validate_afu(dev, afu);
+ return rc;
+}
+EXPORT_SYMBOL_GPL(ocxl_config_read_afu);
+
+int ocxl_config_get_actag_info(struct pci_dev *dev, u16 *base, u16 *enabled,
+ u16 *supported)
+{
+ int rc;
+
+ /*
+ * This is really a simple wrapper for the kernel API, to
+ * avoid an external driver using ocxl as a library to call
+ * platform-dependent code
+ */
+ rc = pnv_ocxl_get_actag(dev, base, enabled, supported);
+ if (rc) {
+ dev_err(&dev->dev, "Can't get actag for device: %d\n", rc);
+ return rc;
+ }
+ return 0;
+}
+EXPORT_SYMBOL_GPL(ocxl_config_get_actag_info);
+
+void ocxl_config_set_afu_actag(struct pci_dev *dev, int pos, int actag_base,
+ int actag_count)
+{
+ u16 val;
+
+ val = actag_count & OCXL_DVSEC_ACTAG_MASK;
+ pci_write_config_byte(dev, pos + OCXL_DVSEC_AFU_CTRL_ACTAG_EN, val);
+
+ val = actag_base & OCXL_DVSEC_ACTAG_MASK;
+ pci_write_config_dword(dev, pos + OCXL_DVSEC_AFU_CTRL_ACTAG_BASE, val);
+}
+EXPORT_SYMBOL_GPL(ocxl_config_set_afu_actag);
+
+int ocxl_config_get_pasid_info(struct pci_dev *dev, int *count)
+{
+ return pnv_ocxl_get_pasid_count(dev, count);
+}
+EXPORT_SYMBOL_GPL(ocxl_config_get_pasid_info);
+
+void ocxl_config_set_afu_pasid(struct pci_dev *dev, int pos, int pasid_base,
+ u32 pasid_count_log)
+{
+ u8 val8;
+ u32 val32;
+
+ val8 = pasid_count_log & OCXL_DVSEC_PASID_LOG_MASK;
+ pci_write_config_byte(dev, pos + OCXL_DVSEC_AFU_CTRL_PASID_EN, val8);
+
+ pci_read_config_dword(dev, pos + OCXL_DVSEC_AFU_CTRL_PASID_BASE,
+ &val32);
+ val32 &= ~OCXL_DVSEC_PASID_MASK;
+ val32 |= pasid_base & OCXL_DVSEC_PASID_MASK;
+ pci_write_config_dword(dev, pos + OCXL_DVSEC_AFU_CTRL_PASID_BASE,
+ val32);
+}
+EXPORT_SYMBOL_GPL(ocxl_config_set_afu_pasid);
+
+void ocxl_config_set_afu_state(struct pci_dev *dev, int pos, int enable)
+{
+ u8 val;
+
+ pci_read_config_byte(dev, pos + OCXL_DVSEC_AFU_CTRL_ENABLE, &val);
+ if (enable)
+ val |= 1;
+ else
+ val &= 0xFE;
+ pci_write_config_byte(dev, pos + OCXL_DVSEC_AFU_CTRL_ENABLE, val);
+}
+EXPORT_SYMBOL_GPL(ocxl_config_set_afu_state);
+
+int ocxl_config_set_TL(struct pci_dev *dev, int tl_dvsec)
+{
+ u32 val;
+ __be32 *be32ptr;
+ u8 timers;
+ int i, rc;
+ long recv_cap;
+ char *recv_rate;
+
+ /*
+ * Skip on function != 0, as the TL can only be defined on 0
+ */
+ if (PCI_FUNC(dev->devfn) != 0)
+ return 0;
+
+ recv_rate = kzalloc(PNV_OCXL_TL_RATE_BUF_SIZE, GFP_KERNEL);
+ if (!recv_rate)
+ return -ENOMEM;
+ /*
+ * The spec defines 64 templates for messages in the
+ * Transaction Layer (TL).
+ *
+ * The host and device each support a subset, so we need to
+ * configure the transmitters on each side to send only
+ * templates the receiver understands, at a rate the receiver
+ * can process. Per the spec, template 0 must be supported by
+ * everybody. That's the template which has been used by the
+ * host and device so far.
+ *
+ * The sending rate limit must be set before the template is
+ * enabled.
+ */
+
+ /*
+ * Device -> host
+ */
+ rc = pnv_ocxl_get_tl_cap(dev, &recv_cap, recv_rate,
+ PNV_OCXL_TL_RATE_BUF_SIZE);
+ if (rc)
+ goto out;
+
+ for (i = 0; i < PNV_OCXL_TL_RATE_BUF_SIZE; i += 4) {
+ be32ptr = (__be32 *) &recv_rate[i];
+ pci_write_config_dword(dev,
+ tl_dvsec + OCXL_DVSEC_TL_SEND_RATE + i,
+ be32_to_cpu(*be32ptr));
+ }
+ val = recv_cap >> 32;
+ pci_write_config_dword(dev, tl_dvsec + OCXL_DVSEC_TL_SEND_CAP, val);
+ val = recv_cap & GENMASK(31, 0);
+ pci_write_config_dword(dev, tl_dvsec + OCXL_DVSEC_TL_SEND_CAP + 4, val);
+
+ /*
+ * Host -> device
+ */
+ for (i = 0; i < PNV_OCXL_TL_RATE_BUF_SIZE; i += 4) {
+ pci_read_config_dword(dev,
+ tl_dvsec + OCXL_DVSEC_TL_RECV_RATE + i,
+ &val);
+ be32ptr = (__be32 *) &recv_rate[i];
+ *be32ptr = cpu_to_be32(val);
+ }
+ pci_read_config_dword(dev, tl_dvsec + OCXL_DVSEC_TL_RECV_CAP, &val);
+ recv_cap = (long) val << 32;
+ pci_read_config_dword(dev, tl_dvsec + OCXL_DVSEC_TL_RECV_CAP + 4, &val);
+ recv_cap |= val;
+
+ rc = pnv_ocxl_set_tl_conf(dev, recv_cap, __pa(recv_rate),
+ PNV_OCXL_TL_RATE_BUF_SIZE);
+ if (rc)
+ goto out;
+
+ /*
+ * Opencapi commands needing to be retried are classified per
+ * the TL in 2 groups: short and long commands.
+ *
+ * The short back off timer it not used for now. It will be
+ * for opencapi 4.0.
+ *
+ * The long back off timer is typically used when an AFU hits
+ * a page fault but the NPU is already processing one. So the
+ * AFU needs to wait before it can resubmit. Having a value
+ * too low doesn't break anything, but can generate extra
+ * traffic on the link.
+ * We set it to 1.6 us for now. It's shorter than, but in the
+ * same order of magnitude as the time spent to process a page
+ * fault.
+ */
+ timers = 0x2 << 4; /* long timer = 1.6 us */
+ pci_write_config_byte(dev, tl_dvsec + OCXL_DVSEC_TL_BACKOFF_TIMERS,
+ timers);
+
+ rc = 0;
+out:
+ kfree(recv_rate);
+ return rc;
+}
+EXPORT_SYMBOL_GPL(ocxl_config_set_TL);
+
+int ocxl_config_terminate_pasid(struct pci_dev *dev, int afu_control, int pasid)
+{
+ u32 val;
+ unsigned long timeout;
+
+ pci_read_config_dword(dev, afu_control + OCXL_DVSEC_AFU_CTRL_TERM_PASID,
+ &val);
+ if (EXTRACT_BIT(val, 20)) {
+ dev_err(&dev->dev,
+ "Can't terminate PASID %#x, previous termination didn't complete\n",
+ pasid);
+ return -EBUSY;
+ }
+
+ val &= ~OCXL_DVSEC_PASID_MASK;
+ val |= pasid & OCXL_DVSEC_PASID_MASK;
+ val |= BIT(20);
+ pci_write_config_dword(dev,
+ afu_control + OCXL_DVSEC_AFU_CTRL_TERM_PASID,
+ val);
+
+ timeout = jiffies + (HZ * OCXL_CFG_TIMEOUT);
+ pci_read_config_dword(dev, afu_control + OCXL_DVSEC_AFU_CTRL_TERM_PASID,
+ &val);
+ while (EXTRACT_BIT(val, 20)) {
+ if (time_after_eq(jiffies, timeout)) {
+ dev_err(&dev->dev,
+ "Timeout while waiting for AFU to terminate PASID %#x\n",
+ pasid);
+ return -EBUSY;
+ }
+ cpu_relax();
+ pci_read_config_dword(dev,
+ afu_control + OCXL_DVSEC_AFU_CTRL_TERM_PASID,
+ &val);
+ }
+ return 0;
+}
+EXPORT_SYMBOL_GPL(ocxl_config_terminate_pasid);
+
+void ocxl_config_set_actag(struct pci_dev *dev, int func_dvsec, u32 tag_first,
+ u32 tag_count)
+{
+ u32 val;
+
+ val = (tag_first & OCXL_DVSEC_ACTAG_MASK) << 16;
+ val |= tag_count & OCXL_DVSEC_ACTAG_MASK;
+ pci_write_config_dword(dev, func_dvsec + OCXL_DVSEC_FUNC_OFF_ACTAG,
+ val);
+}
+EXPORT_SYMBOL_GPL(ocxl_config_set_actag);
diff --git a/drivers/misc/ocxl/context.c b/drivers/misc/ocxl/context.c
new file mode 100644
index 000000000000..909e8807824a
--- /dev/null
+++ b/drivers/misc/ocxl/context.c
@@ -0,0 +1,279 @@
+// SPDX-License-Identifier: GPL-2.0+
+// Copyright 2017 IBM Corp.
+#include <linux/sched/mm.h>
+#include "trace.h"
+#include "ocxl_internal.h"
+
+struct ocxl_context *ocxl_context_alloc(void)
+{
+ return kzalloc(sizeof(struct ocxl_context), GFP_KERNEL);
+}
+
+int ocxl_context_init(struct ocxl_context *ctx, struct ocxl_afu *afu,
+ struct address_space *mapping)
+{
+ int pasid;
+
+ ctx->afu = afu;
+ mutex_lock(&afu->contexts_lock);
+ pasid = idr_alloc(&afu->contexts_idr, ctx, afu->pasid_base,
+ afu->pasid_base + afu->pasid_max, GFP_KERNEL);
+ if (pasid < 0) {
+ mutex_unlock(&afu->contexts_lock);
+ return pasid;
+ }
+ afu->pasid_count++;
+ mutex_unlock(&afu->contexts_lock);
+
+ ctx->pasid = pasid;
+ ctx->status = OPENED;
+ mutex_init(&ctx->status_mutex);
+ ctx->mapping = mapping;
+ mutex_init(&ctx->mapping_lock);
+ init_waitqueue_head(&ctx->events_wq);
+ mutex_init(&ctx->xsl_error_lock);
+ mutex_init(&ctx->irq_lock);
+ idr_init(&ctx->irq_idr);
+ /*
+ * Keep a reference on the AFU to make sure it's valid for the
+ * duration of the life of the context
+ */
+ ocxl_afu_get(afu);
+ return 0;
+}
+
+/*
+ * Callback for when a translation fault triggers an error
+ * data: a pointer to the context which triggered the fault
+ * addr: the address that triggered the error
+ * dsisr: the value of the PPC64 dsisr register
+ */
+static void xsl_fault_error(void *data, u64 addr, u64 dsisr)
+{
+ struct ocxl_context *ctx = (struct ocxl_context *) data;
+
+ mutex_lock(&ctx->xsl_error_lock);
+ ctx->xsl_error.addr = addr;
+ ctx->xsl_error.dsisr = dsisr;
+ ctx->xsl_error.count++;
+ mutex_unlock(&ctx->xsl_error_lock);
+
+ wake_up_all(&ctx->events_wq);
+}
+
+int ocxl_context_attach(struct ocxl_context *ctx, u64 amr)
+{
+ int rc;
+
+ mutex_lock(&ctx->status_mutex);
+ if (ctx->status != OPENED) {
+ rc = -EIO;
+ goto out;
+ }
+
+ rc = ocxl_link_add_pe(ctx->afu->fn->link, ctx->pasid,
+ current->mm->context.id, 0, amr, current->mm,
+ xsl_fault_error, ctx);
+ if (rc)
+ goto out;
+
+ ctx->status = ATTACHED;
+out:
+ mutex_unlock(&ctx->status_mutex);
+ return rc;
+}
+
+static int map_afu_irq(struct vm_area_struct *vma, unsigned long address,
+ u64 offset, struct ocxl_context *ctx)
+{
+ u64 trigger_addr;
+
+ trigger_addr = ocxl_afu_irq_get_addr(ctx, offset);
+ if (!trigger_addr)
+ return VM_FAULT_SIGBUS;
+
+ vm_insert_pfn(vma, address, trigger_addr >> PAGE_SHIFT);
+ return VM_FAULT_NOPAGE;
+}
+
+static int map_pp_mmio(struct vm_area_struct *vma, unsigned long address,
+ u64 offset, struct ocxl_context *ctx)
+{
+ u64 pp_mmio_addr;
+ int pasid_off;
+
+ if (offset >= ctx->afu->config.pp_mmio_stride)
+ return VM_FAULT_SIGBUS;
+
+ mutex_lock(&ctx->status_mutex);
+ if (ctx->status != ATTACHED) {
+ mutex_unlock(&ctx->status_mutex);
+ pr_debug("%s: Context not attached, failing mmio mmap\n",
+ __func__);
+ return VM_FAULT_SIGBUS;
+ }
+
+ pasid_off = ctx->pasid - ctx->afu->pasid_base;
+ pp_mmio_addr = ctx->afu->pp_mmio_start +
+ pasid_off * ctx->afu->config.pp_mmio_stride +
+ offset;
+
+ vm_insert_pfn(vma, address, pp_mmio_addr >> PAGE_SHIFT);
+ mutex_unlock(&ctx->status_mutex);
+ return VM_FAULT_NOPAGE;
+}
+
+static int ocxl_mmap_fault(struct vm_fault *vmf)
+{
+ struct vm_area_struct *vma = vmf->vma;
+ struct ocxl_context *ctx = vma->vm_file->private_data;
+ u64 offset;
+ int rc;
+
+ offset = vmf->pgoff << PAGE_SHIFT;
+ pr_debug("%s: pasid %d address 0x%lx offset 0x%llx\n", __func__,
+ ctx->pasid, vmf->address, offset);
+
+ if (offset < ctx->afu->irq_base_offset)
+ rc = map_pp_mmio(vma, vmf->address, offset, ctx);
+ else
+ rc = map_afu_irq(vma, vmf->address, offset, ctx);
+ return rc;
+}
+
+static const struct vm_operations_struct ocxl_vmops = {
+ .fault = ocxl_mmap_fault,
+};
+
+static int check_mmap_afu_irq(struct ocxl_context *ctx,
+ struct vm_area_struct *vma)
+{
+ /* only one page */
+ if (vma_pages(vma) != 1)
+ return -EINVAL;
+
+ /* check offset validty */
+ if (!ocxl_afu_irq_get_addr(ctx, vma->vm_pgoff << PAGE_SHIFT))
+ return -EINVAL;
+
+ /*
+ * trigger page should only be accessible in write mode.
+ *
+ * It's a bit theoretical, as a page mmaped with only
+ * PROT_WRITE is currently readable, but it doesn't hurt.
+ */
+ if ((vma->vm_flags & VM_READ) || (vma->vm_flags & VM_EXEC) ||
+ !(vma->vm_flags & VM_WRITE))
+ return -EINVAL;
+ vma->vm_flags &= ~(VM_MAYREAD | VM_MAYEXEC);
+ return 0;
+}
+
+static int check_mmap_mmio(struct ocxl_context *ctx,
+ struct vm_area_struct *vma)
+{
+ if ((vma_pages(vma) + vma->vm_pgoff) >
+ (ctx->afu->config.pp_mmio_stride >> PAGE_SHIFT))
+ return -EINVAL;
+ return 0;
+}
+
+int ocxl_context_mmap(struct ocxl_context *ctx, struct vm_area_struct *vma)
+{
+ int rc;
+
+ if ((vma->vm_pgoff << PAGE_SHIFT) < ctx->afu->irq_base_offset)
+ rc = check_mmap_mmio(ctx, vma);
+ else
+ rc = check_mmap_afu_irq(ctx, vma);
+ if (rc)
+ return rc;
+
+ vma->vm_flags |= VM_IO | VM_PFNMAP;
+ vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
+ vma->vm_ops = &ocxl_vmops;
+ return 0;
+}
+
+int ocxl_context_detach(struct ocxl_context *ctx)
+{
+ struct pci_dev *dev;
+ int afu_control_pos;
+ enum ocxl_context_status status;
+ int rc;
+
+ mutex_lock(&ctx->status_mutex);
+ status = ctx->status;
+ ctx->status = CLOSED;
+ mutex_unlock(&ctx->status_mutex);
+ if (status != ATTACHED)
+ return 0;
+
+ dev = to_pci_dev(ctx->afu->fn->dev.parent);
+ afu_control_pos = ctx->afu->config.dvsec_afu_control_pos;
+
+ mutex_lock(&ctx->afu->afu_control_lock);
+ rc = ocxl_config_terminate_pasid(dev, afu_control_pos, ctx->pasid);
+ mutex_unlock(&ctx->afu->afu_control_lock);
+ trace_ocxl_terminate_pasid(ctx->pasid, rc);
+ if (rc) {
+ /*
+ * If we timeout waiting for the AFU to terminate the
+ * pasid, then it's dangerous to clean up the Process
+ * Element entry in the SPA, as it may be referenced
+ * in the future by the AFU. In which case, we would
+ * checkstop because of an invalid PE access (FIR
+ * register 2, bit 42). So leave the PE
+ * defined. Caller shouldn't free the context so that
+ * PASID remains allocated.
+ *
+ * A link reset will be required to cleanup the AFU
+ * and the SPA.
+ */
+ if (rc == -EBUSY)
+ return rc;
+ }
+ rc = ocxl_link_remove_pe(ctx->afu->fn->link, ctx->pasid);
+ if (rc) {
+ dev_warn(&ctx->afu->dev,
+ "Couldn't remove PE entry cleanly: %d\n", rc);
+ }
+ return 0;
+}
+
+void ocxl_context_detach_all(struct ocxl_afu *afu)
+{
+ struct ocxl_context *ctx;
+ int tmp;
+
+ mutex_lock(&afu->contexts_lock);
+ idr_for_each_entry(&afu->contexts_idr, ctx, tmp) {
+ ocxl_context_detach(ctx);
+ /*
+ * We are force detaching - remove any active mmio
+ * mappings so userspace cannot interfere with the
+ * card if it comes back. Easiest way to exercise
+ * this is to unbind and rebind the driver via sysfs
+ * while it is in use.
+ */
+ mutex_lock(&ctx->mapping_lock);
+ if (ctx->mapping)
+ unmap_mapping_range(ctx->mapping, 0, 0, 1);
+ mutex_unlock(&ctx->mapping_lock);
+ }
+ mutex_unlock(&afu->contexts_lock);
+}
+
+void ocxl_context_free(struct ocxl_context *ctx)
+{
+ mutex_lock(&ctx->afu->contexts_lock);
+ ctx->afu->pasid_count--;
+ idr_remove(&ctx->afu->contexts_idr, ctx->pasid);
+ mutex_unlock(&ctx->afu->contexts_lock);
+
+ ocxl_afu_irq_free_all(ctx);
+ idr_destroy(&ctx->irq_idr);
+ /* reference to the AFU taken in ocxl_context_init */
+ ocxl_afu_put(ctx->afu);
+ kfree(ctx);
+}
diff --git a/drivers/misc/ocxl/file.c b/drivers/misc/ocxl/file.c
new file mode 100644
index 000000000000..c90c1a578d2f
--- /dev/null
+++ b/drivers/misc/ocxl/file.c
@@ -0,0 +1,432 @@
+// SPDX-License-Identifier: GPL-2.0+
+// Copyright 2017 IBM Corp.
+#include <linux/fs.h>
+#include <linux/poll.h>
+#include <linux/sched/signal.h>
+#include <linux/uaccess.h>
+#include <uapi/misc/ocxl.h>
+#include "ocxl_internal.h"
+
+
+#define OCXL_NUM_MINORS 256 /* Total to reserve */
+
+static dev_t ocxl_dev;
+static struct class *ocxl_class;
+static struct mutex minors_idr_lock;
+static struct idr minors_idr;
+
+static struct ocxl_afu *find_and_get_afu(dev_t devno)
+{
+ struct ocxl_afu *afu;
+ int afu_minor;
+
+ afu_minor = MINOR(devno);
+ /*
+ * We don't declare an RCU critical section here, as our AFU
+ * is protected by a reference counter on the device. By the time the
+ * minor number of a device is removed from the idr, the ref count of
+ * the device is already at 0, so no user API will access that AFU and
+ * this function can't return it.
+ */
+ afu = idr_find(&minors_idr, afu_minor);
+ if (afu)
+ ocxl_afu_get(afu);
+ return afu;
+}
+
+static int allocate_afu_minor(struct ocxl_afu *afu)
+{
+ int minor;
+
+ mutex_lock(&minors_idr_lock);
+ minor = idr_alloc(&minors_idr, afu, 0, OCXL_NUM_MINORS, GFP_KERNEL);
+ mutex_unlock(&minors_idr_lock);
+ return minor;
+}
+
+static void free_afu_minor(struct ocxl_afu *afu)
+{
+ mutex_lock(&minors_idr_lock);
+ idr_remove(&minors_idr, MINOR(afu->dev.devt));
+ mutex_unlock(&minors_idr_lock);
+}
+
+static int afu_open(struct inode *inode, struct file *file)
+{
+ struct ocxl_afu *afu;
+ struct ocxl_context *ctx;
+ int rc;
+
+ pr_debug("%s for device %x\n", __func__, inode->i_rdev);
+
+ afu = find_and_get_afu(inode->i_rdev);
+ if (!afu)
+ return -ENODEV;
+
+ ctx = ocxl_context_alloc();
+ if (!ctx) {
+ rc = -ENOMEM;
+ goto put_afu;
+ }
+
+ rc = ocxl_context_init(ctx, afu, inode->i_mapping);
+ if (rc)
+ goto put_afu;
+ file->private_data = ctx;
+ ocxl_afu_put(afu);
+ return 0;
+
+put_afu:
+ ocxl_afu_put(afu);
+ return rc;
+}
+
+static long afu_ioctl_attach(struct ocxl_context *ctx,
+ struct ocxl_ioctl_attach __user *uarg)
+{
+ struct ocxl_ioctl_attach arg;
+ u64 amr = 0;
+ int rc;
+
+ pr_debug("%s for context %d\n", __func__, ctx->pasid);
+
+ if (copy_from_user(&arg, uarg, sizeof(arg)))
+ return -EFAULT;
+
+ /* Make sure reserved fields are not set for forward compatibility */
+ if (arg.reserved1 || arg.reserved2 || arg.reserved3)
+ return -EINVAL;
+
+ amr = arg.amr & mfspr(SPRN_UAMOR);
+ rc = ocxl_context_attach(ctx, amr);
+ return rc;
+}
+
+#define CMD_STR(x) (x == OCXL_IOCTL_ATTACH ? "ATTACH" : \
+ x == OCXL_IOCTL_IRQ_ALLOC ? "IRQ_ALLOC" : \
+ x == OCXL_IOCTL_IRQ_FREE ? "IRQ_FREE" : \
+ x == OCXL_IOCTL_IRQ_SET_FD ? "IRQ_SET_FD" : \
+ "UNKNOWN")
+
+static long afu_ioctl(struct file *file, unsigned int cmd,
+ unsigned long args)
+{
+ struct ocxl_context *ctx = file->private_data;
+ struct ocxl_ioctl_irq_fd irq_fd;
+ u64 irq_offset;
+ long rc;
+
+ pr_debug("%s for context %d, command %s\n", __func__, ctx->pasid,
+ CMD_STR(cmd));
+
+ if (ctx->status == CLOSED)
+ return -EIO;
+
+ switch (cmd) {
+ case OCXL_IOCTL_ATTACH:
+ rc = afu_ioctl_attach(ctx,
+ (struct ocxl_ioctl_attach __user *) args);
+ break;
+
+ case OCXL_IOCTL_IRQ_ALLOC:
+ rc = ocxl_afu_irq_alloc(ctx, &irq_offset);
+ if (!rc) {
+ rc = copy_to_user((u64 __user *) args, &irq_offset,
+ sizeof(irq_offset));
+ if (rc)
+ ocxl_afu_irq_free(ctx, irq_offset);
+ }
+ break;
+
+ case OCXL_IOCTL_IRQ_FREE:
+ rc = copy_from_user(&irq_offset, (u64 __user *) args,
+ sizeof(irq_offset));
+ if (rc)
+ return -EFAULT;
+ rc = ocxl_afu_irq_free(ctx, irq_offset);
+ break;
+
+ case OCXL_IOCTL_IRQ_SET_FD:
+ rc = copy_from_user(&irq_fd, (u64 __user *) args,
+ sizeof(irq_fd));
+ if (rc)
+ return -EFAULT;
+ if (irq_fd.reserved)
+ return -EINVAL;
+ rc = ocxl_afu_irq_set_fd(ctx, irq_fd.irq_offset,
+ irq_fd.eventfd);
+ break;
+
+ default:
+ rc = -EINVAL;
+ }
+ return rc;
+}
+
+static long afu_compat_ioctl(struct file *file, unsigned int cmd,
+ unsigned long args)
+{
+ return afu_ioctl(file, cmd, args);
+}
+
+static int afu_mmap(struct file *file, struct vm_area_struct *vma)
+{
+ struct ocxl_context *ctx = file->private_data;
+
+ pr_debug("%s for context %d\n", __func__, ctx->pasid);
+ return ocxl_context_mmap(ctx, vma);
+}
+
+static bool has_xsl_error(struct ocxl_context *ctx)
+{
+ bool ret;
+
+ mutex_lock(&ctx->xsl_error_lock);
+ ret = !!ctx->xsl_error.addr;
+ mutex_unlock(&ctx->xsl_error_lock);
+
+ return ret;
+}
+
+/*
+ * Are there any events pending on the AFU
+ * ctx: The AFU context
+ * Returns: true if there are events pending
+ */
+static bool afu_events_pending(struct ocxl_context *ctx)
+{
+ if (has_xsl_error(ctx))
+ return true;
+ return false;
+}
+
+static unsigned int afu_poll(struct file *file, struct poll_table_struct *wait)
+{
+ struct ocxl_context *ctx = file->private_data;
+ unsigned int mask = 0;
+ bool closed;
+
+ pr_debug("%s for context %d\n", __func__, ctx->pasid);
+
+ poll_wait(file, &ctx->events_wq, wait);
+
+ mutex_lock(&ctx->status_mutex);
+ closed = (ctx->status == CLOSED);
+ mutex_unlock(&ctx->status_mutex);
+
+ if (afu_events_pending(ctx))
+ mask = POLLIN | POLLRDNORM;
+ else if (closed)
+ mask = POLLERR;
+
+ return mask;
+}
+
+/*
+ * Populate the supplied buffer with a single XSL error
+ * ctx: The AFU context to report the error from
+ * header: the event header to populate
+ * buf: The buffer to write the body into (should be at least
+ * AFU_EVENT_BODY_XSL_ERROR_SIZE)
+ * Return: the amount of buffer that was populated
+ */
+static ssize_t append_xsl_error(struct ocxl_context *ctx,
+ struct ocxl_kernel_event_header *header,
+ char __user *buf)
+{
+ struct ocxl_kernel_event_xsl_fault_error body;
+
+ memset(&body, 0, sizeof(body));
+
+ mutex_lock(&ctx->xsl_error_lock);
+ if (!ctx->xsl_error.addr) {
+ mutex_unlock(&ctx->xsl_error_lock);
+ return 0;
+ }
+
+ body.addr = ctx->xsl_error.addr;
+ body.dsisr = ctx->xsl_error.dsisr;
+ body.count = ctx->xsl_error.count;
+
+ ctx->xsl_error.addr = 0;
+ ctx->xsl_error.dsisr = 0;
+ ctx->xsl_error.count = 0;
+
+ mutex_unlock(&ctx->xsl_error_lock);
+
+ header->type = OCXL_AFU_EVENT_XSL_FAULT_ERROR;
+
+ if (copy_to_user(buf, &body, sizeof(body)))
+ return -EFAULT;
+
+ return sizeof(body);
+}
+
+#define AFU_EVENT_BODY_MAX_SIZE sizeof(struct ocxl_kernel_event_xsl_fault_error)
+
+/*
+ * Reports events on the AFU
+ * Format:
+ * Header (struct ocxl_kernel_event_header)
+ * Body (struct ocxl_kernel_event_*)
+ * Header...
+ */
+static ssize_t afu_read(struct file *file, char __user *buf, size_t count,
+ loff_t *off)
+{
+ struct ocxl_context *ctx = file->private_data;
+ struct ocxl_kernel_event_header header;
+ ssize_t rc;
+ size_t used = 0;
+ DEFINE_WAIT(event_wait);
+
+ memset(&header, 0, sizeof(header));
+
+ /* Require offset to be 0 */
+ if (*off != 0)
+ return -EINVAL;
+
+ if (count < (sizeof(struct ocxl_kernel_event_header) +
+ AFU_EVENT_BODY_MAX_SIZE))
+ return -EINVAL;
+
+ for (;;) {
+ prepare_to_wait(&ctx->events_wq, &event_wait,
+ TASK_INTERRUPTIBLE);
+
+ if (afu_events_pending(ctx))
+ break;
+
+ if (ctx->status == CLOSED)
+ break;
+
+ if (file->f_flags & O_NONBLOCK) {
+ finish_wait(&ctx->events_wq, &event_wait);
+ return -EAGAIN;
+ }
+
+ if (signal_pending(current)) {
+ finish_wait(&ctx->events_wq, &event_wait);
+ return -ERESTARTSYS;
+ }
+
+ schedule();
+ }
+
+ finish_wait(&ctx->events_wq, &event_wait);
+
+ if (has_xsl_error(ctx)) {
+ used = append_xsl_error(ctx, &header, buf + sizeof(header));
+ if (used < 0)
+ return used;
+ }
+
+ if (!afu_events_pending(ctx))
+ header.flags |= OCXL_KERNEL_EVENT_FLAG_LAST;
+
+ if (copy_to_user(buf, &header, sizeof(header)))
+ return -EFAULT;
+
+ used += sizeof(header);
+
+ rc = (ssize_t) used;
+ return rc;
+}
+
+static int afu_release(struct inode *inode, struct file *file)
+{
+ struct ocxl_context *ctx = file->private_data;
+ int rc;
+
+ pr_debug("%s for device %x\n", __func__, inode->i_rdev);
+ rc = ocxl_context_detach(ctx);
+ mutex_lock(&ctx->mapping_lock);
+ ctx->mapping = NULL;
+ mutex_unlock(&ctx->mapping_lock);
+ wake_up_all(&ctx->events_wq);
+ if (rc != -EBUSY)
+ ocxl_context_free(ctx);
+ return 0;
+}
+
+static const struct file_operations ocxl_afu_fops = {
+ .owner = THIS_MODULE,
+ .open = afu_open,
+ .unlocked_ioctl = afu_ioctl,
+ .compat_ioctl = afu_compat_ioctl,
+ .mmap = afu_mmap,
+ .poll = afu_poll,
+ .read = afu_read,
+ .release = afu_release,
+};
+
+int ocxl_create_cdev(struct ocxl_afu *afu)
+{
+ int rc;
+
+ cdev_init(&afu->cdev, &ocxl_afu_fops);
+ rc = cdev_add(&afu->cdev, afu->dev.devt, 1);
+ if (rc) {
+ dev_err(&afu->dev, "Unable to add afu char device: %d\n", rc);
+ return rc;
+ }
+ return 0;
+}
+
+void ocxl_destroy_cdev(struct ocxl_afu *afu)
+{
+ cdev_del(&afu->cdev);
+}
+
+int ocxl_register_afu(struct ocxl_afu *afu)
+{
+ int minor;
+
+ minor = allocate_afu_minor(afu);
+ if (minor < 0)
+ return minor;
+ afu->dev.devt = MKDEV(MAJOR(ocxl_dev), minor);
+ afu->dev.class = ocxl_class;
+ return device_register(&afu->dev);
+}
+
+void ocxl_unregister_afu(struct ocxl_afu *afu)
+{
+ free_afu_minor(afu);
+}
+
+static char *ocxl_devnode(struct device *dev, umode_t *mode)
+{
+ return kasprintf(GFP_KERNEL, "ocxl/%s", dev_name(dev));
+}
+
+int ocxl_file_init(void)
+{
+ int rc;
+
+ mutex_init(&minors_idr_lock);
+ idr_init(&minors_idr);
+
+ rc = alloc_chrdev_region(&ocxl_dev, 0, OCXL_NUM_MINORS, "ocxl");
+ if (rc) {
+ pr_err("Unable to allocate ocxl major number: %d\n", rc);
+ return rc;
+ }
+
+ ocxl_class = class_create(THIS_MODULE, "ocxl");
+ if (IS_ERR(ocxl_class)) {
+ pr_err("Unable to create ocxl class\n");
+ unregister_chrdev_region(ocxl_dev, OCXL_NUM_MINORS);
+ return PTR_ERR(ocxl_class);
+ }
+
+ ocxl_class->devnode = ocxl_devnode;
+ return 0;
+}
+
+void ocxl_file_exit(void)
+{
+ class_destroy(ocxl_class);
+ unregister_chrdev_region(ocxl_dev, OCXL_NUM_MINORS);
+ idr_destroy(&minors_idr);
+}
diff --git a/drivers/misc/ocxl/link.c b/drivers/misc/ocxl/link.c
new file mode 100644
index 000000000000..f30790582dc0
--- /dev/null
+++ b/drivers/misc/ocxl/link.c
@@ -0,0 +1,647 @@
+// SPDX-License-Identifier: GPL-2.0+
+// Copyright 2017 IBM Corp.
+#include <linux/sched/mm.h>
+#include <linux/mutex.h>
+#include <linux/mmu_context.h>
+#include <asm/copro.h>
+#include <asm/pnv-ocxl.h>
+#include <misc/ocxl.h>
+#include "ocxl_internal.h"
+#include "trace.h"
+
+
+#define SPA_PASID_BITS 15
+#define SPA_PASID_MAX ((1 << SPA_PASID_BITS) - 1)
+#define SPA_PE_MASK SPA_PASID_MAX
+#define SPA_SPA_SIZE_LOG 22 /* Each SPA is 4 Mb */
+
+#define SPA_CFG_SF (1ull << (63-0))
+#define SPA_CFG_TA (1ull << (63-1))
+#define SPA_CFG_HV (1ull << (63-3))
+#define SPA_CFG_UV (1ull << (63-4))
+#define SPA_CFG_XLAT_hpt (0ull << (63-6)) /* Hashed page table (HPT) mode */
+#define SPA_CFG_XLAT_roh (2ull << (63-6)) /* Radix on HPT mode */
+#define SPA_CFG_XLAT_ror (3ull << (63-6)) /* Radix on Radix mode */
+#define SPA_CFG_PR (1ull << (63-49))
+#define SPA_CFG_TC (1ull << (63-54))
+#define SPA_CFG_DR (1ull << (63-59))
+
+#define SPA_XSL_TF (1ull << (63-3)) /* Translation fault */
+#define SPA_XSL_S (1ull << (63-38)) /* Store operation */
+
+#define SPA_PE_VALID 0x80000000
+
+
+struct pe_data {
+ struct mm_struct *mm;
+ /* callback to trigger when a translation fault occurs */
+ void (*xsl_err_cb)(void *data, u64 addr, u64 dsisr);
+ /* opaque pointer to be passed to the above callback */
+ void *xsl_err_data;
+ struct rcu_head rcu;
+};
+
+struct spa {
+ struct ocxl_process_element *spa_mem;
+ int spa_order;
+ struct mutex spa_lock;
+ struct radix_tree_root pe_tree; /* Maps PE handles to pe_data */
+ char *irq_name;
+ int virq;
+ void __iomem *reg_dsisr;
+ void __iomem *reg_dar;
+ void __iomem *reg_tfc;
+ void __iomem *reg_pe_handle;
+ /*
+ * The following field are used by the memory fault
+ * interrupt handler. We can only have one interrupt at a
+ * time. The NPU won't raise another interrupt until the
+ * previous one has been ack'd by writing to the TFC register
+ */
+ struct xsl_fault {
+ struct work_struct fault_work;
+ u64 pe;
+ u64 dsisr;
+ u64 dar;
+ struct pe_data pe_data;
+ } xsl_fault;
+};
+
+/*
+ * A opencapi link can be used be by several PCI functions. We have
+ * one link per device slot.
+ *
+ * A linked list of opencapi links should suffice, as there's a
+ * limited number of opencapi slots on a system and lookup is only
+ * done when the device is probed
+ */
+struct link {
+ struct list_head list;
+ struct kref ref;
+ int domain;
+ int bus;
+ int dev;
+ atomic_t irq_available;
+ struct spa *spa;
+ void *platform_data;
+};
+static struct list_head links_list = LIST_HEAD_INIT(links_list);
+static DEFINE_MUTEX(links_list_lock);
+
+enum xsl_response {
+ CONTINUE,
+ ADDRESS_ERROR,
+ RESTART,
+};
+
+
+static void read_irq(struct spa *spa, u64 *dsisr, u64 *dar, u64 *pe)
+{
+ u64 reg;
+
+ *dsisr = in_be64(spa->reg_dsisr);
+ *dar = in_be64(spa->reg_dar);
+ reg = in_be64(spa->reg_pe_handle);
+ *pe = reg & SPA_PE_MASK;
+}
+
+static void ack_irq(struct spa *spa, enum xsl_response r)
+{
+ u64 reg = 0;
+
+ /* continue is not supported */
+ if (r == RESTART)
+ reg = PPC_BIT(31);
+ else if (r == ADDRESS_ERROR)
+ reg = PPC_BIT(30);
+ else
+ WARN(1, "Invalid irq response %d\n", r);
+
+ if (reg) {
+ trace_ocxl_fault_ack(spa->spa_mem, spa->xsl_fault.pe,
+ spa->xsl_fault.dsisr, spa->xsl_fault.dar, reg);
+ out_be64(spa->reg_tfc, reg);
+ }
+}
+
+static void xsl_fault_handler_bh(struct work_struct *fault_work)
+{
+ unsigned int flt = 0;
+ unsigned long access, flags, inv_flags = 0;
+ enum xsl_response r;
+ struct xsl_fault *fault = container_of(fault_work, struct xsl_fault,
+ fault_work);
+ struct spa *spa = container_of(fault, struct spa, xsl_fault);
+
+ int rc;
+
+ /*
+ * We need to release a reference on the mm whenever exiting this
+ * function (taken in the memory fault interrupt handler)
+ */
+ rc = copro_handle_mm_fault(fault->pe_data.mm, fault->dar, fault->dsisr,
+ &flt);
+ if (rc) {
+ pr_debug("copro_handle_mm_fault failed: %d\n", rc);
+ if (fault->pe_data.xsl_err_cb) {
+ fault->pe_data.xsl_err_cb(
+ fault->pe_data.xsl_err_data,
+ fault->dar, fault->dsisr);
+ }
+ r = ADDRESS_ERROR;
+ goto ack;
+ }
+
+ if (!radix_enabled()) {
+ /*
+ * update_mmu_cache() will not have loaded the hash
+ * since current->trap is not a 0x400 or 0x300, so
+ * just call hash_page_mm() here.
+ */
+ access = _PAGE_PRESENT | _PAGE_READ;
+ if (fault->dsisr & SPA_XSL_S)
+ access |= _PAGE_WRITE;
+
+ if (REGION_ID(fault->dar) != USER_REGION_ID)
+ access |= _PAGE_PRIVILEGED;
+
+ local_irq_save(flags);
+ hash_page_mm(fault->pe_data.mm, fault->dar, access, 0x300,
+ inv_flags);
+ local_irq_restore(flags);
+ }
+ r = RESTART;
+ack:
+ mmdrop(fault->pe_data.mm);
+ ack_irq(spa, r);
+}
+
+static irqreturn_t xsl_fault_handler(int irq, void *data)
+{
+ struct link *link = (struct link *) data;
+ struct spa *spa = link->spa;
+ u64 dsisr, dar, pe_handle;
+ struct pe_data *pe_data;
+ struct ocxl_process_element *pe;
+ int lpid, pid, tid;
+
+ read_irq(spa, &dsisr, &dar, &pe_handle);
+ trace_ocxl_fault(spa->spa_mem, pe_handle, dsisr, dar, -1);
+
+ WARN_ON(pe_handle > SPA_PE_MASK);
+ pe = spa->spa_mem + pe_handle;
+ lpid = be32_to_cpu(pe->lpid);
+ pid = be32_to_cpu(pe->pid);
+ tid = be32_to_cpu(pe->tid);
+ /* We could be reading all null values here if the PE is being
+ * removed while an interrupt kicks in. It's not supposed to
+ * happen if the driver notified the AFU to terminate the
+ * PASID, and the AFU waited for pending operations before
+ * acknowledging. But even if it happens, we won't find a
+ * memory context below and fail silently, so it should be ok.
+ */
+ if (!(dsisr & SPA_XSL_TF)) {
+ WARN(1, "Invalid xsl interrupt fault register %#llx\n", dsisr);
+ ack_irq(spa, ADDRESS_ERROR);
+ return IRQ_HANDLED;
+ }
+
+ rcu_read_lock();
+ pe_data = radix_tree_lookup(&spa->pe_tree, pe_handle);
+ if (!pe_data) {
+ /*
+ * Could only happen if the driver didn't notify the
+ * AFU about PASID termination before removing the PE,
+ * or the AFU didn't wait for all memory access to
+ * have completed.
+ *
+ * Either way, we fail early, but we shouldn't log an
+ * error message, as it is a valid (if unexpected)
+ * scenario
+ */
+ rcu_read_unlock();
+ pr_debug("Unknown mm context for xsl interrupt\n");
+ ack_irq(spa, ADDRESS_ERROR);
+ return IRQ_HANDLED;
+ }
+ WARN_ON(pe_data->mm->context.id != pid);
+
+ spa->xsl_fault.pe = pe_handle;
+ spa->xsl_fault.dar = dar;
+ spa->xsl_fault.dsisr = dsisr;
+ spa->xsl_fault.pe_data = *pe_data;
+ mmgrab(pe_data->mm); /* mm count is released by bottom half */
+
+ rcu_read_unlock();
+ schedule_work(&spa->xsl_fault.fault_work);
+ return IRQ_HANDLED;
+}
+
+static void unmap_irq_registers(struct spa *spa)
+{
+ pnv_ocxl_unmap_xsl_regs(spa->reg_dsisr, spa->reg_dar, spa->reg_tfc,
+ spa->reg_pe_handle);
+}
+
+static int map_irq_registers(struct pci_dev *dev, struct spa *spa)
+{
+ return pnv_ocxl_map_xsl_regs(dev, &spa->reg_dsisr, &spa->reg_dar,
+ &spa->reg_tfc, &spa->reg_pe_handle);
+}
+
+static int setup_xsl_irq(struct pci_dev *dev, struct link *link)
+{
+ struct spa *spa = link->spa;
+ int rc;
+ int hwirq;
+
+ rc = pnv_ocxl_get_xsl_irq(dev, &hwirq);
+ if (rc)
+ return rc;
+
+ rc = map_irq_registers(dev, spa);
+ if (rc)
+ return rc;
+
+ spa->irq_name = kasprintf(GFP_KERNEL, "ocxl-xsl-%x-%x-%x",
+ link->domain, link->bus, link->dev);
+ if (!spa->irq_name) {
+ unmap_irq_registers(spa);
+ dev_err(&dev->dev, "Can't allocate name for xsl interrupt\n");
+ return -ENOMEM;
+ }
+ /*
+ * At some point, we'll need to look into allowing a higher
+ * number of interrupts. Could we have an IRQ domain per link?
+ */
+ spa->virq = irq_create_mapping(NULL, hwirq);
+ if (!spa->virq) {
+ kfree(spa->irq_name);
+ unmap_irq_registers(spa);
+ dev_err(&dev->dev,
+ "irq_create_mapping failed for translation interrupt\n");
+ return -EINVAL;
+ }
+
+ dev_dbg(&dev->dev, "hwirq %d mapped to virq %d\n", hwirq, spa->virq);
+
+ rc = request_irq(spa->virq, xsl_fault_handler, 0, spa->irq_name,
+ link);
+ if (rc) {
+ irq_dispose_mapping(spa->virq);
+ kfree(spa->irq_name);
+ unmap_irq_registers(spa);
+ dev_err(&dev->dev,
+ "request_irq failed for translation interrupt: %d\n",
+ rc);
+ return -EINVAL;
+ }
+ return 0;
+}
+
+static void release_xsl_irq(struct link *link)
+{
+ struct spa *spa = link->spa;
+
+ if (spa->virq) {
+ free_irq(spa->virq, link);
+ irq_dispose_mapping(spa->virq);
+ }
+ kfree(spa->irq_name);
+ unmap_irq_registers(spa);
+}
+
+static int alloc_spa(struct pci_dev *dev, struct link *link)
+{
+ struct spa *spa;
+
+ spa = kzalloc(sizeof(struct spa), GFP_KERNEL);
+ if (!spa)
+ return -ENOMEM;
+
+ mutex_init(&spa->spa_lock);
+ INIT_RADIX_TREE(&spa->pe_tree, GFP_KERNEL);
+ INIT_WORK(&spa->xsl_fault.fault_work, xsl_fault_handler_bh);
+
+ spa->spa_order = SPA_SPA_SIZE_LOG - PAGE_SHIFT;
+ spa->spa_mem = (struct ocxl_process_element *)
+ __get_free_pages(GFP_KERNEL | __GFP_ZERO, spa->spa_order);
+ if (!spa->spa_mem) {
+ dev_err(&dev->dev, "Can't allocate Shared Process Area\n");
+ kfree(spa);
+ return -ENOMEM;
+ }
+ pr_debug("Allocated SPA for %x:%x:%x at %p\n", link->domain, link->bus,
+ link->dev, spa->spa_mem);
+
+ link->spa = spa;
+ return 0;
+}
+
+static void free_spa(struct link *link)
+{
+ struct spa *spa = link->spa;
+
+ pr_debug("Freeing SPA for %x:%x:%x\n", link->domain, link->bus,
+ link->dev);
+
+ if (spa && spa->spa_mem) {
+ free_pages((unsigned long) spa->spa_mem, spa->spa_order);
+ kfree(spa);
+ link->spa = NULL;
+ }
+}
+
+static int alloc_link(struct pci_dev *dev, int PE_mask, struct link **out_link)
+{
+ struct link *link;
+ int rc;
+
+ link = kzalloc(sizeof(struct link), GFP_KERNEL);
+ if (!link)
+ return -ENOMEM;
+
+ kref_init(&link->ref);
+ link->domain = pci_domain_nr(dev->bus);
+ link->bus = dev->bus->number;
+ link->dev = PCI_SLOT(dev->devfn);
+ atomic_set(&link->irq_available, MAX_IRQ_PER_LINK);
+
+ rc = alloc_spa(dev, link);
+ if (rc)
+ goto err_free;
+
+ rc = setup_xsl_irq(dev, link);
+ if (rc)
+ goto err_spa;
+
+ /* platform specific hook */
+ rc = pnv_ocxl_spa_setup(dev, link->spa->spa_mem, PE_mask,
+ &link->platform_data);
+ if (rc)
+ goto err_xsl_irq;
+
+ *out_link = link;
+ return 0;
+
+err_xsl_irq:
+ release_xsl_irq(link);
+err_spa:
+ free_spa(link);
+err_free:
+ kfree(link);
+ return rc;
+}
+
+static void free_link(struct link *link)
+{
+ release_xsl_irq(link);
+ free_spa(link);
+ kfree(link);
+}
+
+int ocxl_link_setup(struct pci_dev *dev, int PE_mask, void **link_handle)
+{
+ int rc = 0;
+ struct link *link;
+
+ mutex_lock(&links_list_lock);
+ list_for_each_entry(link, &links_list, list) {
+ /* The functions of a device all share the same link */
+ if (link->domain == pci_domain_nr(dev->bus) &&
+ link->bus == dev->bus->number &&
+ link->dev == PCI_SLOT(dev->devfn)) {
+ kref_get(&link->ref);
+ *link_handle = link;
+ goto unlock;
+ }
+ }
+ rc = alloc_link(dev, PE_mask, &link);
+ if (rc)
+ goto unlock;
+
+ list_add(&link->list, &links_list);
+ *link_handle = link;
+unlock:
+ mutex_unlock(&links_list_lock);
+ return rc;
+}
+EXPORT_SYMBOL_GPL(ocxl_link_setup);
+
+static void release_xsl(struct kref *ref)
+{
+ struct link *link = container_of(ref, struct link, ref);
+
+ list_del(&link->list);
+ /* call platform code before releasing data */
+ pnv_ocxl_spa_release(link->platform_data);
+ free_link(link);
+}
+
+void ocxl_link_release(struct pci_dev *dev, void *link_handle)
+{
+ struct link *link = (struct link *) link_handle;
+
+ mutex_lock(&links_list_lock);
+ kref_put(&link->ref, release_xsl);
+ mutex_unlock(&links_list_lock);
+}
+EXPORT_SYMBOL_GPL(ocxl_link_release);
+
+static u64 calculate_cfg_state(bool kernel)
+{
+ u64 state;
+
+ state = SPA_CFG_DR;
+ if (mfspr(SPRN_LPCR) & LPCR_TC)
+ state |= SPA_CFG_TC;
+ if (radix_enabled())
+ state |= SPA_CFG_XLAT_ror;
+ else
+ state |= SPA_CFG_XLAT_hpt;
+ state |= SPA_CFG_HV;
+ if (kernel) {
+ if (mfmsr() & MSR_SF)
+ state |= SPA_CFG_SF;
+ } else {
+ state |= SPA_CFG_PR;
+ if (!test_tsk_thread_flag(current, TIF_32BIT))
+ state |= SPA_CFG_SF;
+ }
+ return state;
+}
+
+int ocxl_link_add_pe(void *link_handle, int pasid, u32 pidr, u32 tidr,
+ u64 amr, struct mm_struct *mm,
+ void (*xsl_err_cb)(void *data, u64 addr, u64 dsisr),
+ void *xsl_err_data)
+{
+ struct link *link = (struct link *) link_handle;
+ struct spa *spa = link->spa;
+ struct ocxl_process_element *pe;
+ int pe_handle, rc = 0;
+ struct pe_data *pe_data;
+
+ BUILD_BUG_ON(sizeof(struct ocxl_process_element) != 128);
+ if (pasid > SPA_PASID_MAX)
+ return -EINVAL;
+
+ mutex_lock(&spa->spa_lock);
+ pe_handle = pasid & SPA_PE_MASK;
+ pe = spa->spa_mem + pe_handle;
+
+ if (pe->software_state) {
+ rc = -EBUSY;
+ goto unlock;
+ }
+
+ pe_data = kmalloc(sizeof(*pe_data), GFP_KERNEL);
+ if (!pe_data) {
+ rc = -ENOMEM;
+ goto unlock;
+ }
+
+ pe_data->mm = mm;
+ pe_data->xsl_err_cb = xsl_err_cb;
+ pe_data->xsl_err_data = xsl_err_data;
+
+ memset(pe, 0, sizeof(struct ocxl_process_element));
+ pe->config_state = cpu_to_be64(calculate_cfg_state(pidr == 0));
+ pe->lpid = cpu_to_be32(mfspr(SPRN_LPID));
+ pe->pid = cpu_to_be32(pidr);
+ pe->tid = cpu_to_be32(tidr);
+ pe->amr = cpu_to_be64(amr);
+ pe->software_state = cpu_to_be32(SPA_PE_VALID);
+
+ mm_context_add_copro(mm);
+ /*
+ * Barrier is to make sure PE is visible in the SPA before it
+ * is used by the device. It also helps with the global TLBI
+ * invalidation
+ */
+ mb();
+ radix_tree_insert(&spa->pe_tree, pe_handle, pe_data);
+
+ /*
+ * The mm must stay valid for as long as the device uses it. We
+ * lower the count when the context is removed from the SPA.
+ *
+ * We grab mm_count (and not mm_users), as we don't want to
+ * end up in a circular dependency if a process mmaps its
+ * mmio, therefore incrementing the file ref count when
+ * calling mmap(), and forgets to unmap before exiting. In
+ * that scenario, when the kernel handles the death of the
+ * process, the file is not cleaned because unmap was not
+ * called, and the mm wouldn't be freed because we would still
+ * have a reference on mm_users. Incrementing mm_count solves
+ * the problem.
+ */
+ mmgrab(mm);
+ trace_ocxl_context_add(current->pid, spa->spa_mem, pasid, pidr, tidr);
+unlock:
+ mutex_unlock(&spa->spa_lock);
+ return rc;
+}
+EXPORT_SYMBOL_GPL(ocxl_link_add_pe);
+
+int ocxl_link_remove_pe(void *link_handle, int pasid)
+{
+ struct link *link = (struct link *) link_handle;
+ struct spa *spa = link->spa;
+ struct ocxl_process_element *pe;
+ struct pe_data *pe_data;
+ int pe_handle, rc;
+
+ if (pasid > SPA_PASID_MAX)
+ return -EINVAL;
+
+ /*
+ * About synchronization with our memory fault handler:
+ *
+ * Before removing the PE, the driver is supposed to have
+ * notified the AFU, which should have cleaned up and make
+ * sure the PASID is no longer in use, including pending
+ * interrupts. However, there's no way to be sure...
+ *
+ * We clear the PE and remove the context from our radix
+ * tree. From that point on, any new interrupt for that
+ * context will fail silently, which is ok. As mentioned
+ * above, that's not expected, but it could happen if the
+ * driver or AFU didn't do the right thing.
+ *
+ * There could still be a bottom half running, but we don't
+ * need to wait/flush, as it is managing a reference count on
+ * the mm it reads from the radix tree.
+ */
+ pe_handle = pasid & SPA_PE_MASK;
+ pe = spa->spa_mem + pe_handle;
+
+ mutex_lock(&spa->spa_lock);
+
+ if (!(be32_to_cpu(pe->software_state) & SPA_PE_VALID)) {
+ rc = -EINVAL;
+ goto unlock;
+ }
+
+ trace_ocxl_context_remove(current->pid, spa->spa_mem, pasid,
+ be32_to_cpu(pe->pid), be32_to_cpu(pe->tid));
+
+ memset(pe, 0, sizeof(struct ocxl_process_element));
+ /*
+ * The barrier makes sure the PE is removed from the SPA
+ * before we clear the NPU context cache below, so that the
+ * old PE cannot be reloaded erroneously.
+ */
+ mb();
+
+ /*
+ * hook to platform code
+ * On powerpc, the entry needs to be cleared from the context
+ * cache of the NPU.
+ */
+ rc = pnv_ocxl_spa_remove_pe(link->platform_data, pe_handle);
+ WARN_ON(rc);
+
+ pe_data = radix_tree_delete(&spa->pe_tree, pe_handle);
+ if (!pe_data) {
+ WARN(1, "Couldn't find pe data when removing PE\n");
+ } else {
+ mm_context_remove_copro(pe_data->mm);
+ mmdrop(pe_data->mm);
+ kfree_rcu(pe_data, rcu);
+ }
+unlock:
+ mutex_unlock(&spa->spa_lock);
+ return rc;
+}
+EXPORT_SYMBOL_GPL(ocxl_link_remove_pe);
+
+int ocxl_link_irq_alloc(void *link_handle, int *hw_irq, u64 *trigger_addr)
+{
+ struct link *link = (struct link *) link_handle;
+ int rc, irq;
+ u64 addr;
+
+ if (atomic_dec_if_positive(&link->irq_available) < 0)
+ return -ENOSPC;
+
+ rc = pnv_ocxl_alloc_xive_irq(&irq, &addr);
+ if (rc) {
+ atomic_inc(&link->irq_available);
+ return rc;
+ }
+
+ *hw_irq = irq;
+ *trigger_addr = addr;
+ return 0;
+}
+EXPORT_SYMBOL_GPL(ocxl_link_irq_alloc);
+
+void ocxl_link_free_irq(void *link_handle, int hw_irq)
+{
+ struct link *link = (struct link *) link_handle;
+
+ pnv_ocxl_free_xive_irq(hw_irq);
+ atomic_inc(&link->irq_available);
+}
+EXPORT_SYMBOL_GPL(ocxl_link_free_irq);
diff --git a/drivers/misc/ocxl/main.c b/drivers/misc/ocxl/main.c
new file mode 100644
index 000000000000..7210d9e059be
--- /dev/null
+++ b/drivers/misc/ocxl/main.c
@@ -0,0 +1,33 @@
+// SPDX-License-Identifier: GPL-2.0+
+// Copyright 2017 IBM Corp.
+#include <linux/module.h>
+#include <linux/pci.h>
+#include "ocxl_internal.h"
+
+static int __init init_ocxl(void)
+{
+ int rc = 0;
+
+ rc = ocxl_file_init();
+ if (rc)
+ return rc;
+
+ rc = pci_register_driver(&ocxl_pci_driver);
+ if (rc) {
+ ocxl_file_exit();
+ return rc;
+ }
+ return 0;
+}
+
+static void exit_ocxl(void)
+{
+ pci_unregister_driver(&ocxl_pci_driver);
+ ocxl_file_exit();
+}
+
+module_init(init_ocxl);
+module_exit(exit_ocxl);
+
+MODULE_DESCRIPTION("Open Coherent Accelerator");
+MODULE_LICENSE("GPL");
diff --git a/drivers/misc/ocxl/ocxl_internal.h b/drivers/misc/ocxl/ocxl_internal.h
new file mode 100644
index 000000000000..5d421824afd9
--- /dev/null
+++ b/drivers/misc/ocxl/ocxl_internal.h
@@ -0,0 +1,131 @@
+// SPDX-License-Identifier: GPL-2.0+
+// Copyright 2017 IBM Corp.
+#ifndef _OCXL_INTERNAL_H_
+#define _OCXL_INTERNAL_H_
+
+#include <linux/pci.h>
+#include <linux/cdev.h>
+#include <linux/list.h>
+#include <misc/ocxl.h>
+
+#define MAX_IRQ_PER_LINK 2000
+#define MAX_IRQ_PER_CONTEXT MAX_IRQ_PER_LINK
+
+#define to_ocxl_function(d) container_of(d, struct ocxl_fn, dev)
+#define to_ocxl_afu(d) container_of(d, struct ocxl_afu, dev)
+
+extern struct pci_driver ocxl_pci_driver;
+
+
+struct ocxl_fn {
+ struct device dev;
+ int bar_used[3];
+ struct ocxl_fn_config config;
+ struct list_head afu_list;
+ int pasid_base;
+ int actag_base;
+ int actag_enabled;
+ int actag_supported;
+ struct list_head pasid_list;
+ struct list_head actag_list;
+ void *link;
+};
+
+struct ocxl_afu {
+ struct ocxl_fn *fn;
+ struct list_head list;
+ struct device dev;
+ struct cdev cdev;
+ struct ocxl_afu_config config;
+ int pasid_base;
+ int pasid_count; /* opened contexts */
+ int pasid_max; /* maximum number of contexts */
+ int actag_base;
+ int actag_enabled;
+ struct mutex contexts_lock;
+ struct idr contexts_idr;
+ struct mutex afu_control_lock;
+ u64 global_mmio_start;
+ u64 irq_base_offset;
+ void __iomem *global_mmio_ptr;
+ u64 pp_mmio_start;
+ struct bin_attribute attr_global_mmio;
+};
+
+enum ocxl_context_status {
+ CLOSED,
+ OPENED,
+ ATTACHED,
+};
+
+// Contains metadata about a translation fault
+struct ocxl_xsl_error {
+ u64 addr; // The address that triggered the fault
+ u64 dsisr; // the value of the dsisr register
+ u64 count; // The number of times this fault has been triggered
+};
+
+struct ocxl_context {
+ struct ocxl_afu *afu;
+ int pasid;
+ struct mutex status_mutex;
+ enum ocxl_context_status status;
+ struct address_space *mapping;
+ struct mutex mapping_lock;
+ wait_queue_head_t events_wq;
+ struct mutex xsl_error_lock;
+ struct ocxl_xsl_error xsl_error;
+ struct mutex irq_lock;
+ struct idr irq_idr;
+};
+
+struct ocxl_process_element {
+ __be64 config_state;
+ __be32 reserved1[11];
+ __be32 lpid;
+ __be32 tid;
+ __be32 pid;
+ __be32 reserved2[10];
+ __be64 amr;
+ __be32 reserved3[3];
+ __be32 software_state;
+};
+
+
+extern struct ocxl_afu *ocxl_afu_get(struct ocxl_afu *afu);
+extern void ocxl_afu_put(struct ocxl_afu *afu);
+
+extern int ocxl_create_cdev(struct ocxl_afu *afu);
+extern void ocxl_destroy_cdev(struct ocxl_afu *afu);
+extern int ocxl_register_afu(struct ocxl_afu *afu);
+extern void ocxl_unregister_afu(struct ocxl_afu *afu);
+
+extern int ocxl_file_init(void);
+extern void ocxl_file_exit(void);
+
+extern int ocxl_pasid_afu_alloc(struct ocxl_fn *fn, u32 size);
+extern void ocxl_pasid_afu_free(struct ocxl_fn *fn, u32 start, u32 size);
+extern int ocxl_actag_afu_alloc(struct ocxl_fn *fn, u32 size);
+extern void ocxl_actag_afu_free(struct ocxl_fn *fn, u32 start, u32 size);
+
+extern struct ocxl_context *ocxl_context_alloc(void);
+extern int ocxl_context_init(struct ocxl_context *ctx, struct ocxl_afu *afu,
+ struct address_space *mapping);
+extern int ocxl_context_attach(struct ocxl_context *ctx, u64 amr);
+extern int ocxl_context_mmap(struct ocxl_context *ctx,
+ struct vm_area_struct *vma);
+extern int ocxl_context_detach(struct ocxl_context *ctx);
+extern void ocxl_context_detach_all(struct ocxl_afu *afu);
+extern void ocxl_context_free(struct ocxl_context *ctx);
+
+extern int ocxl_sysfs_add_afu(struct ocxl_afu *afu);
+extern void ocxl_sysfs_remove_afu(struct ocxl_afu *afu);
+
+extern int ocxl_afu_irq_alloc(struct ocxl_context *ctx, u64 *irq_offset);
+extern int ocxl_afu_irq_free(struct ocxl_context *ctx, u64 irq_offset);
+extern void ocxl_afu_irq_free_all(struct ocxl_context *ctx);
+extern int ocxl_afu_irq_set_fd(struct ocxl_context *ctx, u64 irq_offset,
+ int eventfd);
+extern u64 ocxl_afu_irq_get_addr(struct ocxl_context *ctx, u64 irq_offset);
+
+#endif /* _OCXL_INTERNAL_H_ */
diff --git a/drivers/misc/ocxl/pasid.c b/drivers/misc/ocxl/pasid.c
new file mode 100644
index 000000000000..d14cb56e6920
--- /dev/null
+++ b/drivers/misc/ocxl/pasid.c
@@ -0,0 +1,107 @@
+// SPDX-License-Identifier: GPL-2.0+
+// Copyright 2017 IBM Corp.
+#include "ocxl_internal.h"
+
+
+struct id_range {
+ struct list_head list;
+ u32 start;
+ u32 end;
+};
+
+#ifdef DEBUG
+static void dump_list(struct list_head *head, char *type_str)
+{
+ struct id_range *cur;
+
+ pr_debug("%s ranges allocated:\n", type_str);
+ list_for_each_entry(cur, head, list) {
+ pr_debug("Range %d->%d\n", cur->start, cur->end);
+ }
+}
+#endif
+
+static int range_alloc(struct list_head *head, u32 size, int max_id,
+ char *type_str)
+{
+ struct list_head *pos;
+ struct id_range *cur, *new;
+ int rc, last_end;
+
+ new = kmalloc(sizeof(struct id_range), GFP_KERNEL);
+ if (!new)
+ return -ENOMEM;
+
+ pos = head;
+ last_end = -1;
+ list_for_each_entry(cur, head, list) {
+ if ((cur->start - last_end) > size)
+ break;
+ last_end = cur->end;
+ pos = &cur->list;
+ }
+
+ new->start = last_end + 1;
+ new->end = new->start + size - 1;
+
+ if (new->end > max_id) {
+ kfree(new);
+ rc = -ENOSPC;
+ } else {
+ list_add(&new->list, pos);
+ rc = new->start;
+ }
+
+#ifdef DEBUG
+ dump_list(head, type_str);
+#endif
+ return rc;
+}
+
+static void range_free(struct list_head *head, u32 start, u32 size,
+ char *type_str)
+{
+ bool found = false;
+ struct id_range *cur, *tmp;
+
+ list_for_each_entry_safe(cur, tmp, head, list) {
+ if (cur->start == start && cur->end == (start + size - 1)) {
+ found = true;
+ list_del(&cur->list);
+ kfree(cur);
+ break;
+ }
+ }
+ WARN_ON(!found);
+#ifdef DEBUG
+ dump_list(head, type_str);
+#endif
+}
+
+int ocxl_pasid_afu_alloc(struct ocxl_fn *fn, u32 size)
+{
+ int max_pasid;
+
+ if (fn->config.max_pasid_log < 0)
+ return -ENOSPC;
+ max_pasid = 1 << fn->config.max_pasid_log;
+ return range_alloc(&fn->pasid_list, size, max_pasid, "afu pasid");
+}
+
+void ocxl_pasid_afu_free(struct ocxl_fn *fn, u32 start, u32 size)
+{
+ return range_free(&fn->pasid_list, start, size, "afu pasid");
+}
+
+int ocxl_actag_afu_alloc(struct ocxl_fn *fn, u32 size)
+{
+ int max_actag;
+
+ max_actag = fn->actag_enabled;
+ return range_alloc(&fn->actag_list, size, max_actag, "afu actag");
+}
+
+void ocxl_actag_afu_free(struct ocxl_fn *fn, u32 start, u32 size)
+{
+ return range_free(&fn->actag_list, start, size, "afu actag");
+}
diff --git a/drivers/misc/ocxl/pci.c b/drivers/misc/ocxl/pci.c
new file mode 100644
index 000000000000..0051d9ec76cc
--- /dev/null
+++ b/drivers/misc/ocxl/pci.c
@@ -0,0 +1,585 @@
+// SPDX-License-Identifier: GPL-2.0+
+// Copyright 2017 IBM Corp.
+#include <linux/module.h>
+#include <linux/pci.h>
+#include <linux/idr.h>
+#include <asm/pnv-ocxl.h>
+#include "ocxl_internal.h"
+
+/*
+ * Any opencapi device which wants to use this 'generic' driver should
+ * use the 0x062B device ID. Vendors should define the subsystem
+ * vendor/device ID to help differentiate devices.
+ */
+static const struct pci_device_id ocxl_pci_tbl[] = {
+ { PCI_DEVICE(PCI_VENDOR_ID_IBM, 0x062B), },
+ { }
+};
+MODULE_DEVICE_TABLE(pci, ocxl_pci_tbl);
+
+
+static struct ocxl_fn *ocxl_fn_get(struct ocxl_fn *fn)
+{
+ return (get_device(&fn->dev) == NULL) ? NULL : fn;
+}
+
+static void ocxl_fn_put(struct ocxl_fn *fn)
+{
+ put_device(&fn->dev);
+}
+
+struct ocxl_afu *ocxl_afu_get(struct ocxl_afu *afu)
+{
+ return (get_device(&afu->dev) == NULL) ? NULL : afu;
+}
+
+void ocxl_afu_put(struct ocxl_afu *afu)
+{
+ put_device(&afu->dev);
+}
+
+static struct ocxl_afu *alloc_afu(struct ocxl_fn *fn)
+{
+ struct ocxl_afu *afu;
+
+ afu = kzalloc(sizeof(struct ocxl_afu), GFP_KERNEL);
+ if (!afu)
+ return NULL;
+
+ mutex_init(&afu->contexts_lock);
+ mutex_init(&afu->afu_control_lock);
+ idr_init(&afu->contexts_idr);
+ afu->fn = fn;
+ ocxl_fn_get(fn);
+ return afu;
+}
+
+static void free_afu(struct ocxl_afu *afu)
+{
+ idr_destroy(&afu->contexts_idr);
+ ocxl_fn_put(afu->fn);
+ kfree(afu);
+}
+
+static void free_afu_dev(struct device *dev)
+{
+ struct ocxl_afu *afu = to_ocxl_afu(dev);
+
+ ocxl_unregister_afu(afu);
+ free_afu(afu);
+}
+
+static int set_afu_device(struct ocxl_afu *afu, const char *location)
+{
+ struct ocxl_fn *fn = afu->fn;
+ int rc;
+
+ afu->dev.parent = &fn->dev;
+ afu->dev.release = free_afu_dev;
+ rc = dev_set_name(&afu->dev, "%s.%s.%hhu", afu->config.name, location,
+ afu->config.idx);
+ return rc;
+}
+
+static int assign_afu_actag(struct ocxl_afu *afu, struct pci_dev *dev)
+{
+ struct ocxl_fn *fn = afu->fn;
+ int actag_count, actag_offset;
+
+ /*
+ * if there were not enough actags for the function, each afu
+ * reduces its count as well
+ */
+ actag_count = afu->config.actag_supported *
+ fn->actag_enabled / fn->actag_supported;
+ actag_offset = ocxl_actag_afu_alloc(fn, actag_count);
+ if (actag_offset < 0) {
+ dev_err(&afu->dev, "Can't allocate %d actags for AFU: %d\n",
+ actag_count, actag_offset);
+ return actag_offset;
+ }
+ afu->actag_base = fn->actag_base + actag_offset;
+ afu->actag_enabled = actag_count;
+
+ ocxl_config_set_afu_actag(dev, afu->config.dvsec_afu_control_pos,
+ afu->actag_base, afu->actag_enabled);
+ dev_dbg(&afu->dev, "actag base=%d enabled=%d\n",
+ afu->actag_base, afu->actag_enabled);
+ return 0;
+}
+
+static void reclaim_afu_actag(struct ocxl_afu *afu)
+{
+ struct ocxl_fn *fn = afu->fn;
+ int start_offset, size;
+
+ start_offset = afu->actag_base - fn->actag_base;
+ size = afu->actag_enabled;
+ ocxl_actag_afu_free(afu->fn, start_offset, size);
+}
+
+static int assign_afu_pasid(struct ocxl_afu *afu, struct pci_dev *dev)
+{
+ struct ocxl_fn *fn = afu->fn;
+ int pasid_count, pasid_offset;
+
+ /*
+ * We only support the case where the function configuration
+ * requested enough PASIDs to cover all AFUs.
+ */
+ pasid_count = 1 << afu->config.pasid_supported_log;
+ pasid_offset = ocxl_pasid_afu_alloc(fn, pasid_count);
+ if (pasid_offset < 0) {
+ dev_err(&afu->dev, "Can't allocate %d PASIDs for AFU: %d\n",
+ pasid_count, pasid_offset);
+ return pasid_offset;
+ }
+ afu->pasid_base = fn->pasid_base + pasid_offset;
+ afu->pasid_count = 0;
+ afu->pasid_max = pasid_count;
+
+ ocxl_config_set_afu_pasid(dev, afu->config.dvsec_afu_control_pos,
+ afu->pasid_base,
+ afu->config.pasid_supported_log);
+ dev_dbg(&afu->dev, "PASID base=%d, enabled=%d\n",
+ afu->pasid_base, pasid_count);
+ return 0;
+}
+
+static void reclaim_afu_pasid(struct ocxl_afu *afu)
+{
+ struct ocxl_fn *fn = afu->fn;
+ int start_offset, size;
+
+ start_offset = afu->pasid_base - fn->pasid_base;
+ size = 1 << afu->config.pasid_supported_log;
+ ocxl_pasid_afu_free(afu->fn, start_offset, size);
+}
+
+static int reserve_fn_bar(struct ocxl_fn *fn, int bar)
+{
+ struct pci_dev *dev = to_pci_dev(fn->dev.parent);
+ int rc, idx;
+
+ if (bar != 0 && bar != 2 && bar != 4)
+ return -EINVAL;
+
+ idx = bar >> 1;
+ if (fn->bar_used[idx]++ == 0) {
+ rc = pci_request_region(dev, bar, "ocxl");
+ if (rc)
+ return rc;
+ }
+ return 0;
+}
+
+static void release_fn_bar(struct ocxl_fn *fn, int bar)
+{
+ struct pci_dev *dev = to_pci_dev(fn->dev.parent);
+ int idx;
+
+ if (bar != 0 && bar != 2 && bar != 4)
+ return;
+
+ idx = bar >> 1;
+ if (--fn->bar_used[idx] == 0)
+ pci_release_region(dev, bar);
+ WARN_ON(fn->bar_used[idx] < 0);
+}
+
+static int map_mmio_areas(struct ocxl_afu *afu, struct pci_dev *dev)
+{
+ int rc;
+
+ rc = reserve_fn_bar(afu->fn, afu->config.global_mmio_bar);
+ if (rc)
+ return rc;
+
+ rc = reserve_fn_bar(afu->fn, afu->config.pp_mmio_bar);
+ if (rc) {
+ release_fn_bar(afu->fn, afu->config.global_mmio_bar);
+ return rc;
+ }
+
+ afu->global_mmio_start =
+ pci_resource_start(dev, afu->config.global_mmio_bar) +
+ afu->config.global_mmio_offset;
+ afu->pp_mmio_start =
+ pci_resource_start(dev, afu->config.pp_mmio_bar) +
+ afu->config.pp_mmio_offset;
+
+ afu->global_mmio_ptr = ioremap(afu->global_mmio_start,
+ afu->config.global_mmio_size);
+ if (!afu->global_mmio_ptr) {
+ release_fn_bar(afu->fn, afu->config.pp_mmio_bar);
+ release_fn_bar(afu->fn, afu->config.global_mmio_bar);
+ dev_err(&dev->dev, "Error mapping global mmio area\n");
+ return -ENOMEM;
+ }
+
+ /*
+ * Leave an empty page between the per-process mmio area and
+ * the AFU interrupt mappings
+ */
+ afu->irq_base_offset = afu->config.pp_mmio_stride + PAGE_SIZE;
+ return 0;
+}
+
+static void unmap_mmio_areas(struct ocxl_afu *afu)
+{
+ if (afu->global_mmio_ptr) {
+ iounmap(afu->global_mmio_ptr);
+ afu->global_mmio_ptr = NULL;
+ }
+ afu->global_mmio_start = 0;
+ afu->pp_mmio_start = 0;
+ release_fn_bar(afu->fn, afu->config.pp_mmio_bar);
+ release_fn_bar(afu->fn, afu->config.global_mmio_bar);
+}
+
+static int configure_afu(struct ocxl_afu *afu, u8 afu_idx, struct pci_dev *dev)
+{
+ int rc;
+
+ rc = ocxl_config_read_afu(dev, &afu->fn->config, &afu->config, afu_idx);
+ if (rc)
+ return rc;
+
+ rc = set_afu_device(afu, dev_name(&dev->dev));
+ if (rc)
+ return rc;
+
+ rc = assign_afu_actag(afu, dev);
+ if (rc)
+ return rc;
+
+ rc = assign_afu_pasid(afu, dev);
+ if (rc) {
+ reclaim_afu_actag(afu);
+ return rc;
+ }
+
+ rc = map_mmio_areas(afu, dev);
+ if (rc) {
+ reclaim_afu_pasid(afu);
+ reclaim_afu_actag(afu);
+ return rc;
+ }
+ return 0;
+}
+
+static void deconfigure_afu(struct ocxl_afu *afu)
+{
+ unmap_mmio_areas(afu);
+ reclaim_afu_pasid(afu);
+ reclaim_afu_actag(afu);
+}
+
+static int activate_afu(struct pci_dev *dev, struct ocxl_afu *afu)
+{
+ int rc;
+
+ ocxl_config_set_afu_state(dev, afu->config.dvsec_afu_control_pos, 1);
+ /*
+ * Char device creation is the last step, as processes can
+ * call our driver immediately, so all our inits must be finished.
+ */
+ rc = ocxl_create_cdev(afu);
+ if (rc)
+ return rc;
+ return 0;
+}
+
+static void deactivate_afu(struct ocxl_afu *afu)
+{
+ struct pci_dev *dev = to_pci_dev(afu->fn->dev.parent);
+
+ ocxl_destroy_cdev(afu);
+ ocxl_config_set_afu_state(dev, afu->config.dvsec_afu_control_pos, 0);
+}
+
+static int init_afu(struct pci_dev *dev, struct ocxl_fn *fn, u8 afu_idx)
+{
+ int rc;
+ struct ocxl_afu *afu;
+
+ afu = alloc_afu(fn);
+ if (!afu)
+ return -ENOMEM;
+
+ rc = configure_afu(afu, afu_idx, dev);
+ if (rc) {
+ free_afu(afu);
+ return rc;
+ }
+
+ rc = ocxl_register_afu(afu);
+ if (rc)
+ goto err;
+
+ rc = ocxl_sysfs_add_afu(afu);
+ if (rc)
+ goto err;
+
+ rc = activate_afu(dev, afu);
+ if (rc)
+ goto err_sys;
+
+ list_add_tail(&afu->list, &fn->afu_list);
+ return 0;
+
+err_sys:
+ ocxl_sysfs_remove_afu(afu);
+err:
+ deconfigure_afu(afu);
+ device_unregister(&afu->dev);
+ return rc;
+}
+
+static void remove_afu(struct ocxl_afu *afu)
+{
+ list_del(&afu->list);
+ ocxl_context_detach_all(afu);
+ deactivate_afu(afu);
+ ocxl_sysfs_remove_afu(afu);
+ deconfigure_afu(afu);
+ device_unregister(&afu->dev);
+}
+
+static struct ocxl_fn *alloc_function(struct pci_dev *dev)
+{
+ struct ocxl_fn *fn;
+
+ fn = kzalloc(sizeof(struct ocxl_fn), GFP_KERNEL);
+ if (!fn)
+ return NULL;
+
+ INIT_LIST_HEAD(&fn->afu_list);
+ INIT_LIST_HEAD(&fn->pasid_list);
+ INIT_LIST_HEAD(&fn->actag_list);
+ return fn;
+}
+
+static void free_function(struct ocxl_fn *fn)
+{
+ WARN_ON(!list_empty(&fn->afu_list));
+ WARN_ON(!list_empty(&fn->pasid_list));
+ kfree(fn);
+}
+
+static void free_function_dev(struct device *dev)
+{
+ struct ocxl_fn *fn = to_ocxl_function(dev);
+
+ free_function(fn);
+}
+
+static int set_function_device(struct ocxl_fn *fn, struct pci_dev *dev)
+{
+ int rc;
+
+ fn->dev.parent = &dev->dev;
+ fn->dev.release = free_function_dev;
+ rc = dev_set_name(&fn->dev, "ocxlfn.%s", dev_name(&dev->dev));
+ if (rc)
+ return rc;
+ pci_set_drvdata(dev, fn);
+ return 0;
+}
+
+static int assign_function_actag(struct ocxl_fn *fn)
+{
+ struct pci_dev *dev = to_pci_dev(fn->dev.parent);
+ u16 base, enabled, supported;
+ int rc;
+
+ rc = ocxl_config_get_actag_info(dev, &base, &enabled, &supported);
+ if (rc)
+ return rc;
+
+ fn->actag_base = base;
+ fn->actag_enabled = enabled;
+ fn->actag_supported = supported;
+
+ ocxl_config_set_actag(dev, fn->config.dvsec_function_pos,
+ fn->actag_base, fn->actag_enabled);
+ dev_dbg(&fn->dev, "actag range starting at %d, enabled %d\n",
+ fn->actag_base, fn->actag_enabled);
+ return 0;
+}
+
+static int set_function_pasid(struct ocxl_fn *fn)
+{
+ struct pci_dev *dev = to_pci_dev(fn->dev.parent);
+ int rc, desired_count, max_count;
+
+ /* A function may not require any PASID */
+ if (fn->config.max_pasid_log < 0)
+ return 0;
+
+ rc = ocxl_config_get_pasid_info(dev, &max_count);
+ if (rc)
+ return rc;
+
+ desired_count = 1 << fn->config.max_pasid_log;
+
+ if (desired_count > max_count) {
+ dev_err(&fn->dev,
+ "Function requires more PASIDs than is available (%d vs. %d)\n",
+ desired_count, max_count);
+ return -ENOSPC;
+ }
+
+ fn->pasid_base = 0;
+ return 0;
+}
+
+static int configure_function(struct ocxl_fn *fn, struct pci_dev *dev)
+{
+ int rc;
+
+ rc = pci_enable_device(dev);
+ if (rc) {
+ dev_err(&dev->dev, "pci_enable_device failed: %d\n", rc);
+ return rc;
+ }
+
+ /*
+ * Once it has been confirmed to work on our hardware, we
+ * should reset the function, to force the adapter to restart
+ * from scratch.
+ * A function reset would also reset all its AFUs.
+ *
+ * Some hints for implementation:
+ *
+ * - there's not status bit to know when the reset is done. We
+ * should try reading the config space to know when it's
+ * done.
+ * - probably something like:
+ * Reset
+ * wait 100ms
+ * issue config read
+ * allow device up to 1 sec to return success on config
+ * read before declaring it broken
+ *
+ * Some shared logic on the card (CFG, TLX) won't be reset, so
+ * there's no guarantee that it will be enough.
+ */
+ rc = ocxl_config_read_function(dev, &fn->config);
+ if (rc)
+ return rc;
+
+ rc = set_function_device(fn, dev);
+ if (rc)
+ return rc;
+
+ rc = assign_function_actag(fn);
+ if (rc)
+ return rc;
+
+ rc = set_function_pasid(fn);
+ if (rc)
+ return rc;
+
+ rc = ocxl_link_setup(dev, 0, &fn->link);
+ if (rc)
+ return rc;
+
+ rc = ocxl_config_set_TL(dev, fn->config.dvsec_tl_pos);
+ if (rc) {
+ ocxl_link_release(dev, fn->link);
+ return rc;
+ }
+ return 0;
+}
+
+static void deconfigure_function(struct ocxl_fn *fn)
+{
+ struct pci_dev *dev = to_pci_dev(fn->dev.parent);
+
+ ocxl_link_release(dev, fn->link);
+ pci_disable_device(dev);
+}
+
+static struct ocxl_fn *init_function(struct pci_dev *dev)
+{
+ struct ocxl_fn *fn;
+ int rc;
+
+ fn = alloc_function(dev);
+ if (!fn)
+ return ERR_PTR(-ENOMEM);
+
+ rc = configure_function(fn, dev);
+ if (rc) {
+ free_function(fn);
+ return ERR_PTR(rc);
+ }
+
+ rc = device_register(&fn->dev);
+ if (rc) {
+ deconfigure_function(fn);
+ device_unregister(&fn->dev);
+ return ERR_PTR(rc);
+ }
+ return fn;
+}
+
+static void remove_function(struct ocxl_fn *fn)
+{
+ deconfigure_function(fn);
+ device_unregister(&fn->dev);
+}
+
+static int ocxl_probe(struct pci_dev *dev, const struct pci_device_id *id)
+{
+ int rc, afu_count = 0;
+ u8 afu;
+ struct ocxl_fn *fn;
+
+ if (!radix_enabled()) {
+ dev_err(&dev->dev, "Unsupported memory model (hash)\n");
+ return -ENODEV;
+ }
+
+ fn = init_function(dev);
+ if (IS_ERR(fn)) {
+ dev_err(&dev->dev, "function init failed: %li\n",
+ PTR_ERR(fn));
+ return PTR_ERR(fn);
+ }
+
+ for (afu = 0; afu <= fn->config.max_afu_index; afu++) {
+ rc = ocxl_config_check_afu_index(dev, &fn->config, afu);
+ if (rc > 0) {
+ rc = init_afu(dev, fn, afu);
+ if (rc) {
+ dev_err(&dev->dev,
+ "Can't initialize AFU index %d\n", afu);
+ continue;
+ }
+ afu_count++;
+ }
+ }
+ dev_info(&dev->dev, "%d AFU(s) configured\n", afu_count);
+ return 0;
+}
+
+static void ocxl_remove(struct pci_dev *dev)
+{
+ struct ocxl_afu *afu, *tmp;
+ struct ocxl_fn *fn = pci_get_drvdata(dev);
+
+ list_for_each_entry_safe(afu, tmp, &fn->afu_list, list) {
+ remove_afu(afu);
+ }
+ remove_function(fn);
+}
+
+struct pci_driver ocxl_pci_driver = {
+ .name = "ocxl",
+ .id_table = ocxl_pci_tbl,
+ .probe = ocxl_probe,
+ .remove = ocxl_remove,
+ .shutdown = ocxl_remove,
+};
diff --git a/drivers/misc/ocxl/sysfs.c b/drivers/misc/ocxl/sysfs.c
new file mode 100644
index 000000000000..d9753a1db14b
--- /dev/null
+++ b/drivers/misc/ocxl/sysfs.c
@@ -0,0 +1,142 @@
+// SPDX-License-Identifier: GPL-2.0+
+// Copyright 2017 IBM Corp.
+#include <linux/sysfs.h>
+#include "ocxl_internal.h"
+
+static ssize_t global_mmio_size_show(struct device *device,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct ocxl_afu *afu = to_ocxl_afu(device);
+
+ return scnprintf(buf, PAGE_SIZE, "%d\n",
+ afu->config.global_mmio_size);
+}
+
+static ssize_t pp_mmio_size_show(struct device *device,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct ocxl_afu *afu = to_ocxl_afu(device);
+
+ return scnprintf(buf, PAGE_SIZE, "%d\n",
+ afu->config.pp_mmio_stride);
+}
+
+static ssize_t afu_version_show(struct device *device,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct ocxl_afu *afu = to_ocxl_afu(device);
+
+ return scnprintf(buf, PAGE_SIZE, "%hhu:%hhu\n",
+ afu->config.version_major,
+ afu->config.version_minor);
+}
+
+static ssize_t contexts_show(struct device *device,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct ocxl_afu *afu = to_ocxl_afu(device);
+
+ return scnprintf(buf, PAGE_SIZE, "%d/%d\n",
+ afu->pasid_count, afu->pasid_max);
+}
+
+static struct device_attribute afu_attrs[] = {
+ __ATTR_RO(global_mmio_size),
+ __ATTR_RO(pp_mmio_size),
+ __ATTR_RO(afu_version),
+ __ATTR_RO(contexts),
+};
+
+static ssize_t global_mmio_read(struct file *filp, struct kobject *kobj,
+ struct bin_attribute *bin_attr, char *buf,
+ loff_t off, size_t count)
+{
+ struct ocxl_afu *afu = to_ocxl_afu(kobj_to_dev(kobj));
+
+ if (count == 0 || off < 0 ||
+ off >= afu->config.global_mmio_size)
+ return 0;
+ memcpy_fromio(buf, afu->global_mmio_ptr + off, count);
+ return count;
+}
+
+static int global_mmio_fault(struct vm_fault *vmf)
+{
+ struct vm_area_struct *vma = vmf->vma;
+ struct ocxl_afu *afu = vma->vm_private_data;
+ unsigned long offset;
+
+ if (vmf->pgoff >= (afu->config.global_mmio_size >> PAGE_SHIFT))
+ return VM_FAULT_SIGBUS;
+
+ offset = vmf->pgoff;
+ offset += (afu->global_mmio_start >> PAGE_SHIFT);
+ vm_insert_pfn(vma, vmf->address, offset);
+ return VM_FAULT_NOPAGE;
+}
+
+static const struct vm_operations_struct global_mmio_vmops = {
+ .fault = global_mmio_fault,
+};
+
+static int global_mmio_mmap(struct file *filp, struct kobject *kobj,
+ struct bin_attribute *bin_attr,
+ struct vm_area_struct *vma)
+{
+ struct ocxl_afu *afu = to_ocxl_afu(kobj_to_dev(kobj));
+
+ if ((vma_pages(vma) + vma->vm_pgoff) >
+ (afu->config.global_mmio_size >> PAGE_SHIFT))
+ return -EINVAL;
+
+ vma->vm_flags |= VM_IO | VM_PFNMAP;
+ vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
+ vma->vm_ops = &global_mmio_vmops;
+ vma->vm_private_data = afu;
+ return 0;
+}
+
+int ocxl_sysfs_add_afu(struct ocxl_afu *afu)
+{
+ int i, rc;
+
+ for (i = 0; i < ARRAY_SIZE(afu_attrs); i++) {
+ rc = device_create_file(&afu->dev, &afu_attrs[i]);
+ if (rc)
+ goto err;
+ }
+
+ sysfs_attr_init(&afu->attr_global_mmio.attr);
+ afu->attr_global_mmio.attr.name = "global_mmio_area";
+ afu->attr_global_mmio.attr.mode = 0600;
+ afu->attr_global_mmio.size = afu->config.global_mmio_size;
+ afu->attr_global_mmio.read = global_mmio_read;
+ afu->attr_global_mmio.mmap = global_mmio_mmap;
+ rc = device_create_bin_file(&afu->dev, &afu->attr_global_mmio);
+ if (rc) {
+ dev_err(&afu->dev,
+ "Unable to create global mmio attr for afu: %d\n",
+ rc);
+ goto err;
+ }
+
+ return 0;
+
+err:
+ for (i--; i >= 0; i--)
+ device_remove_file(&afu->dev, &afu_attrs[i]);
+ return rc;
+}
+
+void ocxl_sysfs_remove_afu(struct ocxl_afu *afu)
+{
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(afu_attrs); i++)
+ device_remove_file(&afu->dev, &afu_attrs[i]);
+ device_remove_bin_file(&afu->dev, &afu->attr_global_mmio);
+}
diff --git a/drivers/misc/ocxl/trace.c b/drivers/misc/ocxl/trace.c
new file mode 100644
index 000000000000..1e6947049697
--- /dev/null
+++ b/drivers/misc/ocxl/trace.c
@@ -0,0 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0+
+// Copyright 2017 IBM Corp.
+#ifndef __CHECKER__
+#define CREATE_TRACE_POINTS
+#include "trace.h"
+#endif
diff --git a/drivers/misc/ocxl/trace.h b/drivers/misc/ocxl/trace.h
new file mode 100644
index 000000000000..bcb7ff330c1e
--- /dev/null
+++ b/drivers/misc/ocxl/trace.h
@@ -0,0 +1,182 @@
+// SPDX-License-Identifier: GPL-2.0+
+// Copyright 2017 IBM Corp.
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM ocxl
+
+#if !defined(_TRACE_OCXL_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_OCXL_H
+
+#include <linux/tracepoint.h>
+
+DECLARE_EVENT_CLASS(ocxl_context,
+ TP_PROTO(pid_t pid, void *spa, int pasid, u32 pidr, u32 tidr),
+ TP_ARGS(pid, spa, pasid, pidr, tidr),
+
+ TP_STRUCT__entry(
+ __field(pid_t, pid)
+ __field(void*, spa)
+ __field(int, pasid)
+ __field(u32, pidr)
+ __field(u32, tidr)
+ ),
+
+ TP_fast_assign(
+ __entry->pid = pid;
+ __entry->spa = spa;
+ __entry->pasid = pasid;
+ __entry->pidr = pidr;
+ __entry->tidr = tidr;
+ ),
+
+ TP_printk("linux pid=%d spa=0x%p pasid=0x%x pidr=0x%x tidr=0x%x",
+ __entry->pid,
+ __entry->spa,
+ __entry->pasid,
+ __entry->pidr,
+ __entry->tidr
+ )
+);
+
+DEFINE_EVENT(ocxl_context, ocxl_context_add,
+ TP_PROTO(pid_t pid, void *spa, int pasid, u32 pidr, u32 tidr),
+ TP_ARGS(pid, spa, pasid, pidr, tidr)
+);
+
+DEFINE_EVENT(ocxl_context, ocxl_context_remove,
+ TP_PROTO(pid_t pid, void *spa, int pasid, u32 pidr, u32 tidr),
+ TP_ARGS(pid, spa, pasid, pidr, tidr)
+);
+
+TRACE_EVENT(ocxl_terminate_pasid,
+ TP_PROTO(int pasid, int rc),
+ TP_ARGS(pasid, rc),
+
+ TP_STRUCT__entry(
+ __field(int, pasid)
+ __field(int, rc)
+ ),
+
+ TP_fast_assign(
+ __entry->pasid = pasid;
+ __entry->rc = rc;
+ ),
+
+ TP_printk("pasid=0x%x rc=%d",
+ __entry->pasid,
+ __entry->rc
+ )
+);
+
+DECLARE_EVENT_CLASS(ocxl_fault_handler,
+ TP_PROTO(void *spa, u64 pe, u64 dsisr, u64 dar, u64 tfc),
+ TP_ARGS(spa, pe, dsisr, dar, tfc),
+
+ TP_STRUCT__entry(
+ __field(void *, spa)
+ __field(u64, pe)
+ __field(u64, dsisr)
+ __field(u64, dar)
+ __field(u64, tfc)
+ ),
+
+ TP_fast_assign(
+ __entry->spa = spa;
+ __entry->pe = pe;
+ __entry->dsisr = dsisr;
+ __entry->dar = dar;
+ __entry->tfc = tfc;
+ ),
+
+ TP_printk("spa=%p pe=0x%llx dsisr=0x%llx dar=0x%llx tfc=0x%llx",
+ __entry->spa,
+ __entry->pe,
+ __entry->dsisr,
+ __entry->dar,
+ __entry->tfc
+ )
+);
+
+DEFINE_EVENT(ocxl_fault_handler, ocxl_fault,
+ TP_PROTO(void *spa, u64 pe, u64 dsisr, u64 dar, u64 tfc),
+ TP_ARGS(spa, pe, dsisr, dar, tfc)
+);
+
+DEFINE_EVENT(ocxl_fault_handler, ocxl_fault_ack,
+ TP_PROTO(void *spa, u64 pe, u64 dsisr, u64 dar, u64 tfc),
+ TP_ARGS(spa, pe, dsisr, dar, tfc)
+);
+
+TRACE_EVENT(ocxl_afu_irq_alloc,
+ TP_PROTO(int pasid, int irq_id, unsigned int virq, int hw_irq,
+ u64 irq_offset),
+ TP_ARGS(pasid, irq_id, virq, hw_irq, irq_offset),
+
+ TP_STRUCT__entry(
+ __field(int, pasid)
+ __field(int, irq_id)
+ __field(unsigned int, virq)
+ __field(int, hw_irq)
+ __field(u64, irq_offset)
+ ),
+
+ TP_fast_assign(
+ __entry->pasid = pasid;
+ __entry->irq_id = irq_id;
+ __entry->virq = virq;
+ __entry->hw_irq = hw_irq;
+ __entry->irq_offset = irq_offset;
+ ),
+
+ TP_printk("pasid=0x%x irq_id=%d virq=%u hw_irq=%d irq_offset=0x%llx",
+ __entry->pasid,
+ __entry->irq_id,
+ __entry->virq,
+ __entry->hw_irq,
+ __entry->irq_offset
+ )
+);
+
+TRACE_EVENT(ocxl_afu_irq_free,
+ TP_PROTO(int pasid, int irq_id),
+ TP_ARGS(pasid, irq_id),
+
+ TP_STRUCT__entry(
+ __field(int, pasid)
+ __field(int, irq_id)
+ ),
+
+ TP_fast_assign(
+ __entry->pasid = pasid;
+ __entry->irq_id = irq_id;
+ ),
+
+ TP_printk("pasid=0x%x irq_id=%d",
+ __entry->pasid,
+ __entry->irq_id
+ )
+);
+
+TRACE_EVENT(ocxl_afu_irq_receive,
+ TP_PROTO(int virq),
+ TP_ARGS(virq),
+
+ TP_STRUCT__entry(
+ __field(int, virq)
+ ),
+
+ TP_fast_assign(
+ __entry->virq = virq;
+ ),
+
+ TP_printk("virq=%d",
+ __entry->virq
+ )
+);
+
+#endif /* _TRACE_OCXL_H */
+
+/* This part must be outside protection */
+#undef TRACE_INCLUDE_PATH
+#define TRACE_INCLUDE_PATH .
+#define TRACE_INCLUDE_FILE trace
+#include <trace/define_trace.h>
diff --git a/drivers/pci/hotplug/rpadlpar_core.c b/drivers/pci/hotplug/rpadlpar_core.c
index a3449d717a99..fc01d7d807f3 100644
--- a/drivers/pci/hotplug/rpadlpar_core.c
+++ b/drivers/pci/hotplug/rpadlpar_core.c
@@ -27,6 +27,7 @@
#include <linux/mutex.h>
#include <asm/rtas.h>
#include <asm/vio.h>
+#include <linux/firmware.h>
#include "../pci.h"
#include "rpaphp.h"
@@ -44,15 +45,14 @@ static struct device_node *find_vio_slot_node(char *drc_name)
{
struct device_node *parent = of_find_node_by_name(NULL, "vdevice");
struct device_node *dn = NULL;
- char *name;
int rc;
if (!parent)
return NULL;
while ((dn = of_get_next_child(parent, dn))) {
- rc = rpaphp_get_drc_props(dn, NULL, &name, NULL, NULL);
- if ((rc == 0) && (!strcmp(drc_name, name)))
+ rc = rpaphp_check_drc_props(dn, drc_name, NULL);
+ if (rc == 0)
break;
}
@@ -64,15 +64,12 @@ static struct device_node *find_php_slot_pci_node(char *drc_name,
char *drc_type)
{
struct device_node *np = NULL;
- char *name;
- char *type;
int rc;
while ((np = of_find_node_by_name(np, "pci"))) {
- rc = rpaphp_get_drc_props(np, NULL, &name, &type, NULL);
+ rc = rpaphp_check_drc_props(np, drc_name, drc_type);
if (rc == 0)
- if (!strcmp(drc_name, name) && !strcmp(drc_type, type))
- break;
+ break;
}
return np;
diff --git a/drivers/pci/hotplug/rpadlpar_sysfs.c b/drivers/pci/hotplug/rpadlpar_sysfs.c
index edb5d8a53020..b806314349cf 100644
--- a/drivers/pci/hotplug/rpadlpar_sysfs.c
+++ b/drivers/pci/hotplug/rpadlpar_sysfs.c
@@ -16,6 +16,7 @@
#include <linux/string.h>
#include <linux/pci.h>
#include <linux/pci_hotplug.h>
+#include "rpaphp.h"
#include "rpadlpar.h"
#include "../pci.h"
@@ -27,8 +28,6 @@
#define ADD_SLOT_ATTR_NAME add_slot
#define REMOVE_SLOT_ATTR_NAME remove_slot
-#define MAX_DRC_NAME_LEN 64
-
static ssize_t add_slot_store(struct kobject *kobj, struct kobj_attribute *attr,
const char *buf, size_t nbytes)
{
diff --git a/drivers/pci/hotplug/rpaphp.h b/drivers/pci/hotplug/rpaphp.h
index 7db024e68fe6..bdb844b01a3d 100644
--- a/drivers/pci/hotplug/rpaphp.h
+++ b/drivers/pci/hotplug/rpaphp.h
@@ -64,6 +64,10 @@ extern bool rpaphp_debug;
#define CONFIGURED 1
#define EMPTY 0
+/* DRC constants */
+
+#define MAX_DRC_NAME_LEN 64
+
/*
* struct slot - slot information for each *physical* slot
*/
@@ -91,8 +95,8 @@ int rpaphp_get_sensor_state(struct slot *slot, int *state);
/* rpaphp_core.c */
int rpaphp_add_slot(struct device_node *dn);
-int rpaphp_get_drc_props(struct device_node *dn, int *drc_index,
- char **drc_name, char **drc_type, int *drc_power_domain);
+int rpaphp_check_drc_props(struct device_node *dn, char *drc_name,
+ char *drc_type);
/* rpaphp_slot.c */
void dealloc_slot_struct(struct slot *slot);
diff --git a/drivers/pci/hotplug/rpaphp_core.c b/drivers/pci/hotplug/rpaphp_core.c
index 1e29abaaea08..53902c7c38f2 100644
--- a/drivers/pci/hotplug/rpaphp_core.c
+++ b/drivers/pci/hotplug/rpaphp_core.c
@@ -30,6 +30,7 @@
#include <linux/smp.h>
#include <linux/init.h>
#include <linux/vmalloc.h>
+#include <asm/firmware.h>
#include <asm/eeh.h> /* for eeh_add_device() */
#include <asm/rtas.h> /* rtas_call */
#include <asm/pci-bridge.h> /* for pci_controller */
@@ -196,25 +197,21 @@ static int get_children_props(struct device_node *dn, const int **drc_indexes,
return 0;
}
-/* To get the DRC props describing the current node, first obtain it's
- * my-drc-index property. Next obtain the DRC list from it's parent. Use
- * the my-drc-index for correlation, and obtain the requested properties.
+
+/* Verify the existence of 'drc_name' and/or 'drc_type' within the
+ * current node. First obtain it's my-drc-index property. Next,
+ * obtain the DRC info from it's parent. Use the my-drc-index for
+ * correlation, and obtain/validate the requested properties.
*/
-int rpaphp_get_drc_props(struct device_node *dn, int *drc_index,
- char **drc_name, char **drc_type, int *drc_power_domain)
+
+static int rpaphp_check_drc_props_v1(struct device_node *dn, char *drc_name,
+ char *drc_type, unsigned int my_index)
{
+ char *name_tmp, *type_tmp;
const int *indexes, *names;
const int *types, *domains;
- const unsigned int *my_index;
- char *name_tmp, *type_tmp;
int i, rc;
- my_index = of_get_property(dn, "ibm,my-drc-index", NULL);
- if (!my_index) {
- /* Node isn't DLPAR/hotplug capable */
- return -EINVAL;
- }
-
rc = get_children_props(dn->parent, &indexes, &names, &types, &domains);
if (rc < 0) {
return -EINVAL;
@@ -225,24 +222,84 @@ int rpaphp_get_drc_props(struct device_node *dn, int *drc_index,
/* Iterate through parent properties, looking for my-drc-index */
for (i = 0; i < be32_to_cpu(indexes[0]); i++) {
- if ((unsigned int) indexes[i + 1] == *my_index) {
- if (drc_name)
- *drc_name = name_tmp;
- if (drc_type)
- *drc_type = type_tmp;
- if (drc_index)
- *drc_index = be32_to_cpu(*my_index);
- if (drc_power_domain)
- *drc_power_domain = be32_to_cpu(domains[i+1]);
- return 0;
- }
+ if ((unsigned int) indexes[i + 1] == my_index)
+ break;
+
name_tmp += (strlen(name_tmp) + 1);
type_tmp += (strlen(type_tmp) + 1);
}
+ if (((drc_name == NULL) || (drc_name && !strcmp(drc_name, name_tmp))) &&
+ ((drc_type == NULL) || (drc_type && !strcmp(drc_type, type_tmp))))
+ return 0;
+
return -EINVAL;
}
-EXPORT_SYMBOL_GPL(rpaphp_get_drc_props);
+
+static int rpaphp_check_drc_props_v2(struct device_node *dn, char *drc_name,
+ char *drc_type, unsigned int my_index)
+{
+ struct property *info;
+ unsigned int entries;
+ struct of_drc_info drc;
+ const __be32 *value;
+ char cell_drc_name[MAX_DRC_NAME_LEN];
+ int j, fndit;
+
+ info = of_find_property(dn->parent, "ibm,drc-info", NULL);
+ if (info == NULL)
+ return -EINVAL;
+
+ value = of_prop_next_u32(info, NULL, &entries);
+ if (!value)
+ return -EINVAL;
+
+ for (j = 0; j < entries; j++) {
+ of_read_drc_info_cell(&info, &value, &drc);
+
+ /* Should now know end of current entry */
+
+ if (my_index > drc.last_drc_index)
+ continue;
+
+ fndit = 1;
+ break;
+ }
+ /* Found it */
+
+ if (fndit)
+ sprintf(cell_drc_name, "%s%d", drc.drc_name_prefix,
+ my_index);
+
+ if (((drc_name == NULL) ||
+ (drc_name && !strcmp(drc_name, cell_drc_name))) &&
+ ((drc_type == NULL) ||
+ (drc_type && !strcmp(drc_type, drc.drc_type))))
+ return 0;
+
+ return -EINVAL;
+}
+
+int rpaphp_check_drc_props(struct device_node *dn, char *drc_name,
+ char *drc_type)
+{
+ const unsigned int *my_index;
+
+ my_index = of_get_property(dn, "ibm,my-drc-index", NULL);
+ if (!my_index) {
+ /* Node isn't DLPAR/hotplug capable */
+ return -EINVAL;
+ }
+
+ if (firmware_has_feature(FW_FEATURE_DRC_INFO))
+ return rpaphp_check_drc_props_v2(dn, drc_name, drc_type,
+ *my_index);
+ else
+ return rpaphp_check_drc_props_v1(dn, drc_name, drc_type,
+ *my_index);
+}
+EXPORT_SYMBOL_GPL(rpaphp_check_drc_props);
+
static int is_php_type(char *drc_type)
{
diff --git a/drivers/pci/iov.c b/drivers/pci/iov.c
index 6bacb8995e96..0b7b5da63d4e 100644
--- a/drivers/pci/iov.c
+++ b/drivers/pci/iov.c
@@ -636,6 +636,17 @@ void pci_restore_iov_state(struct pci_dev *dev)
}
/**
+ * pci_vf_drivers_autoprobe - set PF property drivers_autoprobe for VFs
+ * @dev: the PCI device
+ * @auto_probe: set VF drivers auto probe flag
+ */
+void pci_vf_drivers_autoprobe(struct pci_dev *dev, bool auto_probe)
+{
+ if (dev->is_physfn)
+ dev->sriov->drivers_autoprobe = auto_probe;
+}
+
+/**
* pci_iov_bus_range - find bus range used by Virtual Function
* @bus: the PCI bus
*
diff --git a/drivers/pci/pcie/aer/aerdrv_core.c b/drivers/pci/pcie/aer/aerdrv_core.c
index 744805232155..8d7448063fd1 100644
--- a/drivers/pci/pcie/aer/aerdrv_core.c
+++ b/drivers/pci/pcie/aer/aerdrv_core.c
@@ -278,6 +278,7 @@ static int report_error_detected(struct pci_dev *dev, void *data)
} else {
err_handler = dev->driver->err_handler;
vote = err_handler->error_detected(dev, result_data->state);
+ pci_uevent_ers(dev, PCI_ERS_RESULT_NONE);
}
result_data->result = merge_result(result_data->result, vote);
@@ -341,6 +342,7 @@ static int report_resume(struct pci_dev *dev, void *data)
err_handler = dev->driver->err_handler;
err_handler->resume(dev);
+ pci_uevent_ers(dev, PCI_ERS_RESULT_RECOVERED);
out:
device_unlock(&dev->dev);
return 0;
@@ -541,6 +543,7 @@ static void do_recovery(struct pci_dev *dev, int severity)
return;
failed:
+ pci_uevent_ers(dev, PCI_ERS_RESULT_DISCONNECT);
/* TODO: Should kernel panic here? */
dev_info(&dev->dev, "AER: Device recovery failed\n");
}
diff --git a/drivers/ps3/ps3av.c b/drivers/ps3/ps3av.c
index 437fc35beb7b..e293606b0334 100644
--- a/drivers/ps3/ps3av.c
+++ b/drivers/ps3/ps3av.c
@@ -44,7 +44,6 @@ static struct ps3av {
struct mutex mutex;
struct work_struct work;
struct completion done;
- struct workqueue_struct *wq;
int open_count;
struct ps3_system_bus_device *dev;
@@ -485,7 +484,7 @@ static int ps3av_set_videomode(void)
ps3av_set_av_video_mute(PS3AV_CMD_MUTE_ON);
/* wake up ps3avd to do the actual video mode setting */
- queue_work(ps3av->wq, &ps3av->work);
+ schedule_work(&ps3av->work);
return 0;
}
@@ -956,11 +955,6 @@ static int ps3av_probe(struct ps3_system_bus_device *dev)
INIT_WORK(&ps3av->work, ps3avd);
init_completion(&ps3av->done);
complete(&ps3av->done);
- ps3av->wq = create_singlethread_workqueue("ps3avd");
- if (!ps3av->wq) {
- res = -ENOMEM;
- goto fail;
- }
switch (ps3_os_area_get_av_multi_out()) {
case PS3_PARAM_AV_MULTI_OUT_NTSC:
@@ -1018,8 +1012,7 @@ static int ps3av_remove(struct ps3_system_bus_device *dev)
dev_dbg(&dev->core, " -> %s:%d\n", __func__, __LINE__);
if (ps3av) {
ps3av_cmd_fin();
- if (ps3av->wq)
- destroy_workqueue(ps3av->wq);
+ flush_work(&ps3av->work);
kfree(ps3av);
ps3av = NULL;
}
diff --git a/drivers/rtc/rtc-opal.c b/drivers/rtc/rtc-opal.c
index e2a946c0e667..304e891e35fc 100644
--- a/drivers/rtc/rtc-opal.c
+++ b/drivers/rtc/rtc-opal.c
@@ -58,6 +58,7 @@ static void tm_to_opal(struct rtc_time *tm, u32 *y_m_d, u64 *h_m_s_ms)
static int opal_get_rtc_time(struct device *dev, struct rtc_time *tm)
{
long rc = OPAL_BUSY;
+ int retries = 10;
u32 y_m_d;
u64 h_m_s_ms;
__be32 __y_m_d;
@@ -67,8 +68,11 @@ static int opal_get_rtc_time(struct device *dev, struct rtc_time *tm)
rc = opal_rtc_read(&__y_m_d, &__h_m_s_ms);
if (rc == OPAL_BUSY_EVENT)
opal_poll_events(NULL);
- else
+ else if (retries-- && (rc == OPAL_HARDWARE
+ || rc == OPAL_INTERNAL_ERROR))
msleep(10);
+ else if (rc != OPAL_BUSY && rc != OPAL_BUSY_EVENT)
+ break;
}
if (rc != OPAL_SUCCESS)
@@ -84,6 +88,7 @@ static int opal_get_rtc_time(struct device *dev, struct rtc_time *tm)
static int opal_set_rtc_time(struct device *dev, struct rtc_time *tm)
{
long rc = OPAL_BUSY;
+ int retries = 10;
u32 y_m_d = 0;
u64 h_m_s_ms = 0;
@@ -92,8 +97,11 @@ static int opal_set_rtc_time(struct device *dev, struct rtc_time *tm)
rc = opal_rtc_write(y_m_d, h_m_s_ms);
if (rc == OPAL_BUSY_EVENT)
opal_poll_events(NULL);
- else
+ else if (retries-- && (rc == OPAL_HARDWARE
+ || rc == OPAL_INTERNAL_ERROR))
msleep(10);
+ else if (rc != OPAL_BUSY && rc != OPAL_BUSY_EVENT)
+ break;
}
return rc == OPAL_SUCCESS ? 0 : -EIO;
diff --git a/include/linux/pci.h b/include/linux/pci.h
index 0314e0716c30..ad35aac87971 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -1965,6 +1965,7 @@ int pci_vfs_assigned(struct pci_dev *dev);
int pci_sriov_set_totalvfs(struct pci_dev *dev, u16 numvfs);
int pci_sriov_get_totalvfs(struct pci_dev *dev);
resource_size_t pci_iov_resource_size(struct pci_dev *dev, int resno);
+void pci_vf_drivers_autoprobe(struct pci_dev *dev, bool probe);
#else
static inline int pci_iov_virtfn_bus(struct pci_dev *dev, int id)
{
@@ -1992,6 +1993,7 @@ static inline int pci_sriov_get_totalvfs(struct pci_dev *dev)
{ return 0; }
static inline resource_size_t pci_iov_resource_size(struct pci_dev *dev, int resno)
{ return 0; }
+static inline void pci_vf_drivers_autoprobe(struct pci_dev *dev, bool probe) { }
#endif
#if defined(CONFIG_HOTPLUG_PCI) || defined(CONFIG_HOTPLUG_PCI_MODULE)
@@ -2279,6 +2281,42 @@ static inline bool pci_is_thunderbolt_attached(struct pci_dev *pdev)
return false;
}
+/**
+ * pci_uevent_ers - emit a uevent during recovery path of pci device
+ * @pdev: pci device to check
+ * @err_type: type of error event
+ *
+ */
+static inline void pci_uevent_ers(struct pci_dev *pdev,
+ enum pci_ers_result err_type)
+{
+ int idx = 0;
+ char *envp[3];
+
+ switch (err_type) {
+ case PCI_ERS_RESULT_NONE:
+ case PCI_ERS_RESULT_CAN_RECOVER:
+ envp[idx++] = "ERROR_EVENT=BEGIN_RECOVERY";
+ envp[idx++] = "DEVICE_ONLINE=0";
+ break;
+ case PCI_ERS_RESULT_RECOVERED:
+ envp[idx++] = "ERROR_EVENT=SUCCESSFUL_RECOVERY";
+ envp[idx++] = "DEVICE_ONLINE=1";
+ break;
+ case PCI_ERS_RESULT_DISCONNECT:
+ envp[idx++] = "ERROR_EVENT=FAILED_RECOVERY";
+ envp[idx++] = "DEVICE_ONLINE=0";
+ break;
+ default:
+ break;
+ }
+
+ if (idx > 0) {
+ envp[idx++] = NULL;
+ kobject_uevent_env(&pdev->dev.kobj, KOBJ_CHANGE, envp);
+ }
+}
+
/* provide the legacy pci_dma_* API */
#include <linux/pci-dma-compat.h>
diff --git a/include/misc/ocxl-config.h b/include/misc/ocxl-config.h
new file mode 100644
index 000000000000..3526fa996a22
--- /dev/null
+++ b/include/misc/ocxl-config.h
@@ -0,0 +1,45 @@
+// SPDX-License-Identifier: GPL-2.0+
+// Copyright 2017 IBM Corp.
+#ifndef _OCXL_CONFIG_H_
+#define _OCXL_CONFIG_H_
+
+/*
+ * This file lists the various constants used to read the
+ * configuration space of an opencapi adapter.
+ *
+ * It follows the specification for opencapi 3.0
+ */
+
+#define OCXL_EXT_CAP_ID_DVSEC 0x23
+
+#define OCXL_DVSEC_VENDOR_OFFSET 0x4
+#define OCXL_DVSEC_ID_OFFSET 0x8
+#define OCXL_DVSEC_TL_ID 0xF000
+#define OCXL_DVSEC_TL_BACKOFF_TIMERS 0x10
+#define OCXL_DVSEC_TL_RECV_CAP 0x18
+#define OCXL_DVSEC_TL_SEND_CAP 0x20
+#define OCXL_DVSEC_TL_RECV_RATE 0x30
+#define OCXL_DVSEC_TL_SEND_RATE 0x50
+#define OCXL_DVSEC_FUNC_ID 0xF001
+#define OCXL_DVSEC_FUNC_OFF_INDEX 0x08
+#define OCXL_DVSEC_FUNC_OFF_ACTAG 0x0C
+#define OCXL_DVSEC_AFU_INFO_ID 0xF003
+#define OCXL_DVSEC_AFU_INFO_AFU_IDX 0x0A
+#define OCXL_DVSEC_AFU_INFO_OFF 0x0C
+#define OCXL_DVSEC_AFU_INFO_DATA 0x10
+#define OCXL_DVSEC_AFU_CTRL_ID 0xF004
+#define OCXL_DVSEC_AFU_CTRL_AFU_IDX 0x0A
+#define OCXL_DVSEC_AFU_CTRL_TERM_PASID 0x0C
+#define OCXL_DVSEC_AFU_CTRL_ENABLE 0x0F
+#define OCXL_DVSEC_AFU_CTRL_PASID_SUP 0x10
+#define OCXL_DVSEC_AFU_CTRL_PASID_EN 0x11
+#define OCXL_DVSEC_AFU_CTRL_PASID_BASE 0x14
+#define OCXL_DVSEC_AFU_CTRL_ACTAG_SUP 0x18
+#define OCXL_DVSEC_AFU_CTRL_ACTAG_EN 0x1A
+#define OCXL_DVSEC_AFU_CTRL_ACTAG_BASE 0x1C
+#define OCXL_DVSEC_VENDOR_ID 0xF0F0
+#define OCXL_DVSEC_VENDOR_CFG_VERS 0x0C
+#define OCXL_DVSEC_VENDOR_TLX_VERS 0x10
+#define OCXL_DVSEC_VENDOR_DLX_VERS 0x20
+
+#endif /* _OCXL_CONFIG_H_ */
diff --git a/include/misc/ocxl.h b/include/misc/ocxl.h
new file mode 100644
index 000000000000..51ccf76db293
--- /dev/null
+++ b/include/misc/ocxl.h
@@ -0,0 +1,214 @@
+// SPDX-License-Identifier: GPL-2.0+
+// Copyright 2017 IBM Corp.
+#ifndef _MISC_OCXL_H_
+#define _MISC_OCXL_H_
+
+#include <linux/pci.h>
+
+/*
+ * Opencapi drivers all need some common facilities, like parsing the
+ * device configuration space, adding a Process Element to the Shared
+ * Process Area, etc...
+ *
+ * The ocxl module provides a kernel API, to allow other drivers to
+ * reuse common code. A bit like a in-kernel library.
+ */
+
+#define OCXL_AFU_NAME_SZ (24+1) /* add 1 for NULL termination */
+
+/*
+ * The following 2 structures are a fairly generic way of representing
+ * the configuration data for a function and AFU, as read from the
+ * configuration space.
+ */
+struct ocxl_afu_config {
+ u8 idx;
+ int dvsec_afu_control_pos; /* offset of AFU control DVSEC */
+ char name[OCXL_AFU_NAME_SZ];
+ u8 version_major;
+ u8 version_minor;
+ u8 afuc_type;
+ u8 afum_type;
+ u8 profile;
+ u8 global_mmio_bar; /* global MMIO area */
+ u64 global_mmio_offset;
+ u32 global_mmio_size;
+ u8 pp_mmio_bar; /* per-process MMIO area */
+ u64 pp_mmio_offset;
+ u32 pp_mmio_stride;
+ u8 log_mem_size;
+ u8 pasid_supported_log;
+ u16 actag_supported;
+};
+
+struct ocxl_fn_config {
+ int dvsec_tl_pos; /* offset of the Transaction Layer DVSEC */
+ int dvsec_function_pos; /* offset of the Function DVSEC */
+ int dvsec_afu_info_pos; /* offset of the AFU information DVSEC */
+ s8 max_pasid_log;
+ s8 max_afu_index;
+};
+
+/*
+ * Read the configuration space of a function and fill in a
+ * ocxl_fn_config structure with all the function details
+ */
+extern int ocxl_config_read_function(struct pci_dev *dev,
+ struct ocxl_fn_config *fn);
+
+/*
+ * Check if an AFU index is valid for the given function.
+ *
+ * AFU indexes can be sparse, so a driver should check all indexes up
+ * to the maximum found in the function description
+ */
+extern int ocxl_config_check_afu_index(struct pci_dev *dev,
+ struct ocxl_fn_config *fn, int afu_idx);
+
+/*
+ * Read the configuration space of a function for the AFU specified by
+ * the index 'afu_idx'. Fills in a ocxl_afu_config structure
+ */
+extern int ocxl_config_read_afu(struct pci_dev *dev,
+ struct ocxl_fn_config *fn,
+ struct ocxl_afu_config *afu,
+ u8 afu_idx);
+
+/*
+ * Get the max PASID value that can be used by the function
+ */
+extern int ocxl_config_get_pasid_info(struct pci_dev *dev, int *count);
+
+/*
+ * Tell an AFU, by writing in the configuration space, the PASIDs that
+ * it can use. Range starts at 'pasid_base' and its size is a multiple
+ * of 2
+ *
+ * 'afu_control_offset' is the offset of the AFU control DVSEC which
+ * can be found in the function configuration
+ */
+extern void ocxl_config_set_afu_pasid(struct pci_dev *dev,
+ int afu_control_offset,
+ int pasid_base, u32 pasid_count_log);
+
+/*
+ * Get the actag configuration for the function:
+ * 'base' is the first actag value that can be used.
+ * 'enabled' it the number of actags available, starting from base.
+ * 'supported' is the total number of actags desired by all the AFUs
+ * of the function.
+ */
+extern int ocxl_config_get_actag_info(struct pci_dev *dev,
+ u16 *base, u16 *enabled, u16 *supported);
+
+/*
+ * Tell a function, by writing in the configuration space, the actags
+ * it can use.
+ *
+ * 'func_offset' is the offset of the Function DVSEC that can found in
+ * the function configuration
+ */
+extern void ocxl_config_set_actag(struct pci_dev *dev, int func_offset,
+ u32 actag_base, u32 actag_count);
+
+/*
+ * Tell an AFU, by writing in the configuration space, the actags it
+ * can use.
+ *
+ * 'afu_control_offset' is the offset of the AFU control DVSEC for the
+ * desired AFU. It can be found in the AFU configuration
+ */
+extern void ocxl_config_set_afu_actag(struct pci_dev *dev,
+ int afu_control_offset,
+ int actag_base, int actag_count);
+
+/*
+ * Enable/disable an AFU, by writing in the configuration space.
+ *
+ * 'afu_control_offset' is the offset of the AFU control DVSEC for the
+ * desired AFU. It can be found in the AFU configuration
+ */
+extern void ocxl_config_set_afu_state(struct pci_dev *dev,
+ int afu_control_offset, int enable);
+
+/*
+ * Set the Transaction Layer configuration in the configuration space.
+ * Only needed for function 0.
+ *
+ * It queries the host TL capabilities, find some common ground
+ * between the host and device, and set the Transaction Layer on both
+ * accordingly.
+ */
+extern int ocxl_config_set_TL(struct pci_dev *dev, int tl_dvsec);
+
+/*
+ * Request an AFU to terminate a PASID.
+ * Will return once the AFU has acked the request, or an error in case
+ * of timeout.
+ *
+ * The hardware can only terminate one PASID at a time, so caller must
+ * guarantee some kind of serialization.
+ *
+ * 'afu_control_offset' is the offset of the AFU control DVSEC for the
+ * desired AFU. It can be found in the AFU configuration
+ */
+extern int ocxl_config_terminate_pasid(struct pci_dev *dev,
+ int afu_control_offset, int pasid);
+
+/*
+ * Set up the opencapi link for the function.
+ *
+ * When called for the first time for a link, it sets up the Shared
+ * Process Area for the link and the interrupt handler to process
+ * translation faults.
+ *
+ * Returns a 'link handle' that should be used for further calls for
+ * the link
+ */
+extern int ocxl_link_setup(struct pci_dev *dev, int PE_mask,
+ void **link_handle);
+
+/*
+ * Remove the association between the function and its link.
+ */
+extern void ocxl_link_release(struct pci_dev *dev, void *link_handle);
+
+/*
+ * Add a Process Element to the Shared Process Area for a link.
+ * The process is defined by its PASID, pid, tid and its mm_struct.
+ *
+ * 'xsl_err_cb' is an optional callback if the driver wants to be
+ * notified when the translation fault interrupt handler detects an
+ * address error.
+ * 'xsl_err_data' is an argument passed to the above callback, if
+ * defined
+ */
+extern int ocxl_link_add_pe(void *link_handle, int pasid, u32 pidr, u32 tidr,
+ u64 amr, struct mm_struct *mm,
+ void (*xsl_err_cb)(void *data, u64 addr, u64 dsisr),
+ void *xsl_err_data);
+
+/*
+ * Remove a Process Element from the Shared Process Area for a link
+ */
+extern int ocxl_link_remove_pe(void *link_handle, int pasid);
+
+/*
+ * Allocate an AFU interrupt associated to the link.
+ *
+ * 'hw_irq' is the hardware interrupt number
+ * 'obj_handle' is the 64-bit object handle to be passed to the AFU to
+ * trigger the interrupt.
+ * On P9, 'obj_handle' is an address, which, if written, triggers the
+ * interrupt. It is an MMIO address which needs to be remapped (one
+ * page).
+ */
+extern int ocxl_link_irq_alloc(void *link_handle, int *hw_irq,
+ u64 *obj_handle);
+
+/*
+ * Free a previously allocated AFU interrupt
+ */
+extern void ocxl_link_free_irq(void *link_handle, int hw_irq);
+
+#endif /* _MISC_OCXL_H_ */
diff --git a/include/uapi/linux/elf.h b/include/uapi/linux/elf.h
index bb6836986200..3bf73fb58045 100644
--- a/include/uapi/linux/elf.h
+++ b/include/uapi/linux/elf.h
@@ -396,6 +396,7 @@ typedef struct elf64_shdr {
#define NT_PPC_TM_CTAR 0x10d /* TM checkpointed Target Address Register */
#define NT_PPC_TM_CPPR 0x10e /* TM checkpointed Program Priority Register */
#define NT_PPC_TM_CDSCR 0x10f /* TM checkpointed Data Stream Control Register */
+#define NT_PPC_PKEY 0x110 /* Memory Protection Keys registers */
#define NT_386_TLS 0x200 /* i386 TLS slots (struct user_desc) */
#define NT_386_IOPERM 0x201 /* x86 io permission bitmap (1=deny) */
#define NT_X86_XSTATE 0x202 /* x86 extended state using xsave */
diff --git a/include/uapi/misc/cxl.h b/include/uapi/misc/cxl.h
index 49e8fd08855a..56376d3907d8 100644
--- a/include/uapi/misc/cxl.h
+++ b/include/uapi/misc/cxl.h
@@ -20,20 +20,22 @@ struct cxl_ioctl_start_work {
__u64 work_element_descriptor;
__u64 amr;
__s16 num_interrupts;
- __s16 reserved1;
- __s32 reserved2;
+ __u16 tid;
+ __s32 reserved1;
+ __u64 reserved2;
__u64 reserved3;
__u64 reserved4;
__u64 reserved5;
- __u64 reserved6;
};
#define CXL_START_WORK_AMR 0x0000000000000001ULL
#define CXL_START_WORK_NUM_IRQS 0x0000000000000002ULL
#define CXL_START_WORK_ERR_FF 0x0000000000000004ULL
+#define CXL_START_WORK_TID 0x0000000000000008ULL
#define CXL_START_WORK_ALL (CXL_START_WORK_AMR |\
CXL_START_WORK_NUM_IRQS |\
- CXL_START_WORK_ERR_FF)
+ CXL_START_WORK_ERR_FF |\
+ CXL_START_WORK_TID)
/* Possible modes that an afu can be in */
diff --git a/include/uapi/misc/ocxl.h b/include/uapi/misc/ocxl.h
new file mode 100644
index 000000000000..4b0b0b756f3e
--- /dev/null
+++ b/include/uapi/misc/ocxl.h
@@ -0,0 +1,49 @@
+/* SPDX-License-Identifier: GPL-2.0+ WITH Linux-syscall-note */
+/* Copyright 2017 IBM Corp. */
+#ifndef _UAPI_MISC_OCXL_H
+#define _UAPI_MISC_OCXL_H
+
+#include <linux/types.h>
+#include <linux/ioctl.h>
+
+enum ocxl_event_type {
+ OCXL_AFU_EVENT_XSL_FAULT_ERROR = 0,
+};
+
+#define OCXL_KERNEL_EVENT_FLAG_LAST 0x0001 /* This is the last event pending */
+
+struct ocxl_kernel_event_header {
+ __u16 type;
+ __u16 flags;
+ __u32 reserved;
+};
+
+struct ocxl_kernel_event_xsl_fault_error {
+ __u64 addr;
+ __u64 dsisr;
+ __u64 count;
+ __u64 reserved;
+};
+
+struct ocxl_ioctl_attach {
+ __u64 amr;
+ __u64 reserved1;
+ __u64 reserved2;
+ __u64 reserved3;
+};
+
+struct ocxl_ioctl_irq_fd {
+ __u64 irq_offset;
+ __s32 eventfd;
+ __u32 reserved;
+};
+
+/* ioctl numbers */
+#define OCXL_MAGIC 0xCA
+/* AFU devices */
+#define OCXL_IOCTL_ATTACH _IOW(OCXL_MAGIC, 0x10, struct ocxl_ioctl_attach)
+#define OCXL_IOCTL_IRQ_ALLOC _IOR(OCXL_MAGIC, 0x11, __u64)
+#define OCXL_IOCTL_IRQ_FREE _IOW(OCXL_MAGIC, 0x12, __u64)
+#define OCXL_IOCTL_IRQ_SET_FD _IOW(OCXL_MAGIC, 0x13, struct ocxl_ioctl_irq_fd)
+
+#endif /* _UAPI_MISC_OCXL_H */
diff --git a/tools/testing/selftests/powerpc/alignment/Makefile b/tools/testing/selftests/powerpc/alignment/Makefile
index 16b22004e75f..083a48a008b4 100644
--- a/tools/testing/selftests/powerpc/alignment/Makefile
+++ b/tools/testing/selftests/powerpc/alignment/Makefile
@@ -1,4 +1,5 @@
-TEST_GEN_PROGS := copy_unaligned copy_first_unaligned paste_unaligned paste_last_unaligned
+TEST_GEN_PROGS := copy_unaligned copy_first_unaligned paste_unaligned \
+ paste_last_unaligned alignment_handler
include ../../lib.mk
diff --git a/tools/testing/selftests/powerpc/alignment/alignment_handler.c b/tools/testing/selftests/powerpc/alignment/alignment_handler.c
new file mode 100644
index 000000000000..39fd362415cf
--- /dev/null
+++ b/tools/testing/selftests/powerpc/alignment/alignment_handler.c
@@ -0,0 +1,491 @@
+/*
+ * Test the powerpc alignment handler on POWER8/POWER9
+ *
+ * Copyright (C) 2017 IBM Corporation (Michael Neuling, Andrew Donnellan)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+/*
+ * This selftest exercises the powerpc alignment fault handler.
+ *
+ * We create two sets of source and destination buffers, one in regular memory,
+ * the other cache-inhibited (we use /dev/fb0 for this).
+ *
+ * We initialise the source buffers, then use whichever set of load/store
+ * instructions is under test to copy bytes from the source buffers to the
+ * destination buffers. For the regular buffers, these instructions will
+ * execute normally. For the cache-inhibited buffers, these instructions
+ * will trap and cause an alignment fault, and the alignment fault handler
+ * will emulate the particular instruction under test. We then compare the
+ * destination buffers to ensure that the native and emulated cases give the
+ * same result.
+ *
+ * TODO:
+ * - Any FIXMEs below
+ * - Test VSX regs < 32 and > 32
+ * - Test all loads and stores
+ * - Check update forms do update register
+ * - Test alignment faults over page boundary
+ *
+ * Some old binutils may not support all the instructions.
+ */
+
+
+#include <sys/mman.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <assert.h>
+#include <getopt.h>
+#include <setjmp.h>
+#include <signal.h>
+
+#include "utils.h"
+
+int bufsize;
+int debug;
+int testing;
+volatile int gotsig;
+
+void sighandler(int sig, siginfo_t *info, void *ctx)
+{
+ struct ucontext *ucp = ctx;
+
+ if (!testing) {
+ signal(sig, SIG_DFL);
+ kill(0, sig);
+ }
+ gotsig = sig;
+#ifdef __powerpc64__
+ ucp->uc_mcontext.gp_regs[PT_NIP] += 4;
+#else
+ ucp->uc_mcontext.uc_regs->gregs[PT_NIP] += 4;
+#endif
+}
+
+#define XFORM(reg, n) " " #reg " ,%"#n",%2 ;"
+#define DFORM(reg, n) " " #reg " ,0(%"#n") ;"
+
+#define TEST(name, ld_op, st_op, form, ld_reg, st_reg) \
+ void test_##name(char *s, char *d) \
+ { \
+ asm volatile( \
+ #ld_op form(ld_reg, 0) \
+ #st_op form(st_reg, 1) \
+ :: "r"(s), "r"(d), "r"(0) \
+ : "memory", "vs0", "vs32", "r31"); \
+ } \
+ rc |= do_test(#name, test_##name)
+
+#define LOAD_VSX_XFORM_TEST(op) TEST(op, op, stxvd2x, XFORM, 32, 32)
+#define STORE_VSX_XFORM_TEST(op) TEST(op, lxvd2x, op, XFORM, 32, 32)
+#define LOAD_VSX_DFORM_TEST(op) TEST(op, op, stxv, DFORM, 32, 32)
+#define STORE_VSX_DFORM_TEST(op) TEST(op, lxv, op, DFORM, 32, 32)
+#define LOAD_VMX_XFORM_TEST(op) TEST(op, op, stxvd2x, XFORM, 0, 32)
+#define STORE_VMX_XFORM_TEST(op) TEST(op, lxvd2x, op, XFORM, 32, 0)
+#define LOAD_VMX_DFORM_TEST(op) TEST(op, op, stxv, DFORM, 0, 32)
+#define STORE_VMX_DFORM_TEST(op) TEST(op, lxv, op, DFORM, 32, 0)
+
+#define LOAD_XFORM_TEST(op) TEST(op, op, stdx, XFORM, 31, 31)
+#define STORE_XFORM_TEST(op) TEST(op, ldx, op, XFORM, 31, 31)
+#define LOAD_DFORM_TEST(op) TEST(op, op, std, DFORM, 31, 31)
+#define STORE_DFORM_TEST(op) TEST(op, ld, op, DFORM, 31, 31)
+
+#define LOAD_FLOAT_DFORM_TEST(op) TEST(op, op, stfd, DFORM, 0, 0)
+#define STORE_FLOAT_DFORM_TEST(op) TEST(op, lfd, op, DFORM, 0, 0)
+#define LOAD_FLOAT_XFORM_TEST(op) TEST(op, op, stfdx, XFORM, 0, 0)
+#define STORE_FLOAT_XFORM_TEST(op) TEST(op, lfdx, op, XFORM, 0, 0)
+
+
+/* FIXME: Unimplemented tests: */
+// STORE_DFORM_TEST(stq) /* FIXME: need two registers for quad */
+// STORE_DFORM_TEST(stswi) /* FIXME: string instruction */
+
+// STORE_XFORM_TEST(stwat) /* AMO can't emulate or run on CI */
+// STORE_XFORM_TEST(stdat) /* ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ */
+
+
+/* preload byte by byte */
+void preload_data(void *dst, int offset, int width)
+{
+ char *c = dst;
+ int i;
+
+ c += offset;
+
+ for (i = 0 ; i < width ; i++)
+ c[i] = i;
+}
+
+int test_memcpy(void *dst, void *src, int size, int offset,
+ void (*test_func)(char *, char *))
+{
+ char *s, *d;
+
+ s = src;
+ s += offset;
+ d = dst;
+ d += offset;
+
+ assert(size == 16);
+ gotsig = 0;
+ testing = 1;
+
+ test_func(s, d); /* run the actual test */
+
+ testing = 0;
+ if (gotsig) {
+ if (debug)
+ printf(" Got signal %i\n", gotsig);
+ return 1;
+ }
+ return 0;
+}
+
+void dumpdata(char *s1, char *s2, int n, char *test_name)
+{
+ int i;
+
+ printf(" %s: unexpected result:\n", test_name);
+ printf(" mem:");
+ for (i = 0; i < n; i++)
+ printf(" %02x", s1[i]);
+ printf("\n");
+ printf(" ci: ");
+ for (i = 0; i < n; i++)
+ printf(" %02x", s2[i]);
+ printf("\n");
+}
+
+int test_memcmp(void *s1, void *s2, int n, int offset, char *test_name)
+{
+ char *s1c, *s2c;
+
+ s1c = s1;
+ s1c += offset;
+ s2c = s2;
+ s2c += offset;
+
+ if (memcmp(s1c, s2c, n)) {
+ if (debug) {
+ printf("\n Compare failed. Offset:%i length:%i\n",
+ offset, n);
+ dumpdata(s1c, s2c, n, test_name);
+ }
+ return 1;
+ }
+ return 0;
+}
+
+/*
+ * Do two memcpy tests using the same instructions. One cachable
+ * memory and the other doesn't.
+ */
+int do_test(char *test_name, void (*test_func)(char *, char *))
+{
+ int offset, width, fd, rc = 0, r;
+ void *mem0, *mem1, *ci0, *ci1;
+
+ printf("\tDoing %s:\t", test_name);
+
+ fd = open("/dev/fb0", O_RDWR);
+ if (fd < 0) {
+ printf("\n");
+ perror("Can't open /dev/fb0");
+ SKIP_IF(1);
+ }
+
+ ci0 = mmap(NULL, bufsize, PROT_WRITE, MAP_SHARED,
+ fd, 0x0);
+ ci1 = mmap(NULL, bufsize, PROT_WRITE, MAP_SHARED,
+ fd, bufsize);
+ if ((ci0 == MAP_FAILED) || (ci1 == MAP_FAILED)) {
+ printf("\n");
+ perror("mmap failed");
+ SKIP_IF(1);
+ }
+
+ rc = posix_memalign(&mem0, bufsize, bufsize);
+ if (rc) {
+ printf("\n");
+ return rc;
+ }
+
+ rc = posix_memalign(&mem1, bufsize, bufsize);
+ if (rc) {
+ printf("\n");
+ free(mem0);
+ return rc;
+ }
+
+ /* offset = 0 no alignment fault, so skip */
+ for (offset = 1; offset < 16; offset++) {
+ width = 16; /* vsx == 16 bytes */
+ r = 0;
+
+ /* load pattern into memory byte by byte */
+ preload_data(ci0, offset, width);
+ preload_data(mem0, offset, width); // FIXME: remove??
+ memcpy(ci0, mem0, bufsize);
+ memcpy(ci1, mem1, bufsize); /* initialise output to the same */
+
+ /* sanity check */
+ test_memcmp(mem0, ci0, width, offset, test_name);
+
+ r |= test_memcpy(ci1, ci0, width, offset, test_func);
+ r |= test_memcpy(mem1, mem0, width, offset, test_func);
+ if (r && !debug) {
+ printf("FAILED: Got signal");
+ break;
+ }
+
+ r |= test_memcmp(mem1, ci1, width, offset, test_name);
+ rc |= r;
+ if (r && !debug) {
+ printf("FAILED: Wrong Data");
+ break;
+ }
+ }
+ if (!r)
+ printf("PASSED");
+ printf("\n");
+
+ munmap(ci0, bufsize);
+ munmap(ci1, bufsize);
+ free(mem0);
+ free(mem1);
+
+ return rc;
+}
+
+int test_alignment_handler_vsx_206(void)
+{
+ int rc = 0;
+
+ printf("VSX: 2.06B\n");
+ LOAD_VSX_XFORM_TEST(lxvd2x);
+ LOAD_VSX_XFORM_TEST(lxvw4x);
+ LOAD_VSX_XFORM_TEST(lxsdx);
+ LOAD_VSX_XFORM_TEST(lxvdsx);
+ STORE_VSX_XFORM_TEST(stxvd2x);
+ STORE_VSX_XFORM_TEST(stxvw4x);
+ STORE_VSX_XFORM_TEST(stxsdx);
+ return rc;
+}
+
+int test_alignment_handler_vsx_207(void)
+{
+ int rc = 0;
+
+ printf("VSX: 2.07B\n");
+ LOAD_VSX_XFORM_TEST(lxsspx);
+ LOAD_VSX_XFORM_TEST(lxsiwax);
+ LOAD_VSX_XFORM_TEST(lxsiwzx);
+ STORE_VSX_XFORM_TEST(stxsspx);
+ STORE_VSX_XFORM_TEST(stxsiwx);
+ return rc;
+}
+
+int test_alignment_handler_vsx_300(void)
+{
+ int rc = 0;
+
+ SKIP_IF(!have_hwcap2(PPC_FEATURE2_ARCH_3_00));
+ printf("VSX: 3.00B\n");
+ LOAD_VMX_DFORM_TEST(lxsd);
+ LOAD_VSX_XFORM_TEST(lxsibzx);
+ LOAD_VSX_XFORM_TEST(lxsihzx);
+ LOAD_VMX_DFORM_TEST(lxssp);
+ LOAD_VSX_DFORM_TEST(lxv);
+ LOAD_VSX_XFORM_TEST(lxvb16x);
+ LOAD_VSX_XFORM_TEST(lxvh8x);
+ LOAD_VSX_XFORM_TEST(lxvx);
+ LOAD_VSX_XFORM_TEST(lxvwsx);
+ LOAD_VSX_XFORM_TEST(lxvl);
+ LOAD_VSX_XFORM_TEST(lxvll);
+ STORE_VMX_DFORM_TEST(stxsd);
+ STORE_VSX_XFORM_TEST(stxsibx);
+ STORE_VSX_XFORM_TEST(stxsihx);
+ STORE_VMX_DFORM_TEST(stxssp);
+ STORE_VSX_DFORM_TEST(stxv);
+ STORE_VSX_XFORM_TEST(stxvb16x);
+ STORE_VSX_XFORM_TEST(stxvh8x);
+ STORE_VSX_XFORM_TEST(stxvx);
+ STORE_VSX_XFORM_TEST(stxvl);
+ STORE_VSX_XFORM_TEST(stxvll);
+ return rc;
+}
+
+int test_alignment_handler_integer(void)
+{
+ int rc = 0;
+
+ printf("Integer\n");
+ LOAD_DFORM_TEST(lbz);
+ LOAD_DFORM_TEST(lbzu);
+ LOAD_XFORM_TEST(lbzx);
+ LOAD_XFORM_TEST(lbzux);
+ LOAD_DFORM_TEST(lhz);
+ LOAD_DFORM_TEST(lhzu);
+ LOAD_XFORM_TEST(lhzx);
+ LOAD_XFORM_TEST(lhzux);
+ LOAD_DFORM_TEST(lha);
+ LOAD_DFORM_TEST(lhau);
+ LOAD_XFORM_TEST(lhax);
+ LOAD_XFORM_TEST(lhaux);
+ LOAD_XFORM_TEST(lhbrx);
+ LOAD_DFORM_TEST(lwz);
+ LOAD_DFORM_TEST(lwzu);
+ LOAD_XFORM_TEST(lwzx);
+ LOAD_XFORM_TEST(lwzux);
+ LOAD_DFORM_TEST(lwa);
+ LOAD_XFORM_TEST(lwax);
+ LOAD_XFORM_TEST(lwaux);
+ LOAD_XFORM_TEST(lwbrx);
+ LOAD_DFORM_TEST(ld);
+ LOAD_DFORM_TEST(ldu);
+ LOAD_XFORM_TEST(ldx);
+ LOAD_XFORM_TEST(ldux);
+ LOAD_XFORM_TEST(ldbrx);
+ LOAD_DFORM_TEST(lmw);
+ STORE_DFORM_TEST(stb);
+ STORE_XFORM_TEST(stbx);
+ STORE_DFORM_TEST(stbu);
+ STORE_XFORM_TEST(stbux);
+ STORE_DFORM_TEST(sth);
+ STORE_XFORM_TEST(sthx);
+ STORE_DFORM_TEST(sthu);
+ STORE_XFORM_TEST(sthux);
+ STORE_XFORM_TEST(sthbrx);
+ STORE_DFORM_TEST(stw);
+ STORE_XFORM_TEST(stwx);
+ STORE_DFORM_TEST(stwu);
+ STORE_XFORM_TEST(stwux);
+ STORE_XFORM_TEST(stwbrx);
+ STORE_DFORM_TEST(std);
+ STORE_XFORM_TEST(stdx);
+ STORE_DFORM_TEST(stdu);
+ STORE_XFORM_TEST(stdux);
+ STORE_XFORM_TEST(stdbrx);
+ STORE_DFORM_TEST(stmw);
+ return rc;
+}
+
+int test_alignment_handler_vmx(void)
+{
+ int rc = 0;
+
+ printf("VMX\n");
+ LOAD_VMX_XFORM_TEST(lvx);
+
+ /*
+ * FIXME: These loads only load part of the register, so our
+ * testing method doesn't work. Also they don't take alignment
+ * faults, so it's kinda pointless anyway
+ *
+ LOAD_VMX_XFORM_TEST(lvebx)
+ LOAD_VMX_XFORM_TEST(lvehx)
+ LOAD_VMX_XFORM_TEST(lvewx)
+ LOAD_VMX_XFORM_TEST(lvxl)
+ */
+ STORE_VMX_XFORM_TEST(stvx);
+ STORE_VMX_XFORM_TEST(stvebx);
+ STORE_VMX_XFORM_TEST(stvehx);
+ STORE_VMX_XFORM_TEST(stvewx);
+ STORE_VMX_XFORM_TEST(stvxl);
+ return rc;
+}
+
+int test_alignment_handler_fp(void)
+{
+ int rc = 0;
+
+ printf("Floating point\n");
+ LOAD_FLOAT_DFORM_TEST(lfd);
+ LOAD_FLOAT_XFORM_TEST(lfdx);
+ LOAD_FLOAT_DFORM_TEST(lfdp);
+ LOAD_FLOAT_XFORM_TEST(lfdpx);
+ LOAD_FLOAT_DFORM_TEST(lfdu);
+ LOAD_FLOAT_XFORM_TEST(lfdux);
+ LOAD_FLOAT_DFORM_TEST(lfs);
+ LOAD_FLOAT_XFORM_TEST(lfsx);
+ LOAD_FLOAT_DFORM_TEST(lfsu);
+ LOAD_FLOAT_XFORM_TEST(lfsux);
+ LOAD_FLOAT_XFORM_TEST(lfiwzx);
+ LOAD_FLOAT_XFORM_TEST(lfiwax);
+ STORE_FLOAT_DFORM_TEST(stfd);
+ STORE_FLOAT_XFORM_TEST(stfdx);
+ STORE_FLOAT_DFORM_TEST(stfdp);
+ STORE_FLOAT_XFORM_TEST(stfdpx);
+ STORE_FLOAT_DFORM_TEST(stfdu);
+ STORE_FLOAT_XFORM_TEST(stfdux);
+ STORE_FLOAT_DFORM_TEST(stfs);
+ STORE_FLOAT_XFORM_TEST(stfsx);
+ STORE_FLOAT_DFORM_TEST(stfsu);
+ STORE_FLOAT_XFORM_TEST(stfsux);
+ STORE_FLOAT_XFORM_TEST(stfiwx);
+
+ return rc;
+}
+
+void usage(char *prog)
+{
+ printf("Usage: %s [options]\n", prog);
+ printf(" -d Enable debug error output\n");
+ printf("\n");
+ printf("This test requires a POWER8 or POWER9 CPU and a usable ");
+ printf("framebuffer at /dev/fb0.\n");
+}
+
+int main(int argc, char *argv[])
+{
+
+ struct sigaction sa;
+ int rc = 0;
+ int option = 0;
+
+ while ((option = getopt(argc, argv, "d")) != -1) {
+ switch (option) {
+ case 'd':
+ debug++;
+ break;
+ default:
+ usage(argv[0]);
+ exit(1);
+ }
+ }
+
+ bufsize = getpagesize();
+
+ sa.sa_sigaction = sighandler;
+ sigemptyset(&sa.sa_mask);
+ sa.sa_flags = SA_SIGINFO;
+ if (sigaction(SIGSEGV, &sa, NULL) == -1
+ || sigaction(SIGBUS, &sa, NULL) == -1
+ || sigaction(SIGILL, &sa, NULL) == -1) {
+ perror("sigaction");
+ exit(1);
+ }
+
+ rc |= test_harness(test_alignment_handler_vsx_206,
+ "test_alignment_handler_vsx_206");
+ rc |= test_harness(test_alignment_handler_vsx_207,
+ "test_alignment_handler_vsx_207");
+ rc |= test_harness(test_alignment_handler_vsx_300,
+ "test_alignment_handler_vsx_300");
+ rc |= test_harness(test_alignment_handler_integer,
+ "test_alignment_handler_integer");
+ rc |= test_harness(test_alignment_handler_vmx,
+ "test_alignment_handler_vmx");
+ rc |= test_harness(test_alignment_handler_fp,
+ "test_alignment_handler_fp");
+ return rc;
+}
diff --git a/tools/testing/selftests/powerpc/benchmarks/mmap_bench.c b/tools/testing/selftests/powerpc/benchmarks/mmap_bench.c
index 8d084a2d6e74..7a0a462a2272 100644
--- a/tools/testing/selftests/powerpc/benchmarks/mmap_bench.c
+++ b/tools/testing/selftests/powerpc/benchmarks/mmap_bench.c
@@ -7,17 +7,34 @@
#include <stdlib.h>
#include <sys/mman.h>
#include <time.h>
+#include <getopt.h>
#include "utils.h"
#define ITERATIONS 5000000
-#define MEMSIZE (128 * 1024 * 1024)
+#define MEMSIZE (1UL << 27)
+#define PAGE_SIZE (1UL << 16)
+#define CHUNK_COUNT (MEMSIZE/PAGE_SIZE)
+
+static int pg_fault;
+static int iterations = ITERATIONS;
+
+static struct option options[] = {
+ { "pgfault", no_argument, &pg_fault, 1 },
+ { "iterations", required_argument, 0, 'i' },
+ { 0, },
+};
+
+static void usage(void)
+{
+ printf("mmap_bench <--pgfault> <--iterations count>\n");
+}
int test_mmap(void)
{
struct timespec ts_start, ts_end;
- unsigned long i = ITERATIONS;
+ unsigned long i = iterations;
clock_gettime(CLOCK_MONOTONIC, &ts_start);
@@ -25,6 +42,11 @@ int test_mmap(void)
char *c = mmap(NULL, MEMSIZE, PROT_READ|PROT_WRITE,
MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
FAIL_IF(c == MAP_FAILED);
+ if (pg_fault) {
+ int count;
+ for (count = 0; count < CHUNK_COUNT; count++)
+ c[count << 16] = 'c';
+ }
munmap(c, MEMSIZE);
}
@@ -35,7 +57,32 @@ int test_mmap(void)
return 0;
}
-int main(void)
+int main(int argc, char *argv[])
{
+ signed char c;
+ while (1) {
+ int option_index = 0;
+
+ c = getopt_long(argc, argv, "", options, &option_index);
+
+ if (c == -1)
+ break;
+
+ switch (c) {
+ case 0:
+ if (options[option_index].flag != 0)
+ break;
+
+ usage();
+ exit(1);
+ break;
+ case 'i':
+ iterations = atoi(optarg);
+ break;
+ default:
+ usage();
+ exit(1);
+ }
+ }
return test_harness(test_mmap, "mmap_bench");
}
diff --git a/tools/testing/selftests/powerpc/mm/.gitignore b/tools/testing/selftests/powerpc/mm/.gitignore
index e715a3f2fbf4..7d7c42ed6de9 100644
--- a/tools/testing/selftests/powerpc/mm/.gitignore
+++ b/tools/testing/selftests/powerpc/mm/.gitignore
@@ -1,4 +1,5 @@
hugetlb_vs_thp_test
subpage_prot
tempfile
-prot_sao \ No newline at end of file
+prot_sao
+segv_errors \ No newline at end of file
diff --git a/tools/testing/selftests/powerpc/mm/Makefile b/tools/testing/selftests/powerpc/mm/Makefile
index bf315bcbe663..8ebbe96d80a8 100644
--- a/tools/testing/selftests/powerpc/mm/Makefile
+++ b/tools/testing/selftests/powerpc/mm/Makefile
@@ -2,7 +2,7 @@
noarg:
$(MAKE) -C ../
-TEST_GEN_PROGS := hugetlb_vs_thp_test subpage_prot prot_sao
+TEST_GEN_PROGS := hugetlb_vs_thp_test subpage_prot prot_sao segv_errors
TEST_GEN_FILES := tempfile
include ../../lib.mk
diff --git a/tools/testing/selftests/powerpc/mm/segv_errors.c b/tools/testing/selftests/powerpc/mm/segv_errors.c
new file mode 100644
index 000000000000..06ae76ee3ea1
--- /dev/null
+++ b/tools/testing/selftests/powerpc/mm/segv_errors.c
@@ -0,0 +1,78 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/*
+ * Copyright 2017 John Sperbeck
+ *
+ * Test that an access to a mapped but inaccessible area causes a SEGV and
+ * reports si_code == SEGV_ACCERR.
+ */
+
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <signal.h>
+#include <sys/mman.h>
+#include <assert.h>
+#include <ucontext.h>
+
+#include "utils.h"
+
+static bool faulted;
+static int si_code;
+
+static void segv_handler(int n, siginfo_t *info, void *ctxt_v)
+{
+ ucontext_t *ctxt = (ucontext_t *)ctxt_v;
+ struct pt_regs *regs = ctxt->uc_mcontext.regs;
+
+ faulted = true;
+ si_code = info->si_code;
+ regs->nip += 4;
+}
+
+int test_segv_errors(void)
+{
+ struct sigaction act = {
+ .sa_sigaction = segv_handler,
+ .sa_flags = SA_SIGINFO,
+ };
+ char c, *p = NULL;
+
+ p = mmap(NULL, getpagesize(), 0, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
+ FAIL_IF(p == MAP_FAILED);
+
+ FAIL_IF(sigaction(SIGSEGV, &act, NULL) != 0);
+
+ faulted = false;
+ si_code = 0;
+
+ /*
+ * We just need a compiler barrier, but mb() works and has the nice
+ * property of being easy to spot in the disassembly.
+ */
+ mb();
+ c = *p;
+ mb();
+
+ FAIL_IF(!faulted);
+ FAIL_IF(si_code != SEGV_ACCERR);
+
+ faulted = false;
+ si_code = 0;
+
+ mb();
+ *p = c;
+ mb();
+
+ FAIL_IF(!faulted);
+ FAIL_IF(si_code != SEGV_ACCERR);
+
+ return 0;
+}
+
+int main(void)
+{
+ return test_harness(test_segv_errors, "segv_errors");
+}
diff --git a/tools/testing/selftests/powerpc/ptrace/ptrace-tm-spd-vsx.c b/tools/testing/selftests/powerpc/ptrace/ptrace-tm-spd-vsx.c
index 0df3c23b7888..277dade1b382 100644
--- a/tools/testing/selftests/powerpc/ptrace/ptrace-tm-spd-vsx.c
+++ b/tools/testing/selftests/powerpc/ptrace/ptrace-tm-spd-vsx.c
@@ -79,8 +79,8 @@ trans:
: [res] "=r" (result), [texasr] "=r" (texasr)
: [fp_load] "r" (fp_load), [fp_load_ckpt] "r" (fp_load_ckpt),
[sprn_texasr] "i" (SPRN_TEXASR)
- : "memory", "r0", "r1", "r2", "r3", "r4",
- "r8", "r9", "r10", "r11"
+ : "memory", "r0", "r1", "r3", "r4",
+ "r7", "r8", "r9", "r10", "r11"
);
if (result) {
diff --git a/tools/testing/selftests/powerpc/ptrace/ptrace-tm-spr.c b/tools/testing/selftests/powerpc/ptrace/ptrace-tm-spr.c
index 94e57cb89769..51427a2465f6 100644
--- a/tools/testing/selftests/powerpc/ptrace/ptrace-tm-spr.c
+++ b/tools/testing/selftests/powerpc/ptrace/ptrace-tm-spr.c
@@ -76,8 +76,7 @@ trans:
: [tfhar] "=r" (tfhar), [res] "=r" (result),
[texasr] "=r" (texasr), [cptr1] "=r" (cptr1)
: [sprn_texasr] "i" (SPRN_TEXASR)
- : "memory", "r0", "r1", "r2", "r3", "r4",
- "r8", "r9", "r10", "r11", "r31"
+ : "memory", "r0", "r8", "r31"
);
/* There are 2 32bit instructions before tbegin. */
diff --git a/tools/testing/selftests/powerpc/ptrace/ptrace-tm-vsx.c b/tools/testing/selftests/powerpc/ptrace/ptrace-tm-vsx.c
index b4081e2b22d5..17c23cabac3e 100644
--- a/tools/testing/selftests/powerpc/ptrace/ptrace-tm-vsx.c
+++ b/tools/testing/selftests/powerpc/ptrace/ptrace-tm-vsx.c
@@ -67,7 +67,7 @@ trans:
: [res] "=r" (result), [texasr] "=r" (texasr)
: [fp_load] "r" (fp_load), [fp_load_ckpt] "r" (fp_load_ckpt),
[sprn_texasr] "i" (SPRN_TEXASR), [cptr1] "r" (&cptr[1])
- : "memory", "r0", "r1", "r2", "r3", "r4",
+ : "memory", "r0", "r1", "r3", "r4",
"r7", "r8", "r9", "r10", "r11"
);
diff --git a/tools/testing/selftests/powerpc/tm/.gitignore b/tools/testing/selftests/powerpc/tm/.gitignore
index 241a4a4ee0e4..bb90d4b79524 100644
--- a/tools/testing/selftests/powerpc/tm/.gitignore
+++ b/tools/testing/selftests/powerpc/tm/.gitignore
@@ -13,3 +13,4 @@ tm-signal-context-chk-vmx
tm-signal-context-chk-vsx
tm-vmx-unavail
tm-unavailable
+tm-trap
diff --git a/tools/testing/selftests/powerpc/tm/Makefile b/tools/testing/selftests/powerpc/tm/Makefile
index 8ed6f8c57230..a23453943ad2 100644
--- a/tools/testing/selftests/powerpc/tm/Makefile
+++ b/tools/testing/selftests/powerpc/tm/Makefile
@@ -3,7 +3,7 @@ SIGNAL_CONTEXT_CHK_TESTS := tm-signal-context-chk-gpr tm-signal-context-chk-fpu
tm-signal-context-chk-vmx tm-signal-context-chk-vsx
TEST_GEN_PROGS := tm-resched-dscr tm-syscall tm-signal-msr-resv tm-signal-stack \
- tm-vmxcopy tm-fork tm-tar tm-tmspr tm-vmx-unavail tm-unavailable \
+ tm-vmxcopy tm-fork tm-tar tm-tmspr tm-vmx-unavail tm-unavailable tm-trap \
$(SIGNAL_CONTEXT_CHK_TESTS)
include ../../lib.mk
@@ -18,6 +18,7 @@ $(OUTPUT)/tm-tmspr: CFLAGS += -pthread
$(OUTPUT)/tm-vmx-unavail: CFLAGS += -pthread -m64
$(OUTPUT)/tm-resched-dscr: ../pmu/lib.o
$(OUTPUT)/tm-unavailable: CFLAGS += -O0 -pthread -m64 -Wno-error=uninitialized -mvsx
+$(OUTPUT)/tm-trap: CFLAGS += -O0 -pthread -m64
SIGNAL_CONTEXT_CHK_TESTS := $(patsubst %,$(OUTPUT)/%,$(SIGNAL_CONTEXT_CHK_TESTS))
$(SIGNAL_CONTEXT_CHK_TESTS): tm-signal.S
diff --git a/tools/testing/selftests/powerpc/tm/tm-trap.c b/tools/testing/selftests/powerpc/tm/tm-trap.c
new file mode 100644
index 000000000000..5d92c23ee6cb
--- /dev/null
+++ b/tools/testing/selftests/powerpc/tm/tm-trap.c
@@ -0,0 +1,329 @@
+/*
+ * Copyright 2017, Gustavo Romero, IBM Corp.
+ * Licensed under GPLv2.
+ *
+ * Check if thread endianness is flipped inadvertently to BE on trap
+ * caught in TM whilst MSR.FP and MSR.VEC are zero (i.e. just after
+ * load_fp and load_vec overflowed).
+ *
+ * The issue can be checked on LE machines simply by zeroing load_fp
+ * and load_vec and then causing a trap in TM. Since the endianness
+ * changes to BE on return from the signal handler, 'nop' is
+ * thread as an illegal instruction in following sequence:
+ * tbegin.
+ * beq 1f
+ * trap
+ * tend.
+ * 1: nop
+ *
+ * However, although the issue is also present on BE machines, it's a
+ * bit trickier to check it on BE machines because MSR.LE bit is set
+ * to zero which determines a BE endianness that is the native
+ * endianness on BE machines, so nothing notably critical happens,
+ * i.e. no illegal instruction is observed immediately after returning
+ * from the signal handler (as it happens on LE machines). Thus to test
+ * it on BE machines LE endianness is forced after a first trap and then
+ * the endianness is verified on subsequent traps to determine if the
+ * endianness "flipped back" to the native endianness (BE).
+ */
+
+#define _GNU_SOURCE
+#include <error.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <htmintrin.h>
+#include <inttypes.h>
+#include <pthread.h>
+#include <sched.h>
+#include <signal.h>
+#include <stdbool.h>
+
+#include "tm.h"
+#include "utils.h"
+
+#define pr_error(error_code, format, ...) \
+ error_at_line(1, error_code, __FILE__, __LINE__, format, ##__VA_ARGS__)
+
+#define MSR_LE 1UL
+#define LE 1UL
+
+pthread_t t0_ping;
+pthread_t t1_pong;
+
+int exit_from_pong;
+
+int trap_event;
+int le;
+
+bool success;
+
+void trap_signal_handler(int signo, siginfo_t *si, void *uc)
+{
+ ucontext_t *ucp = uc;
+ uint64_t thread_endianness;
+
+ /* Get thread endianness: extract bit LE from MSR */
+ thread_endianness = MSR_LE & ucp->uc_mcontext.gp_regs[PT_MSR];
+
+ /***
+ * Little-Endian Machine
+ */
+
+ if (le) {
+ /* First trap event */
+ if (trap_event == 0) {
+ /* Do nothing. Since it is returning from this trap
+ * event that endianness is flipped by the bug, so just
+ * let the process return from the signal handler and
+ * check on the second trap event if endianness is
+ * flipped or not.
+ */
+ }
+ /* Second trap event */
+ else if (trap_event == 1) {
+ /*
+ * Since trap was caught in TM on first trap event, if
+ * endianness was still LE (not flipped inadvertently)
+ * after returning from the signal handler instruction
+ * (1) is executed (basically a 'nop'), as it's located
+ * at address of tbegin. +4 (rollback addr). As (1) on
+ * LE endianness does in effect nothing, instruction (2)
+ * is then executed again as 'trap', generating a second
+ * trap event (note that in that case 'trap' is caught
+ * not in transacional mode). On te other hand, if after
+ * the return from the signal handler the endianness in-
+ * advertently flipped, instruction (1) is tread as a
+ * branch instruction, i.e. b .+8, hence instruction (3)
+ * and (4) are executed (tbegin.; trap;) and we get sim-
+ * ilaly on the trap signal handler, but now in TM mode.
+ * Either way, it's now possible to check the MSR LE bit
+ * once in the trap handler to verify if endianness was
+ * flipped or not after the return from the second trap
+ * event. If endianness is flipped, the bug is present.
+ * Finally, getting a trap in TM mode or not is just
+ * worth noting because it affects the math to determine
+ * the offset added to the NIP on return: the NIP for a
+ * trap caught in TM is the rollback address, i.e. the
+ * next instruction after 'tbegin.', whilst the NIP for
+ * a trap caught in non-transactional mode is the very
+ * same address of the 'trap' instruction that generated
+ * the trap event.
+ */
+
+ if (thread_endianness == LE) {
+ /* Go to 'success', i.e. instruction (6) */
+ ucp->uc_mcontext.gp_regs[PT_NIP] += 16;
+ } else {
+ /*
+ * Thread endianness is BE, so it flipped
+ * inadvertently. Thus we flip back to LE and
+ * set NIP to go to 'failure', instruction (5).
+ */
+ ucp->uc_mcontext.gp_regs[PT_MSR] |= 1UL;
+ ucp->uc_mcontext.gp_regs[PT_NIP] += 4;
+ }
+ }
+ }
+
+ /***
+ * Big-Endian Machine
+ */
+
+ else {
+ /* First trap event */
+ if (trap_event == 0) {
+ /*
+ * Force thread endianness to be LE. Instructions (1),
+ * (3), and (4) will be executed, generating a second
+ * trap in TM mode.
+ */
+ ucp->uc_mcontext.gp_regs[PT_MSR] |= 1UL;
+ }
+ /* Second trap event */
+ else if (trap_event == 1) {
+ /*
+ * Do nothing. If bug is present on return from this
+ * second trap event endianness will flip back "automat-
+ * ically" to BE, otherwise thread endianness will
+ * continue to be LE, just as it was set above.
+ */
+ }
+ /* A third trap event */
+ else {
+ /*
+ * Once here it means that after returning from the sec-
+ * ond trap event instruction (4) (trap) was executed
+ * as LE, generating a third trap event. In that case
+ * endianness is still LE as set on return from the
+ * first trap event, hence no bug. Otherwise, bug
+ * flipped back to BE on return from the second trap
+ * event and instruction (4) was executed as 'tdi' (so
+ * basically a 'nop') and branch to 'failure' in
+ * instruction (5) was taken to indicate failure and we
+ * never get here.
+ */
+
+ /*
+ * Flip back to BE and go to instruction (6), i.e. go to
+ * 'success'.
+ */
+ ucp->uc_mcontext.gp_regs[PT_MSR] &= ~1UL;
+ ucp->uc_mcontext.gp_regs[PT_NIP] += 8;
+ }
+ }
+
+ trap_event++;
+}
+
+void usr1_signal_handler(int signo, siginfo_t *si, void *not_used)
+{
+ /* Got a USR1 signal from ping(), so just tell pong() to exit */
+ exit_from_pong = 1;
+}
+
+void *ping(void *not_used)
+{
+ uint64_t i;
+
+ trap_event = 0;
+
+ /*
+ * Wait an amount of context switches so load_fp and load_vec overflows
+ * and MSR_[FP|VEC|V] is 0.
+ */
+ for (i = 0; i < 1024*1024*512; i++)
+ ;
+
+ asm goto(
+ /*
+ * [NA] means "Native Endianness", i.e. it tells how a
+ * instruction is executed on machine's native endianness (in
+ * other words, native endianness matches kernel endianness).
+ * [OP] means "Opposite Endianness", i.e. on a BE machine, it
+ * tells how a instruction is executed as a LE instruction; con-
+ * versely, on a LE machine, it tells how a instruction is
+ * executed as a BE instruction. When [NA] is omitted, it means
+ * that the native interpretation of a given instruction is not
+ * relevant for the test. Likewise when [OP] is omitted.
+ */
+
+ " tbegin. ;" /* (0) tbegin. [NA] */
+ " tdi 0, 0, 0x48;" /* (1) nop [NA]; b (3) [OP] */
+ " trap ;" /* (2) trap [NA] */
+ ".long 0x1D05007C;" /* (3) tbegin. [OP] */
+ ".long 0x0800E07F;" /* (4) trap [OP]; nop [NA] */
+ " b %l[failure] ;" /* (5) b [NA]; MSR.LE flipped (bug) */
+ " b %l[success] ;" /* (6) b [NA]; MSR.LE did not flip (ok)*/
+
+ : : : : failure, success);
+
+failure:
+ success = false;
+ goto exit_from_ping;
+
+success:
+ success = true;
+
+exit_from_ping:
+ /* Tell pong() to exit before leaving */
+ pthread_kill(t1_pong, SIGUSR1);
+ return NULL;
+}
+
+void *pong(void *not_used)
+{
+ while (!exit_from_pong)
+ /*
+ * Induce context switches on ping() thread
+ * until ping() finishes its job and signs
+ * to exit from this loop.
+ */
+ sched_yield();
+
+ return NULL;
+}
+
+int tm_trap_test(void)
+{
+ uint16_t k = 1;
+
+ int rc;
+
+ pthread_attr_t attr;
+ cpu_set_t cpuset;
+
+ struct sigaction trap_sa;
+
+ trap_sa.sa_flags = SA_SIGINFO;
+ trap_sa.sa_sigaction = trap_signal_handler;
+ sigaction(SIGTRAP, &trap_sa, NULL);
+
+ struct sigaction usr1_sa;
+
+ usr1_sa.sa_flags = SA_SIGINFO;
+ usr1_sa.sa_sigaction = usr1_signal_handler;
+ sigaction(SIGUSR1, &usr1_sa, NULL);
+
+ /* Set only CPU 0 in the mask. Both threads will be bound to cpu 0. */
+ CPU_ZERO(&cpuset);
+ CPU_SET(0, &cpuset);
+
+ /* Init pthread attribute */
+ rc = pthread_attr_init(&attr);
+ if (rc)
+ pr_error(rc, "pthread_attr_init()");
+
+ /*
+ * Bind thread ping() and pong() both to CPU 0 so they ping-pong and
+ * speed up context switches on ping() thread, speeding up the load_fp
+ * and load_vec overflow.
+ */
+ rc = pthread_attr_setaffinity_np(&attr, sizeof(cpu_set_t), &cpuset);
+ if (rc)
+ pr_error(rc, "pthread_attr_setaffinity()");
+
+ /* Figure out the machine endianness */
+ le = (int) *(uint8_t *)&k;
+
+ printf("%s machine detected. Checking if endianness flips %s",
+ le ? "Little-Endian" : "Big-Endian",
+ "inadvertently on trap in TM... ");
+
+ rc = fflush(0);
+ if (rc)
+ pr_error(rc, "fflush()");
+
+ /* Launch ping() */
+ rc = pthread_create(&t0_ping, &attr, ping, NULL);
+ if (rc)
+ pr_error(rc, "pthread_create()");
+
+ exit_from_pong = 0;
+
+ /* Launch pong() */
+ rc = pthread_create(&t1_pong, &attr, pong, NULL);
+ if (rc)
+ pr_error(rc, "pthread_create()");
+
+ rc = pthread_join(t0_ping, NULL);
+ if (rc)
+ pr_error(rc, "pthread_join()");
+
+ rc = pthread_join(t1_pong, NULL);
+ if (rc)
+ pr_error(rc, "pthread_join()");
+
+ if (success) {
+ printf("no.\n"); /* no, endianness did not flip inadvertently */
+ return EXIT_SUCCESS;
+ }
+
+ printf("yes!\n"); /* yes, endianness did flip inadvertently */
+ return EXIT_FAILURE;
+}
+
+int main(int argc, char **argv)
+{
+ return test_harness(tm_trap_test, "tm_trap_test");
+}
diff --git a/tools/testing/selftests/powerpc/tm/tm-unavailable.c b/tools/testing/selftests/powerpc/tm/tm-unavailable.c
index 96c37f84ce54..e6a0fad2bfd0 100644
--- a/tools/testing/selftests/powerpc/tm/tm-unavailable.c
+++ b/tools/testing/selftests/powerpc/tm/tm-unavailable.c
@@ -15,6 +15,7 @@
*/
#define _GNU_SOURCE
+#include <error.h>
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
@@ -33,6 +34,11 @@
#define VSX_UNA_EXCEPTION 2
#define NUM_EXCEPTIONS 3
+#define err_at_line(status, errnum, format, ...) \
+ error_at_line(status, errnum, __FILE__, __LINE__, format ##__VA_ARGS__)
+
+#define pr_warn(code, format, ...) err_at_line(0, code, format, ##__VA_ARGS__)
+#define pr_err(code, format, ...) err_at_line(1, code, format, ##__VA_ARGS__)
struct Flags {
int touch_fp;
@@ -303,10 +309,19 @@ void test_fp_vec(int fp, int vec, pthread_attr_t *attr)
* checking if the failure cause is the one we expect.
*/
do {
+ int rc;
+
/* Bind 'ping' to CPU 0, as specified in 'attr'. */
- pthread_create(&t0, attr, ping, (void *) &flags);
- pthread_setname_np(t0, "ping");
- pthread_join(t0, &ret_value);
+ rc = pthread_create(&t0, attr, ping, (void *) &flags);
+ if (rc)
+ pr_err(rc, "pthread_create()");
+ rc = pthread_setname_np(t0, "ping");
+ if (rc)
+ pr_warn(rc, "pthread_setname_np");
+ rc = pthread_join(t0, &ret_value);
+ if (rc)
+ pr_err(rc, "pthread_join");
+
retries--;
} while (ret_value != NULL && retries);
@@ -320,7 +335,7 @@ void test_fp_vec(int fp, int vec, pthread_attr_t *attr)
int main(int argc, char **argv)
{
- int exception; /* FP = 0, VEC = 1, VSX = 2 */
+ int rc, exception; /* FP = 0, VEC = 1, VSX = 2 */
pthread_t t1;
pthread_attr_t attr;
cpu_set_t cpuset;
@@ -330,13 +345,23 @@ int main(int argc, char **argv)
CPU_SET(0, &cpuset);
/* Init pthread attribute. */
- pthread_attr_init(&attr);
+ rc = pthread_attr_init(&attr);
+ if (rc)
+ pr_err(rc, "pthread_attr_init()");
/* Set CPU 0 mask into the pthread attribute. */
- pthread_attr_setaffinity_np(&attr, sizeof(cpu_set_t), &cpuset);
-
- pthread_create(&t1, &attr /* Bind 'pong' to CPU 0 */, pong, NULL);
- pthread_setname_np(t1, "pong"); /* Name it for systemtap convenience */
+ rc = pthread_attr_setaffinity_np(&attr, sizeof(cpu_set_t), &cpuset);
+ if (rc)
+ pr_err(rc, "pthread_attr_setaffinity_np()");
+
+ rc = pthread_create(&t1, &attr /* Bind 'pong' to CPU 0 */, pong, NULL);
+ if (rc)
+ pr_err(rc, "pthread_create()");
+
+ /* Name it for systemtap convenience */
+ rc = pthread_setname_np(t1, "pong");
+ if (rc)
+ pr_warn(rc, "pthread_create()");
flags.result = 0;