diff options
53 files changed, 580 insertions, 304 deletions
diff --git a/Documentation/arm64/elf_hwcaps.txt b/Documentation/arm64/elf_hwcaps.txt index b73a2519ecf2..5ae2ef2c12f3 100644 --- a/Documentation/arm64/elf_hwcaps.txt +++ b/Documentation/arm64/elf_hwcaps.txt @@ -207,6 +207,10 @@ HWCAP_FLAGM Functionality implied by ID_AA64ISAR0_EL1.TS == 0b0001. +HWCAP2_FLAGM2 + + Functionality implied by ID_AA64ISAR0_EL1.TS == 0b0010. + HWCAP_SSBS Functionality implied by ID_AA64PFR1_EL1.SSBS == 0b0010. @@ -223,6 +227,10 @@ HWCAP_PACG ID_AA64ISAR1_EL1.GPI == 0b0001, as described by Documentation/arm64/pointer-authentication.txt. +HWCAP2_FRINT + + Functionality implied by ID_AA64ISAR1_EL1.FRINTTS == 0b0001. + 4. Unused AT_HWCAP bits ----------------------- diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 697ea0510729..089a834b0ed0 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -26,6 +26,7 @@ config ARM64 select ARCH_HAS_MEMBARRIER_SYNC_CORE select ARCH_HAS_PTE_SPECIAL select ARCH_HAS_SETUP_DMA_OPS + select ARCH_HAS_SET_DIRECT_MAP select ARCH_HAS_SET_MEMORY select ARCH_HAS_STRICT_KERNEL_RWX select ARCH_HAS_STRICT_MODULE_RWX @@ -260,7 +261,8 @@ config GENERIC_CALIBRATE_DELAY def_bool y config ZONE_DMA32 - def_bool y + bool "Support DMA32 zone" if EXPERT + default y config HAVE_GENERIC_GUP def_bool y @@ -933,7 +935,6 @@ config PARAVIRT config PARAVIRT_TIME_ACCOUNTING bool "Paravirtual steal time accounting" select PARAVIRT - default n help Select this option to enable fine granularity task steal time accounting. Time spent executing other tasks in parallel with @@ -1418,12 +1419,27 @@ config ARM64_SVE KVM in the same kernel image. config ARM64_MODULE_PLTS - bool + bool "Use PLTs to allow module memory to spill over into vmalloc area" + depends on MODULES select HAVE_MOD_ARCH_SPECIFIC + help + Allocate PLTs when loading modules so that jumps and calls whose + targets are too far away for their relative offsets to be encoded + in the instructions themselves can be bounced via veneers in the + module's PLT. This allows modules to be allocated in the generic + vmalloc area after the dedicated module memory area has been + exhausted. + + When running with address space randomization (KASLR), the module + region itself may be too far away for ordinary relative jumps and + calls, and so in that case, module PLTs are required and cannot be + disabled. + + Specific errata workaround(s) might also force module PLTs to be + enabled (ARM64_ERRATUM_843419). config ARM64_PSEUDO_NMI bool "Support for NMI-like interrupts" - depends on BROKEN # 1556553607-46531-1-git-send-email-julien.thierry@arm.com select CONFIG_ARM_GIC_V3 help Adds support for mimicking Non-Maskable Interrupts through the use of @@ -1436,6 +1452,17 @@ config ARM64_PSEUDO_NMI If unsure, say N +if ARM64_PSEUDO_NMI +config ARM64_DEBUG_PRIORITY_MASKING + bool "Debug interrupt priority masking" + help + This adds runtime checks to functions enabling/disabling + interrupts when using priority masking. The additional checks verify + the validity of ICC_PMR_EL1 when calling concerned functions. + + If unsure, say N +endif + config RELOCATABLE bool help diff --git a/arch/arm64/configs/defconfig b/arch/arm64/configs/defconfig index 4d583514258c..a7cbf7cd84b4 100644 --- a/arch/arm64/configs/defconfig +++ b/arch/arm64/configs/defconfig @@ -68,6 +68,7 @@ CONFIG_KEXEC=y CONFIG_CRASH_DUMP=y CONFIG_XEN=y CONFIG_COMPAT=y +CONFIG_RANDOMIZE_BASE=y CONFIG_HIBERNATION=y CONFIG_WQ_POWER_EFFICIENT_DEFAULT=y CONFIG_ARM_CPUIDLE=y diff --git a/arch/arm64/include/asm/arch_gicv3.h b/arch/arm64/include/asm/arch_gicv3.h index 14b41ddc68ba..9e991b628706 100644 --- a/arch/arm64/include/asm/arch_gicv3.h +++ b/arch/arm64/include/asm/arch_gicv3.h @@ -163,7 +163,9 @@ static inline bool gic_prio_masking_enabled(void) static inline void gic_pmr_mask_irqs(void) { - BUILD_BUG_ON(GICD_INT_DEF_PRI <= GIC_PRIO_IRQOFF); + BUILD_BUG_ON(GICD_INT_DEF_PRI < (GIC_PRIO_IRQOFF | + GIC_PRIO_PSR_I_SET)); + BUILD_BUG_ON(GICD_INT_DEF_PRI >= GIC_PRIO_IRQON); gic_write_pmr(GIC_PRIO_IRQOFF); } diff --git a/arch/arm64/include/asm/cache.h b/arch/arm64/include/asm/cache.h index 926434f413fa..d24b7c1ecd9b 100644 --- a/arch/arm64/include/asm/cache.h +++ b/arch/arm64/include/asm/cache.h @@ -91,12 +91,15 @@ static inline u32 cache_type_cwg(void) #define __read_mostly __attribute__((__section__(".data..read_mostly"))) -static inline int cache_line_size(void) +static inline int cache_line_size_of_cpu(void) { u32 cwg = cache_type_cwg(); + return cwg ? 4 << cwg : ARCH_DMA_MINALIGN; } +int cache_line_size(void); + /* * Read the effective value of CTR_EL0. * diff --git a/arch/arm64/include/asm/cacheflush.h b/arch/arm64/include/asm/cacheflush.h index 19844211a4e6..b9ee5510067f 100644 --- a/arch/arm64/include/asm/cacheflush.h +++ b/arch/arm64/include/asm/cacheflush.h @@ -187,4 +187,7 @@ static inline void flush_cache_vunmap(unsigned long start, unsigned long end) int set_memory_valid(unsigned long addr, int numpages, int enable); +int set_direct_map_invalid_noflush(struct page *page); +int set_direct_map_default_noflush(struct page *page); + #endif diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h index bc895c869892..693a086e2148 100644 --- a/arch/arm64/include/asm/cpufeature.h +++ b/arch/arm64/include/asm/cpufeature.h @@ -617,6 +617,12 @@ static inline bool system_uses_irq_prio_masking(void) cpus_have_const_cap(ARM64_HAS_IRQ_PRIO_MASKING); } +static inline bool system_has_prio_mask_debugging(void) +{ + return IS_ENABLED(CONFIG_ARM64_DEBUG_PRIORITY_MASKING) && + system_uses_irq_prio_masking(); +} + #define ARM64_SSBD_UNKNOWN -1 #define ARM64_SSBD_FORCE_DISABLE 0 #define ARM64_SSBD_KERNEL 1 diff --git a/arch/arm64/include/asm/daifflags.h b/arch/arm64/include/asm/daifflags.h index db452aa9e651..eca5bee1d85b 100644 --- a/arch/arm64/include/asm/daifflags.h +++ b/arch/arm64/include/asm/daifflags.h @@ -18,6 +18,7 @@ #include <linux/irqflags.h> +#include <asm/arch_gicv3.h> #include <asm/cpufeature.h> #define DAIF_PROCCTX 0 @@ -27,11 +28,20 @@ /* mask/save/unmask/restore all exceptions, including interrupts. */ static inline void local_daif_mask(void) { + WARN_ON(system_has_prio_mask_debugging() && + (read_sysreg_s(SYS_ICC_PMR_EL1) == (GIC_PRIO_IRQOFF | + GIC_PRIO_PSR_I_SET))); + asm volatile( "msr daifset, #0xf // local_daif_mask\n" : : : "memory"); + + /* Don't really care for a dsb here, we don't intend to enable IRQs */ + if (system_uses_irq_prio_masking()) + gic_write_pmr(GIC_PRIO_IRQON | GIC_PRIO_PSR_I_SET); + trace_hardirqs_off(); } @@ -43,7 +53,7 @@ static inline unsigned long local_daif_save(void) if (system_uses_irq_prio_masking()) { /* If IRQs are masked with PMR, reflect it in the flags */ - if (read_sysreg_s(SYS_ICC_PMR_EL1) <= GIC_PRIO_IRQOFF) + if (read_sysreg_s(SYS_ICC_PMR_EL1) != GIC_PRIO_IRQON) flags |= PSR_I_BIT; } @@ -56,39 +66,50 @@ static inline void local_daif_restore(unsigned long flags) { bool irq_disabled = flags & PSR_I_BIT; + WARN_ON(system_has_prio_mask_debugging() && + !(read_sysreg(daif) & PSR_I_BIT)); + if (!irq_disabled) { trace_hardirqs_on(); - if (system_uses_irq_prio_masking()) - arch_local_irq_enable(); - } else if (!(flags & PSR_A_BIT)) { - /* - * If interrupts are disabled but we can take - * asynchronous errors, we can take NMIs - */ if (system_uses_irq_prio_masking()) { - flags &= ~PSR_I_BIT; + gic_write_pmr(GIC_PRIO_IRQON); + dsb(sy); + } + } else if (system_uses_irq_prio_masking()) { + u64 pmr; + + if (!(flags & PSR_A_BIT)) { /* - * There has been concern that the write to daif - * might be reordered before this write to PMR. - * From the ARM ARM DDI 0487D.a, section D1.7.1 - * "Accessing PSTATE fields": - * Writes to the PSTATE fields have side-effects on - * various aspects of the PE operation. All of these - * side-effects are guaranteed: - * - Not to be visible to earlier instructions in - * the execution stream. - * - To be visible to later instructions in the - * execution stream - * - * Also, writes to PMR are self-synchronizing, so no - * interrupts with a lower priority than PMR is signaled - * to the PE after the write. - * - * So we don't need additional synchronization here. + * If interrupts are disabled but we can take + * asynchronous errors, we can take NMIs */ - arch_local_irq_disable(); + flags &= ~PSR_I_BIT; + pmr = GIC_PRIO_IRQOFF; + } else { + pmr = GIC_PRIO_IRQON | GIC_PRIO_PSR_I_SET; } + + /* + * There has been concern that the write to daif + * might be reordered before this write to PMR. + * From the ARM ARM DDI 0487D.a, section D1.7.1 + * "Accessing PSTATE fields": + * Writes to the PSTATE fields have side-effects on + * various aspects of the PE operation. All of these + * side-effects are guaranteed: + * - Not to be visible to earlier instructions in + * the execution stream. + * - To be visible to later instructions in the + * execution stream + * + * Also, writes to PMR are self-synchronizing, so no + * interrupts with a lower priority than PMR is signaled + * to the PE after the write. + * + * So we don't need additional synchronization here. + */ + gic_write_pmr(pmr); } write_sysreg(flags, daif); diff --git a/arch/arm64/include/asm/fpsimd.h b/arch/arm64/include/asm/fpsimd.h index df62bbd33a9a..4154851c21ab 100644 --- a/arch/arm64/include/asm/fpsimd.h +++ b/arch/arm64/include/asm/fpsimd.h @@ -48,8 +48,6 @@ struct task_struct; extern void fpsimd_save_state(struct user_fpsimd_state *state); extern void fpsimd_load_state(struct user_fpsimd_state *state); -extern void fpsimd_save(void); - extern void fpsimd_thread_switch(struct task_struct *next); extern void fpsimd_flush_thread(void); @@ -63,8 +61,7 @@ extern void fpsimd_bind_state_to_cpu(struct user_fpsimd_state *state, void *sve_state, unsigned int sve_vl); extern void fpsimd_flush_task_state(struct task_struct *target); -extern void fpsimd_flush_cpu_state(void); -extern void sve_flush_cpu_state(void); +extern void fpsimd_save_and_flush_cpu_state(void); /* Maximum VL that SVE VL-agnostic software can transparently support */ #define SVE_VL_ARCH_MAX 0x100 diff --git a/arch/arm64/include/asm/hwcap.h b/arch/arm64/include/asm/hwcap.h index b4bfb6672168..8371202e0a8b 100644 --- a/arch/arm64/include/asm/hwcap.h +++ b/arch/arm64/include/asm/hwcap.h @@ -95,6 +95,8 @@ #define KERNEL_HWCAP_SVEBITPERM __khwcap2_feature(SVEBITPERM) #define KERNEL_HWCAP_SVESHA3 __khwcap2_feature(SVESHA3) #define KERNEL_HWCAP_SVESM4 __khwcap2_feature(SVESM4) +#define KERNEL_HWCAP_FLAGM2 __khwcap2_feature(FLAGM2) +#define KERNEL_HWCAP_FRINT __khwcap2_feature(FRINT) /* * This yields a mask that user programs can use to figure out what diff --git a/arch/arm64/include/asm/irqflags.h b/arch/arm64/include/asm/irqflags.h index 629963189085..cac2d2a3c24e 100644 --- a/arch/arm64/include/asm/irqflags.h +++ b/arch/arm64/include/asm/irqflags.h @@ -40,6 +40,12 @@ */ static inline void arch_local_irq_enable(void) { + if (system_has_prio_mask_debugging()) { + u32 pmr = read_sysreg_s(SYS_ICC_PMR_EL1); + + WARN_ON_ONCE(pmr != GIC_PRIO_IRQON && pmr != GIC_PRIO_IRQOFF); + } + asm volatile(ALTERNATIVE( "msr daifclr, #2 // arch_local_irq_enable\n" "nop", @@ -53,6 +59,12 @@ static inline void arch_local_irq_enable(void) static inline void arch_local_irq_disable(void) { + if (system_has_prio_mask_debugging()) { + u32 pmr = read_sysreg_s(SYS_ICC_PMR_EL1); + + WARN_ON_ONCE(pmr != GIC_PRIO_IRQON && pmr != GIC_PRIO_IRQOFF); + } + asm volatile(ALTERNATIVE( "msr daifset, #2 // arch_local_irq_disable", __msr_s(SYS_ICC_PMR_EL1, "%0"), @@ -67,43 +79,46 @@ static inline void arch_local_irq_disable(void) */ static inline unsigned long arch_local_save_flags(void) { - unsigned long daif_bits; unsigned long flags; - daif_bits = read_sysreg(daif); - - /* - * The asm is logically equivalent to: - * - * if (system_uses_irq_prio_masking()) - * flags = (daif_bits & PSR_I_BIT) ? - * GIC_PRIO_IRQOFF : - * read_sysreg_s(SYS_ICC_PMR_EL1); - * else - * flags = daif_bits; - */ asm volatile(ALTERNATIVE( - "mov %0, %1\n" - "nop\n" - "nop", - __mrs_s("%0", SYS_ICC_PMR_EL1) - "ands %1, %1, " __stringify(PSR_I_BIT) "\n" - "csel %0, %0, %2, eq", - ARM64_HAS_IRQ_PRIO_MASKING) - : "=&r" (flags), "+r" (daif_bits) - : "r" ((unsigned long) GIC_PRIO_IRQOFF) + "mrs %0, daif", + __mrs_s("%0", SYS_ICC_PMR_EL1), + ARM64_HAS_IRQ_PRIO_MASKING) + : "=&r" (flags) + : : "memory"); return flags; } +static inline int arch_irqs_disabled_flags(unsigned long flags) +{ + int res; + + asm volatile(ALTERNATIVE( + "and %w0, %w1, #" __stringify(PSR_I_BIT), + "eor %w0, %w1, #" __stringify(GIC_PRIO_IRQON), + ARM64_HAS_IRQ_PRIO_MASKING) + : "=&r" (res) + : "r" ((int) flags) + : "memory"); + + return res; +} + static inline unsigned long arch_local_irq_save(void) { unsigned long flags; flags = arch_local_save_flags(); - arch_local_irq_disable(); + /* + * There are too many states with IRQs disabled, just keep the current + * state if interrupts are already disabled/masked. + */ + if (!arch_irqs_disabled_flags(flags)) + arch_local_irq_disable(); return flags; } @@ -119,26 +134,10 @@ static inline void arch_local_irq_restore(unsigned long flags) __msr_s(SYS_ICC_PMR_EL1, "%0") "dsb sy", ARM64_HAS_IRQ_PRIO_MASKING) - : "+r" (flags) : + : "r" (flags) : "memory"); } -static inline int arch_irqs_disabled_flags(unsigned long flags) -{ - int res; - - asm volatile(ALTERNATIVE( - "and %w0, %w1, #" __stringify(PSR_I_BIT) "\n" - "nop", - "cmp %w1, #" __stringify(GIC_PRIO_IRQOFF) "\n" - "cset %w0, ls", - ARM64_HAS_IRQ_PRIO_MASKING) - : "=&r" (res) - : "r" ((int) flags) - : "memory"); - - return res; -} #endif #endif diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 4bcd9c1291d5..33410635b015 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -608,11 +608,12 @@ static inline void kvm_arm_vhe_guest_enter(void) * will not signal the CPU of interrupts of lower priority, and the * only way to get out will be via guest exceptions. * Naturally, we want to avoid this. + * + * local_daif_mask() already sets GIC_PRIO_PSR_I_SET, we just need a + * dsb to ensure the redistributor is forwards EL2 IRQs to the CPU. */ - if (system_uses_irq_prio_masking()) { - gic_write_pmr(GIC_PRIO_IRQON); + if (system_uses_irq_prio_masking()) dsb(sy); - } } static inline void kvm_arm_vhe_guest_exit(void) diff --git a/arch/arm64/include/asm/pgtable-hwdef.h b/arch/arm64/include/asm/pgtable-hwdef.h index a69259cc1f16..529e83f8e607 100644 --- a/arch/arm64/include/asm/pgtable-hwdef.h +++ b/arch/arm64/include/asm/pgtable-hwdef.h @@ -126,7 +126,6 @@ * Level 2 descriptor (PMD). */ #define PMD_TYPE_MASK (_AT(pmdval_t, 3) << 0) -#define PMD_TYPE_FAULT (_AT(pmdval_t, 0) << 0) #define PMD_TYPE_TABLE (_AT(pmdval_t, 3) << 0) #define PMD_TYPE_SECT (_AT(pmdval_t, 1) << 0) #define PMD_TABLE_BIT (_AT(pmdval_t, 1) << 1) @@ -153,8 +152,8 @@ /* * Level 3 descriptor (PTE). */ +#define PTE_VALID (_AT(pteval_t, 1) << 0) #define PTE_TYPE_MASK (_AT(pteval_t, 3) << 0) -#define PTE_TYPE_FAULT (_AT(pteval_t, 0) << 0) #define PTE_TYPE_PAGE (_AT(pteval_t, 3) << 0) #define PTE_TABLE_BIT (_AT(pteval_t, 1) << 1) #define PTE_USER (_AT(pteval_t, 1) << 6) /* AP[1] */ diff --git a/arch/arm64/include/asm/pgtable-prot.h b/arch/arm64/include/asm/pgtable-prot.h index 986e41c4c32b..38c7148e2023 100644 --- a/arch/arm64/include/asm/pgtable-prot.h +++ b/arch/arm64/include/asm/pgtable-prot.h @@ -24,7 +24,6 @@ /* * Software defined PTE bits definition. */ -#define PTE_VALID (_AT(pteval_t, 1) << 0) #define PTE_WRITE (PTE_DBM) /* same as DBM (51) */ #define PTE_DIRTY (_AT(pteval_t, 1) << 55) #define PTE_SPECIAL (_AT(pteval_t, 1) << 56) diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index 2c41b04708fe..2023eab19d73 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -246,29 +246,42 @@ extern void __sync_icache_dcache(pte_t pteval); * * PTE_DIRTY || (PTE_WRITE && !PTE_RDONLY) */ -static inline void set_pte_at(struct mm_struct *mm, unsigned long addr, - pte_t *ptep, pte_t pte) + +static inline void __check_racy_pte_update(struct mm_struct *mm, pte_t *ptep, + pte_t pte) { pte_t old_pte; - if (pte_present(pte) && pte_user_exec(pte) && !pte_special(pte)) - __sync_icache_dcache(pte); + if (!IS_ENABLED(CONFIG_DEBUG_VM)) + return; + + old_pte = READ_ONCE(*ptep); + + if (!pte_valid(old_pte) || !pte_valid(pte)) + return; + if (mm != current->active_mm && atomic_read(&mm->mm_users) <= 1) + return; /* - * If the existing pte is valid, check for potential race with - * hardware updates of the pte (ptep_set_access_flags safely changes - * valid ptes without going through an invalid entry). + * Check for potential race with hardware updates of the pte + * (ptep_set_access_flags safely changes valid ptes without going + * through an invalid entry). */ - old_pte = READ_ONCE(*ptep); - if (IS_ENABLED(CONFIG_DEBUG_VM) && pte_valid(old_pte) && pte_valid(pte) && - (mm == current->active_mm || atomic_read(&mm->mm_users) > 1)) { - VM_WARN_ONCE(!pte_young(pte), - "%s: racy access flag clearing: 0x%016llx -> 0x%016llx", - __func__, pte_val(old_pte), pte_val(pte)); - VM_WARN_ONCE(pte_write(old_pte) && !pte_dirty(pte), - "%s: racy dirty state clearing: 0x%016llx -> 0x%016llx", - __func__, pte_val(old_pte), pte_val(pte)); - } + VM_WARN_ONCE(!pte_young(pte), + "%s: racy access flag clearing: 0x%016llx -> 0x%016llx", + __func__, pte_val(old_pte), pte_val(pte)); + VM_WARN_ONCE(pte_write(old_pte) && !pte_dirty(pte), + "%s: racy dirty state clearing: 0x%016llx -> 0x%016llx", + __func__, pte_val(old_pte), pte_val(pte)); +} + +static inline void set_pte_at(struct mm_struct *mm, unsigned long addr, + pte_t *ptep, pte_t pte) +{ + if (pte_present(pte) && pte_user_exec(pte) && !pte_special(pte)) + __sync_icache_dcache(pte); + + __check_racy_pte_update(mm, ptep, pte); set_pte(ptep, pte); } @@ -335,9 +348,14 @@ static inline pmd_t pte_pmd(pte_t pte) return __pmd(pte_val(pte)); } -static inline pgprot_t mk_sect_prot(pgprot_t prot) +static inline pgprot_t mk_pud_sect_prot(pgprot_t prot) +{ + return __pgprot((pgprot_val(prot) & ~PUD_TABLE_BIT) | PUD_TYPE_SECT); +} + +static inline pgprot_t mk_pmd_sect_prot(pgprot_t prot) { - return __pgprot(pgprot_val(prot) & ~PTE_TABLE_BIT); + return __pgprot((pgprot_val(prot) & ~PMD_TABLE_BIT) | PMD_TYPE_SECT); } #ifdef CONFIG_NUMA_BALANCING diff --git a/arch/arm64/include/asm/ptrace.h b/arch/arm64/include/asm/ptrace.h index b2de32939ada..da2242248466 100644 --- a/arch/arm64/include/asm/ptrace.h +++ b/arch/arm64/include/asm/ptrace.h @@ -35,9 +35,15 @@ * means masking more IRQs (or at least that the same IRQs remain masked). * * To mask interrupts, we clear the most significant bit of PMR. + * + * Some code sections either automatically switch back to PSR.I or explicitly + * require to not use priority masking. If bit GIC_PRIO_PSR_I_SET is included + * in the the priority mask, it indicates that PSR.I should be set and + * interrupt disabling temporarily does not rely on IRQ priorities. */ -#define GIC_PRIO_IRQON 0xf0 -#define GIC_PRIO_IRQOFF (GIC_PRIO_IRQON & ~0x80) +#define GIC_PRIO_IRQON 0xc0 +#define GIC_PRIO_IRQOFF (GIC_PRIO_IRQON & ~0x80) +#define GIC_PRIO_PSR_I_SET (1 << 4) /* Additional SPSR bits not exposed in the UABI */ #define PSR_IL_BIT (1 << 20) diff --git a/arch/arm64/include/asm/simd.h b/arch/arm64/include/asm/simd.h index 6495cc51246f..a6307e43b8c2 100644 --- a/arch/arm64/include/asm/simd.h +++ b/arch/arm64/include/asm/simd.h @@ -15,9 +15,9 @@ #include <linux/preempt.h> #include <linux/types.h> -#ifdef CONFIG_KERNEL_MODE_NEON +DECLARE_PER_CPU(bool, fpsimd_context_busy); -DECLARE_PER_CPU(bool, kernel_neon_busy); +#ifdef CONFIG_KERNEL_MODE_NEON /* * may_use_simd - whether it is allowable at this time to issue SIMD @@ -29,15 +29,15 @@ DECLARE_PER_CPU(bool, kernel_neon_busy); static __must_check inline bool may_use_simd(void) { /* - * kernel_neon_busy is only set while preemption is disabled, + * fpsimd_context_busy is only set while preemption is disabled, * and is clear whenever preemption is enabled. Since - * this_cpu_read() is atomic w.r.t. preemption, kernel_neon_busy + * this_cpu_read() is atomic w.r.t. preemption, fpsimd_context_busy * cannot change under our feet -- if it's set we cannot be * migrated, and if it's clear we cannot be migrated to a CPU * where it is set. */ return !in_irq() && !irqs_disabled() && !in_nmi() && - !this_cpu_read(kernel_neon_busy); + !this_cpu_read(fpsimd_context_busy); } #else /* ! CONFIG_KERNEL_MODE_NEON */ diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index 902d75b60914..601972771807 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -560,6 +560,7 @@ /* id_aa64isar1 */ #define ID_AA64ISAR1_SB_SHIFT 36 +#define ID_AA64ISAR1_FRINTTS_SHIFT 32 #define ID_AA64ISAR1_GPI_SHIFT 28 #define ID_AA64ISAR1_GPA_SHIFT 24 #define ID_AA64ISAR1_LRCPC_SHIFT 20 diff --git a/arch/arm64/include/asm/thread_info.h b/arch/arm64/include/asm/thread_info.h index eb3ef73e07cf..c285d1ce7186 100644 --- a/arch/arm64/include/asm/thread_info.h +++ b/arch/arm64/include/asm/thread_info.h @@ -75,6 +75,7 @@ void arch_release_task_struct(struct task_struct *tsk); * TIF_SYSCALL_TRACE - syscall trace active * TIF_SYSCALL_TRACEPOINT - syscall tracepoint for ftrace * TIF_SYSCALL_AUDIT - syscall auditing + * TIF_SYSCALL_EMU - syscall emulation active * TIF_SECOMP - syscall secure computing * TIF_SIGPENDING - signal pending * TIF_NEED_RESCHED - rescheduling necessary @@ -91,6 +92,7 @@ void arch_release_task_struct(struct task_struct *tsk); #define TIF_SYSCALL_AUDIT 9 #define TIF_SYSCALL_TRACEPOINT 10 #define TIF_SECCOMP 11 +#define TIF_SYSCALL_EMU 12 #define TIF_MEMDIE 18 /* is terminating due to OOM killer */ #define TIF_FREEZE 19 #define TIF_RESTORE_SIGMASK 20 @@ -109,6 +111,7 @@ void arch_release_task_struct(struct task_struct *tsk); #define _TIF_SYSCALL_AUDIT (1 << TIF_SYSCALL_AUDIT) #define _TIF_SYSCALL_TRACEPOINT (1 << TIF_SYSCALL_TRACEPOINT) #define _TIF_SECCOMP (1 << TIF_SECCOMP) +#define _TIF_SYSCALL_EMU (1 << TIF_SYSCALL_EMU) #define _TIF_UPROBE (1 << TIF_UPROBE) #define _TIF_FSCHECK (1 << TIF_FSCHECK) #define _TIF_32BIT (1 << TIF_32BIT) @@ -120,7 +123,7 @@ void arch_release_task_struct(struct task_struct *tsk); #define _TIF_SYSCALL_WORK (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | \ _TIF_SYSCALL_TRACEPOINT | _TIF_SECCOMP | \ - _TIF_NOHZ) + _TIF_NOHZ | _TIF_SYSCALL_EMU) #define INIT_THREAD_INFO(tsk) \ { \ diff --git a/arch/arm64/include/uapi/asm/hwcap.h b/arch/arm64/include/uapi/asm/hwcap.h index 1a772b162191..a1e72886b30c 100644 --- a/arch/arm64/include/uapi/asm/hwcap.h +++ b/arch/arm64/include/uapi/asm/hwcap.h @@ -63,5 +63,7 @@ #define HWCAP2_SVEBITPERM (1 << 4) #define HWCAP2_SVESHA3 (1 << 5) #define HWCAP2_SVESM4 (1 << 6) +#define HWCAP2_FLAGM2 (1 << 7) +#define HWCAP2_FRINT (1 << 8) #endif /* _UAPI__ASM_HWCAP_H */ diff --git a/arch/arm64/include/uapi/asm/ptrace.h b/arch/arm64/include/uapi/asm/ptrace.h index d78623acb649..627ac57c1581 100644 --- a/arch/arm64/include/uapi/asm/ptrace.h +++ b/arch/arm64/include/uapi/asm/ptrace.h @@ -62,6 +62,9 @@ #define PSR_x 0x0000ff00 /* Extension */ #define PSR_c 0x000000ff /* Control */ +/* syscall emulation path in ptrace */ +#define PTRACE_SYSEMU 31 +#define PTRACE_SYSEMU_SINGLESTEP 32 #ifndef __ASSEMBLY__ diff --git a/arch/arm64/kernel/acpi.c b/arch/arm64/kernel/acpi.c index 803f0494dd3e..7722e85fb69c 100644 --- a/arch/arm64/kernel/acpi.c +++ b/arch/arm64/kernel/acpi.c @@ -155,10 +155,14 @@ static int __init acpi_fadt_sanity_check(void) */ if (table->revision < 5 || (table->revision == 5 && fadt->minor_revision < 1)) { - pr_err("Unsupported FADT revision %d.%d, should be 5.1+\n", + pr_err(FW_BUG "Unsupported FADT revision %d.%d, should be 5.1+\n", table->revision, fadt->minor_revision); - ret = -EINVAL; - goto out; + + if (!fadt->arm_boot_flags) { + ret = -EINVAL; + goto out; + } + pr_err("FADT has ARM boot flags set, assuming 5.1\n"); } if (!(fadt->flags & ACPI_FADT_HW_REDUCED)) { diff --git a/arch/arm64/kernel/cacheinfo.c b/arch/arm64/kernel/cacheinfo.c index 0bf0a835122f..969fcc3be556 100644 --- a/arch/arm64/kernel/cacheinfo.c +++ b/arch/arm64/kernel/cacheinfo.c @@ -28,6 +28,15 @@ #define CLIDR_CTYPE(clidr, level) \ (((clidr) & CLIDR_CTYPE_MASK(level)) >> CLIDR_CTYPE_SHIFT(level)) +int cache_line_size(void) +{ + if (coherency_max_size != 0) + return coherency_max_size; + + return cache_line_size_of_cpu(); +} +EXPORT_SYMBOL_GPL(cache_line_size); + static inline enum cache_type get_cache_type(int level) { u64 clidr; diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index ca27e08e3d8a..a0f00917b8b9 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -1194,14 +1194,14 @@ static struct undef_hook ssbs_emulation_hook = { static void cpu_enable_ssbs(const struct arm64_cpu_capabilities *__unused) { static bool undef_hook_registered = false; - static DEFINE_SPINLOCK(hook_lock); + static DEFINE_RAW_SPINLOCK(hook_lock); - spin_lock(&hook_lock); + raw_spin_lock(&hook_lock); if (!undef_hook_registered) { register_undef_hook(&ssbs_emulation_hook); undef_hook_registered = true; } - spin_unlock(&hook_lock); + raw_spin_unlock(&hook_lock); if (arm64_get_ssbd_state() == ARM64_SSBD_FORCE_DISABLE) { sysreg_clear_set(sctlr_el1, 0, SCTLR_ELx_DSSBS); @@ -1628,6 +1628,7 @@ static const struct arm64_cpu_capabilities arm64_elf_hwcaps[] = { HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_DP_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_ASIMDDP), HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_FHM_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_ASIMDFHM), HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_TS_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_FLAGM), + HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_TS_SHIFT, FTR_UNSIGNED, 2, CAP_HWCAP, KERNEL_HWCAP_FLAGM2), HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_FP_SHIFT, FTR_SIGNED, 0, CAP_HWCAP, KERNEL_HWCAP_FP), HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_FP_SHIFT, FTR_SIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_FPHP), HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_ASIMD_SHIFT, FTR_SIGNED, 0, CAP_HWCAP, KERNEL_HWCAP_ASIMD), @@ -1639,6 +1640,7 @@ static const struct arm64_cpu_capabilities arm64_elf_hwcaps[] = { HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_FCMA_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_FCMA), HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_LRCPC_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_LRCPC), HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_LRCPC_SHIFT, FTR_UNSIGNED, 2, CAP_HWCAP, KERNEL_HWCAP_ILRCPC), + HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_FRINTTS_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_FRINT), HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_SB_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_SB), HWCAP_CAP(SYS_ID_AA64MMFR2_EL1, ID_AA64MMFR2_AT_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_USCAT), #ifdef CONFIG_ARM64_SVE diff --git a/arch/arm64/kernel/cpuinfo.c b/arch/arm64/kernel/cpuinfo.c index f6f7936be6e7..fda8ded8b739 100644 --- a/arch/arm64/kernel/cpuinfo.c +++ b/arch/arm64/kernel/cpuinfo.c @@ -92,6 +92,8 @@ static const char *const hwcap_str[] = { "svebitperm", "svesha3", "svesm4", + "flagm2", + "frint", NULL }; diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S index cd0c7af8e4a8..165da78815c5 100644 --- a/arch/arm64/kernel/entry.S +++ b/arch/arm64/kernel/entry.S @@ -258,6 +258,7 @@ alternative_else_nop_endif /* * Registers that may be useful after this macro is invoked: * + * x20 - ICC_PMR_EL1 * x21 - aborted SP * x22 - aborted PC * x23 - aborted PSTATE @@ -435,6 +436,38 @@ tsk .req x28 // current thread_info irq_stack_exit .endm +#ifdef CONFIG_ARM64_PSEUDO_NMI + /* + * Set res to 0 if irqs were unmasked in interrupted context. + * Otherwise set res to non-0 value. + */ + .macro test_irqs_unmasked res:req, pmr:req +alternative_if ARM64_HAS_IRQ_PRIO_MASKING + sub \res, \pmr, #GIC_PRIO_IRQON +alternative_else + mov \res, xzr +alternative_endif + .endm +#endif + + .macro gic_prio_kentry_setup, tmp:req +#ifdef CONFIG_ARM64_PSEUDO_NMI + alternative_if ARM64_HAS_IRQ_PRIO_MASKING + mov \tmp, #(GIC_PRIO_PSR_I_SET | GIC_PRIO_IRQON) + msr_s SYS_ICC_PMR_EL1, \tmp + alternative_else_nop_endif +#endif + .endm + + .macro gic_prio_irq_setup, pmr:req, tmp:req +#ifdef CONFIG_ARM64_PSEUDO_NMI + alternative_if ARM64_HAS_IRQ_PRIO_MASKING + orr \tmp, \pmr, #GIC_PRIO_PSR_I_SET + msr_s SYS_ICC_PMR_EL1, \tmp + alternative_else_nop_endif +#endif + .endm + .text /* @@ -613,6 +646,7 @@ el1_dbg: cmp x24, #ESR_ELx_EC_BRK64 // if BRK64 cinc x24, x24, eq // set bit '0' tbz x24, #0, el1_inv // EL1 only + gic_prio_kentry_setup tmp=x3 mrs x0, far_el1 mov x2, sp // struct pt_regs bl do_debug_exception @@ -630,20 +664,18 @@ ENDPROC(el1_sync) .align 6 el1_irq: kernel_entry 1 + gic_prio_irq_setup pmr=x20, tmp=x1 enable_da_f -#ifdef CONFIG_TRACE_IRQFLAGS + #ifdef CONFIG_ARM64_PSEUDO_NMI -alternative_if ARM64_HAS_IRQ_PRIO_MASKING - ldr x20, [sp, #S_PMR_SAVE] -alternative_else - mov x20, #GIC_PRIO_IRQON -alternative_endif - cmp x20, #GIC_PRIO_IRQOFF - /* Irqs were disabled, don't trace */ - b.ls 1f + test_irqs_unmasked res=x0, pmr=x20 + cbz x0, 1f + bl asm_nmi_enter +1: #endif + +#ifdef CONFIG_TRACE_IRQFLAGS bl trace_hardirqs_off -1: #endif irq_handler @@ -662,14 +694,23 @@ alternative_else_nop_endif bl preempt_schedule_irq // irq en/disable is done inside 1: #endif -#ifdef CONFIG_TRACE_IRQFLAGS + #ifdef CONFIG_ARM64_PSEUDO_NMI /* - * if IRQs were disabled when we received the interrupt, we have an NMI - * and we are not re-enabling interrupt upon eret. Skip tracing. + * When using IRQ priority masking, we can get spurious interrupts while + * PMR is set to GIC_PRIO_IRQOFF. An NMI might also have occurred in a + * section with interrupts disabled. Skip tracing in those cases. */ - cmp x20, #GIC_PRIO_IRQOFF - b.ls 1f + test_irqs_unmasked res=x0, pmr=x20 + cbz x0, 1f + bl asm_nmi_exit +1: +#endif + +#ifdef CONFIG_TRACE_IRQFLAGS +#ifdef CONFIG_ARM64_PSEUDO_NMI + test_irqs_unmasked res=x0, pmr=x20 + cbnz x0, 1f #endif bl trace_hardirqs_on 1: @@ -787,6 +828,7 @@ el0_ia: * Instruction abort handling */ mrs x26, far_el1 + gic_prio_kentry_setup tmp=x0 enable_da_f #ifdef CONFIG_TRACE_IRQFLAGS bl trace_hardirqs_off @@ -832,6 +874,7 @@ el0_sp_pc: * Stack or PC alignment exception handling */ mrs x26, far_el1 + gic_prio_kentry_setup tmp=x0 enable_da_f #ifdef CONFIG_TRACE_IRQFLAGS bl trace_hardirqs_off @@ -866,11 +909,12 @@ el0_dbg: * Debug exception handling */ tbnz x24, #0, el0_inv // EL0 only + gic_prio_kentry_setup tmp=x3 mrs x0, far_el1 mov x1, x25 mov x2, sp bl do_debug_exception - enable_daif + enable_da_f ct_user_exit b ret_to_user el0_inv: @@ -887,7 +931,9 @@ ENDPROC(el0_sync) el0_irq: kernel_entry 0 el0_irq_naked: + gic_prio_irq_setup pmr=x20, tmp=x0 enable_da_f + #ifdef CONFIG_TRACE_IRQFLAGS bl trace_hardirqs_off #endif @@ -909,6 +955,7 @@ ENDPROC(el0_irq) el1_error: kernel_entry 1 mrs x1, esr_el1 + gic_prio_kentry_setup tmp=x2 enable_dbg mov x0, sp bl do_serror @@ -919,10 +966,11 @@ el0_error: kernel_entry 0 el0_error_naked: mrs x1, esr_el1 + gic_prio_kentry_setup tmp=x2 enable_dbg mov x0, sp bl do_serror - enable_daif + enable_da_f ct_user_exit b ret_to_user ENDPROC(el0_error) @@ -943,6 +991,7 @@ work_pending: */ ret_to_user: disable_daif + gic_prio_kentry_setup tmp=x3 ldr x1, [tsk, #TSK_TI_FLAGS] and x2, x1, #_TIF_WORK_MASK cbnz x2, work_pending @@ -959,6 +1008,7 @@ ENDPROC(ret_to_user) */ .align 6 el0_svc: + gic_prio_kentry_setup tmp=x1 mov x0, sp bl el0_svc_handler b ret_to_user diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c index a38bf74bcca8..c7c454df2779 100644 --- a/arch/arm64/kernel/fpsimd.c +++ b/arch/arm64/kernel/fpsimd.c @@ -92,7 +92,8 @@ * To prevent this from racing with the manipulation of the task's FPSIMD state * from task context and thereby corrupting the state, it is necessary to * protect any manipulation of a task's fpsimd_state or TIF_FOREIGN_FPSTATE - * flag with local_bh_disable() unless softirqs are already masked. + * flag with {, __}get_cpu_fpsimd_context(). This will still allow softirqs to + * run but prevent them to use FPSIMD. * * For a certain task, the sequence may look something like this: * - the task gets scheduled in; if both the task's fpsimd_cpu field @@ -155,6 +156,56 @@ extern void __percpu *efi_sve_state; #endif /* ! CONFIG_ARM64_SVE */ +DEFINE_PER_CPU(bool, fpsimd_context_busy); +EXPORT_PER_CPU_SYMBOL(fpsimd_context_busy); + +static void __get_cpu_fpsimd_context(void) +{ + bool busy = __this_cpu_xchg(fpsimd_context_busy, true); + + WARN_ON(busy); +} + +/* + * Claim ownership of the CPU FPSIMD context for use by the calling context. + * + * The caller may freely manipulate the FPSIMD context metadata until + * put_cpu_fpsimd_context() is called. + * + * The double-underscore version must only be called if you know the task + * can't be preempted. + */ +static void get_cpu_fpsimd_context(void) +{ + preempt_disable(); + __get_cpu_fpsimd_context(); +} + +static void __put_cpu_fpsimd_context(void) +{ + bool busy = __this_cpu_xchg(fpsimd_context_busy, false); + + WARN_ON(!busy); /* No matching get_cpu_fpsimd_context()? */ +} + +/* + * Release the CPU FPSIMD context. + * + * Must be called from a context in which get_cpu_fpsimd_context() was + * previously called, with no call to put_cpu_fpsimd_context() in the + * meantime. + */ +static void put_cpu_fpsimd_context(void) +{ + __put_cpu_fpsimd_context(); + preempt_enable(); +} + +static bool have_cpu_fpsimd_context(void) +{ + return !preemptible() && __this_cpu_read(fpsimd_context_busy); +} + /* * Call __sve_free() directly only if you know task can't be scheduled * or preempted. @@ -225,12 +276,10 @@ static void sve_free(struct task_struct *task) * This function should be called only when the FPSIMD/SVE state in * thread_struct is known to be up to date, when preparing to enter * userspace. - * - * Softirqs (and preemption) must be disabled. */ static void task_fpsimd_load(void) { - WARN_ON(!in_softirq() && !irqs_disabled()); + WARN_ON(!have_cpu_fpsimd_context()); if (system_supports_sve() && test_thread_flag(TIF_SVE)) sve_load_state(sve_pffr(¤t->thread), @@ -243,16 +292,14 @@ static void task_fpsimd_load(void) /* * Ensure FPSIMD/SVE storage in memory for the loaded context is up to * date with respect to the CPU registers. - * - * Softirqs (and preemption) must be disabled. */ -void fpsimd_save(void) +static void fpsimd_save(void) { struct fpsimd_last_state_struct const *last = this_cpu_ptr(&fpsimd_last_state); /* set by fpsimd_bind_task_to_cpu() or fpsimd_bind_state_to_cpu() */ - WARN_ON(!in_softirq() && !irqs_disabled()); + WARN_ON(!have_cpu_fpsimd_context()); if (!test_thread_flag(TIF_FOREIGN_FPSTATE)) { if (system_supports_sve() && test_thread_flag(TIF_SVE)) { @@ -357,7 +404,8 @@ static int __init sve_sysctl_init(void) { return 0; } * task->thread.sve_state. * * Task can be a non-runnable task, or current. In the latter case, - * softirqs (and preemption) must be disabled. + * the caller must have ownership of the cpu FPSIMD context before calling + * this function. * task->thread.sve_state must point to at least sve_state_size(task) * bytes of allocated kernel memory. * task->thread.uw.fpsimd_state must be up to date before calling this @@ -384,7 +432,8 @@ static void fpsimd_to_sve(struct task_struct *task) * task->thread.uw.fpsimd_state. * * Task can be a non-runnable task, or current. In the latter case, - * softirqs (and preemption) must be disabled. + * the caller must have ownership of the cpu FPSIMD context before calling + * this function. * task->thread.sve_state must point to at least sve_state_size(task) * bytes of allocated kernel memory. * task->thread.sve_state must be up to date before calling this function. @@ -544,7 +593,7 @@ int sve_set_vector_length(struct task_struct *task, * non-SVE thread. */ if (task == current) { - local_bh_disable(); + get_cpu_fpsimd_context(); fpsimd_save(); } @@ -554,7 +603,7 @@ int sve_set_vector_length(struct task_struct *task, sve_to_fpsimd(task); if (task == current) - local_bh_enable(); + put_cpu_fpsimd_context(); /* * Force reallocation of task SVE state to the correct size @@ -867,7 +916,7 @@ asmlinkage void do_sve_acc(unsigned int esr, struct pt_regs *regs) sve_alloc(current); - local_bh_disable(); + get_cpu_fpsimd_context(); fpsimd_save(); @@ -878,7 +927,7 @@ asmlinkage void do_sve_acc(unsigned int esr, struct pt_regs *regs) if (test_and_set_thread_flag(TIF_SVE)) WARN_ON(1); /* SVE access shouldn't have trapped */ - local_bh_enable(); + put_cpu_fpsimd_context(); } /* @@ -922,6 +971,8 @@ void fpsimd_thread_switch(struct task_struct *next) if (!system_supports_fpsimd()) return; + __get_cpu_fpsimd_context(); + /* Save unsaved fpsimd state, if any: */ fpsimd_save(); @@ -936,6 +987,8 @@ void fpsimd_thread_switch(struct task_struct *next) update_tsk_thread_flag(next, TIF_FOREIGN_FPSTATE, wrong_task || wrong_cpu); + + __put_cpu_fpsimd_context(); } void fpsimd_flush_thread(void) @@ -945,7 +998,7 @@ void fpsimd_flush_thread(void) if (!system_supports_fpsimd()) return; - local_bh_disable(); + get_cpu_fpsimd_context(); fpsimd_flush_task_state(current); memset(¤t->thread.uw.fpsimd_state, 0, @@ -986,7 +1039,7 @@ void fpsimd_flush_thread(void) current->thread.sve_vl_onexec = 0; } - local_bh_enable(); + put_cpu_fpsimd_context(); } /* @@ -998,9 +1051,9 @@ void fpsimd_preserve_current_state(void) if (!system_supports_fpsimd()) return; - local_bh_disable(); + get_cpu_fpsimd_context(); fpsimd_save(); - local_bh_enable(); + put_cpu_fpsimd_context(); } /* @@ -1017,7 +1070,8 @@ void fpsimd_signal_preserve_current_state(void) /* * Associate current's FPSIMD context with this cpu - * Preemption must be disabled when calling this function. + * The caller must have ownership of the cpu FPSIMD context before calling + * this function. */ void fpsimd_bind_task_to_cpu(void) { @@ -1063,14 +1117,14 @@ void fpsimd_restore_current_state(void) if (!system_supports_fpsimd()) return; - local_bh_disable(); + get_cpu_fpsimd_context(); if (test_and_clear_thread_flag(TIF_FOREIGN_FPSTATE)) { task_fpsimd_load(); fpsimd_bind_task_to_cpu(); } - local_bh_enable(); + put_cpu_fpsimd_context(); } /* @@ -1083,7 +1137,7 @@ void fpsimd_update_current_state(struct user_fpsimd_state const *state) if (!system_supports_fpsimd()) return; - local_bh_disable(); + get_cpu_fpsimd_context(); current->thread.uw.fpsimd_state = *state; if (system_supports_sve() && test_thread_flag(TIF_SVE)) @@ -1094,7 +1148,7 @@ void fpsimd_update_current_state(struct user_fpsimd_state const *state) clear_thread_flag(TIF_FOREIGN_FPSTATE); - local_bh_enable(); + put_cpu_fpsimd_context(); } /* @@ -1120,18 +1174,29 @@ void fpsimd_flush_task_state(struct task_struct *t) /* * Invalidate any task's FPSIMD state that is present on this cpu. - * This function must be called with softirqs disabled. + * The FPSIMD context should be acquired with get_cpu_fpsimd_context() + * before calling this function. */ -void fpsimd_flush_cpu_state(void) +static void fpsimd_flush_cpu_state(void) { __this_cpu_write(fpsimd_last_state.st, NULL); set_thread_flag(TIF_FOREIGN_FPSTATE); } -#ifdef CONFIG_KERNEL_MODE_NEON +/* + * Save the FPSIMD state to memory and invalidate cpu view. + * This function must be called with preemption disabled. + */ +void fpsimd_save_and_flush_cpu_state(void) +{ + WARN_ON(preemptible()); + __get_cpu_fpsimd_context(); + fpsimd_save(); + fpsimd_flush_cpu_state(); + __put_cpu_fpsimd_context(); +} -DEFINE_PER_CPU(bool, kernel_neon_busy); -EXPORT_PER_CPU_SYMBOL(kernel_neon_busy); +#ifdef CONFIG_KERNEL_MODE_NEON /* * Kernel-side NEON support functions @@ -1157,19 +1222,13 @@ void kernel_neon_begin(void) BUG_ON(!may_use_simd()); - local_bh_disable(); - - __this_cpu_write(kernel_neon_busy, true); + get_cpu_fpsimd_context(); /* Save unsaved fpsimd state, if any: */ fpsimd_save(); /* Invalidate any task state remaining in the fpsimd regs: */ fpsimd_flush_cpu_state(); - - preempt_disable(); - - local_bh_enable(); } EXPORT_SYMBOL(kernel_neon_begin); @@ -1184,15 +1243,10 @@ EXPORT_SYMBOL(kernel_neon_begin); */ void kernel_neon_end(void) { - bool busy; - if (!system_supports_fpsimd()) return; - busy = __this_cpu_xchg(kernel_neon_busy, false); - WARN_ON(!busy); /* No matching kernel_neon_begin()? */ - - preempt_enable(); + put_cpu_fpsimd_context(); } EXPORT_SYMBOL(kernel_neon_end); @@ -1284,8 +1338,7 @@ static int fpsimd_cpu_pm_notifier(struct notifier_block *self, { switch (cmd) { case CPU_PM_ENTER: - fpsimd_save(); - fpsimd_flush_cpu_state(); + fpsimd_save_and_flush_cpu_state(); break; case CPU_PM_EXIT: break; diff --git a/arch/arm64/kernel/irq.c b/arch/arm64/kernel/irq.c index 92fa81798fb9..e8daa7aa77bc 100644 --- a/arch/arm64/kernel/irq.c +++ b/arch/arm64/kernel/irq.c @@ -27,8 +27,10 @@ #include <linux/smp.h> #include <linux/init.h> #include <linux/irqchip.h> +#include <linux/kprobes.h> #include <linux/seq_file.h> #include <linux/vmalloc.h> +#include <asm/daifflags.h> #include <asm/vmap_stack.h> unsigned long irq_err_count; @@ -75,4 +77,28 @@ void __init init_IRQ(void) irqchip_init(); if (!handle_arch_irq) panic("No interrupt controller found."); + + if (system_uses_irq_prio_masking()) { + /* + * Now that we have a stack for our IRQ handler, set + * the PMR/PSR pair to a consistent state. + */ + WARN_ON(read_sysreg(daif) & PSR_A_BIT); + local_daif_restore(DAIF_PROCCTX_NOIRQ); + } +} + +/* + * Stubs to make nmi_enter/exit() code callable from ASM + */ +asmlinkage void notrace asm_nmi_enter(void) +{ + nmi_enter(); +} +NOKPROBE_SYMBOL(asm_nmi_enter); + +asmlinkage void notrace asm_nmi_exit(void) +{ + nmi_exit(); } +NOKPROBE_SYMBOL(asm_nmi_exit); diff --git a/arch/arm64/kernel/module.c b/arch/arm64/kernel/module.c index dd080837e6a9..5b5936b7868c 100644 --- a/arch/arm64/kernel/module.c +++ b/arch/arm64/kernel/module.c @@ -41,7 +41,7 @@ void *module_alloc(unsigned long size) p = __vmalloc_node_range(size, MODULE_ALIGN, module_alloc_base, module_alloc_base + MODULES_VSIZE, - gfp_mask, PAGE_KERNEL_EXEC, 0, + gfp_mask, PAGE_KERNEL, 0, NUMA_NO_NODE, __builtin_return_address(0)); if (!p && IS_ENABLED(CONFIG_ARM64_MODULE_PLTS) && @@ -57,7 +57,7 @@ void *module_alloc(unsigned long size) */ p = __vmalloc_node_range(size, MODULE_ALIGN, module_alloc_base, module_alloc_base + SZ_2G, GFP_KERNEL, - PAGE_KERNEL_EXEC, 0, NUMA_NO_NODE, + PAGE_KERNEL, 0, NUMA_NO_NODE, __builtin_return_address(0)); if (p && (kasan_module_alloc(p, size) < 0)) { diff --git a/arch/arm64/kernel/probes/kprobes.c b/arch/arm64/kernel/probes/kprobes.c index 88ce502c8e6f..bd5dfffca272 100644 --- a/arch/arm64/kernel/probes/kprobes.c +++ b/arch/arm64/kernel/probes/kprobes.c @@ -122,8 +122,10 @@ void *alloc_insn_page(void) void *page; page = vmalloc_exec(PAGE_SIZE); - if (page) + if (page) { set_memory_ro((unsigned long)page, 1); + set_vm_flush_reset_perms(page); + } return page; } diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c index 3767fb21a5b8..58efc3727778 100644 --- a/arch/arm64/kernel/process.c +++ b/arch/arm64/kernel/process.c @@ -94,7 +94,7 @@ static void __cpu_do_idle_irqprio(void) * be raised. */ pmr = gic_read_pmr(); - gic_write_pmr(GIC_PRIO_IRQON); + gic_write_pmr(GIC_PRIO_IRQON | GIC_PRIO_PSR_I_SET); __cpu_do_idle(); diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c index b82e0a9b3da3..9353355cb91a 100644 --- a/arch/arm64/kernel/ptrace.c +++ b/arch/arm64/kernel/ptrace.c @@ -1819,8 +1819,12 @@ static void tracehook_report_syscall(struct pt_regs *regs, int syscall_trace_enter(struct pt_regs *regs) { - if (test_thread_flag(TIF_SYSCALL_TRACE)) + if (test_thread_flag(TIF_SYSCALL_TRACE) || + test_thread_flag(TIF_SYSCALL_EMU)) { tracehook_report_syscall(regs, PTRACE_SYSCALL_ENTER); + if (!in_syscall(regs) || test_thread_flag(TIF_SYSCALL_EMU)) + return -1; + } /* Do the secure computing after ptrace; failures should be fast. */ if (secure_computing(NULL) == -1) diff --git a/arch/arm64/kernel/sleep.S b/arch/arm64/kernel/sleep.S index 3e53ffa07994..f5b04dd8a710 100644 --- a/arch/arm64/kernel/sleep.S +++ b/arch/arm64/kernel/sleep.S @@ -27,7 +27,7 @@ * aff0 = mpidr_masked & 0xff; * aff1 = mpidr_masked & 0xff00; * aff2 = mpidr_masked & 0xff0000; - * aff2 = mpidr_masked & 0xff00000000; + * aff3 = mpidr_masked & 0xff00000000; * dst = (aff0 >> rs0 | aff1 >> rs1 | aff2 >> rs2 | aff3 >> rs3); *} * Input registers: rs0, rs1, rs2, rs3, mpidr, mask diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c index bb4b3f07761a..434d6714358b 100644 --- a/arch/arm64/kernel/smp.c +++ b/arch/arm64/kernel/smp.c @@ -192,11 +192,7 @@ static void init_gic_priority_masking(void) WARN_ON(!(cpuflags & PSR_I_BIT)); - gic_write_pmr(GIC_PRIO_IRQOFF); - - /* We can only unmask PSR.I if we can take aborts */ - if (!(cpuflags & PSR_A_BIT)) - write_sysreg(cpuflags & ~PSR_I_BIT, daif); + gic_write_pmr(GIC_PRIO_IRQON | GIC_PRIO_PSR_I_SET); } /* @@ -845,18 +841,23 @@ void arch_irq_work_raise(void) } #endif -/* - * ipi_cpu_stop - handle IPI from smp_send_stop() - */ -static void ipi_cpu_stop(unsigned int cpu) +static void local_cpu_stop(void) { - set_cpu_online(cpu, false); + set_cpu_online(smp_processor_id(), false); local_daif_mask(); sdei_mask_local_cpu(); + cpu_park_loop(); +} - while (1) - cpu_relax(); +/* + * We need to implement panic_smp_self_stop() for parallel panic() calls, so + * that cpu_online_mask gets correctly updated and smp_send_stop() can skip + * CPUs that have already stopped themselves. + */ +void panic_smp_self_stop(void) +{ + local_cpu_stop(); } #ifdef CONFIG_KEXEC_CORE @@ -909,7 +910,7 @@ void handle_IPI(int ipinr, struct pt_regs *regs) case IPI_CPU_STOP: irq_enter(); - ipi_cpu_stop(cpu); + local_cpu_stop(); irq_exit(); break; diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c index 177c0f6ebabf..8a395523adcf 100644 --- a/arch/arm64/kernel/traps.c +++ b/arch/arm64/kernel/traps.c @@ -66,16 +66,19 @@ static void dump_backtrace_entry(unsigned long where) printk(" %pS\n", (void *)where); } -static void __dump_instr(const char *lvl, struct pt_regs *regs) +static void dump_kernel_instr(const char *lvl, struct pt_regs *regs) { unsigned long addr = instruction_pointer(regs); char str[sizeof("00000000 ") * 5 + 2 + 1], *p = str; int i; + if (user_mode(regs)) + return; + for (i = -4; i < 1; i++) { unsigned int val, bad; - bad = get_user(val, &((u32 *)addr)[i]); + bad = aarch64_insn_read(&((u32 *)addr)[i], &val); if (!bad) p += sprintf(p, i == 0 ? "(%08x) " : "%08x ", val); @@ -84,19 +87,8 @@ static void __dump_instr(const char *lvl, struct pt_regs *regs) break; } } - printk("%sCode: %s\n", lvl, str); -} -static void dump_instr(const char *lvl, struct pt_regs *regs) -{ - if (!user_mode(regs)) { - mm_segment_t fs = get_fs(); - set_fs(KERNEL_DS); - __dump_instr(lvl, regs); - set_fs(fs); - } else { - __dump_instr(lvl, regs); - } + printk("%sCode: %s\n", lvl, str); } void dump_backtrace(struct pt_regs *regs, struct task_struct *tsk) @@ -182,8 +174,7 @@ static int __die(const char *str, int err, struct pt_regs *regs) print_modules(); show_regs(regs); - if (!user_mode(regs)) - dump_instr(KERN_EMERG, regs); + dump_kernel_instr(KERN_EMERG, regs); return ret; } diff --git a/arch/arm64/kvm/fpsimd.c b/arch/arm64/kvm/fpsimd.c index 6e3c9c8b2df9..525010504f9d 100644 --- a/arch/arm64/kvm/fpsimd.c +++ b/arch/arm64/kvm/fpsimd.c @@ -112,9 +112,7 @@ void kvm_arch_vcpu_put_fp(struct kvm_vcpu *vcpu) if (vcpu->arch.flags & KVM_ARM64_FP_ENABLED) { u64 *guest_zcr = &vcpu->arch.ctxt.sys_regs[ZCR_EL1]; - /* Clean guest FP state to memory and invalidate cpu view */ - fpsimd_save(); - fpsimd_flush_cpu_state(); + fpsimd_save_and_flush_cpu_state(); if (guest_has_sve) *guest_zcr = read_sysreg_s(SYS_ZCR_EL12); diff --git a/arch/arm64/kvm/hyp/switch.c b/arch/arm64/kvm/hyp/switch.c index 8799e0c267d4..b89fcf0173b7 100644 --- a/arch/arm64/kvm/hyp/switch.c +++ b/arch/arm64/kvm/hyp/switch.c @@ -615,7 +615,7 @@ int __hyp_text __kvm_vcpu_run_nvhe(struct kvm_vcpu *vcpu) * Naturally, we want to avoid this. */ if (system_uses_irq_prio_masking()) { - gic_write_pmr(GIC_PRIO_IRQON); + gic_write_pmr(GIC_PRIO_IRQON | GIC_PRIO_PSR_I_SET); dsb(sy); } diff --git a/arch/arm64/mm/dma-mapping.c b/arch/arm64/mm/dma-mapping.c index 674860e3e478..ff410195dc1c 100644 --- a/arch/arm64/mm/dma-mapping.c +++ b/arch/arm64/mm/dma-mapping.c @@ -91,10 +91,6 @@ static int __swiotlb_mmap_pfn(struct vm_area_struct *vma, static int __init arm64_dma_init(void) { - WARN_TAINT(ARCH_DMA_MINALIGN < cache_line_size(), - TAINT_CPU_OUT_OF_SPEC, - "ARCH_DMA_MINALIGN smaller than CTR_EL0.CWG (%d < %d)", - ARCH_DMA_MINALIGN, cache_line_size()); return dma_atomic_pool_init(GFP_DMA32, __pgprot(PROT_NORMAL_NC)); } arch_initcall(arm64_dma_init); @@ -472,6 +468,14 @@ static void __iommu_setup_dma_ops(struct device *dev, u64 dma_base, u64 size, void arch_setup_dma_ops(struct device *dev, u64 dma_base, u64 size, const struct iommu_ops *iommu, bool coherent) { + int cls = cache_line_size_of_cpu(); + + WARN_TAINT(!coherent && cls > ARCH_DMA_MINALIGN, + TAINT_CPU_OUT_OF_SPEC, + "%s %s: ARCH_DMA_MINALIGN smaller than CTR_EL0.CWG (%d < %d)", + dev_driver_string(dev), dev_name(dev), + ARCH_DMA_MINALIGN, cls); + dev->dma_coherent = coherent; __iommu_setup_dma_ops(dev, dma_base, size, iommu); diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c index a30818ed9c60..582061dec89f 100644 --- a/arch/arm64/mm/fault.c +++ b/arch/arm64/mm/fault.c @@ -395,40 +395,31 @@ static void do_bad_area(unsigned long addr, unsigned int esr, struct pt_regs *re #define VM_FAULT_BADACCESS 0x020000 static vm_fault_t __do_page_fault(struct mm_struct *mm, unsigned long addr, - unsigned int mm_flags, unsigned long vm_flags, - struct task_struct *tsk) + unsigned int mm_flags, unsigned long vm_flags) { - struct vm_area_struct *vma; - vm_fault_t fault; + struct vm_area_struct *vma = find_vma(mm, addr); - vma = find_vma(mm, addr); - fault = VM_FAULT_BADMAP; if (unlikely(!vma)) - goto out; - if (unlikely(vma->vm_start > addr)) - goto check_stack; + return VM_FAULT_BADMAP; /* * Ok, we have a good vm_area for this memory access, so we can handle * it. */ -good_area: + if (unlikely(vma->vm_start > addr)) { + if (!(vma->vm_flags & VM_GROWSDOWN)) + return VM_FAULT_BADMAP; + if (expand_stack(vma, addr)) + return VM_FAULT_BADMAP; + } + /* * Check that the permissions on the VMA allow for the fault which * occurred. */ - if (!(vma->vm_flags & vm_flags)) { - fault = VM_FAULT_BADACCESS; - goto out; - } - + if (!(vma->vm_flags & vm_flags)) + return VM_FAULT_BADACCESS; return handle_mm_fault(vma, addr & PAGE_MASK, mm_flags); - -check_stack: - if (vma->vm_flags & VM_GROWSDOWN && !expand_stack(vma, addr)) - goto good_area; -out: - return fault; } static bool is_el0_instruction_abort(unsigned int esr) @@ -436,12 +427,20 @@ static bool is_el0_instruction_abort(unsigned int esr) return ESR_ELx_EC(esr) == ESR_ELx_EC_IABT_LOW; } +/* + * Note: not valid for EL1 DC IVAC, but we never use that such that it + * should fault. EL0 cannot issue DC IVAC (undef). + */ +static bool is_write_abort(unsigned int esr) +{ + return (esr & ESR_ELx_WNR) && !(esr & ESR_ELx_CM); +} + static int __kprobes do_page_fault(unsigned long addr, unsigned int esr, struct pt_regs *regs) { const struct fault_info *inf; - struct task_struct *tsk; - struct mm_struct *mm; + struct mm_struct *mm = current->mm; vm_fault_t fault, major = 0; unsigned long vm_flags = VM_READ | VM_WRITE; unsigned int mm_flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE; @@ -449,9 +448,6 @@ static int __kprobes do_page_fault(unsigned long addr, unsigned int esr, if (notify_page_fault(regs, esr)) return 0; - tsk = current; - mm = tsk->mm; - /* * If we're in an interrupt or have no user context, we must not take * the fault. @@ -464,7 +460,8 @@ static int __kprobes do_page_fault(unsigned long addr, unsigned int esr, if (is_el0_instruction_abort(esr)) { vm_flags = VM_EXEC; - } else if ((esr & ESR_ELx_WNR) && !(esr & ESR_ELx_CM)) { + mm_flags |= FAULT_FLAG_INSTRUCTION; + } else if (is_write_abort(esr)) { vm_flags = VM_WRITE; mm_flags |= FAULT_FLAG_WRITE; } @@ -503,12 +500,14 @@ retry: */ might_sleep(); #ifdef CONFIG_DEBUG_VM - if (!user_mode(regs) && !search_exception_tables(regs->pc)) + if (!user_mode(regs) && !search_exception_tables(regs->pc)) { + up_read(&mm->mmap_sem); goto no_context; + } #endif } - fault = __do_page_fault(mm, addr, mm_flags, vm_flags, tsk); + fault = __do_page_fault(mm, addr, mm_flags, vm_flags); major |= fault & VM_FAULT_MAJOR; if (fault & VM_FAULT_RETRY) { @@ -548,11 +547,11 @@ retry: * that point. */ if (major) { - tsk->maj_flt++; + current->maj_flt++; perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, regs, addr); } else { - tsk->min_flt++; + current->min_flt++; perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, regs, addr); } diff --git a/arch/arm64/mm/hugetlbpage.c b/arch/arm64/mm/hugetlbpage.c index f475e54fbc43..bbeb6a5a6ba6 100644 --- a/arch/arm64/mm/hugetlbpage.c +++ b/arch/arm64/mm/hugetlbpage.c @@ -228,7 +228,7 @@ pte_t *huge_pte_alloc(struct mm_struct *mm, if (sz == PUD_SIZE) { ptep = (pte_t *)pudp; - } else if (sz == (PAGE_SIZE * CONT_PTES)) { + } else if (sz == (CONT_PTE_SIZE)) { pmdp = pmd_alloc(mm, pudp, addr); WARN_ON(addr & (sz - 1)); @@ -246,7 +246,7 @@ pte_t *huge_pte_alloc(struct mm_struct *mm, ptep = huge_pmd_share(mm, addr, pudp); else ptep = (pte_t *)pmd_alloc(mm, pudp, addr); - } else if (sz == (PMD_SIZE * CONT_PMDS)) { + } else if (sz == (CONT_PMD_SIZE)) { pmdp = pmd_alloc(mm, pudp, addr); WARN_ON(addr & (sz - 1)); return (pte_t *)pmdp; @@ -454,9 +454,9 @@ static int __init hugetlbpage_init(void) #ifdef CONFIG_ARM64_4K_PAGES add_huge_page_size(PUD_SIZE); #endif - add_huge_page_size(PMD_SIZE * CONT_PMDS); + add_huge_page_size(CONT_PMD_SIZE); add_huge_page_size(PMD_SIZE); - add_huge_page_size(PAGE_SIZE * CONT_PTES); + add_huge_page_size(CONT_PTE_SIZE); return 0; } @@ -470,9 +470,9 @@ static __init int setup_hugepagesz(char *opt) #ifdef CONFIG_ARM64_4K_PAGES case PUD_SIZE: #endif - case PMD_SIZE * CONT_PMDS: + case CONT_PMD_SIZE: case PMD_SIZE: - case PAGE_SIZE * CONT_PTES: + case CONT_PTE_SIZE: add_huge_page_size(ps); return 1; } diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c index d2adffb81b5d..f643bd45ff69 100644 --- a/arch/arm64/mm/init.c +++ b/arch/arm64/mm/init.c @@ -191,8 +191,9 @@ static void __init zone_sizes_init(unsigned long min, unsigned long max) { unsigned long max_zone_pfns[MAX_NR_ZONES] = {0}; - if (IS_ENABLED(CONFIG_ZONE_DMA32)) - max_zone_pfns[ZONE_DMA32] = PFN_DOWN(max_zone_dma_phys()); +#ifdef CONFIG_ZONE_DMA32 + max_zone_pfns[ZONE_DMA32] = PFN_DOWN(max_zone_dma_phys()); +#endif max_zone_pfns[ZONE_NORMAL] = max; free_area_init_nodes(max_zone_pfns); diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index a1bfc4413982..21f3931c73fd 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -776,7 +776,7 @@ int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node, return 0; } -#endif /* CONFIG_ARM64_64K_PAGES */ +#endif /* !ARM64_SWAPPER_USES_SECTION_MAPS */ void vmemmap_free(unsigned long start, unsigned long end, struct vmem_altmap *altmap) { @@ -971,32 +971,28 @@ int __init arch_ioremap_pmd_supported(void) int pud_set_huge(pud_t *pudp, phys_addr_t phys, pgprot_t prot) { - pgprot_t sect_prot = __pgprot(PUD_TYPE_SECT | - pgprot_val(mk_sect_prot(prot))); - pud_t new_pud = pfn_pud(__phys_to_pfn(phys), sect_prot); + pud_t new_pud = pfn_pud(__phys_to_pfn(phys), mk_pud_sect_prot(prot)); /* Only allow permission changes for now */ if (!pgattr_change_is_safe(READ_ONCE(pud_val(*pudp)), pud_val(new_pud))) return 0; - BUG_ON(phys & ~PUD_MASK); + VM_BUG_ON(phys & ~PUD_MASK); set_pud(pudp, new_pud); return 1; } int pmd_set_huge(pmd_t *pmdp, phys_addr_t phys, pgprot_t prot) { - pgprot_t sect_prot = __pgprot(PMD_TYPE_SECT | - pgprot_val(mk_sect_prot(prot))); - pmd_t new_pmd = pfn_pmd(__phys_to_pfn(phys), sect_prot); + pmd_t new_pmd = pfn_pmd(__phys_to_pfn(phys), mk_pmd_sect_prot(prot)); /* Only allow permission changes for now */ if (!pgattr_change_is_safe(READ_ONCE(pmd_val(*pmdp)), pmd_val(new_pmd))) return 0; - BUG_ON(phys & ~PMD_MASK); + VM_BUG_ON(phys & ~PMD_MASK); set_pmd(pmdp, new_pmd); return 1; } diff --git a/arch/arm64/mm/pageattr.c b/arch/arm64/mm/pageattr.c index 6cd645edcf35..9c6b9039ec8f 100644 --- a/arch/arm64/mm/pageattr.c +++ b/arch/arm64/mm/pageattr.c @@ -159,17 +159,48 @@ int set_memory_valid(unsigned long addr, int numpages, int enable) __pgprot(PTE_VALID)); } -#ifdef CONFIG_DEBUG_PAGEALLOC +int set_direct_map_invalid_noflush(struct page *page) +{ + struct page_change_data data = { + .set_mask = __pgprot(0), + .clear_mask = __pgprot(PTE_VALID), + }; + + if (!rodata_full) + return 0; + + return apply_to_page_range(&init_mm, + (unsigned long)page_address(page), + PAGE_SIZE, change_page_range, &data); +} + +int set_direct_map_default_noflush(struct page *page) +{ + struct page_change_data data = { + .set_mask = __pgprot(PTE_VALID | PTE_WRITE), + .clear_mask = __pgprot(PTE_RDONLY), + }; + + if (!rodata_full) + return 0; + + return apply_to_page_range(&init_mm, + (unsigned long)page_address(page), + PAGE_SIZE, change_page_range, &data); +} + void __kernel_map_pages(struct page *page, int numpages, int enable) { + if (!debug_pagealloc_enabled() && !rodata_full) + return; + set_memory_valid((unsigned long)page_address(page), numpages, enable); } -#ifdef CONFIG_HIBERNATION + /* - * When built with CONFIG_DEBUG_PAGEALLOC and CONFIG_HIBERNATION, this function - * is used to determine if a linear map page has been marked as not-valid by - * CONFIG_DEBUG_PAGEALLOC. Walk the page table and check the PTE_VALID bit. - * This is based on kern_addr_valid(), which almost does what we need. + * This function is used to determine if a linear map page has been marked as + * not-valid. Walk the page table and check the PTE_VALID bit. This is based + * on kern_addr_valid(), which almost does what we need. * * Because this is only called on the kernel linear map, p?d_sect() implies * p?d_present(). When debug_pagealloc is enabled, sections mappings are @@ -183,6 +214,9 @@ bool kernel_page_present(struct page *page) pte_t *ptep; unsigned long addr = (unsigned long)page_address(page); + if (!debug_pagealloc_enabled() && !rodata_full) + return true; + pgdp = pgd_offset_k(addr); if (pgd_none(READ_ONCE(*pgdp))) return false; @@ -204,5 +238,3 @@ bool kernel_page_present(struct page *page) ptep = pte_offset_kernel(pmdp, addr); return pte_valid(READ_ONCE(*ptep)); } -#endif /* CONFIG_HIBERNATION */ -#endif /* CONFIG_DEBUG_PAGEALLOC */ diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c index df845cee438e..aef4ff467222 100644 --- a/arch/arm64/net/bpf_jit_comp.c +++ b/arch/arm64/net/bpf_jit_comp.c @@ -981,7 +981,7 @@ void *bpf_jit_alloc_exec(unsigned long size) { return __vmalloc_node_range(size, PAGE_SIZE, BPF_JIT_REGION_START, BPF_JIT_REGION_END, GFP_KERNEL, - PAGE_KERNEL_EXEC, 0, NUMA_NO_NODE, + PAGE_KERNEL, 0, NUMA_NO_NODE, __builtin_return_address(0)); } diff --git a/arch/powerpc/kernel/ptrace.c b/arch/powerpc/kernel/ptrace.c index 684b0b315c32..8c92febf5f44 100644 --- a/arch/powerpc/kernel/ptrace.c +++ b/arch/powerpc/kernel/ptrace.c @@ -2521,7 +2521,6 @@ void ptrace_disable(struct task_struct *child) { /* make sure the single step bit is not set. */ user_disable_single_step(child); - clear_tsk_thread_flag(child, TIF_SYSCALL_EMU); } #ifdef CONFIG_PPC_ADV_DEBUG_REGS diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c index a986b3c8294c..0a61705d62ec 100644 --- a/arch/x86/entry/common.c +++ b/arch/x86/entry/common.c @@ -72,23 +72,18 @@ static long syscall_trace_enter(struct pt_regs *regs) struct thread_info *ti = current_thread_info(); unsigned long ret = 0; - bool emulated = false; u32 work; if (IS_ENABLED(CONFIG_DEBUG_ENTRY)) BUG_ON(regs != task_pt_regs(current)); - work = READ_ONCE(ti->flags) & _TIF_WORK_SYSCALL_ENTRY; + work = READ_ONCE(ti->flags); - if (unlikely(work & _TIF_SYSCALL_EMU)) - emulated = true; - - if ((emulated || (work & _TIF_SYSCALL_TRACE)) && - tracehook_report_syscall_entry(regs)) - return -1L; - - if (emulated) - return -1L; + if (work & (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_EMU)) { + ret = tracehook_report_syscall_entry(regs); + if (ret || (work & _TIF_SYSCALL_EMU)) + return -1L; + } #ifdef CONFIG_SECCOMP /* diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c index a166c960bc9e..36998e0c3fc4 100644 --- a/arch/x86/kernel/ptrace.c +++ b/arch/x86/kernel/ptrace.c @@ -747,9 +747,6 @@ static int ioperm_get(struct task_struct *target, void ptrace_disable(struct task_struct *child) { user_disable_single_step(child); -#ifdef TIF_SYSCALL_EMU - clear_tsk_thread_flag(child, TIF_SYSCALL_EMU); -#endif } #if defined CONFIG_X86_32 || defined CONFIG_IA32_EMULATION diff --git a/drivers/base/cacheinfo.c b/drivers/base/cacheinfo.c index a7359535caf5..8827c60f51e2 100644 --- a/drivers/base/cacheinfo.c +++ b/drivers/base/cacheinfo.c @@ -213,6 +213,8 @@ int __weak cache_setup_acpi(unsigned int cpu) return -ENOTSUPP; } +unsigned int coherency_max_size; + static int cache_shared_cpu_map_setup(unsigned int cpu) { struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu); @@ -251,6 +253,9 @@ static int cache_shared_cpu_map_setup(unsigned int cpu) cpumask_set_cpu(i, &this_leaf->shared_cpu_map); } } + /* record the maximum cache line size */ + if (this_leaf->coherency_line_size > coherency_max_size) + coherency_max_size = this_leaf->coherency_line_size; } return 0; diff --git a/drivers/irqchip/irq-gic-v3.c b/drivers/irqchip/irq-gic-v3.c index f44cd89cfc40..b176700bb387 100644 --- a/drivers/irqchip/irq-gic-v3.c +++ b/drivers/irqchip/irq-gic-v3.c @@ -472,8 +472,12 @@ static void gic_deactivate_unhandled(u32 irqnr) static inline void gic_handle_nmi(u32 irqnr, struct pt_regs *regs) { + bool irqs_enabled = interrupts_enabled(regs); int err; + if (irqs_enabled) + nmi_enter(); + if (static_branch_likely(&supports_deactivate_key)) gic_write_eoir(irqnr); /* @@ -485,6 +489,9 @@ static inline void gic_handle_nmi(u32 irqnr, struct pt_regs *regs) err = handle_domain_nmi(gic_data.domain, irqnr, regs); if (err) gic_deactivate_unhandled(irqnr); + + if (irqs_enabled) + nmi_exit(); } static asmlinkage void __exception_irq_entry gic_handle_irq(struct pt_regs *regs) diff --git a/include/linux/cacheinfo.h b/include/linux/cacheinfo.h index 70e19bc6cc9f..46b92cd61d0c 100644 --- a/include/linux/cacheinfo.h +++ b/include/linux/cacheinfo.h @@ -17,6 +17,8 @@ enum cache_type { CACHE_TYPE_UNIFIED = BIT(2), }; +extern unsigned int coherency_max_size; + /** * struct cacheinfo - represent a cache leaf node * @id: This cache's id. It is unique among caches with the same (type, level). diff --git a/kernel/irq/irqdesc.c b/kernel/irq/irqdesc.c index c52b737ab8e3..a92b33593b8d 100644 --- a/kernel/irq/irqdesc.c +++ b/kernel/irq/irqdesc.c @@ -680,6 +680,8 @@ int __handle_domain_irq(struct irq_domain *domain, unsigned int hwirq, * @hwirq: The HW irq number to convert to a logical one * @regs: Register file coming from the low-level handling code * + * This function must be called from an NMI context. + * * Returns: 0 on success, or -EINVAL if conversion has failed */ int handle_domain_nmi(struct irq_domain *domain, unsigned int hwirq, @@ -689,7 +691,10 @@ int handle_domain_nmi(struct irq_domain *domain, unsigned int hwirq, unsigned int irq; int ret = 0; - nmi_enter(); + /* + * NMI context needs to be setup earlier in order to deal with tracing. + */ + WARN_ON(!in_nmi()); irq = irq_find_mapping(domain, hwirq); @@ -702,7 +707,6 @@ int handle_domain_nmi(struct irq_domain *domain, unsigned int hwirq, else ret = -EINVAL; - nmi_exit(); set_irq_regs(old_regs); return ret; } diff --git a/kernel/ptrace.c b/kernel/ptrace.c index 5710d07e67cf..ab14654b2436 100644 --- a/kernel/ptrace.c +++ b/kernel/ptrace.c @@ -118,6 +118,9 @@ void __ptrace_unlink(struct task_struct *child) BUG_ON(!child->ptrace); clear_tsk_thread_flag(child, TIF_SYSCALL_TRACE); +#ifdef TIF_SYSCALL_EMU + clear_tsk_thread_flag(child, TIF_SYSCALL_EMU); +#endif child->parent = child->real_parent; list_del_init(&child->ptrace_entry); diff --git a/mm/vmalloc.c b/mm/vmalloc.c index 7350a124524b..6bd7b515995c 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -2128,17 +2128,6 @@ static void vm_remove_mappings(struct vm_struct *area, int deallocate_pages) int flush_reset = area->flags & VM_FLUSH_RESET_PERMS; int i; - /* - * The below block can be removed when all architectures that have - * direct map permissions also have set_direct_map_() implementations. - * This is concerned with resetting the direct map any an vm alias with - * execute permissions, without leaving a RW+X window. - */ - if (flush_reset && !IS_ENABLED(CONFIG_ARCH_HAS_SET_DIRECT_MAP)) { - set_memory_nx(addr, area->nr_pages); - set_memory_rw(addr, area->nr_pages); - } - remove_vm_area(area->addr); /* If this is not VM_FLUSH_RESET_PERMS memory, no need for the below. */ |