summaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'kernel')
-rw-r--r--kernel/async.c1
-rw-r--r--kernel/audit.c19
-rw-r--r--kernel/audit.h2
-rw-r--r--kernel/audit_fsnotify.c2
-rw-r--r--kernel/audit_watch.c2
-rw-r--r--kernel/auditsc.c6
-rw-r--r--kernel/cpu.c64
-rw-r--r--kernel/cred.c4
-rw-r--r--kernel/debug/kdb/kdb_main.c4
-rw-r--r--kernel/delayacct.c2
-rw-r--r--kernel/events/core.c6
-rw-r--r--kernel/events/uprobes.c13
-rw-r--r--kernel/exit.c5
-rw-r--r--kernel/fork.c27
-rw-r--r--kernel/futex.c85
-rw-r--r--kernel/futex_compat.c2
-rw-r--r--kernel/gcov/base.c7
-rw-r--r--kernel/irq/handle.c5
-rw-r--r--kernel/irq/irqdomain.c12
-rw-r--r--kernel/irq/msi.c8
-rw-r--r--kernel/kcmp.c4
-rw-r--r--kernel/kexec.c10
-rw-r--r--kernel/kexec_core.c7
-rw-r--r--kernel/kexec_file.c2
-rw-r--r--kernel/kexec_internal.h21
-rw-r--r--kernel/livepatch/core.c176
-rw-r--r--kernel/locking/rtmutex.c135
-rw-r--r--kernel/memremap.c219
-rw-r--r--kernel/module.c349
-rw-r--r--kernel/panic.c3
-rw-r--r--kernel/pid.c2
-rw-r--r--kernel/power/Kconfig2
-rw-r--r--kernel/printk/printk.c77
-rw-r--r--kernel/ptrace.c49
-rw-r--r--kernel/relay.c4
-rw-r--r--kernel/sched/core.c4
-rw-r--r--kernel/sched/idle.c9
-rw-r--r--kernel/seccomp.c22
-rw-r--r--kernel/stop_machine.c4
-rw-r--r--kernel/sys.c20
-rw-r--r--kernel/sysctl.c38
-rw-r--r--kernel/time/tick-sched.c2
-rw-r--r--kernel/trace/trace.c2
43 files changed, 823 insertions, 614 deletions
diff --git a/kernel/async.c b/kernel/async.c
index 4c3773c0bf63..d2edd6efec56 100644
--- a/kernel/async.c
+++ b/kernel/async.c
@@ -326,3 +326,4 @@ bool current_is_async(void)
return worker && worker->current_func == async_run_entry_fn;
}
+EXPORT_SYMBOL_GPL(current_is_async);
diff --git a/kernel/audit.c b/kernel/audit.c
index 5ffcbd354a52..3a3e5deeda8d 100644
--- a/kernel/audit.c
+++ b/kernel/audit.c
@@ -110,7 +110,6 @@ static u32 audit_backlog_limit = 64;
#define AUDIT_BACKLOG_WAIT_TIME (60 * HZ)
static u32 audit_backlog_wait_time_master = AUDIT_BACKLOG_WAIT_TIME;
static u32 audit_backlog_wait_time = AUDIT_BACKLOG_WAIT_TIME;
-static u32 audit_backlog_wait_overflow = 0;
/* The identity of the user shutting down the audit system. */
kuid_t audit_sig_uid = INVALID_UID;
@@ -509,8 +508,7 @@ static void flush_hold_queue(void)
* if auditd just disappeared but we
* dequeued an skb we need to drop ref
*/
- if (skb)
- consume_skb(skb);
+ consume_skb(skb);
}
static int kauditd_thread(void *dummy)
@@ -524,7 +522,8 @@ static int kauditd_thread(void *dummy)
skb = skb_dequeue(&audit_skb_queue);
if (skb) {
- if (skb_queue_len(&audit_skb_queue) <= audit_backlog_limit)
+ if (!audit_backlog_limit ||
+ (skb_queue_len(&audit_skb_queue) <= audit_backlog_limit))
wake_up(&audit_backlog_wait);
if (audit_pid)
kauditd_send_skb(skb);
@@ -1232,9 +1231,7 @@ static void audit_buffer_free(struct audit_buffer *ab)
if (!ab)
return;
- if (ab->skb)
- kfree_skb(ab->skb);
-
+ kfree_skb(ab->skb);
spin_lock_irqsave(&audit_freelist_lock, flags);
if (audit_freelist_count > AUDIT_MAXFREE)
kfree(ab);
@@ -1372,7 +1369,7 @@ struct audit_buffer *audit_log_start(struct audit_context *ctx, gfp_t gfp_mask,
return NULL;
if (gfp_mask & __GFP_DIRECT_RECLAIM) {
- if (audit_pid && audit_pid == current->pid)
+ if (audit_pid && audit_pid == current->tgid)
gfp_mask &= ~__GFP_DIRECT_RECLAIM;
else
reserve = 0;
@@ -1395,12 +1392,12 @@ struct audit_buffer *audit_log_start(struct audit_context *ctx, gfp_t gfp_mask,
skb_queue_len(&audit_skb_queue),
audit_backlog_limit);
audit_log_lost("backlog limit exceeded");
- audit_backlog_wait_time = audit_backlog_wait_overflow;
+ audit_backlog_wait_time = 0;
wake_up(&audit_backlog_wait);
return NULL;
}
- if (!reserve)
+ if (!reserve && !audit_backlog_wait_time)
audit_backlog_wait_time = audit_backlog_wait_time_master;
ab = audit_buffer_alloc(ctx, gfp_mask, type);
@@ -1722,7 +1719,7 @@ static inline int audit_copy_fcaps(struct audit_names *name,
/* Copy inode data into an audit_names. */
void audit_copy_inode(struct audit_names *name, const struct dentry *dentry,
- const struct inode *inode)
+ struct inode *inode)
{
name->ino = inode->i_ino;
name->dev = inode->i_sb->s_dev;
diff --git a/kernel/audit.h b/kernel/audit.h
index de6cbb7cf547..cbbe6bb6496e 100644
--- a/kernel/audit.h
+++ b/kernel/audit.h
@@ -207,7 +207,7 @@ extern u32 audit_ever_enabled;
extern void audit_copy_inode(struct audit_names *name,
const struct dentry *dentry,
- const struct inode *inode);
+ struct inode *inode);
extern void audit_log_cap(struct audit_buffer *ab, char *prefix,
kernel_cap_t *cap);
extern void audit_log_name(struct audit_context *context,
diff --git a/kernel/audit_fsnotify.c b/kernel/audit_fsnotify.c
index 27c6046c2c3d..f84f8d06e1f6 100644
--- a/kernel/audit_fsnotify.c
+++ b/kernel/audit_fsnotify.c
@@ -95,7 +95,7 @@ struct audit_fsnotify_mark *audit_alloc_mark(struct audit_krule *krule, char *pa
if (IS_ERR(dentry))
return (void *)dentry; /* returning an error */
inode = path.dentry->d_inode;
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
audit_mark = kzalloc(sizeof(*audit_mark), GFP_KERNEL);
if (unlikely(!audit_mark)) {
diff --git a/kernel/audit_watch.c b/kernel/audit_watch.c
index 656c7e93ac0d..9f194aad0adc 100644
--- a/kernel/audit_watch.c
+++ b/kernel/audit_watch.c
@@ -364,7 +364,7 @@ static int audit_get_nd(struct audit_watch *watch, struct path *parent)
struct dentry *d = kern_path_locked(watch->path, parent);
if (IS_ERR(d))
return PTR_ERR(d);
- mutex_unlock(&d_backing_inode(parent->dentry)->i_mutex);
+ inode_unlock(d_backing_inode(parent->dentry));
if (d_is_positive(d)) {
/* update watch filter fields */
watch->dev = d_backing_inode(d)->i_sb->s_dev;
diff --git a/kernel/auditsc.c b/kernel/auditsc.c
index b86cc04959de..195ffaee50b9 100644
--- a/kernel/auditsc.c
+++ b/kernel/auditsc.c
@@ -1754,7 +1754,7 @@ void __audit_inode(struct filename *name, const struct dentry *dentry,
unsigned int flags)
{
struct audit_context *context = current->audit_context;
- const struct inode *inode = d_backing_inode(dentry);
+ struct inode *inode = d_backing_inode(dentry);
struct audit_names *n;
bool parent = flags & AUDIT_INODE_PARENT;
@@ -1848,12 +1848,12 @@ void __audit_file(const struct file *file)
* must be hooked prior, in order to capture the target inode during
* unsuccessful attempts.
*/
-void __audit_inode_child(const struct inode *parent,
+void __audit_inode_child(struct inode *parent,
const struct dentry *dentry,
const unsigned char type)
{
struct audit_context *context = current->audit_context;
- const struct inode *inode = d_backing_inode(dentry);
+ struct inode *inode = d_backing_inode(dentry);
const char *dname = dentry->d_name.name;
struct audit_names *n, *found_parent = NULL, *found_child = NULL;
diff --git a/kernel/cpu.c b/kernel/cpu.c
index 85ff5e26e23b..5b9d39633ce9 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -759,71 +759,33 @@ const DECLARE_BITMAP(cpu_all_bits, NR_CPUS) = CPU_BITS_ALL;
EXPORT_SYMBOL(cpu_all_bits);
#ifdef CONFIG_INIT_ALL_POSSIBLE
-static DECLARE_BITMAP(cpu_possible_bits, CONFIG_NR_CPUS) __read_mostly
- = CPU_BITS_ALL;
+struct cpumask __cpu_possible_mask __read_mostly
+ = {CPU_BITS_ALL};
#else
-static DECLARE_BITMAP(cpu_possible_bits, CONFIG_NR_CPUS) __read_mostly;
+struct cpumask __cpu_possible_mask __read_mostly;
#endif
-const struct cpumask *const cpu_possible_mask = to_cpumask(cpu_possible_bits);
-EXPORT_SYMBOL(cpu_possible_mask);
+EXPORT_SYMBOL(__cpu_possible_mask);
-static DECLARE_BITMAP(cpu_online_bits, CONFIG_NR_CPUS) __read_mostly;
-const struct cpumask *const cpu_online_mask = to_cpumask(cpu_online_bits);
-EXPORT_SYMBOL(cpu_online_mask);
+struct cpumask __cpu_online_mask __read_mostly;
+EXPORT_SYMBOL(__cpu_online_mask);
-static DECLARE_BITMAP(cpu_present_bits, CONFIG_NR_CPUS) __read_mostly;
-const struct cpumask *const cpu_present_mask = to_cpumask(cpu_present_bits);
-EXPORT_SYMBOL(cpu_present_mask);
+struct cpumask __cpu_present_mask __read_mostly;
+EXPORT_SYMBOL(__cpu_present_mask);
-static DECLARE_BITMAP(cpu_active_bits, CONFIG_NR_CPUS) __read_mostly;
-const struct cpumask *const cpu_active_mask = to_cpumask(cpu_active_bits);
-EXPORT_SYMBOL(cpu_active_mask);
-
-void set_cpu_possible(unsigned int cpu, bool possible)
-{
- if (possible)
- cpumask_set_cpu(cpu, to_cpumask(cpu_possible_bits));
- else
- cpumask_clear_cpu(cpu, to_cpumask(cpu_possible_bits));
-}
-
-void set_cpu_present(unsigned int cpu, bool present)
-{
- if (present)
- cpumask_set_cpu(cpu, to_cpumask(cpu_present_bits));
- else
- cpumask_clear_cpu(cpu, to_cpumask(cpu_present_bits));
-}
-
-void set_cpu_online(unsigned int cpu, bool online)
-{
- if (online) {
- cpumask_set_cpu(cpu, to_cpumask(cpu_online_bits));
- cpumask_set_cpu(cpu, to_cpumask(cpu_active_bits));
- } else {
- cpumask_clear_cpu(cpu, to_cpumask(cpu_online_bits));
- }
-}
-
-void set_cpu_active(unsigned int cpu, bool active)
-{
- if (active)
- cpumask_set_cpu(cpu, to_cpumask(cpu_active_bits));
- else
- cpumask_clear_cpu(cpu, to_cpumask(cpu_active_bits));
-}
+struct cpumask __cpu_active_mask __read_mostly;
+EXPORT_SYMBOL(__cpu_active_mask);
void init_cpu_present(const struct cpumask *src)
{
- cpumask_copy(to_cpumask(cpu_present_bits), src);
+ cpumask_copy(&__cpu_present_mask, src);
}
void init_cpu_possible(const struct cpumask *src)
{
- cpumask_copy(to_cpumask(cpu_possible_bits), src);
+ cpumask_copy(&__cpu_possible_mask, src);
}
void init_cpu_online(const struct cpumask *src)
{
- cpumask_copy(to_cpumask(cpu_online_bits), src);
+ cpumask_copy(&__cpu_online_mask, src);
}
diff --git a/kernel/cred.c b/kernel/cred.c
index 71179a09c1d6..0c0cd8a62285 100644
--- a/kernel/cred.c
+++ b/kernel/cred.c
@@ -569,8 +569,8 @@ EXPORT_SYMBOL(revert_creds);
void __init cred_init(void)
{
/* allocate a slab in which we can store credentials */
- cred_jar = kmem_cache_create("cred_jar", sizeof(struct cred),
- 0, SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL);
+ cred_jar = kmem_cache_create("cred_jar", sizeof(struct cred), 0,
+ SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_ACCOUNT, NULL);
}
/**
diff --git a/kernel/debug/kdb/kdb_main.c b/kernel/debug/kdb/kdb_main.c
index 4121345498e0..2a20c0dfdafc 100644
--- a/kernel/debug/kdb/kdb_main.c
+++ b/kernel/debug/kdb/kdb_main.c
@@ -2021,7 +2021,7 @@ static int kdb_lsmod(int argc, const char **argv)
continue;
kdb_printf("%-20s%8u 0x%p ", mod->name,
- mod->core_size, (void *)mod);
+ mod->core_layout.size, (void *)mod);
#ifdef CONFIG_MODULE_UNLOAD
kdb_printf("%4d ", module_refcount(mod));
#endif
@@ -2031,7 +2031,7 @@ static int kdb_lsmod(int argc, const char **argv)
kdb_printf(" (Loading)");
else
kdb_printf(" (Live)");
- kdb_printf(" 0x%p", mod->module_core);
+ kdb_printf(" 0x%p", mod->core_layout.base);
#ifdef CONFIG_MODULE_UNLOAD
{
diff --git a/kernel/delayacct.c b/kernel/delayacct.c
index ef90b04d783f..435c14a45118 100644
--- a/kernel/delayacct.c
+++ b/kernel/delayacct.c
@@ -34,7 +34,7 @@ __setup("nodelayacct", delayacct_setup_disable);
void delayacct_init(void)
{
- delayacct_cache = KMEM_CACHE(task_delay_info, SLAB_PANIC);
+ delayacct_cache = KMEM_CACHE(task_delay_info, SLAB_PANIC|SLAB_ACCOUNT);
delayacct_tsk_init(&init_task);
}
diff --git a/kernel/events/core.c b/kernel/events/core.c
index f1e53e8d4ae2..5946460b2425 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -3368,7 +3368,7 @@ find_lively_task_by_vpid(pid_t vpid)
/* Reuse ptrace permission checks for now. */
err = -EACCES;
- if (!ptrace_may_access(task, PTRACE_MODE_READ))
+ if (!ptrace_may_access(task, PTRACE_MODE_READ_REALCREDS))
goto errout;
return task;
@@ -4862,9 +4862,9 @@ static int perf_fasync(int fd, struct file *filp, int on)
struct perf_event *event = filp->private_data;
int retval;
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
retval = fasync_helper(fd, filp, on, &event->fasync);
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
if (retval < 0)
return retval;
diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c
index 7dad84913abf..0167679182c0 100644
--- a/kernel/events/uprobes.c
+++ b/kernel/events/uprobes.c
@@ -161,7 +161,8 @@ static int __replace_page(struct vm_area_struct *vma, unsigned long addr,
const unsigned long mmun_end = addr + PAGE_SIZE;
struct mem_cgroup *memcg;
- err = mem_cgroup_try_charge(kpage, vma->vm_mm, GFP_KERNEL, &memcg);
+ err = mem_cgroup_try_charge(kpage, vma->vm_mm, GFP_KERNEL, &memcg,
+ false);
if (err)
return err;
@@ -175,12 +176,12 @@ static int __replace_page(struct vm_area_struct *vma, unsigned long addr,
goto unlock;
get_page(kpage);
- page_add_new_anon_rmap(kpage, vma, addr);
- mem_cgroup_commit_charge(kpage, memcg, false);
+ page_add_new_anon_rmap(kpage, vma, addr, false);
+ mem_cgroup_commit_charge(kpage, memcg, false, false);
lru_cache_add_active_or_unevictable(kpage, vma);
if (!PageAnon(page)) {
- dec_mm_counter(mm, MM_FILEPAGES);
+ dec_mm_counter(mm, mm_counter_file(page));
inc_mm_counter(mm, MM_ANONPAGES);
}
@@ -188,7 +189,7 @@ static int __replace_page(struct vm_area_struct *vma, unsigned long addr,
ptep_clear_flush_notify(vma, addr, ptep);
set_pte_at_notify(mm, addr, ptep, mk_pte(kpage, vma->vm_page_prot));
- page_remove_rmap(page);
+ page_remove_rmap(page, false);
if (!page_mapped(page))
try_to_free_swap(page);
pte_unmap_unlock(ptep, ptl);
@@ -199,7 +200,7 @@ static int __replace_page(struct vm_area_struct *vma, unsigned long addr,
err = 0;
unlock:
- mem_cgroup_cancel_charge(kpage, memcg);
+ mem_cgroup_cancel_charge(kpage, memcg, false);
mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end);
unlock_page(page);
return err;
diff --git a/kernel/exit.c b/kernel/exit.c
index 07110c6020a0..10e088237fed 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -59,8 +59,6 @@
#include <asm/pgtable.h>
#include <asm/mmu_context.h>
-static void exit_mm(struct task_struct *tsk);
-
static void __unhash_process(struct task_struct *p, bool group_dead)
{
nr_threads--;
@@ -1120,8 +1118,7 @@ static int wait_task_zombie(struct wait_opts *wo, struct task_struct *p)
static int *task_stopped_code(struct task_struct *p, bool ptrace)
{
if (ptrace) {
- if (task_is_stopped_or_traced(p) &&
- !(p->jobctl & JOBCTL_LISTENING))
+ if (task_is_traced(p) && !(p->jobctl & JOBCTL_LISTENING))
return &p->exit_code;
} else {
if (p->signal->flags & SIGNAL_STOP_STOPPED)
diff --git a/kernel/fork.c b/kernel/fork.c
index 6774e6b2e96d..2e391c754ae7 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -300,9 +300,9 @@ void __init fork_init(void)
#define ARCH_MIN_TASKALIGN L1_CACHE_BYTES
#endif
/* create a slab on which task_structs can be allocated */
- task_struct_cachep =
- kmem_cache_create("task_struct", arch_task_struct_size,
- ARCH_MIN_TASKALIGN, SLAB_PANIC | SLAB_NOTRACK, NULL);
+ task_struct_cachep = kmem_cache_create("task_struct",
+ arch_task_struct_size, ARCH_MIN_TASKALIGN,
+ SLAB_PANIC|SLAB_NOTRACK|SLAB_ACCOUNT, NULL);
#endif
/* do the arch specific task caches init */
@@ -414,7 +414,7 @@ static int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm)
RCU_INIT_POINTER(mm->exe_file, get_mm_exe_file(oldmm));
mm->total_vm = oldmm->total_vm;
- mm->shared_vm = oldmm->shared_vm;
+ mm->data_vm = oldmm->data_vm;
mm->exec_vm = oldmm->exec_vm;
mm->stack_vm = oldmm->stack_vm;
@@ -433,8 +433,7 @@ static int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm)
struct file *file;
if (mpnt->vm_flags & VM_DONTCOPY) {
- vm_stat_account(mm, mpnt->vm_flags, mpnt->vm_file,
- -vma_pages(mpnt));
+ vm_stat_account(mm, mpnt->vm_flags, -vma_pages(mpnt));
continue;
}
charge = 0;
@@ -1848,16 +1847,19 @@ void __init proc_caches_init(void)
sighand_cachep = kmem_cache_create("sighand_cache",
sizeof(struct sighand_struct), 0,
SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_DESTROY_BY_RCU|
- SLAB_NOTRACK, sighand_ctor);
+ SLAB_NOTRACK|SLAB_ACCOUNT, sighand_ctor);
signal_cachep = kmem_cache_create("signal_cache",
sizeof(struct signal_struct), 0,
- SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_NOTRACK, NULL);
+ SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_NOTRACK|SLAB_ACCOUNT,
+ NULL);
files_cachep = kmem_cache_create("files_cache",
sizeof(struct files_struct), 0,
- SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_NOTRACK, NULL);
+ SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_NOTRACK|SLAB_ACCOUNT,
+ NULL);
fs_cachep = kmem_cache_create("fs_cache",
sizeof(struct fs_struct), 0,
- SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_NOTRACK, NULL);
+ SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_NOTRACK|SLAB_ACCOUNT,
+ NULL);
/*
* FIXME! The "sizeof(struct mm_struct)" currently includes the
* whole struct cpumask for the OFFSTACK case. We could change
@@ -1867,8 +1869,9 @@ void __init proc_caches_init(void)
*/
mm_cachep = kmem_cache_create("mm_struct",
sizeof(struct mm_struct), ARCH_MIN_MMSTRUCT_ALIGN,
- SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_NOTRACK, NULL);
- vm_area_cachep = KMEM_CACHE(vm_area_struct, SLAB_PANIC);
+ SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_NOTRACK|SLAB_ACCOUNT,
+ NULL);
+ vm_area_cachep = KMEM_CACHE(vm_area_struct, SLAB_PANIC|SLAB_ACCOUNT);
mmap_init();
nsproxy_cache_init();
}
diff --git a/kernel/futex.c b/kernel/futex.c
index 8a310e240cda..5d6ce6413ef1 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -469,7 +469,8 @@ get_futex_key(u32 __user *uaddr, int fshared, union futex_key *key, int rw)
{
unsigned long address = (unsigned long)uaddr;
struct mm_struct *mm = current->mm;
- struct page *page, *page_head;
+ struct page *page;
+ struct address_space *mapping;
int err, ro = 0;
/*
@@ -519,46 +520,9 @@ again:
else
err = 0;
-#ifdef CONFIG_TRANSPARENT_HUGEPAGE
- page_head = page;
- if (unlikely(PageTail(page))) {
- put_page(page);
- /* serialize against __split_huge_page_splitting() */
- local_irq_disable();
- if (likely(__get_user_pages_fast(address, 1, !ro, &page) == 1)) {
- page_head = compound_head(page);
- /*
- * page_head is valid pointer but we must pin
- * it before taking the PG_lock and/or
- * PG_compound_lock. The moment we re-enable
- * irqs __split_huge_page_splitting() can
- * return and the head page can be freed from
- * under us. We can't take the PG_lock and/or
- * PG_compound_lock on a page that could be
- * freed from under us.
- */
- if (page != page_head) {
- get_page(page_head);
- put_page(page);
- }
- local_irq_enable();
- } else {
- local_irq_enable();
- goto again;
- }
- }
-#else
- page_head = compound_head(page);
- if (page != page_head) {
- get_page(page_head);
- put_page(page);
- }
-#endif
-
- lock_page(page_head);
-
+ lock_page(page);
/*
- * If page_head->mapping is NULL, then it cannot be a PageAnon
+ * If page->mapping is NULL, then it cannot be a PageAnon
* page; but it might be the ZERO_PAGE or in the gate area or
* in a special mapping (all cases which we are happy to fail);
* or it may have been a good file page when get_user_pages_fast
@@ -570,12 +534,13 @@ again:
*
* The case we do have to guard against is when memory pressure made
* shmem_writepage move it from filecache to swapcache beneath us:
- * an unlikely race, but we do need to retry for page_head->mapping.
+ * an unlikely race, but we do need to retry for page->mapping.
*/
- if (!page_head->mapping) {
- int shmem_swizzled = PageSwapCache(page_head);
- unlock_page(page_head);
- put_page(page_head);
+ mapping = compound_head(page)->mapping;
+ if (!mapping) {
+ int shmem_swizzled = PageSwapCache(page);
+ unlock_page(page);
+ put_page(page);
if (shmem_swizzled)
goto again;
return -EFAULT;
@@ -588,7 +553,7 @@ again:
* it's a read-only handle, it's expected that futexes attach to
* the object not the particular process.
*/
- if (PageAnon(page_head)) {
+ if (PageAnon(page)) {
/*
* A RO anonymous page will never change and thus doesn't make
* sense for futex operations.
@@ -603,15 +568,15 @@ again:
key->private.address = address;
} else {
key->both.offset |= FUT_OFF_INODE; /* inode-based key */
- key->shared.inode = page_head->mapping->host;
+ key->shared.inode = mapping->host;
key->shared.pgoff = basepage_index(page);
}
get_futex_key_refs(key); /* implies MB (B) */
out:
- unlock_page(page_head);
- put_page(page_head);
+ unlock_page(page);
+ put_page(page);
return err;
}
@@ -639,7 +604,7 @@ static int fault_in_user_writeable(u32 __user *uaddr)
down_read(&mm->mmap_sem);
ret = fixup_user_fault(current, mm, (unsigned long)uaddr,
- FAULT_FLAG_WRITE);
+ FAULT_FLAG_WRITE, NULL);
up_read(&mm->mmap_sem);
return ret < 0 ? ret : 0;
@@ -1226,7 +1191,7 @@ static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_q *this,
if (pi_state->owner != current)
return -EINVAL;
- raw_spin_lock(&pi_state->pi_mutex.wait_lock);
+ raw_spin_lock_irq(&pi_state->pi_mutex.wait_lock);
new_owner = rt_mutex_next_owner(&pi_state->pi_mutex);
/*
@@ -1252,22 +1217,22 @@ static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_q *this,
else if (curval != uval)
ret = -EINVAL;
if (ret) {
- raw_spin_unlock(&pi_state->pi_mutex.wait_lock);
+ raw_spin_unlock_irq(&pi_state->pi_mutex.wait_lock);
return ret;
}
- raw_spin_lock_irq(&pi_state->owner->pi_lock);
+ raw_spin_lock(&pi_state->owner->pi_lock);
WARN_ON(list_empty(&pi_state->list));
list_del_init(&pi_state->list);
- raw_spin_unlock_irq(&pi_state->owner->pi_lock);
+ raw_spin_unlock(&pi_state->owner->pi_lock);
- raw_spin_lock_irq(&new_owner->pi_lock);
+ raw_spin_lock(&new_owner->pi_lock);
WARN_ON(!list_empty(&pi_state->list));
list_add(&pi_state->list, &new_owner->pi_state_list);
pi_state->owner = new_owner;
- raw_spin_unlock_irq(&new_owner->pi_lock);
+ raw_spin_unlock(&new_owner->pi_lock);
- raw_spin_unlock(&pi_state->pi_mutex.wait_lock);
+ raw_spin_unlock_irq(&pi_state->pi_mutex.wait_lock);
deboost = rt_mutex_futex_unlock(&pi_state->pi_mutex, &wake_q);
@@ -2162,11 +2127,11 @@ static int fixup_owner(u32 __user *uaddr, struct futex_q *q, int locked)
* we returned due to timeout or signal without taking the
* rt_mutex. Too late.
*/
- raw_spin_lock(&q->pi_state->pi_mutex.wait_lock);
+ raw_spin_lock_irq(&q->pi_state->pi_mutex.wait_lock);
owner = rt_mutex_owner(&q->pi_state->pi_mutex);
if (!owner)
owner = rt_mutex_next_owner(&q->pi_state->pi_mutex);
- raw_spin_unlock(&q->pi_state->pi_mutex.wait_lock);
+ raw_spin_unlock_irq(&q->pi_state->pi_mutex.wait_lock);
ret = fixup_pi_state_owner(uaddr, q, owner);
goto out;
}
@@ -2919,7 +2884,7 @@ SYSCALL_DEFINE3(get_robust_list, int, pid,
}
ret = -EPERM;
- if (!ptrace_may_access(p, PTRACE_MODE_READ))
+ if (!ptrace_may_access(p, PTRACE_MODE_READ_REALCREDS))
goto err_unlock;
head = p->robust_list;
diff --git a/kernel/futex_compat.c b/kernel/futex_compat.c
index 55c8c9349cfe..4ae3232e7a28 100644
--- a/kernel/futex_compat.c
+++ b/kernel/futex_compat.c
@@ -155,7 +155,7 @@ COMPAT_SYSCALL_DEFINE3(get_robust_list, int, pid,
}
ret = -EPERM;
- if (!ptrace_may_access(p, PTRACE_MODE_READ))
+ if (!ptrace_may_access(p, PTRACE_MODE_READ_REALCREDS))
goto err_unlock;
head = p->compat_robust_list;
diff --git a/kernel/gcov/base.c b/kernel/gcov/base.c
index 7080ae1eb6c1..2f9df37940a0 100644
--- a/kernel/gcov/base.c
+++ b/kernel/gcov/base.c
@@ -123,11 +123,6 @@ void gcov_enable_events(void)
}
#ifdef CONFIG_MODULES
-static inline int within(void *addr, void *start, unsigned long size)
-{
- return ((addr >= start) && (addr < start + size));
-}
-
/* Update list and generate events when modules are unloaded. */
static int gcov_module_notifier(struct notifier_block *nb, unsigned long event,
void *data)
@@ -142,7 +137,7 @@ static int gcov_module_notifier(struct notifier_block *nb, unsigned long event,
/* Remove entries located in module from linked list. */
while ((info = gcov_info_next(info))) {
- if (within(info, mod->module_core, mod->core_size)) {
+ if (within_module((unsigned long)info, mod)) {
gcov_info_unlink(prev, info);
if (gcov_events_enabled)
gcov_event(GCOV_REMOVE, info);
diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c
index a302cf9a2126..57bff7857e87 100644
--- a/kernel/irq/handle.c
+++ b/kernel/irq/handle.c
@@ -138,7 +138,8 @@ irqreturn_t handle_irq_event_percpu(struct irq_desc *desc)
unsigned int flags = 0, irq = desc->irq_data.irq;
struct irqaction *action = desc->action;
- do {
+ /* action might have become NULL since we dropped the lock */
+ while (action) {
irqreturn_t res;
trace_irq_handler_entry(irq, action);
@@ -173,7 +174,7 @@ irqreturn_t handle_irq_event_percpu(struct irq_desc *desc)
retval |= res;
action = action->next;
- } while (action);
+ }
add_interrupt_randomness(irq, flags);
diff --git a/kernel/irq/irqdomain.c b/kernel/irq/irqdomain.c
index 8cf95de1ab3f..3e56d2f03e24 100644
--- a/kernel/irq/irqdomain.c
+++ b/kernel/irq/irqdomain.c
@@ -575,10 +575,15 @@ unsigned int irq_create_fwspec_mapping(struct irq_fwspec *fwspec)
unsigned int type = IRQ_TYPE_NONE;
int virq;
- if (fwspec->fwnode)
- domain = irq_find_matching_fwnode(fwspec->fwnode, DOMAIN_BUS_ANY);
- else
+ if (fwspec->fwnode) {
+ domain = irq_find_matching_fwnode(fwspec->fwnode,
+ DOMAIN_BUS_WIRED);
+ if (!domain)
+ domain = irq_find_matching_fwnode(fwspec->fwnode,
+ DOMAIN_BUS_ANY);
+ } else {
domain = irq_default_domain;
+ }
if (!domain) {
pr_warn("no irq domain found for %s !\n",
@@ -1061,6 +1066,7 @@ void irq_domain_set_info(struct irq_domain *domain, unsigned int virq,
__irq_set_handler(virq, handler, 0, handler_name);
irq_set_handler_data(virq, handler_data);
}
+EXPORT_SYMBOL(irq_domain_set_info);
/**
* irq_domain_reset_irq_data - Clear hwirq, chip and chip_data in @irq_data
diff --git a/kernel/irq/msi.c b/kernel/irq/msi.c
index 15b249e7c673..38e89ce7b071 100644
--- a/kernel/irq/msi.c
+++ b/kernel/irq/msi.c
@@ -109,9 +109,11 @@ static int msi_domain_alloc(struct irq_domain *domain, unsigned int virq,
if (irq_find_mapping(domain, hwirq) > 0)
return -EEXIST;
- ret = irq_domain_alloc_irqs_parent(domain, virq, nr_irqs, arg);
- if (ret < 0)
- return ret;
+ if (domain->parent) {
+ ret = irq_domain_alloc_irqs_parent(domain, virq, nr_irqs, arg);
+ if (ret < 0)
+ return ret;
+ }
for (i = 0; i < nr_irqs; i++) {
ret = ops->msi_init(domain, info, virq + i, hwirq + i, arg);
diff --git a/kernel/kcmp.c b/kernel/kcmp.c
index 0aa69ea1d8fd..3a47fa998fe0 100644
--- a/kernel/kcmp.c
+++ b/kernel/kcmp.c
@@ -122,8 +122,8 @@ SYSCALL_DEFINE5(kcmp, pid_t, pid1, pid_t, pid2, int, type,
&task2->signal->cred_guard_mutex);
if (ret)
goto err;
- if (!ptrace_may_access(task1, PTRACE_MODE_READ) ||
- !ptrace_may_access(task2, PTRACE_MODE_READ)) {
+ if (!ptrace_may_access(task1, PTRACE_MODE_READ_REALCREDS) ||
+ !ptrace_may_access(task2, PTRACE_MODE_READ_REALCREDS)) {
ret = -EPERM;
goto err_unlock;
}
diff --git a/kernel/kexec.c b/kernel/kexec.c
index d873b64fbddc..ee70aef5cd81 100644
--- a/kernel/kexec.c
+++ b/kernel/kexec.c
@@ -63,16 +63,16 @@ static int kimage_alloc_init(struct kimage **rimage, unsigned long entry,
if (ret)
goto out_free_image;
- ret = sanity_check_segment_list(image);
- if (ret)
- goto out_free_image;
-
- /* Enable the special crash kernel control page allocation policy. */
if (kexec_on_panic) {
+ /* Enable special crash kernel control page alloc policy. */
image->control_page = crashk_res.start;
image->type = KEXEC_TYPE_CRASH;
}
+ ret = sanity_check_segment_list(image);
+ if (ret)
+ goto out_free_image;
+
/*
* Find a location for the control code buffer, and add it
* the vector of segments so that it's pages will also be
diff --git a/kernel/kexec_core.c b/kernel/kexec_core.c
index c823f3001e12..8dc659144869 100644
--- a/kernel/kexec_core.c
+++ b/kernel/kexec_core.c
@@ -310,12 +310,9 @@ static void kimage_free_pages(struct page *page)
void kimage_free_page_list(struct list_head *list)
{
- struct list_head *pos, *next;
+ struct page *page, *next;
- list_for_each_safe(pos, next, list) {
- struct page *page;
-
- page = list_entry(pos, struct page, lru);
+ list_for_each_entry_safe(page, next, list, lru) {
list_del(&page->lru);
kimage_free_pages(page);
}
diff --git a/kernel/kexec_file.c b/kernel/kexec_file.c
index b70ada0028d2..007b791f676d 100644
--- a/kernel/kexec_file.c
+++ b/kernel/kexec_file.c
@@ -109,11 +109,13 @@ int __weak arch_kimage_file_post_load_cleanup(struct kimage *image)
return -EINVAL;
}
+#ifdef CONFIG_KEXEC_VERIFY_SIG
int __weak arch_kexec_kernel_verify_sig(struct kimage *image, void *buf,
unsigned long buf_len)
{
return -EKEYREJECTED;
}
+#endif
/* Apply relocations of type RELA */
int __weak
diff --git a/kernel/kexec_internal.h b/kernel/kexec_internal.h
index e4392a698ad4..0a52315d9c62 100644
--- a/kernel/kexec_internal.h
+++ b/kernel/kexec_internal.h
@@ -15,6 +15,27 @@ int kimage_is_destination_range(struct kimage *image,
extern struct mutex kexec_mutex;
#ifdef CONFIG_KEXEC_FILE
+struct kexec_sha_region {
+ unsigned long start;
+ unsigned long len;
+};
+
+/*
+ * Keeps track of buffer parameters as provided by caller for requesting
+ * memory placement of buffer.
+ */
+struct kexec_buf {
+ struct kimage *image;
+ char *buffer;
+ unsigned long bufsz;
+ unsigned long mem;
+ unsigned long memsz;
+ unsigned long buf_align;
+ unsigned long buf_min;
+ unsigned long buf_max;
+ bool top_down; /* allocate from top of memory hole */
+};
+
void kimage_file_post_load_cleanup(struct kimage *image);
#else /* CONFIG_KEXEC_FILE */
static inline void kimage_file_post_load_cleanup(struct kimage *image) { }
diff --git a/kernel/livepatch/core.c b/kernel/livepatch/core.c
index db545cbcdb89..bc2c85c064c1 100644
--- a/kernel/livepatch/core.c
+++ b/kernel/livepatch/core.c
@@ -28,6 +28,7 @@
#include <linux/list.h>
#include <linux/kallsyms.h>
#include <linux/livepatch.h>
+#include <asm/cacheflush.h>
/**
* struct klp_ops - structure for tracking registered ftrace ops structs
@@ -135,13 +136,8 @@ struct klp_find_arg {
const char *objname;
const char *name;
unsigned long addr;
- /*
- * If count == 0, the symbol was not found. If count == 1, a unique
- * match was found and addr is set. If count > 1, there is
- * unresolvable ambiguity among "count" number of symbols with the same
- * name in the same object.
- */
unsigned long count;
+ unsigned long pos;
};
static int klp_find_callback(void *data, const char *name,
@@ -158,37 +154,48 @@ static int klp_find_callback(void *data, const char *name,
if (args->objname && strcmp(args->objname, mod->name))
return 0;
- /*
- * args->addr might be overwritten if another match is found
- * but klp_find_object_symbol() handles this and only returns the
- * addr if count == 1.
- */
args->addr = addr;
args->count++;
+ /*
+ * Finish the search when the symbol is found for the desired position
+ * or the position is not defined for a non-unique symbol.
+ */
+ if ((args->pos && (args->count == args->pos)) ||
+ (!args->pos && (args->count > 1)))
+ return 1;
+
return 0;
}
static int klp_find_object_symbol(const char *objname, const char *name,
- unsigned long *addr)
+ unsigned long sympos, unsigned long *addr)
{
struct klp_find_arg args = {
.objname = objname,
.name = name,
.addr = 0,
- .count = 0
+ .count = 0,
+ .pos = sympos,
};
mutex_lock(&module_mutex);
kallsyms_on_each_symbol(klp_find_callback, &args);
mutex_unlock(&module_mutex);
- if (args.count == 0)
+ /*
+ * Ensure an address was found. If sympos is 0, ensure symbol is unique;
+ * otherwise ensure the symbol position count matches sympos.
+ */
+ if (args.addr == 0)
pr_err("symbol '%s' not found in symbol table\n", name);
- else if (args.count > 1)
+ else if (args.count > 1 && sympos == 0) {
pr_err("unresolvable ambiguity (%lu matches) on symbol '%s' in object '%s'\n",
args.count, name, objname);
- else {
+ } else if (sympos != args.count && sympos > 0) {
+ pr_err("symbol position %lu for symbol '%s' in object '%s' not found\n",
+ sympos, name, objname ? objname : "vmlinux");
+ } else {
*addr = args.addr;
return 0;
}
@@ -197,66 +204,6 @@ static int klp_find_object_symbol(const char *objname, const char *name,
return -EINVAL;
}
-struct klp_verify_args {
- const char *name;
- const unsigned long addr;
-};
-
-static int klp_verify_callback(void *data, const char *name,
- struct module *mod, unsigned long addr)
-{
- struct klp_verify_args *args = data;
-
- if (!mod &&
- !strcmp(args->name, name) &&
- args->addr == addr)
- return 1;
-
- return 0;
-}
-
-static int klp_verify_vmlinux_symbol(const char *name, unsigned long addr)
-{
- struct klp_verify_args args = {
- .name = name,
- .addr = addr,
- };
- int ret;
-
- mutex_lock(&module_mutex);
- ret = kallsyms_on_each_symbol(klp_verify_callback, &args);
- mutex_unlock(&module_mutex);
-
- if (!ret) {
- pr_err("symbol '%s' not found at specified address 0x%016lx, kernel mismatch?\n",
- name, addr);
- return -EINVAL;
- }
-
- return 0;
-}
-
-static int klp_find_verify_func_addr(struct klp_object *obj,
- struct klp_func *func)
-{
- int ret;
-
-#if defined(CONFIG_RANDOMIZE_BASE)
- /* If KASLR has been enabled, adjust old_addr accordingly */
- if (kaslr_enabled() && func->old_addr)
- func->old_addr += kaslr_offset();
-#endif
-
- if (!func->old_addr || klp_is_module(obj))
- ret = klp_find_object_symbol(obj->name, func->old_name,
- &func->old_addr);
- else
- ret = klp_verify_vmlinux_symbol(func->old_name,
- func->old_addr);
-
- return ret;
-}
-
/*
* external symbols are located outside the parent object (where the parent
* object is either vmlinux or the kmod being patched).
@@ -276,14 +223,18 @@ static int klp_find_external_symbol(struct module *pmod, const char *name,
}
preempt_enable();
- /* otherwise check if it's in another .o within the patch module */
- return klp_find_object_symbol(pmod->name, name, addr);
+ /*
+ * Check if it's in another .o within the patch module. This also
+ * checks that the external symbol is unique.
+ */
+ return klp_find_object_symbol(pmod->name, name, 0, addr);
}
static int klp_write_object_relocations(struct module *pmod,
struct klp_object *obj)
{
- int ret;
+ int ret = 0;
+ unsigned long val;
struct klp_reloc *reloc;
if (WARN_ON(!klp_is_object_loaded(obj)))
@@ -292,41 +243,38 @@ static int klp_write_object_relocations(struct module *pmod,
if (WARN_ON(!obj->relocs))
return -EINVAL;
+ module_disable_ro(pmod);
+
for (reloc = obj->relocs; reloc->name; reloc++) {
- if (!klp_is_module(obj)) {
-
-#if defined(CONFIG_RANDOMIZE_BASE)
- /* If KASLR has been enabled, adjust old value accordingly */
- if (kaslr_enabled())
- reloc->val += kaslr_offset();
-#endif
- ret = klp_verify_vmlinux_symbol(reloc->name,
- reloc->val);
- if (ret)
- return ret;
- } else {
- /* module, reloc->val needs to be discovered */
- if (reloc->external)
- ret = klp_find_external_symbol(pmod,
- reloc->name,
- &reloc->val);
- else
- ret = klp_find_object_symbol(obj->mod->name,
- reloc->name,
- &reloc->val);
- if (ret)
- return ret;
- }
+ /* discover the address of the referenced symbol */
+ if (reloc->external) {
+ if (reloc->sympos > 0) {
+ pr_err("non-zero sympos for external reloc symbol '%s' is not supported\n",
+ reloc->name);
+ ret = -EINVAL;
+ goto out;
+ }
+ ret = klp_find_external_symbol(pmod, reloc->name, &val);
+ } else
+ ret = klp_find_object_symbol(obj->name,
+ reloc->name,
+ reloc->sympos,
+ &val);
+ if (ret)
+ goto out;
+
ret = klp_write_module_reloc(pmod, reloc->type, reloc->loc,
- reloc->val + reloc->addend);
+ val + reloc->addend);
if (ret) {
pr_err("relocation failed for symbol '%s' at 0x%016lx (%d)\n",
- reloc->name, reloc->val, ret);
- return ret;
+ reloc->name, val, ret);
+ goto out;
}
}
- return 0;
+out:
+ module_enable_ro(pmod);
+ return ret;
}
static void notrace klp_ftrace_handler(unsigned long ip,
@@ -593,7 +541,7 @@ EXPORT_SYMBOL_GPL(klp_enable_patch);
* /sys/kernel/livepatch/<patch>
* /sys/kernel/livepatch/<patch>/enabled
* /sys/kernel/livepatch/<patch>/<object>
- * /sys/kernel/livepatch/<patch>/<object>/<func>
+ * /sys/kernel/livepatch/<patch>/<object>/<function,sympos>
*/
static ssize_t enabled_store(struct kobject *kobj, struct kobj_attribute *attr,
@@ -738,8 +686,14 @@ static int klp_init_func(struct klp_object *obj, struct klp_func *func)
INIT_LIST_HEAD(&func->stack_node);
func->state = KLP_DISABLED;
+ /* The format for the sysfs directory is <function,sympos> where sympos
+ * is the nth occurrence of this symbol in kallsyms for the patched
+ * object. If the user selects 0 for old_sympos, then 1 will be used
+ * since a unique symbol will be the first occurrence.
+ */
return kobject_init_and_add(&func->kobj, &klp_ktype_func,
- &obj->kobj, "%s", func->old_name);
+ &obj->kobj, "%s,%lu", func->old_name,
+ func->old_sympos ? func->old_sympos : 1);
}
/* parts of the initialization that is done only when the object is loaded */
@@ -756,7 +710,9 @@ static int klp_init_object_loaded(struct klp_patch *patch,
}
klp_for_each_func(obj, func) {
- ret = klp_find_verify_func_addr(obj, func);
+ ret = klp_find_object_symbol(obj->name, func->old_name,
+ func->old_sympos,
+ &func->old_addr);
if (ret)
return ret;
}
diff --git a/kernel/locking/rtmutex.c b/kernel/locking/rtmutex.c
index 8251e75dd9c0..3e746607abe5 100644
--- a/kernel/locking/rtmutex.c
+++ b/kernel/locking/rtmutex.c
@@ -99,13 +99,14 @@ static inline void mark_rt_mutex_waiters(struct rt_mutex *lock)
* 2) Drop lock->wait_lock
* 3) Try to unlock the lock with cmpxchg
*/
-static inline bool unlock_rt_mutex_safe(struct rt_mutex *lock)
+static inline bool unlock_rt_mutex_safe(struct rt_mutex *lock,
+ unsigned long flags)
__releases(lock->wait_lock)
{
struct task_struct *owner = rt_mutex_owner(lock);
clear_rt_mutex_waiters(lock);
- raw_spin_unlock(&lock->wait_lock);
+ raw_spin_unlock_irqrestore(&lock->wait_lock, flags);
/*
* If a new waiter comes in between the unlock and the cmpxchg
* we have two situations:
@@ -147,11 +148,12 @@ static inline void mark_rt_mutex_waiters(struct rt_mutex *lock)
/*
* Simple slow path only version: lock->owner is protected by lock->wait_lock.
*/
-static inline bool unlock_rt_mutex_safe(struct rt_mutex *lock)
+static inline bool unlock_rt_mutex_safe(struct rt_mutex *lock,
+ unsigned long flags)
__releases(lock->wait_lock)
{
lock->owner = NULL;
- raw_spin_unlock(&lock->wait_lock);
+ raw_spin_unlock_irqrestore(&lock->wait_lock, flags);
return true;
}
#endif
@@ -433,7 +435,6 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task,
int ret = 0, depth = 0;
struct rt_mutex *lock;
bool detect_deadlock;
- unsigned long flags;
bool requeue = true;
detect_deadlock = rt_mutex_cond_detect_deadlock(orig_waiter, chwalk);
@@ -476,7 +477,7 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task,
/*
* [1] Task cannot go away as we did a get_task() before !
*/
- raw_spin_lock_irqsave(&task->pi_lock, flags);
+ raw_spin_lock_irq(&task->pi_lock);
/*
* [2] Get the waiter on which @task is blocked on.
@@ -560,7 +561,7 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task,
* operations.
*/
if (!raw_spin_trylock(&lock->wait_lock)) {
- raw_spin_unlock_irqrestore(&task->pi_lock, flags);
+ raw_spin_unlock_irq(&task->pi_lock);
cpu_relax();
goto retry;
}
@@ -591,7 +592,7 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task,
/*
* No requeue[7] here. Just release @task [8]
*/
- raw_spin_unlock_irqrestore(&task->pi_lock, flags);
+ raw_spin_unlock(&task->pi_lock);
put_task_struct(task);
/*
@@ -599,14 +600,14 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task,
* If there is no owner of the lock, end of chain.
*/
if (!rt_mutex_owner(lock)) {
- raw_spin_unlock(&lock->wait_lock);
+ raw_spin_unlock_irq(&lock->wait_lock);
return 0;
}
/* [10] Grab the next task, i.e. owner of @lock */
task = rt_mutex_owner(lock);
get_task_struct(task);
- raw_spin_lock_irqsave(&task->pi_lock, flags);
+ raw_spin_lock(&task->pi_lock);
/*
* No requeue [11] here. We just do deadlock detection.
@@ -621,8 +622,8 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task,
top_waiter = rt_mutex_top_waiter(lock);
/* [13] Drop locks */
- raw_spin_unlock_irqrestore(&task->pi_lock, flags);
- raw_spin_unlock(&lock->wait_lock);
+ raw_spin_unlock(&task->pi_lock);
+ raw_spin_unlock_irq(&lock->wait_lock);
/* If owner is not blocked, end of chain. */
if (!next_lock)
@@ -643,7 +644,7 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task,
rt_mutex_enqueue(lock, waiter);
/* [8] Release the task */
- raw_spin_unlock_irqrestore(&task->pi_lock, flags);
+ raw_spin_unlock(&task->pi_lock);
put_task_struct(task);
/*
@@ -661,14 +662,14 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task,
*/
if (prerequeue_top_waiter != rt_mutex_top_waiter(lock))
wake_up_process(rt_mutex_top_waiter(lock)->task);
- raw_spin_unlock(&lock->wait_lock);
+ raw_spin_unlock_irq(&lock->wait_lock);
return 0;
}
/* [10] Grab the next task, i.e. the owner of @lock */
task = rt_mutex_owner(lock);
get_task_struct(task);
- raw_spin_lock_irqsave(&task->pi_lock, flags);
+ raw_spin_lock(&task->pi_lock);
/* [11] requeue the pi waiters if necessary */
if (waiter == rt_mutex_top_waiter(lock)) {
@@ -722,8 +723,8 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task,
top_waiter = rt_mutex_top_waiter(lock);
/* [13] Drop the locks */
- raw_spin_unlock_irqrestore(&task->pi_lock, flags);
- raw_spin_unlock(&lock->wait_lock);
+ raw_spin_unlock(&task->pi_lock);
+ raw_spin_unlock_irq(&lock->wait_lock);
/*
* Make the actual exit decisions [12], based on the stored
@@ -746,7 +747,7 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task,
goto again;
out_unlock_pi:
- raw_spin_unlock_irqrestore(&task->pi_lock, flags);
+ raw_spin_unlock_irq(&task->pi_lock);
out_put_task:
put_task_struct(task);
@@ -756,7 +757,7 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task,
/*
* Try to take an rt-mutex
*
- * Must be called with lock->wait_lock held.
+ * Must be called with lock->wait_lock held and interrupts disabled
*
* @lock: The lock to be acquired.
* @task: The task which wants to acquire the lock
@@ -766,8 +767,6 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task,
static int try_to_take_rt_mutex(struct rt_mutex *lock, struct task_struct *task,
struct rt_mutex_waiter *waiter)
{
- unsigned long flags;
-
/*
* Before testing whether we can acquire @lock, we set the
* RT_MUTEX_HAS_WAITERS bit in @lock->owner. This forces all
@@ -852,7 +851,7 @@ static int try_to_take_rt_mutex(struct rt_mutex *lock, struct task_struct *task,
* case, but conditionals are more expensive than a redundant
* store.
*/
- raw_spin_lock_irqsave(&task->pi_lock, flags);
+ raw_spin_lock(&task->pi_lock);
task->pi_blocked_on = NULL;
/*
* Finish the lock acquisition. @task is the new owner. If
@@ -861,7 +860,7 @@ static int try_to_take_rt_mutex(struct rt_mutex *lock, struct task_struct *task,
*/
if (rt_mutex_has_waiters(lock))
rt_mutex_enqueue_pi(task, rt_mutex_top_waiter(lock));
- raw_spin_unlock_irqrestore(&task->pi_lock, flags);
+ raw_spin_unlock(&task->pi_lock);
takeit:
/* We got the lock. */
@@ -883,7 +882,7 @@ takeit:
*
* Prepare waiter and propagate pi chain
*
- * This must be called with lock->wait_lock held.
+ * This must be called with lock->wait_lock held and interrupts disabled
*/
static int task_blocks_on_rt_mutex(struct rt_mutex *lock,
struct rt_mutex_waiter *waiter,
@@ -894,7 +893,6 @@ static int task_blocks_on_rt_mutex(struct rt_mutex *lock,
struct rt_mutex_waiter *top_waiter = waiter;
struct rt_mutex *next_lock;
int chain_walk = 0, res;
- unsigned long flags;
/*
* Early deadlock detection. We really don't want the task to
@@ -908,7 +906,7 @@ static int task_blocks_on_rt_mutex(struct rt_mutex *lock,
if (owner == task)
return -EDEADLK;
- raw_spin_lock_irqsave(&task->pi_lock, flags);
+ raw_spin_lock(&task->pi_lock);
__rt_mutex_adjust_prio(task);
waiter->task = task;
waiter->lock = lock;
@@ -921,12 +919,12 @@ static int task_blocks_on_rt_mutex(struct rt_mutex *lock,
task->pi_blocked_on = waiter;
- raw_spin_unlock_irqrestore(&task->pi_lock, flags);
+ raw_spin_unlock(&task->pi_lock);
if (!owner)
return 0;
- raw_spin_lock_irqsave(&owner->pi_lock, flags);
+ raw_spin_lock(&owner->pi_lock);
if (waiter == rt_mutex_top_waiter(lock)) {
rt_mutex_dequeue_pi(owner, top_waiter);
rt_mutex_enqueue_pi(owner, waiter);
@@ -941,7 +939,7 @@ static int task_blocks_on_rt_mutex(struct rt_mutex *lock,
/* Store the lock on which owner is blocked or NULL */
next_lock = task_blocked_on_lock(owner);
- raw_spin_unlock_irqrestore(&owner->pi_lock, flags);
+ raw_spin_unlock(&owner->pi_lock);
/*
* Even if full deadlock detection is on, if the owner is not
* blocked itself, we can avoid finding this out in the chain
@@ -957,12 +955,12 @@ static int task_blocks_on_rt_mutex(struct rt_mutex *lock,
*/
get_task_struct(owner);
- raw_spin_unlock(&lock->wait_lock);
+ raw_spin_unlock_irq(&lock->wait_lock);
res = rt_mutex_adjust_prio_chain(owner, chwalk, lock,
next_lock, waiter, task);
- raw_spin_lock(&lock->wait_lock);
+ raw_spin_lock_irq(&lock->wait_lock);
return res;
}
@@ -971,15 +969,14 @@ static int task_blocks_on_rt_mutex(struct rt_mutex *lock,
* Remove the top waiter from the current tasks pi waiter tree and
* queue it up.
*
- * Called with lock->wait_lock held.
+ * Called with lock->wait_lock held and interrupts disabled.
*/
static void mark_wakeup_next_waiter(struct wake_q_head *wake_q,
struct rt_mutex *lock)
{
struct rt_mutex_waiter *waiter;
- unsigned long flags;
- raw_spin_lock_irqsave(&current->pi_lock, flags);
+ raw_spin_lock(&current->pi_lock);
waiter = rt_mutex_top_waiter(lock);
@@ -1001,7 +998,7 @@ static void mark_wakeup_next_waiter(struct wake_q_head *wake_q,
*/
lock->owner = (void *) RT_MUTEX_HAS_WAITERS;
- raw_spin_unlock_irqrestore(&current->pi_lock, flags);
+ raw_spin_unlock(&current->pi_lock);
wake_q_add(wake_q, waiter->task);
}
@@ -1009,7 +1006,7 @@ static void mark_wakeup_next_waiter(struct wake_q_head *wake_q,
/*
* Remove a waiter from a lock and give up
*
- * Must be called with lock->wait_lock held and
+ * Must be called with lock->wait_lock held and interrupts disabled. I must
* have just failed to try_to_take_rt_mutex().
*/
static void remove_waiter(struct rt_mutex *lock,
@@ -1018,12 +1015,11 @@ static void remove_waiter(struct rt_mutex *lock,
bool is_top_waiter = (waiter == rt_mutex_top_waiter(lock));
struct task_struct *owner = rt_mutex_owner(lock);
struct rt_mutex *next_lock;
- unsigned long flags;
- raw_spin_lock_irqsave(&current->pi_lock, flags);
+ raw_spin_lock(&current->pi_lock);
rt_mutex_dequeue(lock, waiter);
current->pi_blocked_on = NULL;
- raw_spin_unlock_irqrestore(&current->pi_lock, flags);
+ raw_spin_unlock(&current->pi_lock);
/*
* Only update priority if the waiter was the highest priority
@@ -1032,7 +1028,7 @@ static void remove_waiter(struct rt_mutex *lock,
if (!owner || !is_top_waiter)
return;
- raw_spin_lock_irqsave(&owner->pi_lock, flags);
+ raw_spin_lock(&owner->pi_lock);
rt_mutex_dequeue_pi(owner, waiter);
@@ -1044,7 +1040,7 @@ static void remove_waiter(struct rt_mutex *lock,
/* Store the lock on which owner is blocked or NULL */
next_lock = task_blocked_on_lock(owner);
- raw_spin_unlock_irqrestore(&owner->pi_lock, flags);
+ raw_spin_unlock(&owner->pi_lock);
/*
* Don't walk the chain, if the owner task is not blocked
@@ -1056,12 +1052,12 @@ static void remove_waiter(struct rt_mutex *lock,
/* gets dropped in rt_mutex_adjust_prio_chain()! */
get_task_struct(owner);
- raw_spin_unlock(&lock->wait_lock);
+ raw_spin_unlock_irq(&lock->wait_lock);
rt_mutex_adjust_prio_chain(owner, RT_MUTEX_MIN_CHAINWALK, lock,
next_lock, NULL, current);
- raw_spin_lock(&lock->wait_lock);
+ raw_spin_lock_irq(&lock->wait_lock);
}
/*
@@ -1097,11 +1093,11 @@ void rt_mutex_adjust_pi(struct task_struct *task)
* __rt_mutex_slowlock() - Perform the wait-wake-try-to-take loop
* @lock: the rt_mutex to take
* @state: the state the task should block in (TASK_INTERRUPTIBLE
- * or TASK_UNINTERRUPTIBLE)
+ * or TASK_UNINTERRUPTIBLE)
* @timeout: the pre-initialized and started timer, or NULL for none
* @waiter: the pre-initialized rt_mutex_waiter
*
- * lock->wait_lock must be held by the caller.
+ * Must be called with lock->wait_lock held and interrupts disabled
*/
static int __sched
__rt_mutex_slowlock(struct rt_mutex *lock, int state,
@@ -1129,13 +1125,13 @@ __rt_mutex_slowlock(struct rt_mutex *lock, int state,
break;
}
- raw_spin_unlock(&lock->wait_lock);
+ raw_spin_unlock_irq(&lock->wait_lock);
debug_rt_mutex_print_deadlock(waiter);
schedule();
- raw_spin_lock(&lock->wait_lock);
+ raw_spin_lock_irq(&lock->wait_lock);
set_current_state(state);
}
@@ -1172,17 +1168,26 @@ rt_mutex_slowlock(struct rt_mutex *lock, int state,
enum rtmutex_chainwalk chwalk)
{
struct rt_mutex_waiter waiter;
+ unsigned long flags;
int ret = 0;
debug_rt_mutex_init_waiter(&waiter);
RB_CLEAR_NODE(&waiter.pi_tree_entry);
RB_CLEAR_NODE(&waiter.tree_entry);
- raw_spin_lock(&lock->wait_lock);
+ /*
+ * Technically we could use raw_spin_[un]lock_irq() here, but this can
+ * be called in early boot if the cmpxchg() fast path is disabled
+ * (debug, no architecture support). In this case we will acquire the
+ * rtmutex with lock->wait_lock held. But we cannot unconditionally
+ * enable interrupts in that early boot case. So we need to use the
+ * irqsave/restore variants.
+ */
+ raw_spin_lock_irqsave(&lock->wait_lock, flags);
/* Try to acquire the lock again: */
if (try_to_take_rt_mutex(lock, current, NULL)) {
- raw_spin_unlock(&lock->wait_lock);
+ raw_spin_unlock_irqrestore(&lock->wait_lock, flags);
return 0;
}
@@ -1211,7 +1216,7 @@ rt_mutex_slowlock(struct rt_mutex *lock, int state,
*/
fixup_rt_mutex_waiters(lock);
- raw_spin_unlock(&lock->wait_lock);
+ raw_spin_unlock_irqrestore(&lock->wait_lock, flags);
/* Remove pending timer: */
if (unlikely(timeout))
@@ -1227,6 +1232,7 @@ rt_mutex_slowlock(struct rt_mutex *lock, int state,
*/
static inline int rt_mutex_slowtrylock(struct rt_mutex *lock)
{
+ unsigned long flags;
int ret;
/*
@@ -1238,10 +1244,10 @@ static inline int rt_mutex_slowtrylock(struct rt_mutex *lock)
return 0;
/*
- * The mutex has currently no owner. Lock the wait lock and
- * try to acquire the lock.
+ * The mutex has currently no owner. Lock the wait lock and try to
+ * acquire the lock. We use irqsave here to support early boot calls.
*/
- raw_spin_lock(&lock->wait_lock);
+ raw_spin_lock_irqsave(&lock->wait_lock, flags);
ret = try_to_take_rt_mutex(lock, current, NULL);
@@ -1251,7 +1257,7 @@ static inline int rt_mutex_slowtrylock(struct rt_mutex *lock)
*/
fixup_rt_mutex_waiters(lock);
- raw_spin_unlock(&lock->wait_lock);
+ raw_spin_unlock_irqrestore(&lock->wait_lock, flags);
return ret;
}
@@ -1263,7 +1269,10 @@ static inline int rt_mutex_slowtrylock(struct rt_mutex *lock)
static bool __sched rt_mutex_slowunlock(struct rt_mutex *lock,
struct wake_q_head *wake_q)
{
- raw_spin_lock(&lock->wait_lock);
+ unsigned long flags;
+
+ /* irqsave required to support early boot calls */
+ raw_spin_lock_irqsave(&lock->wait_lock, flags);
debug_rt_mutex_unlock(lock);
@@ -1302,10 +1311,10 @@ static bool __sched rt_mutex_slowunlock(struct rt_mutex *lock,
*/
while (!rt_mutex_has_waiters(lock)) {
/* Drops lock->wait_lock ! */
- if (unlock_rt_mutex_safe(lock) == true)
+ if (unlock_rt_mutex_safe(lock, flags) == true)
return false;
/* Relock the rtmutex and try again */
- raw_spin_lock(&lock->wait_lock);
+ raw_spin_lock_irqsave(&lock->wait_lock, flags);
}
/*
@@ -1316,7 +1325,7 @@ static bool __sched rt_mutex_slowunlock(struct rt_mutex *lock,
*/
mark_wakeup_next_waiter(wake_q, lock);
- raw_spin_unlock(&lock->wait_lock);
+ raw_spin_unlock_irqrestore(&lock->wait_lock, flags);
/* check PI boosting */
return true;
@@ -1596,10 +1605,10 @@ int rt_mutex_start_proxy_lock(struct rt_mutex *lock,
{
int ret;
- raw_spin_lock(&lock->wait_lock);
+ raw_spin_lock_irq(&lock->wait_lock);
if (try_to_take_rt_mutex(lock, task, NULL)) {
- raw_spin_unlock(&lock->wait_lock);
+ raw_spin_unlock_irq(&lock->wait_lock);
return 1;
}
@@ -1620,7 +1629,7 @@ int rt_mutex_start_proxy_lock(struct rt_mutex *lock,
if (unlikely(ret))
remove_waiter(lock, waiter);
- raw_spin_unlock(&lock->wait_lock);
+ raw_spin_unlock_irq(&lock->wait_lock);
debug_rt_mutex_print_deadlock(waiter);
@@ -1668,7 +1677,7 @@ int rt_mutex_finish_proxy_lock(struct rt_mutex *lock,
{
int ret;
- raw_spin_lock(&lock->wait_lock);
+ raw_spin_lock_irq(&lock->wait_lock);
set_current_state(TASK_INTERRUPTIBLE);
@@ -1684,7 +1693,7 @@ int rt_mutex_finish_proxy_lock(struct rt_mutex *lock,
*/
fixup_rt_mutex_waiters(lock);
- raw_spin_unlock(&lock->wait_lock);
+ raw_spin_unlock_irq(&lock->wait_lock);
return ret;
}
diff --git a/kernel/memremap.c b/kernel/memremap.c
index 7658d32c5c78..e517a16cb426 100644
--- a/kernel/memremap.c
+++ b/kernel/memremap.c
@@ -10,8 +10,11 @@
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*/
+#include <linux/radix-tree.h>
+#include <linux/memremap.h>
#include <linux/device.h>
#include <linux/types.h>
+#include <linux/pfn_t.h>
#include <linux/io.h>
#include <linux/mm.h>
#include <linux/memory_hotplug.h>
@@ -147,24 +150,127 @@ void devm_memunmap(struct device *dev, void *addr)
}
EXPORT_SYMBOL(devm_memunmap);
+pfn_t phys_to_pfn_t(dma_addr_t addr, unsigned long flags)
+{
+ return __pfn_to_pfn_t(addr >> PAGE_SHIFT, flags);
+}
+EXPORT_SYMBOL(phys_to_pfn_t);
+
#ifdef CONFIG_ZONE_DEVICE
+static DEFINE_MUTEX(pgmap_lock);
+static RADIX_TREE(pgmap_radix, GFP_KERNEL);
+#define SECTION_MASK ~((1UL << PA_SECTION_SHIFT) - 1)
+#define SECTION_SIZE (1UL << PA_SECTION_SHIFT)
+
struct page_map {
struct resource res;
+ struct percpu_ref *ref;
+ struct dev_pagemap pgmap;
+ struct vmem_altmap altmap;
};
-static void devm_memremap_pages_release(struct device *dev, void *res)
+void get_zone_device_page(struct page *page)
+{
+ percpu_ref_get(page->pgmap->ref);
+}
+EXPORT_SYMBOL(get_zone_device_page);
+
+void put_zone_device_page(struct page *page)
+{
+ put_dev_pagemap(page->pgmap);
+}
+EXPORT_SYMBOL(put_zone_device_page);
+
+static void pgmap_radix_release(struct resource *res)
+{
+ resource_size_t key;
+
+ mutex_lock(&pgmap_lock);
+ for (key = res->start; key <= res->end; key += SECTION_SIZE)
+ radix_tree_delete(&pgmap_radix, key >> PA_SECTION_SHIFT);
+ mutex_unlock(&pgmap_lock);
+}
+
+static unsigned long pfn_first(struct page_map *page_map)
+{
+ struct dev_pagemap *pgmap = &page_map->pgmap;
+ const struct resource *res = &page_map->res;
+ struct vmem_altmap *altmap = pgmap->altmap;
+ unsigned long pfn;
+
+ pfn = res->start >> PAGE_SHIFT;
+ if (altmap)
+ pfn += vmem_altmap_offset(altmap);
+ return pfn;
+}
+
+static unsigned long pfn_end(struct page_map *page_map)
+{
+ const struct resource *res = &page_map->res;
+
+ return (res->start + resource_size(res)) >> PAGE_SHIFT;
+}
+
+#define for_each_device_pfn(pfn, map) \
+ for (pfn = pfn_first(map); pfn < pfn_end(map); pfn++)
+
+static void devm_memremap_pages_release(struct device *dev, void *data)
{
- struct page_map *page_map = res;
+ struct page_map *page_map = data;
+ struct resource *res = &page_map->res;
+ resource_size_t align_start, align_size;
+ struct dev_pagemap *pgmap = &page_map->pgmap;
+
+ if (percpu_ref_tryget_live(pgmap->ref)) {
+ dev_WARN(dev, "%s: page mapping is still live!\n", __func__);
+ percpu_ref_put(pgmap->ref);
+ }
+
+ pgmap_radix_release(res);
/* pages are dead and unused, undo the arch mapping */
- arch_remove_memory(page_map->res.start, resource_size(&page_map->res));
+ align_start = res->start & ~(SECTION_SIZE - 1);
+ align_size = ALIGN(resource_size(res), SECTION_SIZE);
+ arch_remove_memory(align_start, align_size);
+ dev_WARN_ONCE(dev, pgmap->altmap && pgmap->altmap->alloc,
+ "%s: failed to free all reserved pages\n", __func__);
+}
+
+/* assumes rcu_read_lock() held at entry */
+struct dev_pagemap *find_dev_pagemap(resource_size_t phys)
+{
+ struct page_map *page_map;
+
+ WARN_ON_ONCE(!rcu_read_lock_held());
+
+ page_map = radix_tree_lookup(&pgmap_radix, phys >> PA_SECTION_SHIFT);
+ return page_map ? &page_map->pgmap : NULL;
}
-void *devm_memremap_pages(struct device *dev, struct resource *res)
+/**
+ * devm_memremap_pages - remap and provide memmap backing for the given resource
+ * @dev: hosting device for @res
+ * @res: "host memory" address range
+ * @ref: a live per-cpu reference count
+ * @altmap: optional descriptor for allocating the memmap from @res
+ *
+ * Notes:
+ * 1/ @ref must be 'live' on entry and 'dead' before devm_memunmap_pages() time
+ * (or devm release event).
+ *
+ * 2/ @res is expected to be a host memory range that could feasibly be
+ * treated as a "System RAM" range, i.e. not a device mmio range, but
+ * this is not enforced.
+ */
+void *devm_memremap_pages(struct device *dev, struct resource *res,
+ struct percpu_ref *ref, struct vmem_altmap *altmap)
{
int is_ram = region_intersects(res->start, resource_size(res),
"System RAM");
+ resource_size_t key, align_start, align_size;
+ struct dev_pagemap *pgmap;
struct page_map *page_map;
+ unsigned long pfn;
int error, nid;
if (is_ram == REGION_MIXED) {
@@ -176,25 +282,120 @@ void *devm_memremap_pages(struct device *dev, struct resource *res)
if (is_ram == REGION_INTERSECTS)
return __va(res->start);
+ if (altmap && !IS_ENABLED(CONFIG_SPARSEMEM_VMEMMAP)) {
+ dev_err(dev, "%s: altmap requires CONFIG_SPARSEMEM_VMEMMAP=y\n",
+ __func__);
+ return ERR_PTR(-ENXIO);
+ }
+
+ if (!ref)
+ return ERR_PTR(-EINVAL);
+
page_map = devres_alloc_node(devm_memremap_pages_release,
sizeof(*page_map), GFP_KERNEL, dev_to_node(dev));
if (!page_map)
return ERR_PTR(-ENOMEM);
+ pgmap = &page_map->pgmap;
memcpy(&page_map->res, res, sizeof(*res));
+ pgmap->dev = dev;
+ if (altmap) {
+ memcpy(&page_map->altmap, altmap, sizeof(*altmap));
+ pgmap->altmap = &page_map->altmap;
+ }
+ pgmap->ref = ref;
+ pgmap->res = &page_map->res;
+
+ mutex_lock(&pgmap_lock);
+ error = 0;
+ for (key = res->start; key <= res->end; key += SECTION_SIZE) {
+ struct dev_pagemap *dup;
+
+ rcu_read_lock();
+ dup = find_dev_pagemap(key);
+ rcu_read_unlock();
+ if (dup) {
+ dev_err(dev, "%s: %pr collides with mapping for %s\n",
+ __func__, res, dev_name(dup->dev));
+ error = -EBUSY;
+ break;
+ }
+ error = radix_tree_insert(&pgmap_radix, key >> PA_SECTION_SHIFT,
+ page_map);
+ if (error) {
+ dev_err(dev, "%s: failed: %d\n", __func__, error);
+ break;
+ }
+ }
+ mutex_unlock(&pgmap_lock);
+ if (error)
+ goto err_radix;
+
nid = dev_to_node(dev);
if (nid < 0)
nid = numa_mem_id();
- error = arch_add_memory(nid, res->start, resource_size(res), true);
- if (error) {
- devres_free(page_map);
- return ERR_PTR(error);
- }
+ align_start = res->start & ~(SECTION_SIZE - 1);
+ align_size = ALIGN(resource_size(res), SECTION_SIZE);
+ error = arch_add_memory(nid, align_start, align_size, true);
+ if (error)
+ goto err_add_memory;
+ for_each_device_pfn(pfn, page_map) {
+ struct page *page = pfn_to_page(pfn);
+
+ /* ZONE_DEVICE pages must never appear on a slab lru */
+ list_force_poison(&page->lru);
+ page->pgmap = pgmap;
+ }
devres_add(dev, page_map);
return __va(res->start);
+
+ err_add_memory:
+ err_radix:
+ pgmap_radix_release(res);
+ devres_free(page_map);
+ return ERR_PTR(error);
}
EXPORT_SYMBOL(devm_memremap_pages);
+
+unsigned long vmem_altmap_offset(struct vmem_altmap *altmap)
+{
+ /* number of pfns from base where pfn_to_page() is valid */
+ return altmap->reserve + altmap->free;
+}
+
+void vmem_altmap_free(struct vmem_altmap *altmap, unsigned long nr_pfns)
+{
+ altmap->alloc -= nr_pfns;
+}
+
+#ifdef CONFIG_SPARSEMEM_VMEMMAP
+struct vmem_altmap *to_vmem_altmap(unsigned long memmap_start)
+{
+ /*
+ * 'memmap_start' is the virtual address for the first "struct
+ * page" in this range of the vmemmap array. In the case of
+ * CONFIG_SPARSE_VMEMMAP a page_to_pfn conversion is simple
+ * pointer arithmetic, so we can perform this to_vmem_altmap()
+ * conversion without concern for the initialization state of
+ * the struct page fields.
+ */
+ struct page *page = (struct page *) memmap_start;
+ struct dev_pagemap *pgmap;
+
+ /*
+ * Uncoditionally retrieve a dev_pagemap associated with the
+ * given physical address, this is only for use in the
+ * arch_{add|remove}_memory() for setting up and tearing down
+ * the memmap.
+ */
+ rcu_read_lock();
+ pgmap = find_dev_pagemap(__pfn_to_phys(page_to_pfn(page)));
+ rcu_read_unlock();
+
+ return pgmap ? pgmap->altmap : NULL;
+}
+#endif /* CONFIG_SPARSEMEM_VMEMMAP */
#endif /* CONFIG_ZONE_DEVICE */
diff --git a/kernel/module.c b/kernel/module.c
index 38c7bd5583ff..8358f4697c0c 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -80,15 +80,6 @@
# define debug_align(X) (X)
#endif
-/*
- * Given BASE and SIZE this macro calculates the number of pages the
- * memory regions occupies
- */
-#define MOD_NUMBER_OF_PAGES(BASE, SIZE) (((SIZE) > 0) ? \
- (PFN_DOWN((unsigned long)(BASE) + (SIZE) - 1) - \
- PFN_DOWN((unsigned long)BASE) + 1) \
- : (0UL))
-
/* If this is set, the section belongs in the init part of the module */
#define INIT_OFFSET_MASK (1UL << (BITS_PER_LONG-1))
@@ -108,13 +99,6 @@ static LIST_HEAD(modules);
* Use a latched RB-tree for __module_address(); this allows us to use
* RCU-sched lookups of the address from any context.
*
- * Because modules have two address ranges: init and core, we need two
- * latch_tree_nodes entries. Therefore we need the back-pointer from
- * mod_tree_node.
- *
- * Because init ranges are short lived we mark them unlikely and have placed
- * them outside the critical cacheline in struct module.
- *
* This is conditional on PERF_EVENTS || TRACING because those can really hit
* __module_address() hard by doing a lot of stack unwinding; potentially from
* NMI context.
@@ -122,24 +106,16 @@ static LIST_HEAD(modules);
static __always_inline unsigned long __mod_tree_val(struct latch_tree_node *n)
{
- struct mod_tree_node *mtn = container_of(n, struct mod_tree_node, node);
- struct module *mod = mtn->mod;
+ struct module_layout *layout = container_of(n, struct module_layout, mtn.node);
- if (unlikely(mtn == &mod->mtn_init))
- return (unsigned long)mod->module_init;
-
- return (unsigned long)mod->module_core;
+ return (unsigned long)layout->base;
}
static __always_inline unsigned long __mod_tree_size(struct latch_tree_node *n)
{
- struct mod_tree_node *mtn = container_of(n, struct mod_tree_node, node);
- struct module *mod = mtn->mod;
-
- if (unlikely(mtn == &mod->mtn_init))
- return (unsigned long)mod->init_size;
+ struct module_layout *layout = container_of(n, struct module_layout, mtn.node);
- return (unsigned long)mod->core_size;
+ return (unsigned long)layout->size;
}
static __always_inline bool
@@ -197,23 +173,23 @@ static void __mod_tree_remove(struct mod_tree_node *node)
*/
static void mod_tree_insert(struct module *mod)
{
- mod->mtn_core.mod = mod;
- mod->mtn_init.mod = mod;
+ mod->core_layout.mtn.mod = mod;
+ mod->init_layout.mtn.mod = mod;
- __mod_tree_insert(&mod->mtn_core);
- if (mod->init_size)
- __mod_tree_insert(&mod->mtn_init);
+ __mod_tree_insert(&mod->core_layout.mtn);
+ if (mod->init_layout.size)
+ __mod_tree_insert(&mod->init_layout.mtn);
}
static void mod_tree_remove_init(struct module *mod)
{
- if (mod->init_size)
- __mod_tree_remove(&mod->mtn_init);
+ if (mod->init_layout.size)
+ __mod_tree_remove(&mod->init_layout.mtn);
}
static void mod_tree_remove(struct module *mod)
{
- __mod_tree_remove(&mod->mtn_core);
+ __mod_tree_remove(&mod->core_layout.mtn);
mod_tree_remove_init(mod);
}
@@ -267,9 +243,9 @@ static void __mod_update_bounds(void *base, unsigned int size)
static void mod_update_bounds(struct module *mod)
{
- __mod_update_bounds(mod->module_core, mod->core_size);
- if (mod->init_size)
- __mod_update_bounds(mod->module_init, mod->init_size);
+ __mod_update_bounds(mod->core_layout.base, mod->core_layout.size);
+ if (mod->init_layout.size)
+ __mod_update_bounds(mod->init_layout.base, mod->init_layout.size);
}
#ifdef CONFIG_KGDB_KDB
@@ -1214,7 +1190,7 @@ struct module_attribute module_uevent =
static ssize_t show_coresize(struct module_attribute *mattr,
struct module_kobject *mk, char *buffer)
{
- return sprintf(buffer, "%u\n", mk->mod->core_size);
+ return sprintf(buffer, "%u\n", mk->mod->core_layout.size);
}
static struct module_attribute modinfo_coresize =
@@ -1223,7 +1199,7 @@ static struct module_attribute modinfo_coresize =
static ssize_t show_initsize(struct module_attribute *mattr,
struct module_kobject *mk, char *buffer)
{
- return sprintf(buffer, "%u\n", mk->mod->init_size);
+ return sprintf(buffer, "%u\n", mk->mod->init_layout.size);
}
static struct module_attribute modinfo_initsize =
@@ -1873,64 +1849,75 @@ static void mod_sysfs_teardown(struct module *mod)
/*
* LKM RO/NX protection: protect module's text/ro-data
* from modification and any data from execution.
+ *
+ * General layout of module is:
+ * [text] [read-only-data] [writable data]
+ * text_size -----^ ^ ^
+ * ro_size ------------------------| |
+ * size -------------------------------------------|
+ *
+ * These values are always page-aligned (as is base)
*/
-void set_page_attributes(void *start, void *end, int (*set)(unsigned long start, int num_pages))
+static void frob_text(const struct module_layout *layout,
+ int (*set_memory)(unsigned long start, int num_pages))
{
- unsigned long begin_pfn = PFN_DOWN((unsigned long)start);
- unsigned long end_pfn = PFN_DOWN((unsigned long)end);
+ BUG_ON((unsigned long)layout->base & (PAGE_SIZE-1));
+ BUG_ON((unsigned long)layout->text_size & (PAGE_SIZE-1));
+ set_memory((unsigned long)layout->base,
+ layout->text_size >> PAGE_SHIFT);
+}
- if (end_pfn > begin_pfn)
- set(begin_pfn << PAGE_SHIFT, end_pfn - begin_pfn);
+static void frob_rodata(const struct module_layout *layout,
+ int (*set_memory)(unsigned long start, int num_pages))
+{
+ BUG_ON((unsigned long)layout->base & (PAGE_SIZE-1));
+ BUG_ON((unsigned long)layout->text_size & (PAGE_SIZE-1));
+ BUG_ON((unsigned long)layout->ro_size & (PAGE_SIZE-1));
+ set_memory((unsigned long)layout->base + layout->text_size,
+ (layout->ro_size - layout->text_size) >> PAGE_SHIFT);
}
-static void set_section_ro_nx(void *base,
- unsigned long text_size,
- unsigned long ro_size,
- unsigned long total_size)
+static void frob_writable_data(const struct module_layout *layout,
+ int (*set_memory)(unsigned long start, int num_pages))
{
- /* begin and end PFNs of the current subsection */
- unsigned long begin_pfn;
- unsigned long end_pfn;
+ BUG_ON((unsigned long)layout->base & (PAGE_SIZE-1));
+ BUG_ON((unsigned long)layout->ro_size & (PAGE_SIZE-1));
+ BUG_ON((unsigned long)layout->size & (PAGE_SIZE-1));
+ set_memory((unsigned long)layout->base + layout->ro_size,
+ (layout->size - layout->ro_size) >> PAGE_SHIFT);
+}
- /*
- * Set RO for module text and RO-data:
- * - Always protect first page.
- * - Do not protect last partial page.
- */
- if (ro_size > 0)
- set_page_attributes(base, base + ro_size, set_memory_ro);
+/* livepatching wants to disable read-only so it can frob module. */
+void module_disable_ro(const struct module *mod)
+{
+ frob_text(&mod->core_layout, set_memory_rw);
+ frob_rodata(&mod->core_layout, set_memory_rw);
+ frob_text(&mod->init_layout, set_memory_rw);
+ frob_rodata(&mod->init_layout, set_memory_rw);
+}
- /*
- * Set NX permissions for module data:
- * - Do not protect first partial page.
- * - Always protect last page.
- */
- if (total_size > text_size) {
- begin_pfn = PFN_UP((unsigned long)base + text_size);
- end_pfn = PFN_UP((unsigned long)base + total_size);
- if (end_pfn > begin_pfn)
- set_memory_nx(begin_pfn << PAGE_SHIFT, end_pfn - begin_pfn);
- }
+void module_enable_ro(const struct module *mod)
+{
+ frob_text(&mod->core_layout, set_memory_ro);
+ frob_rodata(&mod->core_layout, set_memory_ro);
+ frob_text(&mod->init_layout, set_memory_ro);
+ frob_rodata(&mod->init_layout, set_memory_ro);
}
-static void unset_module_core_ro_nx(struct module *mod)
+static void module_enable_nx(const struct module *mod)
{
- set_page_attributes(mod->module_core + mod->core_text_size,
- mod->module_core + mod->core_size,
- set_memory_x);
- set_page_attributes(mod->module_core,
- mod->module_core + mod->core_ro_size,
- set_memory_rw);
+ frob_rodata(&mod->core_layout, set_memory_nx);
+ frob_writable_data(&mod->core_layout, set_memory_nx);
+ frob_rodata(&mod->init_layout, set_memory_nx);
+ frob_writable_data(&mod->init_layout, set_memory_nx);
}
-static void unset_module_init_ro_nx(struct module *mod)
+static void module_disable_nx(const struct module *mod)
{
- set_page_attributes(mod->module_init + mod->init_text_size,
- mod->module_init + mod->init_size,
- set_memory_x);
- set_page_attributes(mod->module_init,
- mod->module_init + mod->init_ro_size,
- set_memory_rw);
+ frob_rodata(&mod->core_layout, set_memory_x);
+ frob_writable_data(&mod->core_layout, set_memory_x);
+ frob_rodata(&mod->init_layout, set_memory_x);
+ frob_writable_data(&mod->init_layout, set_memory_x);
}
/* Iterate through all modules and set each module's text as RW */
@@ -1942,16 +1929,9 @@ void set_all_modules_text_rw(void)
list_for_each_entry_rcu(mod, &modules, list) {
if (mod->state == MODULE_STATE_UNFORMED)
continue;
- if ((mod->module_core) && (mod->core_text_size)) {
- set_page_attributes(mod->module_core,
- mod->module_core + mod->core_text_size,
- set_memory_rw);
- }
- if ((mod->module_init) && (mod->init_text_size)) {
- set_page_attributes(mod->module_init,
- mod->module_init + mod->init_text_size,
- set_memory_rw);
- }
+
+ frob_text(&mod->core_layout, set_memory_rw);
+ frob_text(&mod->init_layout, set_memory_rw);
}
mutex_unlock(&module_mutex);
}
@@ -1965,23 +1945,25 @@ void set_all_modules_text_ro(void)
list_for_each_entry_rcu(mod, &modules, list) {
if (mod->state == MODULE_STATE_UNFORMED)
continue;
- if ((mod->module_core) && (mod->core_text_size)) {
- set_page_attributes(mod->module_core,
- mod->module_core + mod->core_text_size,
- set_memory_ro);
- }
- if ((mod->module_init) && (mod->init_text_size)) {
- set_page_attributes(mod->module_init,
- mod->module_init + mod->init_text_size,
- set_memory_ro);
- }
+
+ frob_text(&mod->core_layout, set_memory_ro);
+ frob_text(&mod->init_layout, set_memory_ro);
}
mutex_unlock(&module_mutex);
}
+
+static void disable_ro_nx(const struct module_layout *layout)
+{
+ frob_text(layout, set_memory_rw);
+ frob_rodata(layout, set_memory_rw);
+ frob_rodata(layout, set_memory_x);
+ frob_writable_data(layout, set_memory_x);
+}
+
#else
-static inline void set_section_ro_nx(void *base, unsigned long text_size, unsigned long ro_size, unsigned long total_size) { }
-static void unset_module_core_ro_nx(struct module *mod) { }
-static void unset_module_init_ro_nx(struct module *mod) { }
+static void disable_ro_nx(const struct module_layout *layout) { }
+static void module_enable_nx(const struct module *mod) { }
+static void module_disable_nx(const struct module *mod) { }
#endif
void __weak module_memfree(void *module_region)
@@ -2033,19 +2015,19 @@ static void free_module(struct module *mod)
synchronize_sched();
mutex_unlock(&module_mutex);
- /* This may be NULL, but that's OK */
- unset_module_init_ro_nx(mod);
+ /* This may be empty, but that's OK */
+ disable_ro_nx(&mod->init_layout);
module_arch_freeing_init(mod);
- module_memfree(mod->module_init);
+ module_memfree(mod->init_layout.base);
kfree(mod->args);
percpu_modfree(mod);
/* Free lock-classes; relies on the preceding sync_rcu(). */
- lockdep_free_key_range(mod->module_core, mod->core_size);
+ lockdep_free_key_range(mod->core_layout.base, mod->core_layout.size);
/* Finally, free the core (containing the module structure) */
- unset_module_core_ro_nx(mod);
- module_memfree(mod->module_core);
+ disable_ro_nx(&mod->core_layout);
+ module_memfree(mod->core_layout.base);
#ifdef CONFIG_MPU
update_protections(current->mm);
@@ -2248,20 +2230,20 @@ static void layout_sections(struct module *mod, struct load_info *info)
|| s->sh_entsize != ~0UL
|| strstarts(sname, ".init"))
continue;
- s->sh_entsize = get_offset(mod, &mod->core_size, s, i);
+ s->sh_entsize = get_offset(mod, &mod->core_layout.size, s, i);
pr_debug("\t%s\n", sname);
}
switch (m) {
case 0: /* executable */
- mod->core_size = debug_align(mod->core_size);
- mod->core_text_size = mod->core_size;
+ mod->core_layout.size = debug_align(mod->core_layout.size);
+ mod->core_layout.text_size = mod->core_layout.size;
break;
case 1: /* RO: text and ro-data */
- mod->core_size = debug_align(mod->core_size);
- mod->core_ro_size = mod->core_size;
+ mod->core_layout.size = debug_align(mod->core_layout.size);
+ mod->core_layout.ro_size = mod->core_layout.size;
break;
case 3: /* whole core */
- mod->core_size = debug_align(mod->core_size);
+ mod->core_layout.size = debug_align(mod->core_layout.size);
break;
}
}
@@ -2277,21 +2259,21 @@ static void layout_sections(struct module *mod, struct load_info *info)
|| s->sh_entsize != ~0UL
|| !strstarts(sname, ".init"))
continue;
- s->sh_entsize = (get_offset(mod, &mod->init_size, s, i)
+ s->sh_entsize = (get_offset(mod, &mod->init_layout.size, s, i)
| INIT_OFFSET_MASK);
pr_debug("\t%s\n", sname);
}
switch (m) {
case 0: /* executable */
- mod->init_size = debug_align(mod->init_size);
- mod->init_text_size = mod->init_size;
+ mod->init_layout.size = debug_align(mod->init_layout.size);
+ mod->init_layout.text_size = mod->init_layout.size;
break;
case 1: /* RO: text and ro-data */
- mod->init_size = debug_align(mod->init_size);
- mod->init_ro_size = mod->init_size;
+ mod->init_layout.size = debug_align(mod->init_layout.size);
+ mod->init_layout.ro_size = mod->init_layout.size;
break;
case 3: /* whole init */
- mod->init_size = debug_align(mod->init_size);
+ mod->init_layout.size = debug_align(mod->init_layout.size);
break;
}
}
@@ -2401,7 +2383,7 @@ static char elf_type(const Elf_Sym *sym, const struct load_info *info)
}
if (sym->st_shndx == SHN_UNDEF)
return 'U';
- if (sym->st_shndx == SHN_ABS)
+ if (sym->st_shndx == SHN_ABS || sym->st_shndx == info->index.pcpu)
return 'a';
if (sym->st_shndx >= SHN_LORESERVE)
return '?';
@@ -2430,7 +2412,7 @@ static char elf_type(const Elf_Sym *sym, const struct load_info *info)
}
static bool is_core_symbol(const Elf_Sym *src, const Elf_Shdr *sechdrs,
- unsigned int shnum)
+ unsigned int shnum, unsigned int pcpundx)
{
const Elf_Shdr *sec;
@@ -2439,6 +2421,11 @@ static bool is_core_symbol(const Elf_Sym *src, const Elf_Shdr *sechdrs,
|| !src->st_name)
return false;
+#ifdef CONFIG_KALLSYMS_ALL
+ if (src->st_shndx == pcpundx)
+ return true;
+#endif
+
sec = sechdrs + src->st_shndx;
if (!(sec->sh_flags & SHF_ALLOC)
#ifndef CONFIG_KALLSYMS_ALL
@@ -2466,7 +2453,7 @@ static void layout_symtab(struct module *mod, struct load_info *info)
/* Put symbol section at end of init part of module. */
symsect->sh_flags |= SHF_ALLOC;
- symsect->sh_entsize = get_offset(mod, &mod->init_size, symsect,
+ symsect->sh_entsize = get_offset(mod, &mod->init_layout.size, symsect,
info->index.sym) | INIT_OFFSET_MASK;
pr_debug("\t%s\n", info->secstrings + symsect->sh_name);
@@ -2476,23 +2463,24 @@ static void layout_symtab(struct module *mod, struct load_info *info)
/* Compute total space required for the core symbols' strtab. */
for (ndst = i = 0; i < nsrc; i++) {
if (i == 0 ||
- is_core_symbol(src+i, info->sechdrs, info->hdr->e_shnum)) {
+ is_core_symbol(src+i, info->sechdrs, info->hdr->e_shnum,
+ info->index.pcpu)) {
strtab_size += strlen(&info->strtab[src[i].st_name])+1;
ndst++;
}
}
/* Append room for core symbols at end of core part. */
- info->symoffs = ALIGN(mod->core_size, symsect->sh_addralign ?: 1);
- info->stroffs = mod->core_size = info->symoffs + ndst * sizeof(Elf_Sym);
- mod->core_size += strtab_size;
- mod->core_size = debug_align(mod->core_size);
+ info->symoffs = ALIGN(mod->core_layout.size, symsect->sh_addralign ?: 1);
+ info->stroffs = mod->core_layout.size = info->symoffs + ndst * sizeof(Elf_Sym);
+ mod->core_layout.size += strtab_size;
+ mod->core_layout.size = debug_align(mod->core_layout.size);
/* Put string table section at end of init part of module. */
strsect->sh_flags |= SHF_ALLOC;
- strsect->sh_entsize = get_offset(mod, &mod->init_size, strsect,
+ strsect->sh_entsize = get_offset(mod, &mod->init_layout.size, strsect,
info->index.str) | INIT_OFFSET_MASK;
- mod->init_size = debug_align(mod->init_size);
+ mod->init_layout.size = debug_align(mod->init_layout.size);
pr_debug("\t%s\n", info->secstrings + strsect->sh_name);
}
@@ -2513,12 +2501,13 @@ static void add_kallsyms(struct module *mod, const struct load_info *info)
for (i = 0; i < mod->num_symtab; i++)
mod->symtab[i].st_info = elf_type(&mod->symtab[i], info);
- mod->core_symtab = dst = mod->module_core + info->symoffs;
- mod->core_strtab = s = mod->module_core + info->stroffs;
+ mod->core_symtab = dst = mod->core_layout.base + info->symoffs;
+ mod->core_strtab = s = mod->core_layout.base + info->stroffs;
src = mod->symtab;
for (ndst = i = 0; i < mod->num_symtab; i++) {
if (i == 0 ||
- is_core_symbol(src+i, info->sechdrs, info->hdr->e_shnum)) {
+ is_core_symbol(src+i, info->sechdrs, info->hdr->e_shnum,
+ info->index.pcpu)) {
dst[ndst] = src[i];
dst[ndst++].st_name = s - mod->core_strtab;
s += strlcpy(s, &mod->strtab[src[i].st_name],
@@ -2964,7 +2953,7 @@ static int move_module(struct module *mod, struct load_info *info)
void *ptr;
/* Do the allocs. */
- ptr = module_alloc(mod->core_size);
+ ptr = module_alloc(mod->core_layout.size);
/*
* The pointer to this block is stored in the module structure
* which is inside the block. Just mark it as not being a
@@ -2974,11 +2963,11 @@ static int move_module(struct module *mod, struct load_info *info)
if (!ptr)
return -ENOMEM;
- memset(ptr, 0, mod->core_size);
- mod->module_core = ptr;
+ memset(ptr, 0, mod->core_layout.size);
+ mod->core_layout.base = ptr;
- if (mod->init_size) {
- ptr = module_alloc(mod->init_size);
+ if (mod->init_layout.size) {
+ ptr = module_alloc(mod->init_layout.size);
/*
* The pointer to this block is stored in the module structure
* which is inside the block. This block doesn't need to be
@@ -2987,13 +2976,13 @@ static int move_module(struct module *mod, struct load_info *info)
*/
kmemleak_ignore(ptr);
if (!ptr) {
- module_memfree(mod->module_core);
+ module_memfree(mod->core_layout.base);
return -ENOMEM;
}
- memset(ptr, 0, mod->init_size);
- mod->module_init = ptr;
+ memset(ptr, 0, mod->init_layout.size);
+ mod->init_layout.base = ptr;
} else
- mod->module_init = NULL;
+ mod->init_layout.base = NULL;
/* Transfer each section which specifies SHF_ALLOC */
pr_debug("final section addresses:\n");
@@ -3005,10 +2994,10 @@ static int move_module(struct module *mod, struct load_info *info)
continue;
if (shdr->sh_entsize & INIT_OFFSET_MASK)
- dest = mod->module_init
+ dest = mod->init_layout.base
+ (shdr->sh_entsize & ~INIT_OFFSET_MASK);
else
- dest = mod->module_core + shdr->sh_entsize;
+ dest = mod->core_layout.base + shdr->sh_entsize;
if (shdr->sh_type != SHT_NOBITS)
memcpy(dest, (void *)shdr->sh_addr, shdr->sh_size);
@@ -3070,12 +3059,12 @@ static void flush_module_icache(const struct module *mod)
* Do it before processing of module parameters, so the module
* can provide parameter accessor functions of its own.
*/
- if (mod->module_init)
- flush_icache_range((unsigned long)mod->module_init,
- (unsigned long)mod->module_init
- + mod->init_size);
- flush_icache_range((unsigned long)mod->module_core,
- (unsigned long)mod->module_core + mod->core_size);
+ if (mod->init_layout.base)
+ flush_icache_range((unsigned long)mod->init_layout.base,
+ (unsigned long)mod->init_layout.base
+ + mod->init_layout.size);
+ flush_icache_range((unsigned long)mod->core_layout.base,
+ (unsigned long)mod->core_layout.base + mod->core_layout.size);
set_fs(old_fs);
}
@@ -3133,8 +3122,8 @@ static void module_deallocate(struct module *mod, struct load_info *info)
{
percpu_modfree(mod);
module_arch_freeing_init(mod);
- module_memfree(mod->module_init);
- module_memfree(mod->module_core);
+ module_memfree(mod->init_layout.base);
+ module_memfree(mod->core_layout.base);
}
int __weak module_finalize(const Elf_Ehdr *hdr,
@@ -3221,7 +3210,7 @@ static noinline int do_init_module(struct module *mod)
ret = -ENOMEM;
goto fail;
}
- freeinit->module_init = mod->module_init;
+ freeinit->module_init = mod->init_layout.base;
/*
* We want to find out whether @mod uses async during init. Clear
@@ -3279,12 +3268,12 @@ static noinline int do_init_module(struct module *mod)
mod->strtab = mod->core_strtab;
#endif
mod_tree_remove_init(mod);
- unset_module_init_ro_nx(mod);
+ disable_ro_nx(&mod->init_layout);
module_arch_freeing_init(mod);
- mod->module_init = NULL;
- mod->init_size = 0;
- mod->init_ro_size = 0;
- mod->init_text_size = 0;
+ mod->init_layout.base = NULL;
+ mod->init_layout.size = 0;
+ mod->init_layout.ro_size = 0;
+ mod->init_layout.text_size = 0;
/*
* We want to free module_init, but be aware that kallsyms may be
* walking this with preempt disabled. In all the failure paths, we
@@ -3373,17 +3362,9 @@ static int complete_formation(struct module *mod, struct load_info *info)
/* This relies on module_mutex for list integrity. */
module_bug_finalize(info->hdr, info->sechdrs, mod);
- /* Set RO and NX regions for core */
- set_section_ro_nx(mod->module_core,
- mod->core_text_size,
- mod->core_ro_size,
- mod->core_size);
-
- /* Set RO and NX regions for init */
- set_section_ro_nx(mod->module_init,
- mod->init_text_size,
- mod->init_ro_size,
- mod->init_size);
+ /* Set RO and NX regions */
+ module_enable_ro(mod);
+ module_enable_nx(mod);
/* Mark state as coming so strong_try_module_get() ignores us,
* but kallsyms etc. can see us. */
@@ -3548,8 +3529,8 @@ static int load_module(struct load_info *info, const char __user *uargs,
MODULE_STATE_GOING, mod);
/* we can't deallocate the module until we clear memory protection */
- unset_module_init_ro_nx(mod);
- unset_module_core_ro_nx(mod);
+ module_disable_ro(mod);
+ module_disable_nx(mod);
ddebug_cleanup:
dynamic_debug_remove(info->debug);
@@ -3578,7 +3559,7 @@ static int load_module(struct load_info *info, const char __user *uargs,
*/
ftrace_release_mod(mod);
/* Free lock-classes; relies on the preceding sync_rcu() */
- lockdep_free_key_range(mod->module_core, mod->core_size);
+ lockdep_free_key_range(mod->core_layout.base, mod->core_layout.size);
module_deallocate(mod, info);
free_copy:
@@ -3656,9 +3637,9 @@ static const char *get_ksymbol(struct module *mod,
/* At worse, next value is at end of module */
if (within_module_init(addr, mod))
- nextval = (unsigned long)mod->module_init+mod->init_text_size;
+ nextval = (unsigned long)mod->init_layout.base+mod->init_layout.text_size;
else
- nextval = (unsigned long)mod->module_core+mod->core_text_size;
+ nextval = (unsigned long)mod->core_layout.base+mod->core_layout.text_size;
/* Scan for closest preceding symbol, and next symbol. (ELF
starts real symbols at 1). */
@@ -3905,7 +3886,7 @@ static int m_show(struct seq_file *m, void *p)
return 0;
seq_printf(m, "%s %u",
- mod->name, mod->init_size + mod->core_size);
+ mod->name, mod->init_layout.size + mod->core_layout.size);
print_unload_info(m, mod);
/* Informative for users. */
@@ -3914,7 +3895,7 @@ static int m_show(struct seq_file *m, void *p)
mod->state == MODULE_STATE_COMING ? "Loading" :
"Live");
/* Used by oprofile and other similar tools. */
- seq_printf(m, " 0x%pK", mod->module_core);
+ seq_printf(m, " 0x%pK", mod->core_layout.base);
/* Taints info */
if (mod->taints)
@@ -4057,8 +4038,8 @@ struct module *__module_text_address(unsigned long addr)
struct module *mod = __module_address(addr);
if (mod) {
/* Make sure it's within the text section. */
- if (!within(addr, mod->module_init, mod->init_text_size)
- && !within(addr, mod->module_core, mod->core_text_size))
+ if (!within(addr, mod->init_layout.base, mod->init_layout.text_size)
+ && !within(addr, mod->core_layout.base, mod->core_layout.text_size))
mod = NULL;
}
return mod;
diff --git a/kernel/panic.c b/kernel/panic.c
index b333380c6bb2..d96469de72dc 100644
--- a/kernel/panic.c
+++ b/kernel/panic.c
@@ -180,8 +180,7 @@ void panic(const char *fmt, ...)
* panic() is not being callled from OOPS.
*/
debug_locks_off();
- console_trylock();
- console_unlock();
+ console_flush_on_panic();
if (!panic_blink)
panic_blink = no_blink;
diff --git a/kernel/pid.c b/kernel/pid.c
index 78b3d9f80d44..f4ad91b746f1 100644
--- a/kernel/pid.c
+++ b/kernel/pid.c
@@ -604,5 +604,5 @@ void __init pidmap_init(void)
atomic_dec(&init_pid_ns.pidmap[0].nr_free);
init_pid_ns.pid_cachep = KMEM_CACHE(pid,
- SLAB_HWCACHE_ALIGN | SLAB_PANIC);
+ SLAB_HWCACHE_ALIGN | SLAB_PANIC | SLAB_ACCOUNT);
}
diff --git a/kernel/power/Kconfig b/kernel/power/Kconfig
index 02e8dfaa1ce2..68d3ebc12601 100644
--- a/kernel/power/Kconfig
+++ b/kernel/power/Kconfig
@@ -235,7 +235,7 @@ config PM_TRACE_RTC
config APM_EMULATION
tristate "Advanced Power Management Emulation"
- depends on PM && SYS_SUPPORTS_APM_EMULATION
+ depends on SYS_SUPPORTS_APM_EMULATION
help
APM is a BIOS specification for saving power using several different
techniques. This is mostly useful for battery powered laptops with
diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c
index 2ce8826f1053..c963ba534a78 100644
--- a/kernel/printk/printk.c
+++ b/kernel/printk/printk.c
@@ -48,6 +48,7 @@
#include <linux/uio.h>
#include <asm/uaccess.h>
+#include <asm-generic/sections.h>
#define CREATE_TRACE_POINTS
#include <trace/events/printk.h>
@@ -232,7 +233,11 @@ struct printk_log {
u8 facility; /* syslog facility */
u8 flags:5; /* internal record flags */
u8 level:3; /* syslog level */
-};
+}
+#ifdef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
+__packed __aligned(4)
+#endif
+;
/*
* The logbuf_lock protects kmsg buffer, indices, counters. This can be taken
@@ -273,11 +278,7 @@ static u32 clear_idx;
#define LOG_FACILITY(v) ((v) >> 3 & 0xff)
/* record buffer */
-#if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS)
-#define LOG_ALIGN 4
-#else
#define LOG_ALIGN __alignof__(struct printk_log)
-#endif
#define __LOG_BUF_LEN (1 << CONFIG_LOG_BUF_SHIFT)
static char __log_buf[__LOG_BUF_LEN] __aligned(LOG_ALIGN);
static char *log_buf = __log_buf;
@@ -1660,7 +1661,7 @@ asmlinkage int vprintk_emit(int facility, int level,
const char *dict, size_t dictlen,
const char *fmt, va_list args)
{
- static int recursion_bug;
+ static bool recursion_bug;
static char textbuf[LOG_LINE_MAX];
char *text = textbuf;
size_t text_len = 0;
@@ -1696,7 +1697,7 @@ asmlinkage int vprintk_emit(int facility, int level,
* it can be printed at the next appropriate moment:
*/
if (!oops_in_progress && !lockdep_recursing(current)) {
- recursion_bug = 1;
+ recursion_bug = true;
local_irq_restore(flags);
return 0;
}
@@ -1711,7 +1712,7 @@ asmlinkage int vprintk_emit(int facility, int level,
static const char recursion_msg[] =
"BUG: recent printk recursion!";
- recursion_bug = 0;
+ recursion_bug = false;
/* emit KERN_CRIT message */
printed_len += log_store(0, 2, LOG_PREFIX|LOG_NEWLINE, 0,
NULL, 0, recursion_msg,
@@ -2233,13 +2234,24 @@ void console_unlock(void)
static u64 seen_seq;
unsigned long flags;
bool wake_klogd = false;
- bool retry;
+ bool do_cond_resched, retry;
if (console_suspended) {
up_console_sem();
return;
}
+ /*
+ * Console drivers are called under logbuf_lock, so
+ * @console_may_schedule should be cleared before; however, we may
+ * end up dumping a lot of lines, for example, if called from
+ * console registration path, and should invoke cond_resched()
+ * between lines if allowable. Not doing so can cause a very long
+ * scheduling stall on a slow console leading to RCU stall and
+ * softlockup warnings which exacerbate the issue with more
+ * messages practically incapacitating the system.
+ */
+ do_cond_resched = console_may_schedule;
console_may_schedule = 0;
/* flush buffered message fragment immediately to console */
@@ -2311,6 +2323,9 @@ skip:
call_console_drivers(level, ext_text, ext_len, text, len);
start_critical_timings();
local_irq_restore(flags);
+
+ if (do_cond_resched)
+ cond_resched();
}
console_locked = 0;
@@ -2378,6 +2393,25 @@ void console_unblank(void)
console_unlock();
}
+/**
+ * console_flush_on_panic - flush console content on panic
+ *
+ * Immediately output all pending messages no matter what.
+ */
+void console_flush_on_panic(void)
+{
+ /*
+ * If someone else is holding the console lock, trylock will fail
+ * and may_schedule may be set. Ignore and proceed to unlock so
+ * that messages are flushed out. As this can be called from any
+ * context and we don't want to get preempted while flushing,
+ * ensure may_schedule is cleared.
+ */
+ console_trylock();
+ console_may_schedule = 0;
+ console_unlock();
+}
+
/*
* Return the console tty driver structure and its associated index
*/
@@ -2658,13 +2692,36 @@ int unregister_console(struct console *console)
}
EXPORT_SYMBOL(unregister_console);
+/*
+ * Some boot consoles access data that is in the init section and which will
+ * be discarded after the initcalls have been run. To make sure that no code
+ * will access this data, unregister the boot consoles in a late initcall.
+ *
+ * If for some reason, such as deferred probe or the driver being a loadable
+ * module, the real console hasn't registered yet at this point, there will
+ * be a brief interval in which no messages are logged to the console, which
+ * makes it difficult to diagnose problems that occur during this time.
+ *
+ * To mitigate this problem somewhat, only unregister consoles whose memory
+ * intersects with the init section. Note that code exists elsewhere to get
+ * rid of the boot console as soon as the proper console shows up, so there
+ * won't be side-effects from postponing the removal.
+ */
static int __init printk_late_init(void)
{
struct console *con;
for_each_console(con) {
if (!keep_bootcon && con->flags & CON_BOOT) {
- unregister_console(con);
+ /*
+ * Make sure to unregister boot consoles whose data
+ * resides in the init section before the init section
+ * is discarded. Boot consoles whose data will stick
+ * around will automatically be unregistered when the
+ * proper console replaces them.
+ */
+ if (init_section_intersects(con, sizeof(*con)))
+ unregister_console(con);
}
}
hotcpu_notifier(console_cpu_notify, 0);
diff --git a/kernel/ptrace.c b/kernel/ptrace.c
index b760bae64cf1..2341efe7fe02 100644
--- a/kernel/ptrace.c
+++ b/kernel/ptrace.c
@@ -219,6 +219,14 @@ static int ptrace_has_cap(struct user_namespace *ns, unsigned int mode)
static int __ptrace_may_access(struct task_struct *task, unsigned int mode)
{
const struct cred *cred = current_cred(), *tcred;
+ int dumpable = 0;
+ kuid_t caller_uid;
+ kgid_t caller_gid;
+
+ if (!(mode & PTRACE_MODE_FSCREDS) == !(mode & PTRACE_MODE_REALCREDS)) {
+ WARN(1, "denying ptrace access check without PTRACE_MODE_*CREDS\n");
+ return -EPERM;
+ }
/* May we inspect the given task?
* This check is used both for attaching with ptrace
@@ -228,18 +236,33 @@ static int __ptrace_may_access(struct task_struct *task, unsigned int mode)
* because setting up the necessary parent/child relationship
* or halting the specified task is impossible.
*/
- int dumpable = 0;
+
/* Don't let security modules deny introspection */
if (same_thread_group(task, current))
return 0;
rcu_read_lock();
+ if (mode & PTRACE_MODE_FSCREDS) {
+ caller_uid = cred->fsuid;
+ caller_gid = cred->fsgid;
+ } else {
+ /*
+ * Using the euid would make more sense here, but something
+ * in userland might rely on the old behavior, and this
+ * shouldn't be a security problem since
+ * PTRACE_MODE_REALCREDS implies that the caller explicitly
+ * used a syscall that requests access to another process
+ * (and not a filesystem syscall to procfs).
+ */
+ caller_uid = cred->uid;
+ caller_gid = cred->gid;
+ }
tcred = __task_cred(task);
- if (uid_eq(cred->uid, tcred->euid) &&
- uid_eq(cred->uid, tcred->suid) &&
- uid_eq(cred->uid, tcred->uid) &&
- gid_eq(cred->gid, tcred->egid) &&
- gid_eq(cred->gid, tcred->sgid) &&
- gid_eq(cred->gid, tcred->gid))
+ if (uid_eq(caller_uid, tcred->euid) &&
+ uid_eq(caller_uid, tcred->suid) &&
+ uid_eq(caller_uid, tcred->uid) &&
+ gid_eq(caller_gid, tcred->egid) &&
+ gid_eq(caller_gid, tcred->sgid) &&
+ gid_eq(caller_gid, tcred->gid))
goto ok;
if (ptrace_has_cap(tcred->user_ns, mode))
goto ok;
@@ -306,7 +329,7 @@ static int ptrace_attach(struct task_struct *task, long request,
goto out;
task_lock(task);
- retval = __ptrace_may_access(task, PTRACE_MODE_ATTACH);
+ retval = __ptrace_may_access(task, PTRACE_MODE_ATTACH_REALCREDS);
task_unlock(task);
if (retval)
goto unlock_creds;
@@ -364,8 +387,14 @@ unlock_creds:
mutex_unlock(&task->signal->cred_guard_mutex);
out:
if (!retval) {
- wait_on_bit(&task->jobctl, JOBCTL_TRAPPING_BIT,
- TASK_UNINTERRUPTIBLE);
+ /*
+ * We do not bother to change retval or clear JOBCTL_TRAPPING
+ * if wait_on_bit() was interrupted by SIGKILL. The tracer will
+ * not return to user-mode, it will exit and clear this bit in
+ * __ptrace_unlink() if it wasn't already cleared by the tracee;
+ * and until then nobody can ptrace this task.
+ */
+ wait_on_bit(&task->jobctl, JOBCTL_TRAPPING_BIT, TASK_KILLABLE);
proc_ptrace_connector(task, PTRACE_ATTACH);
}
diff --git a/kernel/relay.c b/kernel/relay.c
index 0b4570cfacae..074994bcfa9b 100644
--- a/kernel/relay.c
+++ b/kernel/relay.c
@@ -1133,7 +1133,7 @@ static ssize_t relay_file_read_subbufs(struct file *filp, loff_t *ppos,
if (!desc->count)
return 0;
- mutex_lock(&file_inode(filp)->i_mutex);
+ inode_lock(file_inode(filp));
do {
if (!relay_file_read_avail(buf, *ppos))
break;
@@ -1153,7 +1153,7 @@ static ssize_t relay_file_read_subbufs(struct file *filp, loff_t *ppos,
*ppos = relay_file_read_end_pos(buf, read_start, ret);
}
} while (desc->count && ret);
- mutex_unlock(&file_inode(filp)->i_mutex);
+ inode_unlock(file_inode(filp));
return desc->written;
}
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 44253adb3c36..63d3a24e081a 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -222,9 +222,9 @@ sched_feat_write(struct file *filp, const char __user *ubuf,
/* Ensure the static_key remains in a consistent state */
inode = file_inode(filp);
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
i = sched_feat_set(cmp);
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
if (i == __SCHED_FEAT_NR)
return -EINVAL;
diff --git a/kernel/sched/idle.c b/kernel/sched/idle.c
index 4a2ef5a02fd3..544a7133cbd1 100644
--- a/kernel/sched/idle.c
+++ b/kernel/sched/idle.c
@@ -97,12 +97,6 @@ void default_idle_call(void)
static int call_cpuidle(struct cpuidle_driver *drv, struct cpuidle_device *dev,
int next_state)
{
- /* Fall back to the default arch idle method on errors. */
- if (next_state < 0) {
- default_idle_call();
- return next_state;
- }
-
/*
* The idle task must be scheduled, it is pointless to go to idle, just
* update no idle residency and return.
@@ -168,7 +162,7 @@ static void cpuidle_idle_call(void)
*/
if (idle_should_freeze()) {
entered_state = cpuidle_enter_freeze(drv, dev);
- if (entered_state >= 0) {
+ if (entered_state > 0) {
local_irq_enable();
goto exit_idle;
}
@@ -219,6 +213,7 @@ static void cpu_idle_loop(void)
*/
__current_set_polling();
+ quiet_vmstat();
tick_nohz_idle_enter();
while (!need_resched()) {
diff --git a/kernel/seccomp.c b/kernel/seccomp.c
index 580ac2d4024f..15a1795bbba1 100644
--- a/kernel/seccomp.c
+++ b/kernel/seccomp.c
@@ -316,24 +316,24 @@ static inline void seccomp_sync_threads(void)
put_seccomp_filter(thread);
smp_store_release(&thread->seccomp.filter,
caller->seccomp.filter);
+
+ /*
+ * Don't let an unprivileged task work around
+ * the no_new_privs restriction by creating
+ * a thread that sets it up, enters seccomp,
+ * then dies.
+ */
+ if (task_no_new_privs(caller))
+ task_set_no_new_privs(thread);
+
/*
* Opt the other thread into seccomp if needed.
* As threads are considered to be trust-realm
* equivalent (see ptrace_may_access), it is safe to
* allow one thread to transition the other.
*/
- if (thread->seccomp.mode == SECCOMP_MODE_DISABLED) {
- /*
- * Don't let an unprivileged task work around
- * the no_new_privs restriction by creating
- * a thread that sets it up, enters seccomp,
- * then dies.
- */
- if (task_no_new_privs(caller))
- task_set_no_new_privs(thread);
-
+ if (thread->seccomp.mode == SECCOMP_MODE_DISABLED)
seccomp_assign_mode(thread, SECCOMP_MODE_FILTER);
- }
}
}
diff --git a/kernel/stop_machine.c b/kernel/stop_machine.c
index edb6de4f5908..a467e6c28a3b 100644
--- a/kernel/stop_machine.c
+++ b/kernel/stop_machine.c
@@ -529,8 +529,6 @@ static int __init cpu_stop_init(void)
}
early_initcall(cpu_stop_init);
-#if defined(CONFIG_SMP) || defined(CONFIG_HOTPLUG_CPU)
-
static int __stop_machine(cpu_stop_fn_t fn, void *data, const struct cpumask *cpus)
{
struct multi_stop_data msdata = {
@@ -628,5 +626,3 @@ int stop_machine_from_inactive_cpu(cpu_stop_fn_t fn, void *data,
mutex_unlock(&stop_cpus_mutex);
return ret ?: done.ret;
}
-
-#endif /* CONFIG_SMP || CONFIG_HOTPLUG_CPU */
diff --git a/kernel/sys.c b/kernel/sys.c
index 6af9212ab5aa..78947de6f969 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -1853,11 +1853,13 @@ static int prctl_set_mm_map(int opt, const void __user *addr, unsigned long data
user_auxv[AT_VECTOR_SIZE - 1] = AT_NULL;
}
- if (prctl_map.exe_fd != (u32)-1)
+ if (prctl_map.exe_fd != (u32)-1) {
error = prctl_set_mm_exe_file(mm, prctl_map.exe_fd);
- down_read(&mm->mmap_sem);
- if (error)
- goto out;
+ if (error)
+ return error;
+ }
+
+ down_write(&mm->mmap_sem);
/*
* We don't validate if these members are pointing to
@@ -1894,10 +1896,8 @@ static int prctl_set_mm_map(int opt, const void __user *addr, unsigned long data
if (prctl_map.auxv_size)
memcpy(mm->saved_auxv, user_auxv, sizeof(user_auxv));
- error = 0;
-out:
- up_read(&mm->mmap_sem);
- return error;
+ up_write(&mm->mmap_sem);
+ return 0;
}
#endif /* CONFIG_CHECKPOINT_RESTORE */
@@ -1963,7 +1963,7 @@ static int prctl_set_mm(int opt, unsigned long addr,
error = -EINVAL;
- down_read(&mm->mmap_sem);
+ down_write(&mm->mmap_sem);
vma = find_vma(mm, addr);
prctl_map.start_code = mm->start_code;
@@ -2056,7 +2056,7 @@ static int prctl_set_mm(int opt, unsigned long addr,
error = 0;
out:
- up_read(&mm->mmap_sem);
+ up_write(&mm->mmap_sem);
return error;
}
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 5faf89ac9ec0..97715fd9e790 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -173,7 +173,7 @@ extern int no_unaligned_warning;
#define SYSCTL_WRITES_WARN 0
#define SYSCTL_WRITES_STRICT 1
-static int sysctl_writes_strict = SYSCTL_WRITES_WARN;
+static int sysctl_writes_strict = SYSCTL_WRITES_STRICT;
static int proc_do_cad_pid(struct ctl_table *table, int write,
void __user *buffer, size_t *lenp, loff_t *ppos);
@@ -1568,6 +1568,28 @@ static struct ctl_table vm_table[] = {
.mode = 0644,
.proc_handler = proc_doulongvec_minmax,
},
+#ifdef CONFIG_HAVE_ARCH_MMAP_RND_BITS
+ {
+ .procname = "mmap_rnd_bits",
+ .data = &mmap_rnd_bits,
+ .maxlen = sizeof(mmap_rnd_bits),
+ .mode = 0600,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = (void *)&mmap_rnd_bits_min,
+ .extra2 = (void *)&mmap_rnd_bits_max,
+ },
+#endif
+#ifdef CONFIG_HAVE_ARCH_MMAP_RND_COMPAT_BITS
+ {
+ .procname = "mmap_rnd_compat_bits",
+ .data = &mmap_rnd_compat_bits,
+ .maxlen = sizeof(mmap_rnd_compat_bits),
+ .mode = 0600,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = (void *)&mmap_rnd_compat_bits_min,
+ .extra2 = (void *)&mmap_rnd_compat_bits_max,
+ },
+#endif
{ }
};
@@ -1735,6 +1757,20 @@ static struct ctl_table fs_table[] = {
.proc_handler = &pipe_proc_fn,
.extra1 = &pipe_min_size,
},
+ {
+ .procname = "pipe-user-pages-hard",
+ .data = &pipe_user_pages_hard,
+ .maxlen = sizeof(pipe_user_pages_hard),
+ .mode = 0644,
+ .proc_handler = proc_doulongvec_minmax,
+ },
+ {
+ .procname = "pipe-user-pages-soft",
+ .data = &pipe_user_pages_soft,
+ .maxlen = sizeof(pipe_user_pages_soft),
+ .mode = 0644,
+ .proc_handler = proc_doulongvec_minmax,
+ },
{ }
};
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index 9cc20af58c76..9d7a053545f5 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -387,7 +387,7 @@ void __init tick_nohz_init(void)
/*
* NO HZ enabled ?
*/
-static int tick_nohz_enabled __read_mostly = 1;
+int tick_nohz_enabled __read_mostly = 1;
unsigned long tick_nohz_active __read_mostly;
/*
* Enable / Disable tickless mode
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 87fb9801bd9e..d9293402ee68 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -1751,7 +1751,7 @@ void trace_buffer_unlock_commit_regs(struct trace_array *tr,
{
__buffer_unlock_commit(buffer, event);
- ftrace_trace_stack(tr, buffer, flags, 6, pc, regs);
+ ftrace_trace_stack(tr, buffer, flags, 0, pc, regs);
ftrace_trace_userstack(buffer, flags, pc);
}
EXPORT_SYMBOL_GPL(trace_buffer_unlock_commit_regs);