diff options
Diffstat (limited to 'kernel/events')
-rw-r--r-- | kernel/events/core.c | 115 | ||||
-rw-r--r-- | kernel/events/ring_buffer.c | 68 | ||||
-rw-r--r-- | kernel/events/uprobes.c | 16 |
3 files changed, 122 insertions, 77 deletions
diff --git a/kernel/events/core.c b/kernel/events/core.c index 5f59d848171e..abbd4b3b96c2 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -2009,8 +2009,8 @@ event_sched_out(struct perf_event *event, event->pmu->del(event, 0); event->oncpu = -1; - if (event->pending_disable) { - event->pending_disable = 0; + if (READ_ONCE(event->pending_disable) >= 0) { + WRITE_ONCE(event->pending_disable, -1); state = PERF_EVENT_STATE_OFF; } perf_event_set_state(event, state); @@ -2198,7 +2198,8 @@ EXPORT_SYMBOL_GPL(perf_event_disable); void perf_event_disable_inatomic(struct perf_event *event) { - event->pending_disable = 1; + WRITE_ONCE(event->pending_disable, smp_processor_id()); + /* can fail, see perf_pending_event_disable() */ irq_work_queue(&event->pending); } @@ -2477,6 +2478,16 @@ static void ctx_resched(struct perf_cpu_context *cpuctx, perf_pmu_enable(cpuctx->ctx.pmu); } +void perf_pmu_resched(struct pmu *pmu) +{ + struct perf_cpu_context *cpuctx = this_cpu_ptr(pmu->pmu_cpu_context); + struct perf_event_context *task_ctx = cpuctx->task_ctx; + + perf_ctx_lock(cpuctx, task_ctx); + ctx_resched(cpuctx, task_ctx, EVENT_ALL|EVENT_CPU); + perf_ctx_unlock(cpuctx, task_ctx); +} + /* * Cross CPU call to install and enable a performance event * @@ -4238,7 +4249,8 @@ static bool is_sb_event(struct perf_event *event) if (attr->mmap || attr->mmap_data || attr->mmap2 || attr->comm || attr->comm_exec || attr->task || attr->ksymbol || - attr->context_switch) + attr->context_switch || + attr->bpf_event) return true; return false; } @@ -5473,7 +5485,7 @@ static void perf_mmap_close(struct vm_area_struct *vma) /* now it's safe to free the pages */ atomic_long_sub(rb->aux_nr_pages, &mmap_user->locked_vm); - vma->vm_mm->pinned_vm -= rb->aux_mmap_locked; + atomic64_sub(rb->aux_mmap_locked, &vma->vm_mm->pinned_vm); /* this has to be the last one */ rb_free_aux(rb); @@ -5546,7 +5558,7 @@ again: */ atomic_long_sub((size >> PAGE_SHIFT) + 1, &mmap_user->locked_vm); - vma->vm_mm->pinned_vm -= mmap_locked; + atomic64_sub(mmap_locked, &vma->vm_mm->pinned_vm); free_uid(mmap_user); out_put: @@ -5694,7 +5706,7 @@ accounting: lock_limit = rlimit(RLIMIT_MEMLOCK); lock_limit >>= PAGE_SHIFT; - locked = vma->vm_mm->pinned_vm + extra; + locked = atomic64_read(&vma->vm_mm->pinned_vm) + extra; if ((locked > lock_limit) && perf_paranoid_tracepoint_raw() && !capable(CAP_IPC_LOCK)) { @@ -5735,7 +5747,7 @@ accounting: unlock: if (!ret) { atomic_long_add(user_extra, &user->locked_vm); - vma->vm_mm->pinned_vm += extra; + atomic64_add(extra, &vma->vm_mm->pinned_vm); atomic_inc(&event->mmap_count); } else if (rb) { @@ -5809,10 +5821,45 @@ void perf_event_wakeup(struct perf_event *event) } } +static void perf_pending_event_disable(struct perf_event *event) +{ + int cpu = READ_ONCE(event->pending_disable); + + if (cpu < 0) + return; + + if (cpu == smp_processor_id()) { + WRITE_ONCE(event->pending_disable, -1); + perf_event_disable_local(event); + return; + } + + /* + * CPU-A CPU-B + * + * perf_event_disable_inatomic() + * @pending_disable = CPU-A; + * irq_work_queue(); + * + * sched-out + * @pending_disable = -1; + * + * sched-in + * perf_event_disable_inatomic() + * @pending_disable = CPU-B; + * irq_work_queue(); // FAILS + * + * irq_work_run() + * perf_pending_event() + * + * But the event runs on CPU-B and wants disabling there. + */ + irq_work_queue_on(&event->pending, cpu); +} + static void perf_pending_event(struct irq_work *entry) { - struct perf_event *event = container_of(entry, - struct perf_event, pending); + struct perf_event *event = container_of(entry, struct perf_event, pending); int rctx; rctx = perf_swevent_get_recursion_context(); @@ -5821,10 +5868,7 @@ static void perf_pending_event(struct irq_work *entry) * and we won't recurse 'further'. */ - if (event->pending_disable) { - event->pending_disable = 0; - perf_event_disable_local(event); - } + perf_pending_event_disable(event); if (event->pending_wakeup) { event->pending_wakeup = 0; @@ -7188,6 +7232,7 @@ static void perf_event_mmap_output(struct perf_event *event, struct perf_output_handle handle; struct perf_sample_data sample; int size = mmap_event->event_id.header.size; + u32 type = mmap_event->event_id.header.type; int ret; if (!perf_event_mmap_match(event, data)) @@ -7231,6 +7276,7 @@ static void perf_event_mmap_output(struct perf_event *event, perf_output_end(&handle); out: mmap_event->event_id.header.size = size; + mmap_event->event_id.header.type = type; } static void perf_event_mmap_event(struct perf_mmap_event *mmap_event) @@ -9041,26 +9087,29 @@ static void perf_event_addr_filters_apply(struct perf_event *event) if (task == TASK_TOMBSTONE) return; - if (!ifh->nr_file_filters) - return; - - mm = get_task_mm(event->ctx->task); - if (!mm) - goto restart; + if (ifh->nr_file_filters) { + mm = get_task_mm(event->ctx->task); + if (!mm) + goto restart; - down_read(&mm->mmap_sem); + down_read(&mm->mmap_sem); + } raw_spin_lock_irqsave(&ifh->lock, flags); list_for_each_entry(filter, &ifh->list, entry) { - event->addr_filter_ranges[count].start = 0; - event->addr_filter_ranges[count].size = 0; + if (filter->path.dentry) { + /* + * Adjust base offset if the filter is associated to a + * binary that needs to be mapped: + */ + event->addr_filter_ranges[count].start = 0; + event->addr_filter_ranges[count].size = 0; - /* - * Adjust base offset if the filter is associated to a binary - * that needs to be mapped: - */ - if (filter->path.dentry) perf_addr_filter_apply(filter, mm, &event->addr_filter_ranges[count]); + } else { + event->addr_filter_ranges[count].start = filter->offset; + event->addr_filter_ranges[count].size = filter->size; + } count++; } @@ -9068,9 +9117,11 @@ static void perf_event_addr_filters_apply(struct perf_event *event) event->addr_filters_gen++; raw_spin_unlock_irqrestore(&ifh->lock, flags); - up_read(&mm->mmap_sem); + if (ifh->nr_file_filters) { + up_read(&mm->mmap_sem); - mmput(mm); + mmput(mm); + } restart: perf_event_stop(event, 1); @@ -9174,6 +9225,7 @@ perf_event_parse_addr_filter(struct perf_event *event, char *fstr, case IF_SRC_KERNELADDR: case IF_SRC_KERNEL: kernel = 1; + /* fall through */ case IF_SRC_FILEADDR: case IF_SRC_FILE: @@ -10232,6 +10284,7 @@ perf_event_alloc(struct perf_event_attr *attr, int cpu, init_waitqueue_head(&event->waitq); + event->pending_disable = -1; init_irq_work(&event->pending, perf_pending_event); mutex_init(&event->mmap_mutex); @@ -11874,7 +11927,7 @@ static void __init perf_event_init_all_cpus(void) } } -void perf_swevent_init_cpu(unsigned int cpu) +static void perf_swevent_init_cpu(unsigned int cpu) { struct swevent_htable *swhash = &per_cpu(swevent_htable, cpu); diff --git a/kernel/events/ring_buffer.c b/kernel/events/ring_buffer.c index 678ccec60d8f..674b35383491 100644 --- a/kernel/events/ring_buffer.c +++ b/kernel/events/ring_buffer.c @@ -392,7 +392,7 @@ void *perf_aux_output_begin(struct perf_output_handle *handle, * store that will be enabled on successful return */ if (!handle->size) { /* A, matches D */ - event->pending_disable = 1; + event->pending_disable = smp_processor_id(); perf_output_wakeup(handle); local_set(&rb->aux_nest, 0); goto err_put; @@ -455,24 +455,21 @@ void perf_aux_output_end(struct perf_output_handle *handle, unsigned long size) rb->aux_head += size; } - if (size || handle->aux_flags) { - /* - * Only send RECORD_AUX if we have something useful to communicate - * - * Note: the OVERWRITE records by themselves are not considered - * useful, as they don't communicate any *new* information, - * aside from the short-lived offset, that becomes history at - * the next event sched-in and therefore isn't useful. - * The userspace that needs to copy out AUX data in overwrite - * mode should know to use user_page::aux_head for the actual - * offset. So, from now on we don't output AUX records that - * have *only* OVERWRITE flag set. - */ - - if (handle->aux_flags & ~(u64)PERF_AUX_FLAG_OVERWRITE) - perf_event_aux_event(handle->event, aux_head, size, - handle->aux_flags); - } + /* + * Only send RECORD_AUX if we have something useful to communicate + * + * Note: the OVERWRITE records by themselves are not considered + * useful, as they don't communicate any *new* information, + * aside from the short-lived offset, that becomes history at + * the next event sched-in and therefore isn't useful. + * The userspace that needs to copy out AUX data in overwrite + * mode should know to use user_page::aux_head for the actual + * offset. So, from now on we don't output AUX records that + * have *only* OVERWRITE flag set. + */ + if (size || (handle->aux_flags & ~(u64)PERF_AUX_FLAG_OVERWRITE)) + perf_event_aux_event(handle->event, aux_head, size, + handle->aux_flags); rb->user_page->aux_head = rb->aux_head; if (rb_need_aux_wakeup(rb)) @@ -480,7 +477,7 @@ void perf_aux_output_end(struct perf_output_handle *handle, unsigned long size) if (wakeup) { if (handle->aux_flags & PERF_AUX_FLAG_TRUNCATED) - handle->event->pending_disable = 1; + handle->event->pending_disable = smp_processor_id(); perf_output_wakeup(handle); } @@ -598,29 +595,26 @@ int rb_alloc_aux(struct ring_buffer *rb, struct perf_event *event, { bool overwrite = !(flags & RING_BUFFER_WRITABLE); int node = (event->cpu == -1) ? -1 : cpu_to_node(event->cpu); - int ret = -ENOMEM, max_order = 0; + int ret = -ENOMEM, max_order; if (!has_aux(event)) return -EOPNOTSUPP; - if (event->pmu->capabilities & PERF_PMU_CAP_AUX_NO_SG) { - /* - * We need to start with the max_order that fits in nr_pages, - * not the other way around, hence ilog2() and not get_order. - */ - max_order = ilog2(nr_pages); + /* + * We need to start with the max_order that fits in nr_pages, + * not the other way around, hence ilog2() and not get_order. + */ + max_order = ilog2(nr_pages); - /* - * PMU requests more than one contiguous chunks of memory - * for SW double buffering - */ - if ((event->pmu->capabilities & PERF_PMU_CAP_AUX_SW_DOUBLEBUF) && - !overwrite) { - if (!max_order) - return -EINVAL; + /* + * PMU requests more than one contiguous chunks of memory + * for SW double buffering + */ + if (!overwrite) { + if (!max_order) + return -EINVAL; - max_order--; - } + max_order--; } rb->aux_pages = kcalloc_node(nr_pages, sizeof(void *), GFP_KERNEL, diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c index 48abc0f18eae..4ca7364c956d 100644 --- a/kernel/events/uprobes.c +++ b/kernel/events/uprobes.c @@ -53,7 +53,7 @@ static struct percpu_rw_semaphore dup_mmap_sem; struct uprobe { struct rb_node rb_node; /* node in the rb tree */ - atomic_t ref; + refcount_t ref; struct rw_semaphore register_rwsem; struct rw_semaphore consumer_rwsem; struct list_head pending_list; @@ -547,13 +547,13 @@ set_orig_insn(struct arch_uprobe *auprobe, struct mm_struct *mm, unsigned long v static struct uprobe *get_uprobe(struct uprobe *uprobe) { - atomic_inc(&uprobe->ref); + refcount_inc(&uprobe->ref); return uprobe; } static void put_uprobe(struct uprobe *uprobe) { - if (atomic_dec_and_test(&uprobe->ref)) { + if (refcount_dec_and_test(&uprobe->ref)) { /* * If application munmap(exec_vma) before uprobe_unregister() * gets called, we don't get a chance to remove uprobe from @@ -644,7 +644,7 @@ static struct uprobe *__insert_uprobe(struct uprobe *uprobe) rb_link_node(&uprobe->rb_node, parent, p); rb_insert_color(&uprobe->rb_node, &uprobes_tree); /* get access + creation ref */ - atomic_set(&uprobe->ref, 2); + refcount_set(&uprobe->ref, 2); return u; } @@ -2294,16 +2294,14 @@ static struct notifier_block uprobe_exception_nb = { .priority = INT_MAX-1, /* notified after kprobes, kgdb */ }; -static int __init init_uprobes(void) +void __init uprobes_init(void) { int i; for (i = 0; i < UPROBES_HASH_SZ; i++) mutex_init(&uprobes_mmap_mutex[i]); - if (percpu_init_rwsem(&dup_mmap_sem)) - return -ENOMEM; + BUG_ON(percpu_init_rwsem(&dup_mmap_sem)); - return register_die_notifier(&uprobe_exception_nb); + BUG_ON(register_die_notifier(&uprobe_exception_nb)); } -__initcall(init_uprobes); |