From b9ce08c01020eb28bfbfa6faf1c740281c5f418e Mon Sep 17 00:00:00 2001 From: Eduard - Gabriel Munteanu Date: Sun, 10 Aug 2008 20:14:03 +0300 Subject: kmemtrace: Core implementation. kmemtrace provides tracing for slab allocator functions, such as kmalloc, kfree, kmem_cache_alloc, kmem_cache_free etc.. Collected data is then fed to the userspace application in order to analyse allocation hotspots, internal fragmentation and so on, making it possible to see how well an allocator performs, as well as debug and profile kernel code. Signed-off-by: Eduard - Gabriel Munteanu Signed-off-by: Pekka Enberg --- init/main.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'init') diff --git a/init/main.c b/init/main.c index 7e117a231af1..be1fe2242a55 100644 --- a/init/main.c +++ b/init/main.c @@ -69,6 +69,7 @@ #include #include #include +#include #ifdef CONFIG_X86_LOCAL_APIC #include @@ -653,6 +654,7 @@ asmlinkage void __init start_kernel(void) enable_debug_pagealloc(); cpu_hotplug_init(); kmem_cache_init(); + kmemtrace_init(); debug_objects_mem_init(); idr_init_cache(); setup_per_cpu_pageset(); -- cgit v1.2.3-55-g7522 From 36994e58a48fb8f9651c7dc845a6de298aba5bfc Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Mon, 29 Dec 2008 13:42:23 -0800 Subject: tracing/kmemtrace: normalize the raw tracer event to the unified tracing API Impact: new tracer plugin This patch adapts kmemtrace raw events tracing to the unified tracing API. To enable and use this tracer, just do the following: echo kmemtrace > /debugfs/tracing/current_tracer cat /debugfs/tracing/trace You will have the following output: # tracer: kmemtrace # # # ALLOC TYPE REQ GIVEN FLAGS POINTER NODE CALLER # FREE | | | | | | | | # | type_id 1 call_site 18446744071565527833 ptr 18446612134395152256 type_id 0 call_site 18446744071565585597 ptr 18446612134405955584 bytes_req 4096 bytes_alloc 4096 gfp_flags 208 node -1 type_id 1 call_site 18446744071565585534 ptr 18446612134405955584 type_id 0 call_site 18446744071565585597 ptr 18446612134405955584 bytes_req 4096 bytes_alloc 4096 gfp_flags 208 node -1 type_id 0 call_site 18446744071565636711 ptr 18446612134345164672 bytes_req 240 bytes_alloc 240 gfp_flags 208 node -1 type_id 1 call_site 18446744071565585534 ptr 18446612134405955584 type_id 0 call_site 18446744071565585597 ptr 18446612134405955584 bytes_req 4096 bytes_alloc 4096 gfp_flags 208 node -1 type_id 0 call_site 18446744071565636711 ptr 18446612134345164912 bytes_req 240 bytes_alloc 240 gfp_flags 208 node -1 type_id 1 call_site 18446744071565585534 ptr 18446612134405955584 type_id 0 call_site 18446744071565585597 ptr 18446612134405955584 bytes_req 4096 bytes_alloc 4096 gfp_flags 208 node -1 type_id 0 call_site 18446744071565636711 ptr 18446612134345165152 bytes_req 240 bytes_alloc 240 gfp_flags 208 node -1 type_id 0 call_site 18446744071566144042 ptr 18446612134346191680 bytes_req 1304 bytes_alloc 1312 gfp_flags 208 node -1 type_id 1 call_site 18446744071565585534 ptr 18446612134405955584 type_id 0 call_site 18446744071565585597 ptr 18446612134405955584 bytes_req 4096 bytes_alloc 4096 gfp_flags 208 node -1 type_id 1 call_site 18446744071565585534 ptr 18446612134405955584 That was to stay backward compatible with the format output produced in inux/tracepoint.h. This is the default ouput, but note that I tried something else. If you change an option: echo kmem_minimalistic > /debugfs/trace_options and then cat /debugfs/trace, you will have the following output: # tracer: kmemtrace # # # ALLOC TYPE REQ GIVEN FLAGS POINTER NODE CALLER # FREE | | | | | | | | # | - C 0xffff88007c088780 file_free_rcu + K 4096 4096 000000d0 0xffff88007cad6000 -1 getname - C 0xffff88007cad6000 putname + K 4096 4096 000000d0 0xffff88007cad6000 -1 getname + K 240 240 000000d0 0xffff8800790dc780 -1 d_alloc - C 0xffff88007cad6000 putname + K 4096 4096 000000d0 0xffff88007cad6000 -1 getname + K 240 240 000000d0 0xffff8800790dc870 -1 d_alloc - C 0xffff88007cad6000 putname + K 4096 4096 000000d0 0xffff88007cad6000 -1 getname + K 240 240 000000d0 0xffff8800790dc960 -1 d_alloc + K 1304 1312 000000d0 0xffff8800791d7340 -1 reiserfs_alloc_inode - C 0xffff88007cad6000 putname + K 4096 4096 000000d0 0xffff88007cad6000 -1 getname - C 0xffff88007cad6000 putname + K 992 1000 000000d0 0xffff880079045b58 -1 alloc_inode + K 768 1024 000080d0 0xffff88007c096400 -1 alloc_pipe_info + K 240 240 000000d0 0xffff8800790dca50 -1 d_alloc + K 272 320 000080d0 0xffff88007c088780 -1 get_empty_filp + K 272 320 000080d0 0xffff88007c088000 -1 get_empty_filp Yeah I shall confess kmem_minimalistic should be: kmem_alternative. Whatever, I find it more readable but this a personal opinion of course. We can drop it if you want. On the ALLOC/FREE column, + means an allocation and - a free. On the type column, you have K = kmalloc, C = cache, P = page I would like the flags to be GFP_* strings but that would not be easy to not break the column with strings.... About the node...it seems to always be -1. I don't know why but that shouldn't be difficult to find. I moved linux/tracepoint.h to trace/tracepoint.h as well. I think that would be more easy to find the tracer headers if they are all in their common directory. Signed-off-by: Frederic Weisbecker Signed-off-by: Ingo Molnar --- include/linux/kmemtrace.h | 86 ------------ include/linux/slab_def.h | 2 +- include/linux/slub_def.h | 2 +- include/trace/kmemtrace.h | 75 ++++++++++ init/main.c | 2 +- kernel/trace/Kconfig | 22 +++ kernel/trace/Makefile | 1 + kernel/trace/kmemtrace.c | 343 ++++++++++++++++++++++++++++++++++++++++++++++ kernel/trace/trace.h | 25 ++++ lib/Kconfig.debug | 20 --- mm/kmemtrace.c | 2 +- mm/slob.c | 2 +- mm/slub.c | 2 +- 13 files changed, 472 insertions(+), 112 deletions(-) delete mode 100644 include/linux/kmemtrace.h create mode 100644 include/trace/kmemtrace.h create mode 100644 kernel/trace/kmemtrace.c (limited to 'init') diff --git a/include/linux/kmemtrace.h b/include/linux/kmemtrace.h deleted file mode 100644 index 5bea8ead6a6b..000000000000 --- a/include/linux/kmemtrace.h +++ /dev/null @@ -1,86 +0,0 @@ -/* - * Copyright (C) 2008 Eduard - Gabriel Munteanu - * - * This file is released under GPL version 2. - */ - -#ifndef _LINUX_KMEMTRACE_H -#define _LINUX_KMEMTRACE_H - -#ifdef __KERNEL__ - -#include -#include - -enum kmemtrace_type_id { - KMEMTRACE_TYPE_KMALLOC = 0, /* kmalloc() or kfree(). */ - KMEMTRACE_TYPE_CACHE, /* kmem_cache_*(). */ - KMEMTRACE_TYPE_PAGES, /* __get_free_pages() and friends. */ -}; - -#ifdef CONFIG_KMEMTRACE - -extern void kmemtrace_init(void); - -static inline void kmemtrace_mark_alloc_node(enum kmemtrace_type_id type_id, - unsigned long call_site, - const void *ptr, - size_t bytes_req, - size_t bytes_alloc, - gfp_t gfp_flags, - int node) -{ - trace_mark(kmemtrace_alloc, "type_id %d call_site %lu ptr %lu " - "bytes_req %lu bytes_alloc %lu gfp_flags %lu node %d", - type_id, call_site, (unsigned long) ptr, - (unsigned long) bytes_req, (unsigned long) bytes_alloc, - (unsigned long) gfp_flags, node); -} - -static inline void kmemtrace_mark_free(enum kmemtrace_type_id type_id, - unsigned long call_site, - const void *ptr) -{ - trace_mark(kmemtrace_free, "type_id %d call_site %lu ptr %lu", - type_id, call_site, (unsigned long) ptr); -} - -#else /* CONFIG_KMEMTRACE */ - -static inline void kmemtrace_init(void) -{ -} - -static inline void kmemtrace_mark_alloc_node(enum kmemtrace_type_id type_id, - unsigned long call_site, - const void *ptr, - size_t bytes_req, - size_t bytes_alloc, - gfp_t gfp_flags, - int node) -{ -} - -static inline void kmemtrace_mark_free(enum kmemtrace_type_id type_id, - unsigned long call_site, - const void *ptr) -{ -} - -#endif /* CONFIG_KMEMTRACE */ - -static inline void kmemtrace_mark_alloc(enum kmemtrace_type_id type_id, - unsigned long call_site, - const void *ptr, - size_t bytes_req, - size_t bytes_alloc, - gfp_t gfp_flags) -{ - kmemtrace_mark_alloc_node(type_id, call_site, ptr, - bytes_req, bytes_alloc, gfp_flags, -1); -} - -#endif /* __KERNEL__ */ - -#endif /* _LINUX_KMEMTRACE_H */ - diff --git a/include/linux/slab_def.h b/include/linux/slab_def.h index 7555ce99f6d2..455f9affea9a 100644 --- a/include/linux/slab_def.h +++ b/include/linux/slab_def.h @@ -14,7 +14,7 @@ #include /* kmalloc_sizes.h needs PAGE_SIZE */ #include /* kmalloc_sizes.h needs L1_CACHE_BYTES */ #include -#include +#include /* Size description struct for general caches. */ struct cache_sizes { diff --git a/include/linux/slub_def.h b/include/linux/slub_def.h index dc28432b5b9a..6b657f7dcb2b 100644 --- a/include/linux/slub_def.h +++ b/include/linux/slub_def.h @@ -10,7 +10,7 @@ #include #include #include -#include +#include enum stat_item { ALLOC_FASTPATH, /* Allocation from cpu slab */ diff --git a/include/trace/kmemtrace.h b/include/trace/kmemtrace.h new file mode 100644 index 000000000000..ad8b7857855a --- /dev/null +++ b/include/trace/kmemtrace.h @@ -0,0 +1,75 @@ +/* + * Copyright (C) 2008 Eduard - Gabriel Munteanu + * + * This file is released under GPL version 2. + */ + +#ifndef _LINUX_KMEMTRACE_H +#define _LINUX_KMEMTRACE_H + +#ifdef __KERNEL__ + +#include +#include + +enum kmemtrace_type_id { + KMEMTRACE_TYPE_KMALLOC = 0, /* kmalloc() or kfree(). */ + KMEMTRACE_TYPE_CACHE, /* kmem_cache_*(). */ + KMEMTRACE_TYPE_PAGES, /* __get_free_pages() and friends. */ +}; + +#ifdef CONFIG_KMEMTRACE + +extern void kmemtrace_init(void); + +extern void kmemtrace_mark_alloc_node(enum kmemtrace_type_id type_id, + unsigned long call_site, + const void *ptr, + size_t bytes_req, + size_t bytes_alloc, + gfp_t gfp_flags, + int node); + +extern void kmemtrace_mark_free(enum kmemtrace_type_id type_id, + unsigned long call_site, + const void *ptr); + +#else /* CONFIG_KMEMTRACE */ + +static inline void kmemtrace_init(void) +{ +} + +static inline void kmemtrace_mark_alloc_node(enum kmemtrace_type_id type_id, + unsigned long call_site, + const void *ptr, + size_t bytes_req, + size_t bytes_alloc, + gfp_t gfp_flags, + int node) +{ +} + +static inline void kmemtrace_mark_free(enum kmemtrace_type_id type_id, + unsigned long call_site, + const void *ptr) +{ +} + +#endif /* CONFIG_KMEMTRACE */ + +static inline void kmemtrace_mark_alloc(enum kmemtrace_type_id type_id, + unsigned long call_site, + const void *ptr, + size_t bytes_req, + size_t bytes_alloc, + gfp_t gfp_flags) +{ + kmemtrace_mark_alloc_node(type_id, call_site, ptr, + bytes_req, bytes_alloc, gfp_flags, -1); +} + +#endif /* __KERNEL__ */ + +#endif /* _LINUX_KMEMTRACE_H */ + diff --git a/init/main.c b/init/main.c index 9711586aa7c9..beca7aaddb22 100644 --- a/init/main.c +++ b/init/main.c @@ -70,7 +70,7 @@ #include #include #include -#include +#include #ifdef CONFIG_X86_LOCAL_APIC #include diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig index e2a4ff6fc3a6..27fb74b06b3c 100644 --- a/kernel/trace/Kconfig +++ b/kernel/trace/Kconfig @@ -264,6 +264,28 @@ config HW_BRANCH_TRACER This tracer records all branches on the system in a circular buffer giving access to the last N branches for each cpu. +config KMEMTRACE + bool "Trace SLAB allocations" + select TRACING + depends on RELAY + help + kmemtrace provides tracing for slab allocator functions, such as + kmalloc, kfree, kmem_cache_alloc, kmem_cache_free etc.. Collected + data is then fed to the userspace application in order to analyse + allocation hotspots, internal fragmentation and so on, making it + possible to see how well an allocator performs, as well as debug + and profile kernel code. + + This requires an userspace application to use. See + Documentation/vm/kmemtrace.txt for more information. + + Saying Y will make the kernel somewhat larger and slower. However, + if you disable kmemtrace at run-time or boot-time, the performance + impact is minimal (depending on the arch the kernel is built for). + + If unsure, say N. + + config DYNAMIC_FTRACE bool "enable/disable ftrace tracepoints dynamically" depends on FUNCTION_TRACER diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile index 349d5a93653f..513dc86b5dfa 100644 --- a/kernel/trace/Makefile +++ b/kernel/trace/Makefile @@ -33,5 +33,6 @@ obj-$(CONFIG_FUNCTION_GRAPH_TRACER) += trace_functions_graph.o obj-$(CONFIG_TRACE_BRANCH_PROFILING) += trace_branch.o obj-$(CONFIG_HW_BRANCH_TRACER) += trace_hw_branches.o obj-$(CONFIG_POWER_TRACER) += trace_power.o +obj-$(CONFIG_KMEMTRACE) += kmemtrace.o libftrace-y := ftrace.o diff --git a/kernel/trace/kmemtrace.c b/kernel/trace/kmemtrace.c new file mode 100644 index 000000000000..d69cbe3c2a4b --- /dev/null +++ b/kernel/trace/kmemtrace.c @@ -0,0 +1,343 @@ +/* + * Memory allocator tracing + * + * Copyright (C) 2008 Eduard - Gabriel Munteanu + * Copyright (C) 2008 Pekka Enberg + * Copyright (C) 2008 Frederic Weisbecker + */ + +#include +#include +#include +#include +#include + +#include "trace.h" +#include "trace_output.h" + +/* Select an alternative, minimalistic output than the original one */ +#define TRACE_KMEM_OPT_MINIMAL 0x1 + +static struct tracer_opt kmem_opts[] = { + /* Default disable the minimalistic output */ + { TRACER_OPT(kmem_minimalistic, TRACE_KMEM_OPT_MINIMAL) }, + { } +}; + +static struct tracer_flags kmem_tracer_flags = { + .val = 0, + .opts = kmem_opts +}; + + +static bool kmem_tracing_enabled __read_mostly; +static struct trace_array *kmemtrace_array; + +static int kmem_trace_init(struct trace_array *tr) +{ + int cpu; + kmemtrace_array = tr; + + for_each_cpu_mask(cpu, cpu_possible_map) + tracing_reset(tr, cpu); + + kmem_tracing_enabled = true; + + return 0; +} + +static void kmem_trace_reset(struct trace_array *tr) +{ + kmem_tracing_enabled = false; +} + +static void kmemtrace_headers(struct seq_file *s) +{ + /* Don't need headers for the original kmemtrace output */ + if (!(kmem_tracer_flags.val & TRACE_KMEM_OPT_MINIMAL)) + return; + + seq_printf(s, "#\n"); + seq_printf(s, "# ALLOC TYPE REQ GIVEN FLAGS " + " POINTER NODE CALLER\n"); + seq_printf(s, "# FREE | | | | " + " | | | |\n"); + seq_printf(s, "# |\n\n"); +} + +/* + * The two following functions give the original output from kmemtrace, + * or something close to....perhaps they need some missing things + */ +static enum print_line_t +kmemtrace_print_alloc_original(struct trace_iterator *iter, + struct kmemtrace_alloc_entry *entry) +{ + struct trace_seq *s = &iter->seq; + int ret; + + /* Taken from the old linux/kmemtrace.h */ + ret = trace_seq_printf(s, "type_id %d call_site %lu ptr %lu " + "bytes_req %lu bytes_alloc %lu gfp_flags %lu node %d\n", + entry->type_id, entry->call_site, (unsigned long) entry->ptr, + (unsigned long) entry->bytes_req, (unsigned long) entry->bytes_alloc, + (unsigned long) entry->gfp_flags, entry->node); + + if (!ret) + return TRACE_TYPE_PARTIAL_LINE; + + return TRACE_TYPE_HANDLED; +} + +static enum print_line_t +kmemtrace_print_free_original(struct trace_iterator *iter, + struct kmemtrace_free_entry *entry) +{ + struct trace_seq *s = &iter->seq; + int ret; + + /* Taken from the old linux/kmemtrace.h */ + ret = trace_seq_printf(s, "type_id %d call_site %lu ptr %lu\n", + entry->type_id, entry->call_site, (unsigned long) entry->ptr); + + if (!ret) + return TRACE_TYPE_PARTIAL_LINE; + + return TRACE_TYPE_HANDLED; +} + + +/* The two other following provide a more minimalistic output */ +static enum print_line_t +kmemtrace_print_alloc_compress(struct trace_iterator *iter, + struct kmemtrace_alloc_entry *entry) +{ + struct trace_seq *s = &iter->seq; + int ret; + + /* Alloc entry */ + ret = trace_seq_printf(s, " + "); + if (!ret) + return TRACE_TYPE_PARTIAL_LINE; + + /* Type */ + switch (entry->type_id) { + case KMEMTRACE_TYPE_KMALLOC: + ret = trace_seq_printf(s, "K "); + break; + case KMEMTRACE_TYPE_CACHE: + ret = trace_seq_printf(s, "C "); + break; + case KMEMTRACE_TYPE_PAGES: + ret = trace_seq_printf(s, "P "); + break; + default: + ret = trace_seq_printf(s, "? "); + } + + if (!ret) + return TRACE_TYPE_PARTIAL_LINE; + + /* Requested */ + ret = trace_seq_printf(s, "%4d ", entry->bytes_req); + if (!ret) + return TRACE_TYPE_PARTIAL_LINE; + + /* Allocated */ + ret = trace_seq_printf(s, "%4d ", entry->bytes_alloc); + if (!ret) + return TRACE_TYPE_PARTIAL_LINE; + + /* Flags + * TODO: would be better to see the name of the GFP flag names + */ + ret = trace_seq_printf(s, "%08x ", entry->gfp_flags); + if (!ret) + return TRACE_TYPE_PARTIAL_LINE; + + /* Pointer to allocated */ + ret = trace_seq_printf(s, "0x%tx ", (ptrdiff_t)entry->ptr); + if (!ret) + return TRACE_TYPE_PARTIAL_LINE; + + /* Node */ + ret = trace_seq_printf(s, "%4d ", entry->node); + if (!ret) + return TRACE_TYPE_PARTIAL_LINE; + + /* Call site */ + ret = seq_print_ip_sym(s, entry->call_site, 0); + if (!ret) + return TRACE_TYPE_PARTIAL_LINE; + + if (!trace_seq_printf(s, "\n")) + return TRACE_TYPE_PARTIAL_LINE; + + return TRACE_TYPE_HANDLED; +} + +static enum print_line_t +kmemtrace_print_free_compress(struct trace_iterator *iter, + struct kmemtrace_free_entry *entry) +{ + struct trace_seq *s = &iter->seq; + int ret; + + /* Free entry */ + ret = trace_seq_printf(s, " - "); + if (!ret) + return TRACE_TYPE_PARTIAL_LINE; + + /* Type */ + switch (entry->type_id) { + case KMEMTRACE_TYPE_KMALLOC: + ret = trace_seq_printf(s, "K "); + break; + case KMEMTRACE_TYPE_CACHE: + ret = trace_seq_printf(s, "C "); + break; + case KMEMTRACE_TYPE_PAGES: + ret = trace_seq_printf(s, "P "); + break; + default: + ret = trace_seq_printf(s, "? "); + } + + if (!ret) + return TRACE_TYPE_PARTIAL_LINE; + + /* Skip requested/allocated/flags */ + ret = trace_seq_printf(s, " "); + if (!ret) + return TRACE_TYPE_PARTIAL_LINE; + + /* Pointer to allocated */ + ret = trace_seq_printf(s, "0x%tx ", (ptrdiff_t)entry->ptr); + if (!ret) + return TRACE_TYPE_PARTIAL_LINE; + + /* Skip node */ + ret = trace_seq_printf(s, " "); + if (!ret) + return TRACE_TYPE_PARTIAL_LINE; + + /* Call site */ + ret = seq_print_ip_sym(s, entry->call_site, 0); + if (!ret) + return TRACE_TYPE_PARTIAL_LINE; + + if (!trace_seq_printf(s, "\n")) + return TRACE_TYPE_PARTIAL_LINE; + + return TRACE_TYPE_HANDLED; +} + +static enum print_line_t kmemtrace_print_line(struct trace_iterator *iter) +{ + struct trace_entry *entry = iter->ent; + + switch (entry->type) { + case TRACE_KMEM_ALLOC: { + struct kmemtrace_alloc_entry *field; + trace_assign_type(field, entry); + if (kmem_tracer_flags.val & TRACE_KMEM_OPT_MINIMAL) + return kmemtrace_print_alloc_compress(iter, field); + else + return kmemtrace_print_alloc_original(iter, field); + } + + case TRACE_KMEM_FREE: { + struct kmemtrace_free_entry *field; + trace_assign_type(field, entry); + if (kmem_tracer_flags.val & TRACE_KMEM_OPT_MINIMAL) + return kmemtrace_print_free_compress(iter, field); + else + return kmemtrace_print_free_original(iter, field); + } + + default: + return TRACE_TYPE_UNHANDLED; + } +} + +/* Trace allocations */ +void kmemtrace_mark_alloc_node(enum kmemtrace_type_id type_id, + unsigned long call_site, + const void *ptr, + size_t bytes_req, + size_t bytes_alloc, + gfp_t gfp_flags, + int node) +{ + struct ring_buffer_event *event; + struct kmemtrace_alloc_entry *entry; + struct trace_array *tr = kmemtrace_array; + unsigned long irq_flags; + + if (!kmem_tracing_enabled) + return; + + event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry), + &irq_flags); + if (!event) + return; + entry = ring_buffer_event_data(event); + tracing_generic_entry_update(&entry->ent, 0, 0); + + entry->ent.type = TRACE_KMEM_ALLOC; + entry->call_site = call_site; + entry->ptr = ptr; + entry->bytes_req = bytes_req; + entry->bytes_alloc = bytes_alloc; + entry->gfp_flags = gfp_flags; + entry->node = node; + + ring_buffer_unlock_commit(tr->buffer, event, irq_flags); + + trace_wake_up(); +} + +void kmemtrace_mark_free(enum kmemtrace_type_id type_id, + unsigned long call_site, + const void *ptr) +{ + struct ring_buffer_event *event; + struct kmemtrace_free_entry *entry; + struct trace_array *tr = kmemtrace_array; + unsigned long irq_flags; + + if (!kmem_tracing_enabled) + return; + + event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry), + &irq_flags); + if (!event) + return; + entry = ring_buffer_event_data(event); + tracing_generic_entry_update(&entry->ent, 0, 0); + + entry->ent.type = TRACE_KMEM_FREE; + entry->type_id = type_id; + entry->call_site = call_site; + entry->ptr = ptr; + + ring_buffer_unlock_commit(tr->buffer, event, irq_flags); + + trace_wake_up(); +} + +static struct tracer kmem_tracer __read_mostly = { + .name = "kmemtrace", + .init = kmem_trace_init, + .reset = kmem_trace_reset, + .print_line = kmemtrace_print_line, + .print_header = kmemtrace_headers, + .flags = &kmem_tracer_flags +}; + +static int __init init_kmem_tracer(void) +{ + return register_tracer(&kmem_tracer); +} + +device_initcall(init_kmem_tracer); diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index cc7a4f864036..534505bb39b0 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -9,6 +9,7 @@ #include #include #include +#include enum trace_type { __TRACE_FIRST_TYPE = 0, @@ -29,6 +30,8 @@ enum trace_type { TRACE_GRAPH_ENT, TRACE_USER_STACK, TRACE_HW_BRANCHES, + TRACE_KMEM_ALLOC, + TRACE_KMEM_FREE, TRACE_POWER, __TRACE_LAST_TYPE @@ -170,6 +173,24 @@ struct trace_power { struct power_trace state_data; }; +struct kmemtrace_alloc_entry { + struct trace_entry ent; + enum kmemtrace_type_id type_id; + unsigned long call_site; + const void *ptr; + size_t bytes_req; + size_t bytes_alloc; + gfp_t gfp_flags; + int node; +}; + +struct kmemtrace_free_entry { + struct trace_entry ent; + enum kmemtrace_type_id type_id; + unsigned long call_site; + const void *ptr; +}; + /* * trace_flag_type is an enumeration that holds different * states when a trace occurs. These are: @@ -280,6 +301,10 @@ extern void __ftrace_bad_type(void); TRACE_GRAPH_RET); \ IF_ASSIGN(var, ent, struct hw_branch_entry, TRACE_HW_BRANCHES);\ IF_ASSIGN(var, ent, struct trace_power, TRACE_POWER); \ + IF_ASSIGN(var, ent, struct kmemtrace_alloc_entry, \ + TRACE_KMEM_ALLOC); \ + IF_ASSIGN(var, ent, struct kmemtrace_free_entry, \ + TRACE_KMEM_FREE); \ __ftrace_bad_type(); \ } while (0) diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index b5417e23ba94..b0f239e443bc 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -803,26 +803,6 @@ config FIREWIRE_OHCI_REMOTE_DMA If unsure, say N. -config KMEMTRACE - bool "Kernel memory tracer (kmemtrace)" - depends on RELAY && DEBUG_FS && MARKERS - help - kmemtrace provides tracing for slab allocator functions, such as - kmalloc, kfree, kmem_cache_alloc, kmem_cache_free etc.. Collected - data is then fed to the userspace application in order to analyse - allocation hotspots, internal fragmentation and so on, making it - possible to see how well an allocator performs, as well as debug - and profile kernel code. - - This requires an userspace application to use. See - Documentation/vm/kmemtrace.txt for more information. - - Saying Y will make the kernel somewhat larger and slower. However, - if you disable kmemtrace at run-time or boot-time, the performance - impact is minimal (depending on the arch the kernel is built for). - - If unsure, say N. - menuconfig BUILD_DOCSRC bool "Build targets in Documentation/ tree" depends on HEADERS_CHECK diff --git a/mm/kmemtrace.c b/mm/kmemtrace.c index 2a70a805027c..0573b5080cc4 100644 --- a/mm/kmemtrace.c +++ b/mm/kmemtrace.c @@ -10,7 +10,7 @@ #include #include #include -#include +#include #define KMEMTRACE_SUBBUF_SIZE 524288 #define KMEMTRACE_DEF_N_SUBBUFS 20 diff --git a/mm/slob.c b/mm/slob.c index 0f1a49f40690..4d1c0fc33b6b 100644 --- a/mm/slob.c +++ b/mm/slob.c @@ -65,7 +65,7 @@ #include #include #include -#include +#include #include /* diff --git a/mm/slub.c b/mm/slub.c index cc4001fee7ac..7bf8cf8ec082 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -16,7 +16,7 @@ #include #include #include -#include +#include #include #include #include -- cgit v1.2.3-55-g7522 From 91f73f90d97fa67effbb49e0a79c50cf26dfe324 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Fri, 20 Feb 2009 17:34:06 +0100 Subject: tracing/markers: make markers select tracepoints Sometimes it happens that KConfig dependencies are not handled like in the following scenario: - config A bool - config B bool depends on A - config C bool select B If one selects C, then it will select B without checking its dependency to A, if A hasn't been selected elsewhere, it will result in a build failure. This is what happens on the following build error: kernel/built-in.o: In function `marker_update_probe_range': (.text+0x52f64): undefined reference to `tracepoint_probe_register_noupdate' kernel/built-in.o: In function `marker_update_probe_range': (.text+0x52f74): undefined reference to `tracepoint_probe_unregister_noupdate' kernel/built-in.o: In function `marker_update_probe_range': (.text+0x52fb9): undefined reference to `tracepoint_probe_unregister_noupdate' kernel/built-in.o: In function `marker_update_probes': marker.c:(.text+0x530ba): undefined reference to `tracepoint_probe_update_all' CONFIG_KVM_TRACE will select CONFIG_MARKER, but the latter depends on CONFIG_TRACEPOINTS which will not be selected. Reported-by: Ingo Molnar Signed-off-by: Frederic Weisbecker Signed-off-by: Ingo Molnar --- init/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'init') diff --git a/init/Kconfig b/init/Kconfig index f068071fcc5d..26b5bab6f6e8 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -945,7 +945,7 @@ config TRACEPOINTS config MARKERS bool "Activate markers" - depends on TRACEPOINTS + select TRACEPOINTS help Place an empty function call at each marker site. Can be dynamically changed for a probe function. -- cgit v1.2.3-55-g7522 From 759ee0915dd713361e72facb78b66600b5712d65 Mon Sep 17 00:00:00 2001 From: Lai Jiangshan Date: Wed, 25 Mar 2009 17:06:30 +0800 Subject: init,cpuset: fix initialize order Impact: cpuset_wq should be initialized after init_workqueues() When I read /debugfs/tracing/trace_stat/workqueues, I got this: # CPU INSERTED EXECUTED NAME # | | | | 0 0 0 cpuset 0 285 285 events/0 0 2 2 work_on_cpu/0 0 1115 1115 khelper 0 325 325 kblockd/0 0 0 0 kacpid 0 0 0 kacpi_notify 0 0 0 ata/0 0 0 0 ata_aux 0 0 0 ksuspend_usbd 0 0 0 aio/0 0 0 0 nfsiod 0 0 0 kpsmoused 0 0 0 kstriped 0 0 0 kondemand/0 0 1 1 hid_compat 0 0 0 rpciod/0 1 64 64 events/1 1 2 2 work_on_cpu/1 1 5 5 kblockd/1 1 0 0 ata/1 1 0 0 aio/1 1 0 0 kondemand/1 1 0 0 rpciod/1 I found "cpuset" is at the earliest. I found a create_singlethread_workqueue() is earlier than init_workqueues(): kernel_init() ->cpuset_init_smp() ->create_singlethread_workqueue() ->do_basic_setup() ->init_workqueues() I think it's better that create_singlethread_workqueue() is called after workqueue subsystem has been initialized. Signed-off-by: Lai Jiangshan Acked-by: Steven Rostedt Acked-by: Frederic Weisbecker Cc: Paul Menage Cc: miaoxie Cc: Li Zefan Cc: Andrew Morton LKML-Reference: <49C9F416.1050707@cn.fujitsu.com> Signed-off-by: Ingo Molnar --- init/main.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'init') diff --git a/init/main.c b/init/main.c index 20d784ab5ef8..b0097d2b63ae 100644 --- a/init/main.c +++ b/init/main.c @@ -772,6 +772,7 @@ static void __init do_basic_setup(void) { rcu_init_sched(); /* needed by module_init stage. */ init_workqueues(); + cpuset_init_smp(); usermodehelper_init(); driver_init(); init_irq_proc(); @@ -865,8 +866,6 @@ static int __init kernel_init(void * unused) smp_init(); sched_init_smp(); - cpuset_init_smp(); - do_basic_setup(); /* -- cgit v1.2.3-55-g7522 From 1a2142afa5646ad5af44bbe1febaa5e0b7e71156 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Mon, 30 Mar 2009 22:05:10 -0600 Subject: cpumask: remove dangerous CPU_MASK_ALL_PTR, &CPU_MASK_ALL Impact: cleanup (Thanks to Al Viro for reminding me of this, via Ingo) CPU_MASK_ALL is the (deprecated) "all bits set" cpumask, defined as so: #define CPU_MASK_ALL (cpumask_t) { { ... } } Taking the address of such a temporary is questionable at best, unfortunately 321a8e9d (cpumask: add CPU_MASK_ALL_PTR macro) added CPU_MASK_ALL_PTR: #define CPU_MASK_ALL_PTR (&CPU_MASK_ALL) Which formalizes this practice. One day gcc could bite us over this usage (though we seem to have gotten away with it so far). So replace everywhere which used &CPU_MASK_ALL or CPU_MASK_ALL_PTR with the modern "cpu_all_mask" (a real const struct cpumask *). Signed-off-by: Rusty Russell Acked-by: Ingo Molnar Reported-by: Al Viro Cc: Mike Travis --- init/main.c | 2 +- kernel/kmod.c | 2 +- kernel/kthread.c | 4 ++-- mm/pdflush.c | 2 +- 4 files changed, 5 insertions(+), 5 deletions(-) (limited to 'init') diff --git a/init/main.c b/init/main.c index 6bf83afd654d..1ac7ec78e601 100644 --- a/init/main.c +++ b/init/main.c @@ -842,7 +842,7 @@ static int __init kernel_init(void * unused) /* * init can run on any cpu. */ - set_cpus_allowed_ptr(current, CPU_MASK_ALL_PTR); + set_cpus_allowed_ptr(current, cpu_all_mask); /* * Tell the world that we're going to be the grim * reaper of innocent orphaned children. diff --git a/kernel/kmod.c b/kernel/kmod.c index a27a5f64443d..f0c8f545180d 100644 --- a/kernel/kmod.c +++ b/kernel/kmod.c @@ -167,7 +167,7 @@ static int ____call_usermodehelper(void *data) } /* We can run anywhere, unlike our parent keventd(). */ - set_cpus_allowed_ptr(current, CPU_MASK_ALL_PTR); + set_cpus_allowed_ptr(current, cpu_all_mask); /* * Our parent is keventd, which runs with elevated scheduling priority. diff --git a/kernel/kthread.c b/kernel/kthread.c index 4fbc456f393d..84bbadd4d021 100644 --- a/kernel/kthread.c +++ b/kernel/kthread.c @@ -110,7 +110,7 @@ static void create_kthread(struct kthread_create_info *create) */ sched_setscheduler(create->result, SCHED_NORMAL, ¶m); set_user_nice(create->result, KTHREAD_NICE_LEVEL); - set_cpus_allowed_ptr(create->result, CPU_MASK_ALL_PTR); + set_cpus_allowed_ptr(create->result, cpu_all_mask); } complete(&create->done); } @@ -240,7 +240,7 @@ int kthreadd(void *unused) set_task_comm(tsk, "kthreadd"); ignore_signals(tsk); set_user_nice(tsk, KTHREAD_NICE_LEVEL); - set_cpus_allowed_ptr(tsk, CPU_MASK_ALL_PTR); + set_cpus_allowed_ptr(tsk, cpu_all_mask); current->flags |= PF_NOFREEZE | PF_FREEZER_NOSIG; diff --git a/mm/pdflush.c b/mm/pdflush.c index 15de509b68fd..118905e3d788 100644 --- a/mm/pdflush.c +++ b/mm/pdflush.c @@ -191,7 +191,7 @@ static int pdflush(void *dummy) /* * Some configs put our parent kthread in a limited cpuset, - * which kthread() overrides, forcing cpus_allowed == CPU_MASK_ALL. + * which kthread() overrides, forcing cpus_allowed == cpu_all_mask. * Our needs are more modest - cut back to our cpusets cpus_allowed. * This is needed as pdflush's are dynamically created and destroyed. * The boottime pdflush's are easily placed w/o these 2 lines. -- cgit v1.2.3-55-g7522 From 2b17fa506c418e9fb02bbbc7f426d2bbb5b247a6 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Mon, 30 Mar 2009 22:05:12 -0600 Subject: cpumask: use set_cpu_active in init/main.c cpu_active_map is deprecated in favor of cpu_active_mask, which is const for safety: we use accessors now (set_cpu_active) is we really want to make a change. Signed-off-by: Rusty Russell --- init/main.c | 3 +-- kernel/cpu.c | 6 +++--- 2 files changed, 4 insertions(+), 5 deletions(-) (limited to 'init') diff --git a/init/main.c b/init/main.c index 1ac7ec78e601..d6b388fbffa6 100644 --- a/init/main.c +++ b/init/main.c @@ -407,8 +407,7 @@ static void __init smp_init(void) * Set up the current CPU as possible to migrate to. * The other ones will be done by cpu_up/cpu_down() */ - cpu = smp_processor_id(); - cpu_set(cpu, cpu_active_map); + set_cpu_active(smp_processor_id(), true); /* FIXME: This should be done in userspace --RR */ for_each_present_cpu(cpu) { diff --git a/kernel/cpu.c b/kernel/cpu.c index 79e40f00dcb8..395b6974dc8d 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c @@ -281,7 +281,7 @@ int __ref cpu_down(unsigned int cpu) goto out; } - cpu_clear(cpu, cpu_active_map); + set_cpu_active(cpu, false); /* * Make sure the all cpus did the reschedule and are not @@ -296,7 +296,7 @@ int __ref cpu_down(unsigned int cpu) err = _cpu_down(cpu, 0); if (cpu_online(cpu)) - cpu_set(cpu, cpu_active_map); + set_cpu_active(cpu, true); out: cpu_maps_update_done(); @@ -333,7 +333,7 @@ static int __cpuinit _cpu_up(unsigned int cpu, int tasks_frozen) goto out_notify; BUG_ON(!cpu_online(cpu)); - cpu_set(cpu, cpu_active_map); + set_cpu_active(cpu, true); /* Now call notifier in preparation. */ raw_notifier_call_chain(&cpu_chain, CPU_ONLINE | mod, hcpu); -- cgit v1.2.3-55-g7522 From 692105b8ac5bcd75dc65f6a8f10bdbd0f0f34dcf Mon Sep 17 00:00:00 2001 From: Matt LaPlante Date: Mon, 26 Jan 2009 11:12:25 +0100 Subject: trivial: fix typos/grammar errors in Kconfig texts Signed-off-by: Matt LaPlante Acked-by: Randy Dunlap Signed-off-by: Jiri Kosina --- arch/arm/mach-omap1/Kconfig | 2 +- arch/avr32/Kconfig | 6 +++--- arch/blackfin/Kconfig | 6 +++--- arch/cris/Kconfig | 6 +++--- arch/cris/arch-v32/Kconfig | 2 +- arch/cris/arch-v32/drivers/Kconfig | 2 +- arch/cris/arch-v32/mach-fs/Kconfig | 2 +- arch/mips/Kconfig | 2 +- arch/powerpc/Kconfig | 2 +- arch/powerpc/platforms/Kconfig | 2 +- arch/powerpc/sysdev/bestcomm/Kconfig | 4 ++-- arch/sh/Kconfig | 4 ++-- arch/x86/Kconfig | 4 ++-- drivers/ata/Kconfig | 2 +- drivers/gpio/Kconfig | 6 +++--- drivers/hid/Kconfig | 2 +- drivers/input/Kconfig | 2 +- drivers/isdn/mISDN/Kconfig | 10 ++++++---- drivers/leds/Kconfig | 6 +++--- drivers/media/common/tuners/Kconfig | 2 +- drivers/media/dvb/frontends/Kconfig | 2 +- drivers/mfd/Kconfig | 2 +- drivers/misc/Kconfig | 6 +++--- drivers/mmc/host/Kconfig | 2 +- drivers/scsi/Kconfig | 4 ++-- drivers/serial/Kconfig | 2 +- drivers/staging/Kconfig | 4 ++-- drivers/staging/comedi/Kconfig | 4 ++-- drivers/staging/go7007/Kconfig | 4 ++-- drivers/staging/panel/Kconfig | 2 +- drivers/usb/gadget/Kconfig | 2 +- drivers/usb/serial/Kconfig | 4 ++-- drivers/uwb/Kconfig | 4 ++-- drivers/xen/Kconfig | 2 +- fs/ext4/Kconfig | 2 +- fs/ubifs/Kconfig | 4 ++-- init/Kconfig | 6 +++--- kernel/trace/Kconfig | 9 ++++----- net/Kconfig | 2 +- net/ipv6/Kconfig | 18 +++++++++--------- net/mac80211/Kconfig | 2 +- net/netfilter/Kconfig | 2 +- net/phonet/Kconfig | 2 +- net/sunrpc/Kconfig | 2 +- net/wimax/Kconfig | 2 +- sound/soc/blackfin/Kconfig | 2 +- 46 files changed, 86 insertions(+), 85 deletions(-) (limited to 'init') diff --git a/arch/arm/mach-omap1/Kconfig b/arch/arm/mach-omap1/Kconfig index 3f325d3718a9..cd8de89c5fad 100644 --- a/arch/arm/mach-omap1/Kconfig +++ b/arch/arm/mach-omap1/Kconfig @@ -109,7 +109,7 @@ config MACH_OMAP_PALMZ71 help Support for the Palm Zire71 PDA. To boot the kernel, you'll need a PalmOS compatible bootloader; check out - http://hackndev.com/palm/z71 for more informations. + http://hackndev.com/palm/z71 for more information. Say Y here if you have such a PDA, say N otherwise. config MACH_OMAP_PALMTT diff --git a/arch/avr32/Kconfig b/arch/avr32/Kconfig index 05fe3053dcae..414a8ad97f52 100644 --- a/arch/avr32/Kconfig +++ b/arch/avr32/Kconfig @@ -127,13 +127,13 @@ config BOARD_HAMMERHEAD select CPU_AT32AP7000 select USB_ARCH_HAS_HCD help - The Hammerhead platform is built around a AVR32 32-bit microcontroller from Atmel. + The Hammerhead platform is built around an AVR32 32-bit microcontroller from Atmel. It offers versatile peripherals, such as ethernet, usb device, usb host etc. - The board also incooperates a power supply and is a Power over Ethernet (PoE) Powered + The board also incorporates a power supply and is a Power over Ethernet (PoE) Powered Device (PD). - Additonally, a Cyclone III FPGA from Altera is integrated on the board. The FPGA is + Additionally, a Cyclone III FPGA from Altera is integrated on the board. The FPGA is mapped into the 32-bit AVR memory bus. The FPGA offers two DDR2 SDRAM interfaces, which will cover even the most exceptional need of memory bandwidth. Together with the onboard video decoder the board is ready for video processing. diff --git a/arch/blackfin/Kconfig b/arch/blackfin/Kconfig index 0c1f86e3e44a..3640cdc38aac 100644 --- a/arch/blackfin/Kconfig +++ b/arch/blackfin/Kconfig @@ -777,7 +777,7 @@ config CACHELINE_ALIGNED_L1 default n if BF54x depends on !BF531 help - If enabled, cacheline_anligned data is linked + If enabled, cacheline_aligned data is linked into L1 data memory. (less latency) config SYSCALL_TAB_L1 @@ -957,7 +957,7 @@ config MPU memory they do not own. This comes at a performance penalty and is recommended only for debugging. -comment "Asynchonous Memory Configuration" +comment "Asynchronous Memory Configuration" menu "EBIU_AMGCTL Global Control" config C_AMCKEN @@ -989,7 +989,7 @@ config C_B3PEN default n choice - prompt"Enable Asynchonous Memory Banks" + prompt "Enable Asynchronous Memory Banks" default C_AMBEN_ALL config C_AMBEN diff --git a/arch/cris/Kconfig b/arch/cris/Kconfig index 3462245fe9fb..7adac388a771 100644 --- a/arch/cris/Kconfig +++ b/arch/cris/Kconfig @@ -438,7 +438,7 @@ config ETRAX_SERIAL_PORT0_DMA1_IN help Enables the DMA1 input channel for ser0 (ttyS0). If you do not enable DMA, an interrupt for each character will be - used when receiveing data. + used when receiving data. Normally you want to use DMA, unless you use the DMA channel for something else. @@ -565,7 +565,7 @@ config ETRAX_SERIAL_PORT2_DMA7_IN help Enables the DMA7 input channel for ser2 (ttyS2). If you do not enable DMA, an interrupt for each character will be - used when receiveing data. + used when receiving data. Normally you want to use DMA, unless you use the DMA channel for something else. @@ -604,7 +604,7 @@ config ETRAX_SERIAL_PORT3_DMA3_IN help Enables the DMA3 input channel for ser3 (ttyS3). If you do not enable DMA, an interrupt for each character will be - used when receiveing data. + used when receiving data. Normally you want to use DMA, unless you use the DMA channel for something else. diff --git a/arch/cris/arch-v32/Kconfig b/arch/cris/arch-v32/Kconfig index 005ed2b3f7f4..21bbd93be34f 100644 --- a/arch/cris/arch-v32/Kconfig +++ b/arch/cris/arch-v32/Kconfig @@ -28,7 +28,7 @@ config ETRAX_NBR_LED_GRP_ONE help Select this if you want one Ethernet LED group. This LED group can be used for one or more Ethernet interfaces. However, it is - recomended that each Ethernet interface use a dedicated LED group. + recommended that each Ethernet interface use a dedicated LED group. config ETRAX_NBR_LED_GRP_TWO bool "Use two LED groups" diff --git a/arch/cris/arch-v32/drivers/Kconfig b/arch/cris/arch-v32/drivers/Kconfig index 7a64fcef9d07..b9e328e688be 100644 --- a/arch/cris/arch-v32/drivers/Kconfig +++ b/arch/cris/arch-v32/drivers/Kconfig @@ -342,7 +342,7 @@ config ETRAX_SERIAL_PORT4_DMA9_IN help Enables the DMA9 input channel for ser4 (ttyS4). If you do not enable DMA, an interrupt for each character will be - used when receiveing data. + used when receiving data. Normally you want to use DMA, unless you use the DMA channel for something else. diff --git a/arch/cris/arch-v32/mach-fs/Kconfig b/arch/cris/arch-v32/mach-fs/Kconfig index f6d74475f1c6..774de82abef6 100644 --- a/arch/cris/arch-v32/mach-fs/Kconfig +++ b/arch/cris/arch-v32/mach-fs/Kconfig @@ -59,7 +59,7 @@ config ETRAX_SDRAM_GRP1_CONFIG depends on ETRAX_ARCH_V32 default "0" help - SDRAM configuration for group 1. The defult value is 0 + SDRAM configuration for group 1. The default value is 0 because group 1 is not used in the default configuration, described in the help for SDRAM_GRP0_CONFIG. diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig index 206cb7953b0c..92508c521673 100644 --- a/arch/mips/Kconfig +++ b/arch/mips/Kconfig @@ -136,7 +136,7 @@ config MACH_JAZZ help This a family of machines based on the MIPS R4030 chipset which was used by several vendors to build RISC/os and Windows NT workstations. - Members include the Acer PICA, MIPS Magnum 4000, MIPS Millenium and + Members include the Acer PICA, MIPS Magnum 4000, MIPS Millennium and Olivetti M700-10 workstations. config LASAT diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index 74cc312c347c..2e2160c3de7c 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -342,7 +342,7 @@ config PHYP_DUMP help Hypervisor-assisted dump is meant to be a kdump replacement offering robustness and speed not possible without system - hypervisor assistence. + hypervisor assistance. If unsure, say "N" diff --git a/arch/powerpc/platforms/Kconfig b/arch/powerpc/platforms/Kconfig index 200b9cb900ea..9a8bb53d315d 100644 --- a/arch/powerpc/platforms/Kconfig +++ b/arch/powerpc/platforms/Kconfig @@ -298,7 +298,7 @@ config CPM config OF_RTC bool help - Uses information from the OF or flattened device tree to instatiate + Uses information from the OF or flattened device tree to instantiate platform devices for direct mapped RTC chips like the DS1742 or DS1743. source "arch/powerpc/sysdev/bestcomm/Kconfig" diff --git a/arch/powerpc/sysdev/bestcomm/Kconfig b/arch/powerpc/sysdev/bestcomm/Kconfig index 0b192a1c429d..29e427085efb 100644 --- a/arch/powerpc/sysdev/bestcomm/Kconfig +++ b/arch/powerpc/sysdev/bestcomm/Kconfig @@ -9,8 +9,8 @@ config PPC_BESTCOMM select PPC_LIB_RHEAP help BestComm is the name of the communication coprocessor found - on the Freescale MPC5200 family of processor. It's usage is - optionnal for some drivers (like ATA), but required for + on the Freescale MPC5200 family of processor. Its usage is + optional for some drivers (like ATA), but required for others (like FEC). If you want to use drivers that require DMA operations, diff --git a/arch/sh/Kconfig b/arch/sh/Kconfig index 8d50d527c595..2d52b515c241 100644 --- a/arch/sh/Kconfig +++ b/arch/sh/Kconfig @@ -640,10 +640,10 @@ config GUSA_RB depends on GUSA && CPU_SH3 || (CPU_SH4 && !CPU_SH4A) help Enabling this option will allow the kernel to implement some - atomic operations using a software implemention of load-locked/ + atomic operations using a software implementation of load-locked/ store-conditional (LLSC). On machines which do not have hardware LLSC, this should be more efficient than the other alternative of - disabling insterrupts around the atomic sequence. + disabling interrupts around the atomic sequence. endmenu diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 06c02c00d7d9..d7b5621382f6 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -1129,7 +1129,7 @@ config NODES_SHIFT depends on NEED_MULTIPLE_NODES ---help--- Specify the maximum number of NUMA Nodes available on the target - system. Increases memory reserved to accomodate various tables. + system. Increases memory reserved to accommodate various tables. config HAVE_ARCH_BOOTMEM def_bool y @@ -1307,7 +1307,7 @@ config MTRR_SANITIZER add writeback entries. Can be disabled with disable_mtrr_cleanup on the kernel command line. - The largest mtrr entry size for a continous block can be set with + The largest mtrr entry size for a continuous block can be set with mtrr_chunk_size. If unsure, say Y. diff --git a/drivers/ata/Kconfig b/drivers/ata/Kconfig index 0bcf26464670..9120717c0701 100644 --- a/drivers/ata/Kconfig +++ b/drivers/ata/Kconfig @@ -86,7 +86,7 @@ config ATA_SFF For users with exclusively modern controllers like AHCI, Silicon Image 3124, or Marvell 6440, you may choose to - disable this uneeded SFF support. + disable this unneeded SFF support. If unsure, say Y. diff --git a/drivers/gpio/Kconfig b/drivers/gpio/Kconfig index 3d2565441b36..edb02530e461 100644 --- a/drivers/gpio/Kconfig +++ b/drivers/gpio/Kconfig @@ -42,9 +42,9 @@ config DEBUG_GPIO depends on DEBUG_KERNEL help Say Y here to add some extra checks and diagnostics to GPIO calls. - The checks help ensure that GPIOs have been properly initialized - before they are used and that sleeping calls aren not made from - nonsleeping contexts. They can make bitbanged serial protocols + These checks help ensure that GPIOs have been properly initialized + before they are used, and that sleeping calls are not made from + non-sleeping contexts. They can make bitbanged serial protocols slower. The diagnostics help catch the type of setup errors that are most common when setting up new platforms or boards. diff --git a/drivers/hid/Kconfig b/drivers/hid/Kconfig index e85c8fe9ffcf..504cfaa6160f 100644 --- a/drivers/hid/Kconfig +++ b/drivers/hid/Kconfig @@ -243,7 +243,7 @@ config GREENASIA_FF select INPUT_FF_MEMLESS ---help--- Say Y here if you have a GreenAsia (Product ID 0x12) based game controller - (like MANTA Warior MM816 and SpeedLink Strike2 SL-6635) or adapter + (like MANTA Warrior MM816 and SpeedLink Strike2 SL-6635) or adapter and want to enable force feedback support for it. config HID_TOPSEED diff --git a/drivers/input/Kconfig b/drivers/input/Kconfig index 5f9d860925a1..cd50c00ab20f 100644 --- a/drivers/input/Kconfig +++ b/drivers/input/Kconfig @@ -143,7 +143,7 @@ config INPUT_APMPOWER ---help--- Say Y here if you want suspend key events to trigger a user requested suspend through APM. This is useful on embedded - systems where such behviour is desired without userspace + systems where such behaviour is desired without userspace interaction. If unsure, say N. To compile this driver as a module, choose M here: the diff --git a/drivers/isdn/mISDN/Kconfig b/drivers/isdn/mISDN/Kconfig index 4938355c4072..1747a02a019a 100644 --- a/drivers/isdn/mISDN/Kconfig +++ b/drivers/isdn/mISDN/Kconfig @@ -14,13 +14,15 @@ config MISDN_DSP depends on MISDN help Enable support for digital audio processing capability. + This module may be used for special applications that require - cross connecting of bchannels, conferencing, dtmf decoding + cross connecting of bchannels, conferencing, dtmf decoding, echo cancelation, tone generation, and Blowfish encryption and - decryption. - It may use hardware features if available. + decryption. It may use hardware features if available. + E.g. it is required for PBX4Linux. Go to http://isdn.eversberg.eu - and get more informations about this module and it's usage. + and get more information about this module and its usage. + If unsure, say 'N'. config MISDN_L1OIP diff --git a/drivers/leds/Kconfig b/drivers/leds/Kconfig index 556aeca0d860..d9db17624f12 100644 --- a/drivers/leds/Kconfig +++ b/drivers/leds/Kconfig @@ -100,7 +100,7 @@ config LEDS_HP6XX tristate "LED Support for the HP Jornada 6xx" depends on LEDS_CLASS && SH_HP6XX help - This option enables led support for the handheld + This option enables LED support for the handheld HP Jornada 620/660/680/690. config LEDS_PCA9532 @@ -108,7 +108,7 @@ config LEDS_PCA9532 depends on LEDS_CLASS && I2C && INPUT && EXPERIMENTAL help This option enables support for NXP pca9532 - led controller. It is generally only usefull + LED controller. It is generally only useful as a platform driver config LEDS_GPIO @@ -144,7 +144,7 @@ config LEDS_CLEVO_MAIL Positivo Mobile (Clevo M5x0V) If your model is not listed here you can try the "nodetect" - module paramter. + module parameter. To compile this driver as a module, choose M here: the module will be called leds-clevo-mail. diff --git a/drivers/media/common/tuners/Kconfig b/drivers/media/common/tuners/Kconfig index 6f92beaa5ac8..da058c174049 100644 --- a/drivers/media/common/tuners/Kconfig +++ b/drivers/media/common/tuners/Kconfig @@ -147,7 +147,7 @@ config MEDIA_TUNER_XC5000 default m if DVB_FE_CUSTOMISE help A driver for the silicon tuner XC5000 from Xceive. - This device is only used inside a SiP called togther with a + This device is only used inside a SiP called together with a demodulator for now. config MEDIA_TUNER_MXL5005S diff --git a/drivers/media/dvb/frontends/Kconfig b/drivers/media/dvb/frontends/Kconfig index 00269560793a..baf808e9d6e1 100644 --- a/drivers/media/dvb/frontends/Kconfig +++ b/drivers/media/dvb/frontends/Kconfig @@ -439,7 +439,7 @@ config DVB_TUNER_DIB0070 default m if DVB_FE_CUSTOMISE help A driver for the silicon baseband tuner DiB0070 from DiBcom. - This device is only used inside a SiP called togther with a + This device is only used inside a SiP called together with a demodulator for now. comment "SEC control devices for DVB-S" diff --git a/drivers/mfd/Kconfig b/drivers/mfd/Kconfig index 06a2b0f7737c..75f35dbb11dc 100644 --- a/drivers/mfd/Kconfig +++ b/drivers/mfd/Kconfig @@ -88,7 +88,7 @@ config MENELAUS help If you say yes here you get support for the Texas Instruments TWL92330/Menelaus Power Management chip. This include voltage - regulators, Dual slot memory card tranceivers, real-time clock + regulators, Dual slot memory card transceivers, real-time clock and other features that are often used in portable devices like cell phones and PDAs. diff --git a/drivers/misc/Kconfig b/drivers/misc/Kconfig index 1c484084ed4f..7a1e948840e6 100644 --- a/drivers/misc/Kconfig +++ b/drivers/misc/Kconfig @@ -18,8 +18,8 @@ config ATMEL_PWM depends on AVR32 || ARCH_AT91SAM9263 || ARCH_AT91SAM9RL || ARCH_AT91CAP9 help This option enables device driver support for the PWM channels - on certain Atmel prcoessors. Pulse Width Modulation is used for - purposes including software controlled power-efficent backlights + on certain Atmel processors. Pulse Width Modulation is used for + purposes including software controlled power-efficient backlights on LCD displays, motor control, and waveform generation. config ATMEL_TCLIB @@ -142,7 +142,7 @@ config ATMEL_SSC tristate "Device driver for Atmel SSC peripheral" depends on AVR32 || ARCH_AT91 ---help--- - This option enables device driver support for Atmel Syncronized + This option enables device driver support for Atmel Synchronized Serial Communication peripheral (SSC). The SSC peripheral supports a wide variety of serial frame based diff --git a/drivers/mmc/host/Kconfig b/drivers/mmc/host/Kconfig index 99d4b28d52ed..6fbb246c40bb 100644 --- a/drivers/mmc/host/Kconfig +++ b/drivers/mmc/host/Kconfig @@ -177,7 +177,7 @@ config MMC_SPI select CRC7 select CRC_ITU_T help - Some systems accss MMC/SD/SDIO cards using a SPI controller + Some systems access MMC/SD/SDIO cards using a SPI controller instead of using a "native" MMC/SD/SDIO controller. This has a disadvantage of being relatively high overhead, but a compensating advantage of working on many systems without dedicated MMC/SD/SDIO diff --git a/drivers/scsi/Kconfig b/drivers/scsi/Kconfig index e2f44e6c0bcb..20297c521e50 100644 --- a/drivers/scsi/Kconfig +++ b/drivers/scsi/Kconfig @@ -1380,7 +1380,7 @@ config SCSI_LPFC_DEBUG_FS bool "Emulex LightPulse Fibre Channel debugfs Support" depends on SCSI_LPFC && DEBUG_FS help - This makes debugging infomation from the lpfc driver + This makes debugging information from the lpfc driver available via the debugfs filesystem. config SCSI_SIM710 @@ -1388,7 +1388,7 @@ config SCSI_SIM710 depends on (EISA || MCA) && SCSI select SCSI_SPI_ATTRS ---help--- - This driver for NCR53c710 based SCSI host adapters. + This driver is for NCR53c710 based SCSI host adapters. It currently supports Compaq EISA cards and NCR MCA cards diff --git a/drivers/serial/Kconfig b/drivers/serial/Kconfig index 9be11b0963f2..aa9d3a4c2d50 100644 --- a/drivers/serial/Kconfig +++ b/drivers/serial/Kconfig @@ -1374,7 +1374,7 @@ config SERIAL_BFIN_SPORT depends on BLACKFIN && EXPERIMENTAL select SERIAL_CORE help - Enble support SPORT emulate UART on Blackfin series. + Enable SPORT emulate UART on Blackfin series. To compile this driver as a module, choose M here: the module will be called bfin_sport_uart. diff --git a/drivers/staging/Kconfig b/drivers/staging/Kconfig index 211af86a6c55..92981c2383ee 100644 --- a/drivers/staging/Kconfig +++ b/drivers/staging/Kconfig @@ -4,7 +4,7 @@ menuconfig STAGING ---help--- This option allows you to select a number of drivers that are not of the "normal" Linux kernel quality level. These drivers - are placed here in order to get a wider audience for use of + are placed here in order to get a wider audience to make use of them. Please note that these drivers are under heavy development, may or may not work, and may contain userspace interfaces that most likely will be changed in the near @@ -12,7 +12,7 @@ menuconfig STAGING Using any of these drivers will taint your kernel which might affect support options from both the community, and various - commercial support orginizations. + commercial support organizations. If you wish to work on these drivers, to help improve them, or to report problems you have with them, please see the diff --git a/drivers/staging/comedi/Kconfig b/drivers/staging/comedi/Kconfig index b47ca1e7e383..83a93a5c6392 100644 --- a/drivers/staging/comedi/Kconfig +++ b/drivers/staging/comedi/Kconfig @@ -1,9 +1,9 @@ config COMEDI - tristate "Data Acquision support (comedi)" + tristate "Data acquisition support (comedi)" default N depends on m ---help--- - Enable support a wide range of data acquision devices + Enable support a wide range of data acquisition devices for Linux. config COMEDI_RT diff --git a/drivers/staging/go7007/Kconfig b/drivers/staging/go7007/Kconfig index f2cf7f66ae05..ca6ade6c4b47 100644 --- a/drivers/staging/go7007/Kconfig +++ b/drivers/staging/go7007/Kconfig @@ -10,7 +10,7 @@ config VIDEO_GO7007 select CRC32 default N ---help--- - This is a video4linux driver for some wierd device... + This is a video4linux driver for some weird device... To compile this driver as a module, choose M here: the module will be called go7007 @@ -20,7 +20,7 @@ config VIDEO_GO7007_USB depends on VIDEO_GO7007 && USB default N ---help--- - This is a video4linux driver for some wierd device... + This is a video4linux driver for some weird device... To compile this driver as a module, choose M here: the module will be called go7007-usb diff --git a/drivers/staging/panel/Kconfig b/drivers/staging/panel/Kconfig index c4b30f2a549b..3abe7c9d558d 100644 --- a/drivers/staging/panel/Kconfig +++ b/drivers/staging/panel/Kconfig @@ -110,7 +110,7 @@ config PANEL_LCD_BWIDTH ---help--- Most LCDs use a standard controller which supports hardware lines of 40 characters, although sometimes only 16, 20 or 24 of them are really wired - to the terminal. This results in some non-visible but adressable characters, + to the terminal. This results in some non-visible but addressable characters, and is the case for most parallel LCDs. Other LCDs, and some serial ones, however, use the same line width internally as what is visible. The KS0074 for example, uses 16 characters per line for 16 visible characters per line. diff --git a/drivers/usb/gadget/Kconfig b/drivers/usb/gadget/Kconfig index 770b3eaa9184..080bb1e4b847 100644 --- a/drivers/usb/gadget/Kconfig +++ b/drivers/usb/gadget/Kconfig @@ -392,7 +392,7 @@ config USB_GADGET_FSL_QE controllers having QE or CPM2, given minor tweaks. Set CONFIG_USB_GADGET to "m" to build this driver as a - dynmically linked module called "fsl_qe_udc". + dynamically linked module called "fsl_qe_udc". config USB_FSL_QE tristate diff --git a/drivers/usb/serial/Kconfig b/drivers/usb/serial/Kconfig index a65f9196b0a0..c480ea4c19f2 100644 --- a/drivers/usb/serial/Kconfig +++ b/drivers/usb/serial/Kconfig @@ -518,8 +518,8 @@ config USB_SERIAL_SIERRAWIRELESS help Say M here if you want to use Sierra Wireless devices. - Many deviecs have a feature known as TRU-Install, for those devices - to work properly the USB Storage Sierra feature must be enabled. + Many devices have a feature known as TRU-Install. For those devices + to work properly, the USB Storage Sierra feature must be enabled. To compile this driver as a module, choose M here: the module will be called sierra. diff --git a/drivers/uwb/Kconfig b/drivers/uwb/Kconfig index ca783127af36..bac8e7a6f17b 100644 --- a/drivers/uwb/Kconfig +++ b/drivers/uwb/Kconfig @@ -48,10 +48,10 @@ config UWB_WHCI help This driver enables the radio controller for WHCI cards. - WHCI is an specification developed by Intel + WHCI is a specification developed by Intel (http://www.intel.com/technology/comms/wusb/whci.htm) much in the spirit of USB's EHCI, but for UWB and Wireless USB - radio/host controllers connected via memmory mapping (eg: + radio/host controllers connected via memory mapping (eg: PCI). Most of these cards come also with a Wireless USB host controller. diff --git a/drivers/xen/Kconfig b/drivers/xen/Kconfig index 526187c8a12d..8ac9cddac575 100644 --- a/drivers/xen/Kconfig +++ b/drivers/xen/Kconfig @@ -37,7 +37,7 @@ config XEN_COMPAT_XENFS The old xenstore userspace tools expect to find "xenbus" under /proc/xen, but "xenbus" is now found at the root of the xenfs filesystem. Selecting this causes the kernel to create - the compatibilty mount point /proc/xen if it is running on + the compatibility mount point /proc/xen if it is running on a xen platform. If in doubt, say yes. diff --git a/fs/ext4/Kconfig b/fs/ext4/Kconfig index 7505482a08fa..418b6f3b0ae8 100644 --- a/fs/ext4/Kconfig +++ b/fs/ext4/Kconfig @@ -18,7 +18,7 @@ config EXT4_FS filesystem; while there will be some performance gains from the delayed allocation and inode table readahead, the best performance gains will require enabling ext4 features in the - filesystem, or formating a new filesystem as an ext4 + filesystem, or formatting a new filesystem as an ext4 filesystem initially. To compile this file system support as a module, choose M here. The diff --git a/fs/ubifs/Kconfig b/fs/ubifs/Kconfig index e35b54d5059d..830e3f76f442 100644 --- a/fs/ubifs/Kconfig +++ b/fs/ubifs/Kconfig @@ -22,7 +22,7 @@ config UBIFS_FS_ADVANCED_COMPR depends on UBIFS_FS help This option allows to explicitly choose which compressions, if any, - are enabled in UBIFS. Removing compressors means inbility to read + are enabled in UBIFS. Removing compressors means inability to read existing file systems. If unsure, say 'N'. @@ -32,7 +32,7 @@ config UBIFS_FS_LZO depends on UBIFS_FS default y help - LZO compressor is generally faster then zlib but compresses worse. + LZO compressor is generally faster than zlib but compresses worse. Say 'Y' if unsure. config UBIFS_FS_ZLIB diff --git a/init/Kconfig b/init/Kconfig index 14c483d2b7c9..bcffc0e47647 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -687,7 +687,7 @@ config PID_NS depends on NAMESPACES && EXPERIMENTAL help Support process id namespaces. This allows having multiple - process with the same pid as long as they are in different + processes with the same pid as long as they are in different pid namespaces. This is a building block of containers. Unless you want to work with an experimental feature @@ -952,7 +952,7 @@ config COMPAT_BRK Randomizing heap placement makes heap exploits harder, but it also breaks ancient binaries (including anything libc5 based). This option changes the bootup default to heap randomization - disabled, and can be overriden runtime by setting + disabled, and can be overridden at runtime by setting /proc/sys/kernel/randomize_va_space to 2. On non-ancient distros (post-2000 ones) N is usually a safe choice. @@ -1110,7 +1110,7 @@ config INIT_ALL_POSSIBLE cpu_possible_map, some of them chose to initialize cpu_possible_map with all 1s, and others with all 0s. When they were centralised, it was better to provide this option than to break all the archs - and have several arch maintainers persuing me down dark alleys. + and have several arch maintainers pursuing me down dark alleys. config STOP_MACHINE bool diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig index 34e707e5ab87..504086ab4443 100644 --- a/kernel/trace/Kconfig +++ b/kernel/trace/Kconfig @@ -72,11 +72,10 @@ config FUNCTION_GRAPH_TRACER help Enable the kernel to trace a function at both its return and its entry. - It's first purpose is to trace the duration of functions and - draw a call graph for each thread with some informations like - the return value. - This is done by setting the current return address on the current - task structure into a stack of calls. + Its first purpose is to trace the duration of functions and + draw a call graph for each thread with some information like + the return value. This is done by setting the current return + address on the current task structure into a stack of calls. config IRQSOFF_TRACER bool "Interrupts-off Latency Tracer" diff --git a/net/Kconfig b/net/Kconfig index ec93e7e38b38..ce77db4fcec8 100644 --- a/net/Kconfig +++ b/net/Kconfig @@ -140,7 +140,7 @@ config NETFILTER_ADVANCED default y help If you say Y here you can select between all the netfilter modules. - If you say N the more ununsual ones will not be shown and the + If you say N the more unusual ones will not be shown and the basic ones needed by most people will default to 'M'. If unsure, say Y. diff --git a/net/ipv6/Kconfig b/net/ipv6/Kconfig index ec992159b5f8..ca8cb326d1d2 100644 --- a/net/ipv6/Kconfig +++ b/net/ipv6/Kconfig @@ -22,17 +22,17 @@ menuconfig IPV6 if IPV6 config IPV6_PRIVACY - bool "IPv6: Privacy Extensions support" + bool "IPv6: Privacy Extensions (RFC 3041) support" ---help--- Privacy Extensions for Stateless Address Autoconfiguration in IPv6 - support. With this option, additional periodically-alter - pseudo-random global-scope unicast address(es) will assigned to + support. With this option, additional periodically-altered + pseudo-random global-scope unicast address(es) will be assigned to your interface(s). - We use our standard pseudo random algorithm to generate randomized - interface identifier, instead of one described in RFC 3041. + We use our standard pseudo-random algorithm to generate the + randomized interface identifier, instead of one described in RFC 3041. - By default, kernel do not generate temporary addresses. + By default the kernel does not generate temporary addresses. To use temporary addresses, do echo 2 >/proc/sys/net/ipv6/conf/all/use_tempaddr @@ -43,9 +43,9 @@ config IPV6_ROUTER_PREF bool "IPv6: Router Preference (RFC 4191) support" ---help--- Router Preference is an optional extension to the Router - Advertisement message to improve the ability of hosts - to pick more appropriate router, especially when the hosts - is placed in a multi-homed network. + Advertisement message which improves the ability of hosts + to pick an appropriate router, especially when the hosts + are placed in a multi-homed network. If unsure, say N. diff --git a/net/mac80211/Kconfig b/net/mac80211/Kconfig index 60c16162474c..f3d9ae350fb6 100644 --- a/net/mac80211/Kconfig +++ b/net/mac80211/Kconfig @@ -33,7 +33,7 @@ choice ---help--- This option selects the default rate control algorithm mac80211 will use. Note that this default can still be - overriden through the ieee80211_default_rc_algo module + overridden through the ieee80211_default_rc_algo module parameter if different algorithms are available. config MAC80211_RC_DEFAULT_PID diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig index 2c967e4f706c..bb279bf59a1b 100644 --- a/net/netfilter/Kconfig +++ b/net/netfilter/Kconfig @@ -52,7 +52,7 @@ config NF_CT_ACCT Please note that currently this option only sets a default state. You may change it at boot time with nf_conntrack.acct=0/1 kernel - paramater or by loading the nf_conntrack module with acct=0/1. + parameter or by loading the nf_conntrack module with acct=0/1. You may also disable/enable it on a running system with: sysctl net.netfilter.nf_conntrack_acct=0/1 diff --git a/net/phonet/Kconfig b/net/phonet/Kconfig index 51a5669573f2..6ec7d55b1769 100644 --- a/net/phonet/Kconfig +++ b/net/phonet/Kconfig @@ -6,7 +6,7 @@ config PHONET tristate "Phonet protocols family" help The Phone Network protocol (PhoNet) is a packet-oriented - communication protocol developped by Nokia for use with its modems. + communication protocol developed by Nokia for use with its modems. This is required for Maemo to use cellular data connectivity (if supported). It can also be used to control Nokia phones diff --git a/net/sunrpc/Kconfig b/net/sunrpc/Kconfig index 5592883e1e4a..3f7faa9688ae 100644 --- a/net/sunrpc/Kconfig +++ b/net/sunrpc/Kconfig @@ -69,7 +69,7 @@ config RPCSEC_GSS_SPKM3 select CRYPTO_CBC help Choose Y here to enable Secure RPC using the SPKM3 public key - GSS-API mechansim (RFC 2025). + GSS-API mechanism (RFC 2025). Secure RPC calls with SPKM3 require an auxiliary userspace daemon which may be found in the Linux nfs-utils package diff --git a/net/wimax/Kconfig b/net/wimax/Kconfig index 18495cdcd10d..1b46747a5f5a 100644 --- a/net/wimax/Kconfig +++ b/net/wimax/Kconfig @@ -8,7 +8,7 @@ # # As well, enablement of the RFKILL code means we need the INPUT layer # support to inject events coming from hw rfkill switches. That -# dependency could be killed if input.h provided appropiate means to +# dependency could be killed if input.h provided appropriate means to # work when input is disabled. comment "WiMAX Wireless Broadband support requires CONFIG_INPUT enabled" diff --git a/sound/soc/blackfin/Kconfig b/sound/soc/blackfin/Kconfig index 0a2f8f9eff53..811596f4c092 100644 --- a/sound/soc/blackfin/Kconfig +++ b/sound/soc/blackfin/Kconfig @@ -42,7 +42,7 @@ config SND_BF5XX_AC97 You will also need to select the audio interfaces to support below. Note: - AC97 codecs which do not implment the slot-16 mode will not function + AC97 codecs which do not implement the slot-16 mode will not function properly with this driver. This driver is known to work with the Analog Devices line of AC97 codecs. -- cgit v1.2.3-55-g7522 From 21acb9caa2e30b100e9a1943d995bb99d40f4035 Mon Sep 17 00:00:00 2001 From: Thadeu Lima de Souza Cascardo Date: Wed, 4 Feb 2009 10:12:08 +0100 Subject: trivial: fix where cgroup documentation is not correctly referred to cgroup documentation was moved to Documentation/cgroups/. There are some places that still refer to Documentation/controllers/, Documentation/cgroups.txt and Documentation/cpusets.txt. Fix those. Signed-off-by: Thadeu Lima de Souza Cascardo Reviewed-by: Li Zefan Acked-by: Paul Menage Signed-off-by: Jiri Kosina --- Documentation/00-INDEX | 4 ++-- Documentation/cgroups/00-INDEX | 18 ++++++++++++++++++ Documentation/kernel-parameters.txt | 4 ++-- Documentation/scheduler/sched-rt-group.txt | 2 +- Documentation/vm/numa_memory_policy.txt | 3 ++- Documentation/vm/page_migration | 3 ++- Documentation/x86/x86_64/fake-numa-for-cpusets | 5 +++-- include/linux/cgroup.h | 5 ++++- init/Kconfig | 2 +- 9 files changed, 35 insertions(+), 11 deletions(-) create mode 100644 Documentation/cgroups/00-INDEX (limited to 'init') diff --git a/Documentation/00-INDEX b/Documentation/00-INDEX index 2a39aeba1464..d05737aaa84b 100644 --- a/Documentation/00-INDEX +++ b/Documentation/00-INDEX @@ -86,6 +86,8 @@ cachetlb.txt - describes the cache/TLB flushing interfaces Linux uses. cdrom/ - directory with information on the CD-ROM drivers that Linux has. +cgroups/ + - cgroups features, including cpusets and memory controller. connector/ - docs on the netlink based userspace<->kernel space communication mod. console/ @@ -98,8 +100,6 @@ cpu-load.txt - document describing how CPU load statistics are collected. cpuidle/ - info on CPU_IDLE, CPU idle state management subsystem. -cpusets.txt - - documents the cpusets feature; assign CPUs and Mem to a set of tasks. cputopology.txt - documentation on how CPU topology info is exported via sysfs. cris/ diff --git a/Documentation/cgroups/00-INDEX b/Documentation/cgroups/00-INDEX new file mode 100644 index 000000000000..3f58fa3d6d00 --- /dev/null +++ b/Documentation/cgroups/00-INDEX @@ -0,0 +1,18 @@ +00-INDEX + - this file +cgroups.txt + - Control Groups definition, implementation details, examples and API. +cpuacct.txt + - CPU Accounting Controller; account CPU usage for groups of tasks. +cpusets.txt + - documents the cpusets feature; assign CPUs and Mem to a set of tasks. +devices.txt + - Device Whitelist Controller; description, interface and security. +freezer-subsystem.txt + - checkpointing; rationale to not use signals, interface. +memcg_test.txt + - Memory Resource Controller; implementation details. +memory.txt + - Memory Resource Controller; design, accounting, interface, testing. +resource_counter.txt + - Resource Counter API. diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index be3bde51b564..755def2cb071 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -1593,7 +1593,7 @@ and is between 256 and 4096 characters. It is defined in the file nosoftlockup [KNL] Disable the soft-lockup detector. noswapaccount [KNL] Disable accounting of swap in memory resource - controller. (See Documentation/controllers/memory.txt) + controller. (See Documentation/cgroups/memory.txt) nosync [HW,M68K] Disables sync negotiation for all devices. @@ -1932,7 +1932,7 @@ and is between 256 and 4096 characters. It is defined in the file relax_domain_level= [KNL, SMP] Set scheduler's default relax_domain_level. - See Documentation/cpusets.txt. + See Documentation/cgroups/cpusets.txt. reserve= [KNL,BUGS] Force the kernel to ignore some iomem area diff --git a/Documentation/scheduler/sched-rt-group.txt b/Documentation/scheduler/sched-rt-group.txt index 3ef339f491e0..5ba4d3fc625a 100644 --- a/Documentation/scheduler/sched-rt-group.txt +++ b/Documentation/scheduler/sched-rt-group.txt @@ -126,7 +126,7 @@ This uses the /cgroup virtual file system and "/cgroup//cpu.rt_runtime_u to control the CPU time reserved for each control group instead. For more information on working with control groups, you should read -Documentation/cgroups.txt as well. +Documentation/cgroups/cgroups.txt as well. Group settings are checked against the following limits in order to keep the configuration schedulable: diff --git a/Documentation/vm/numa_memory_policy.txt b/Documentation/vm/numa_memory_policy.txt index 6aaaeb38730c..be45dbb9d7f2 100644 --- a/Documentation/vm/numa_memory_policy.txt +++ b/Documentation/vm/numa_memory_policy.txt @@ -8,7 +8,8 @@ The current memory policy support was added to Linux 2.6 around May 2004. This document attempts to describe the concepts and APIs of the 2.6 memory policy support. -Memory policies should not be confused with cpusets (Documentation/cpusets.txt) +Memory policies should not be confused with cpusets +(Documentation/cgroups/cpusets.txt) which is an administrative mechanism for restricting the nodes from which memory may be allocated by a set of processes. Memory policies are a programming interface that a NUMA-aware application can take advantage of. When diff --git a/Documentation/vm/page_migration b/Documentation/vm/page_migration index d5fdfd34bbaf..6513fe2d90b8 100644 --- a/Documentation/vm/page_migration +++ b/Documentation/vm/page_migration @@ -37,7 +37,8 @@ locations. Larger installations usually partition the system using cpusets into sections of nodes. Paul Jackson has equipped cpusets with the ability to -move pages when a task is moved to another cpuset (See ../cpusets.txt). +move pages when a task is moved to another cpuset (See +Documentation/cgroups/cpusets.txt). Cpusets allows the automation of process locality. If a task is moved to a new cpuset then also all its pages are moved with it so that the performance of the process does not sink dramatically. Also the pages diff --git a/Documentation/x86/x86_64/fake-numa-for-cpusets b/Documentation/x86/x86_64/fake-numa-for-cpusets index 33bb56655991..0f11d9becb0b 100644 --- a/Documentation/x86/x86_64/fake-numa-for-cpusets +++ b/Documentation/x86/x86_64/fake-numa-for-cpusets @@ -7,7 +7,8 @@ you can create fake NUMA nodes that represent contiguous chunks of memory and assign them to cpusets and their attached tasks. This is a way of limiting the amount of system memory that are available to a certain class of tasks. -For more information on the features of cpusets, see Documentation/cpusets.txt. +For more information on the features of cpusets, see +Documentation/cgroups/cpusets.txt. There are a number of different configurations you can use for your needs. For more information on the numa=fake command line option and its various ways of configuring fake nodes, see Documentation/x86/x86_64/boot-options.txt. @@ -32,7 +33,7 @@ A machine may be split as follows with "numa=fake=4*512," as reported by dmesg: On node 3 totalpages: 131072 Now following the instructions for mounting the cpusets filesystem from -Documentation/cpusets.txt, you can assign fake nodes (i.e. contiguous memory +Documentation/cgroups/cpusets.txt, you can assign fake nodes (i.e. contiguous memory address spaces) to individual cpusets: [root@xroads /]# mkdir exampleset diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index 499900d0cee7..b837631fe499 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -342,7 +342,10 @@ int cgroup_task_count(const struct cgroup *cgrp); /* Return true if the cgroup is a descendant of the current cgroup */ int cgroup_is_descendant(const struct cgroup *cgrp); -/* Control Group subsystem type. See Documentation/cgroups.txt for details */ +/* + * Control Group subsystem type. + * See Documentation/cgroups/cgroups.txt for details + */ struct cgroup_subsys { struct cgroup_subsys_state *(*create)(struct cgroup_subsys *ss, diff --git a/init/Kconfig b/init/Kconfig index bcffc0e47647..99eb4196bd0a 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -565,7 +565,7 @@ config CGROUP_MEM_RES_CTLR select MM_OWNER help Provides a memory resource controller that manages both anonymous - memory and page cache. (See Documentation/controllers/memory.txt) + memory and page cache. (See Documentation/cgroups/memory.txt) Note that setting this option increases fixed memory overhead associated with each page of memory in the system. By this, -- cgit v1.2.3-55-g7522 From 8b2b5c217c20b5460218ab8731295f2e46c7dd29 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Tue, 31 Mar 2009 14:27:03 +1100 Subject: md: move LEVEL_* definition from md_k.h to md_u.h .. as they are part of the user-space interface. Also move MdpMinorShift into there so we can remove duplication. Lastly move mdp_major in. It is less obviously part of the user-space interface, but do_mounts_md.c uses it, and it is acting a bit like user-space. Signed-off-by: NeilBrown --- drivers/md/md.c | 3 --- include/linux/raid/md.h | 2 -- include/linux/raid/md_k.h | 10 ---------- include/linux/raid/md_u.h | 17 +++++++++++++++++ init/do_mounts_md.c | 2 -- 5 files changed, 17 insertions(+), 17 deletions(-) (limited to 'init') diff --git a/drivers/md/md.c b/drivers/md/md.c index 9a3214c8585f..96336b050b59 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -46,9 +46,6 @@ #include #include "bitmap.h" -/* 63 partitions with the alternate major number (mdp) */ -#define MdpMinorShift 6 - #define DEBUG 0 #define dprintk(x...) ((void)(DEBUG && printk(x))) diff --git a/include/linux/raid/md.h b/include/linux/raid/md.h index 82bea14cae1a..8bfaf6b1d309 100644 --- a/include/linux/raid/md.h +++ b/include/linux/raid/md.h @@ -52,8 +52,6 @@ */ #define MD_PATCHLEVEL_VERSION 3 -extern int mdp_major; - extern int register_md_personality(struct mdk_personality *p); extern int unregister_md_personality(struct mdk_personality *p); extern mdk_thread_t * md_register_thread(void (*run) (mddev_t *mddev), diff --git a/include/linux/raid/md_k.h b/include/linux/raid/md_k.h index 4aedb9fe2bd8..758ec2842d9a 100644 --- a/include/linux/raid/md_k.h +++ b/include/linux/raid/md_k.h @@ -20,16 +20,6 @@ #ifdef CONFIG_BLOCK -#define LEVEL_MULTIPATH (-4) -#define LEVEL_LINEAR (-1) -#define LEVEL_FAULTY (-5) - -/* we need a value for 'no level specified' and 0 - * means 'raid0', so we need something else. This is - * for internal use only - */ -#define LEVEL_NONE (-1000000) - #define MaxSector (~(sector_t)0) typedef struct mddev_s mddev_t; diff --git a/include/linux/raid/md_u.h b/include/linux/raid/md_u.h index 7192035fc4b0..2f824aa889f3 100644 --- a/include/linux/raid/md_u.h +++ b/include/linux/raid/md_u.h @@ -46,6 +46,12 @@ #define STOP_ARRAY_RO _IO (MD_MAJOR, 0x33) #define RESTART_ARRAY_RW _IO (MD_MAJOR, 0x34) +/* 63 partitions with the alternate major number (mdp) */ +#define MdpMinorShift 6 +#ifdef __KERNEL__ +extern int mdp_major; +#endif + typedef struct mdu_version_s { int major; int minor; @@ -85,6 +91,17 @@ typedef struct mdu_array_info_s { } mdu_array_info_t; +/* non-obvious values for 'level' */ +#define LEVEL_MULTIPATH (-4) +#define LEVEL_LINEAR (-1) +#define LEVEL_FAULTY (-5) + +/* we need a value for 'no level specified' and 0 + * means 'raid0', so we need something else. This is + * for internal use only + */ +#define LEVEL_NONE (-1000000) + typedef struct mdu_disk_info_s { /* * configuration/status of one particular disk diff --git a/init/do_mounts_md.c b/init/do_mounts_md.c index 9bdddbcb3d6a..23a15fb57e15 100644 --- a/init/do_mounts_md.c +++ b/init/do_mounts_md.c @@ -112,8 +112,6 @@ static int __init md_setup(char *str) return 1; } -#define MdpMinorShift 6 - static void __init md_setup_drive(void) { int minor, i, ent, partitioned; -- cgit v1.2.3-55-g7522 From bff61975b3d6c18ee31457cc5b4d73042f44915f Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Tue, 31 Mar 2009 14:33:13 +1100 Subject: md: move lots of #include lines out of .h files and into .c This makes the includes more explicit, and is preparation for moving md_k.h to drivers/md/md.h Remove include/raid/md.h as its only remaining use was to #include other files. Signed-off-by: NeilBrown --- crypto/xor.c | 2 +- drivers/md/bitmap.c | 3 ++- drivers/md/faulty.c | 5 ++++- drivers/md/linear.c | 4 ++++ drivers/md/linear.h | 2 -- drivers/md/md.c | 6 +++++- drivers/md/multipath.c | 4 ++++ drivers/md/multipath.h | 2 -- drivers/md/raid0.c | 3 +++ drivers/md/raid0.h | 2 -- drivers/md/raid1.c | 5 ++++- drivers/md/raid1.h | 2 -- drivers/md/raid10.c | 5 ++++- drivers/md/raid10.h | 2 -- drivers/md/raid5.c | 8 ++++++-- drivers/md/raid5.h | 1 - drivers/md/raid6.h | 6 +----- fs/compat_ioctl.c | 2 +- include/linux/raid/md.h | 39 --------------------------------------- include/linux/raid/md_k.h | 3 --- include/linux/raid/xor.h | 2 -- init/do_mounts.h | 1 + init/do_mounts_md.c | 3 ++- 23 files changed, 42 insertions(+), 70 deletions(-) delete mode 100644 include/linux/raid/md.h (limited to 'init') diff --git a/crypto/xor.c b/crypto/xor.c index b2e6db075e49..996b6ee57d9e 100644 --- a/crypto/xor.c +++ b/crypto/xor.c @@ -18,8 +18,8 @@ #define BH_TRACE 0 #include -#include #include +#include #include /* The xor routines to use. */ diff --git a/drivers/md/bitmap.c b/drivers/md/bitmap.c index 7666117738c7..1df012e9d73d 100644 --- a/drivers/md/bitmap.c +++ b/drivers/md/bitmap.c @@ -16,6 +16,7 @@ * wait if count gets too high, wake when it drops to half. */ +#include #include #include #include @@ -26,7 +27,7 @@ #include #include #include -#include +#include #include "bitmap.h" /* debug macros */ diff --git a/drivers/md/faulty.c b/drivers/md/faulty.c index 86d9adf90e79..cc5d2cf08dfc 100644 --- a/drivers/md/faulty.c +++ b/drivers/md/faulty.c @@ -62,7 +62,10 @@ #define ModeShift 5 #define MaxFault 50 -#include +#include +#include +#include +#include static void faulty_fail(struct bio *bio, int error) diff --git a/drivers/md/linear.c b/drivers/md/linear.c index 3603ffa9edc5..c43c3b60ef09 100644 --- a/drivers/md/linear.c +++ b/drivers/md/linear.c @@ -16,6 +16,10 @@ Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ +#include +#include +#include +#include #include "linear.h" /* diff --git a/drivers/md/linear.h b/drivers/md/linear.h index f38b9c586afb..bf8179587f95 100644 --- a/drivers/md/linear.h +++ b/drivers/md/linear.h @@ -1,8 +1,6 @@ #ifndef _LINEAR_H #define _LINEAR_H -#include - struct dev_info { mdk_rdev_t *rdev; sector_t num_sectors; diff --git a/drivers/md/md.c b/drivers/md/md.c index 96336b050b59..11d6e0e1045a 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -33,8 +33,9 @@ */ #include -#include +#include #include +#include #include /* for invalidate_bdev */ #include #include @@ -44,6 +45,9 @@ #include #include #include +#include +#include +#include #include "bitmap.h" #define DEBUG 0 diff --git a/drivers/md/multipath.c b/drivers/md/multipath.c index 547df09a7af3..148b3cd058bf 100644 --- a/drivers/md/multipath.c +++ b/drivers/md/multipath.c @@ -19,6 +19,10 @@ * Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ +#include +#include +#include +#include #include "multipath.h" #define MAX_WORK_PER_DISK 128 diff --git a/drivers/md/multipath.h b/drivers/md/multipath.h index 6f53fc177a47..6fa70b400cda 100644 --- a/drivers/md/multipath.h +++ b/drivers/md/multipath.h @@ -1,8 +1,6 @@ #ifndef _MULTIPATH_H #define _MULTIPATH_H -#include - struct multipath_info { mdk_rdev_t *rdev; }; diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c index ef09ed04864e..64e4c77a1568 100644 --- a/drivers/md/raid0.c +++ b/drivers/md/raid0.c @@ -18,6 +18,9 @@ Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ +#include +#include +#include #include "raid0.h" static void raid0_unplug(struct request_queue *q) diff --git a/drivers/md/raid0.h b/drivers/md/raid0.h index fd42aa87c391..824b12eb1d4f 100644 --- a/drivers/md/raid0.h +++ b/drivers/md/raid0.h @@ -1,8 +1,6 @@ #ifndef _RAID0_H #define _RAID0_H -#include - struct strip_zone { sector_t zone_start; /* Zone offset in md_dev (in sectors) */ diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index bff32285f8bb..253b09c86eca 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c @@ -31,8 +31,11 @@ * Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ -#include "dm-bio-list.h" #include +#include +#include +#include +#include "dm-bio-list.h" #include "raid1.h" #include "bitmap.h" diff --git a/drivers/md/raid1.h b/drivers/md/raid1.h index 0a9ba7c3302e..1620eea3d57c 100644 --- a/drivers/md/raid1.h +++ b/drivers/md/raid1.h @@ -1,8 +1,6 @@ #ifndef _RAID1_H #define _RAID1_H -#include - typedef struct mirror_info mirror_info_t; struct mirror_info { diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index f03dd70d12a5..186e1b199d46 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c @@ -18,8 +18,11 @@ * Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ -#include "dm-bio-list.h" #include +#include +#include +#include +#include "dm-bio-list.h" #include "raid10.h" #include "bitmap.h" diff --git a/drivers/md/raid10.h b/drivers/md/raid10.h index e9091cfeb286..244dbe507a54 100644 --- a/drivers/md/raid10.h +++ b/drivers/md/raid10.h @@ -1,8 +1,6 @@ #ifndef _RAID10_H #define _RAID10_H -#include - typedef struct mirror_info mirror_info_t; struct mirror_info { diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index f75698b1f63d..816157e7d8e0 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -43,8 +43,12 @@ * miss any bits. */ +#include +#include #include #include +#include +#include "raid5.h" #include "raid6.h" #include "bitmap.h" @@ -1467,7 +1471,7 @@ static void copy_data(int frombio, struct bio *bio, static void compute_parity6(struct stripe_head *sh, int method) { - raid6_conf_t *conf = sh->raid_conf; + raid5_conf_t *conf = sh->raid_conf; int i, pd_idx = sh->pd_idx, qd_idx, d0_idx, disks = sh->disks, count; struct bio *chosen; /**** FIX THIS: This could be very bad if disks is close to 256 ****/ @@ -2795,7 +2799,7 @@ static bool handle_stripe5(struct stripe_head *sh) static bool handle_stripe6(struct stripe_head *sh, struct page *tmp_page) { - raid6_conf_t *conf = sh->raid_conf; + raid5_conf_t *conf = sh->raid_conf; int disks = sh->disks; struct bio *return_bi = NULL; int i, pd_idx = sh->pd_idx; diff --git a/drivers/md/raid5.h b/drivers/md/raid5.h index 40f1d0335c74..0ed22dff56e0 100644 --- a/drivers/md/raid5.h +++ b/drivers/md/raid5.h @@ -1,7 +1,6 @@ #ifndef _RAID5_H #define _RAID5_H -#include #include /* diff --git a/drivers/md/raid6.h b/drivers/md/raid6.h index f6c13af65002..66e6b0c6734f 100644 --- a/drivers/md/raid6.h +++ b/drivers/md/raid6.h @@ -17,11 +17,7 @@ /* Set to 1 to use kernel-wide empty_zero_page */ #define RAID6_USE_EMPTY_ZERO_PAGE 0 - -#include -#include "raid5.h" - -typedef raid5_conf_t raid6_conf_t; /* Same configuration */ +#include /* Additional compute_parity mode -- updates the parity w/o LOCKING */ #define UPDATE_PARITY 4 diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c index 45e59d3c7f1f..141c03829153 100644 --- a/fs/compat_ioctl.c +++ b/fs/compat_ioctl.c @@ -23,7 +23,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/include/linux/raid/md.h b/include/linux/raid/md.h deleted file mode 100644 index 71c4fd19c317..000000000000 --- a/include/linux/raid/md.h +++ /dev/null @@ -1,39 +0,0 @@ -/* - md.h : Multiple Devices driver for Linux - Copyright (C) 1996-98 Ingo Molnar, Gadi Oxman - Copyright (C) 1994-96 Marc ZYNGIER - or - - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2, or (at your option) - any later version. - - You should have received a copy of the GNU General Public License - (for example /usr/src/linux/COPYING); if not, write to the Free - Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. -*/ - -#ifndef _MD_H -#define _MD_H - -#include -#include - -/* - * 'md_p.h' holds the 'physical' layout of RAID devices - * 'md_u.h' holds the user <=> kernel API - * - * 'md_k.h' holds kernel internal definitions - */ - -#include -#include -#include - -#ifdef CONFIG_MD - -#endif /* CONFIG_MD */ -#endif - diff --git a/include/linux/raid/md_k.h b/include/linux/raid/md_k.h index 4c5e2d00ff5e..e78b3c1d55fd 100644 --- a/include/linux/raid/md_k.h +++ b/include/linux/raid/md_k.h @@ -15,9 +15,6 @@ #ifndef _MD_K_H #define _MD_K_H -/* and dm-bio-list.h is not under include/linux because.... ??? */ -#include "../../../drivers/md/dm-bio-list.h" - #ifdef CONFIG_BLOCK #define MaxSector (~(sector_t)0) diff --git a/include/linux/raid/xor.h b/include/linux/raid/xor.h index 3e120587eada..5a210959e3f8 100644 --- a/include/linux/raid/xor.h +++ b/include/linux/raid/xor.h @@ -1,8 +1,6 @@ #ifndef _XOR_H #define _XOR_H -#include - #define MAX_XOR_BLOCKS 4 extern void xor_blocks(unsigned int count, unsigned int bytes, diff --git a/init/do_mounts.h b/init/do_mounts.h index 9aa968d54329..f5b978a9bb92 100644 --- a/init/do_mounts.h +++ b/init/do_mounts.h @@ -1,4 +1,5 @@ #include +#include #include #include #include diff --git a/init/do_mounts_md.c b/init/do_mounts_md.c index 23a15fb57e15..69aebbf8fd2d 100644 --- a/init/do_mounts_md.c +++ b/init/do_mounts_md.c @@ -1,5 +1,6 @@ #include -#include +#include +#include #include "do_mounts.h" -- cgit v1.2.3-55-g7522 From 5ad4e53bd5406ee214ddc5a41f03f779b8b2d526 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 29 Mar 2009 19:50:06 -0400 Subject: Get rid of indirect include of fs_struct.h Don't pull it in sched.h; very few files actually need it and those can include directly. sched.h itself only needs forward declaration of struct fs_struct; Signed-off-by: Al Viro --- arch/cris/kernel/process.c | 1 - fs/dcache.c | 1 + fs/exec.c | 1 + fs/fs_struct.c | 1 + fs/namei.c | 1 + fs/namespace.c | 1 + fs/open.c | 1 + fs/proc/base.c | 1 + fs/proc/task_nommu.c | 1 + include/linux/mnt_namespace.h | 2 ++ include/linux/nsproxy.h | 1 + include/linux/sched.h | 3 ++- init/do_mounts.c | 1 + kernel/auditsc.c | 1 + kernel/exec_domain.c | 1 + kernel/exit.c | 1 + kernel/fork.c | 1 + kernel/sys.c | 1 + security/tomoyo/realpath.c | 1 + 19 files changed, 20 insertions(+), 2 deletions(-) (limited to 'init') diff --git a/arch/cris/kernel/process.c b/arch/cris/kernel/process.c index 60816e876455..4df0b320d524 100644 --- a/arch/cris/kernel/process.c +++ b/arch/cris/kernel/process.c @@ -19,7 +19,6 @@ #include #include #include -#include #include #include #include diff --git a/fs/dcache.c b/fs/dcache.c index 90bbd7e1b116..0dc4de21f088 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -32,6 +32,7 @@ #include #include #include +#include #include "internal.h" int sysctl_vfs_cache_pressure __read_mostly = 100; diff --git a/fs/exec.c b/fs/exec.c index 614991bf0c87..052a961e41aa 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -53,6 +53,7 @@ #include #include #include +#include #include #include diff --git a/fs/fs_struct.c b/fs/fs_struct.c index 6ac219338670..eee059052db5 100644 --- a/fs/fs_struct.c +++ b/fs/fs_struct.c @@ -3,6 +3,7 @@ #include #include #include +#include /* * Replace the fs->{rootmnt,root} with {mnt,dentry}. Put the old values. diff --git a/fs/namei.c b/fs/namei.c index 964c0249444b..b8433ebfae05 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -32,6 +32,7 @@ #include #include #include +#include #include #define ACC_MODE(x) ("\000\004\002\006"[(x)&O_ACCMODE]) diff --git a/fs/namespace.c b/fs/namespace.c index 1e56303c718e..c6f54e4c4290 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -27,6 +27,7 @@ #include #include #include +#include #include #include #include "pnode.h" diff --git a/fs/open.c b/fs/open.c index 75b61677daaf..377eb25b6abf 100644 --- a/fs/open.c +++ b/fs/open.c @@ -29,6 +29,7 @@ #include #include #include +#include int vfs_statfs(struct dentry *dentry, struct kstatfs *buf) { diff --git a/fs/proc/base.c b/fs/proc/base.c index e0afd326b688..f71559784bfb 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -80,6 +80,7 @@ #include #include #include +#include #include "internal.h" /* NOTE: diff --git a/fs/proc/task_nommu.c b/fs/proc/task_nommu.c index 6ca01052c5bc..253afc04484c 100644 --- a/fs/proc/task_nommu.c +++ b/fs/proc/task_nommu.c @@ -2,6 +2,7 @@ #include #include #include +#include #include #include #include diff --git a/include/linux/mnt_namespace.h b/include/linux/mnt_namespace.h index 830bbcd449d6..3a059298cc19 100644 --- a/include/linux/mnt_namespace.h +++ b/include/linux/mnt_namespace.h @@ -22,6 +22,8 @@ struct proc_mounts { int event; }; +struct fs_struct; + extern struct mnt_namespace *copy_mnt_ns(unsigned long, struct mnt_namespace *, struct fs_struct *); extern void __put_mnt_ns(struct mnt_namespace *ns); diff --git a/include/linux/nsproxy.h b/include/linux/nsproxy.h index afad7dec1b36..7b370c7cfeff 100644 --- a/include/linux/nsproxy.h +++ b/include/linux/nsproxy.h @@ -8,6 +8,7 @@ struct mnt_namespace; struct uts_namespace; struct ipc_namespace; struct pid_namespace; +struct fs_struct; /* * A structure to contain pointers to all per-process diff --git a/include/linux/sched.h b/include/linux/sched.h index 29df6374d2de..b4e065ea0de1 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -68,7 +68,7 @@ struct sched_param { #include #include #include -#include +#include #include #include #include @@ -97,6 +97,7 @@ struct futex_pi_state; struct robust_list_head; struct bio; struct bts_tracer; +struct fs_struct; /* * List of flags we want to share for kernel threads, diff --git a/init/do_mounts.c b/init/do_mounts.c index 8d4ff5afc1d8..dd7ee5f203f3 100644 --- a/init/do_mounts.c +++ b/init/do_mounts.c @@ -14,6 +14,7 @@ #include #include #include +#include #include #include diff --git a/kernel/auditsc.c b/kernel/auditsc.c index 8cbddff6c283..2bfc64786765 100644 --- a/kernel/auditsc.c +++ b/kernel/auditsc.c @@ -66,6 +66,7 @@ #include #include #include +#include #include "audit.h" diff --git a/kernel/exec_domain.c b/kernel/exec_domain.c index cb8e9626c215..c35452cadded 100644 --- a/kernel/exec_domain.c +++ b/kernel/exec_domain.c @@ -18,6 +18,7 @@ #include #include #include +#include static void default_handler(int, struct pt_regs *); diff --git a/kernel/exit.c b/kernel/exit.c index ad8375758a79..b5d656845c90 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -46,6 +46,7 @@ #include #include #include +#include #include #include diff --git a/kernel/fork.c b/kernel/fork.c index 51f138a131de..e82a14577a98 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -60,6 +60,7 @@ #include #include #include +#include #include #include diff --git a/kernel/sys.c b/kernel/sys.c index 37f458e6882a..ce182aaed204 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -34,6 +34,7 @@ #include #include #include +#include #include #include diff --git a/security/tomoyo/realpath.c b/security/tomoyo/realpath.c index d47f16b844b2..3bbe01a7a4b5 100644 --- a/security/tomoyo/realpath.c +++ b/security/tomoyo/realpath.c @@ -12,6 +12,7 @@ #include #include #include +#include #include "common.h" #include "realpath.h" -- cgit v1.2.3-55-g7522 From acdd052a285a7b4cc7da4fa7d34ef9fd0a67b2f8 Mon Sep 17 00:00:00 2001 From: Hannes Eder Date: Tue, 31 Mar 2009 15:23:50 -0700 Subject: init/main.c: fix sparse warnings: context imbalance Impact: Attribute function 'init_post' with __releases(...). Fix these sparse warnings: init/main.c:805:21: warning: context imbalance in 'init_post' - unexpected unlock init/main.c:899:9: warning: context imbalance in 'kernel_init' - wrong count at exit Signed-off-by: Hannes Eder Cc: Ingo Molnar Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- init/main.c | 1 + 1 file changed, 1 insertion(+) (limited to 'init') diff --git a/init/main.c b/init/main.c index d6b388fbffa6..07c8658ffca5 100644 --- a/init/main.c +++ b/init/main.c @@ -793,6 +793,7 @@ static void run_init_process(char *init_filename) * makes it inline to init() and it becomes part of init.text section */ static noinline int init_post(void) + __releases(kernel_lock) { /* need to finish all async __init code before freeing the memory */ async_synchronize_full(); -- cgit v1.2.3-55-g7522 From c1c490e017b66b31f6559db9cbb51a3ce00cf639 Mon Sep 17 00:00:00 2001 From: Simon Kitching Date: Thu, 2 Apr 2009 16:57:00 -0700 Subject: initramfs: prevent initramfs printk message being split by messages from other code. initramfs uses printk without a linefeed, then does some work, then uses printk to finish the message off. However if some other code does a printk in between, then the messages get mixed together. Better for each message to be an independent line... Example of problem that this fixes: checking if image is initramfs...<7>Switched to high resolution mode on CPU 1 Switched to high resolution mode on CPU 0 it is Signed-off-by: Simon Kitching Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- init/initramfs.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'init') diff --git a/init/initramfs.c b/init/initramfs.c index 619c1baf7701..80cd713f6cc5 100644 --- a/init/initramfs.c +++ b/init/initramfs.c @@ -571,11 +571,11 @@ static int __init populate_rootfs(void) if (initrd_start) { #ifdef CONFIG_BLK_DEV_RAM int fd; - printk(KERN_INFO "checking if image is initramfs..."); + printk(KERN_INFO "checking if image is initramfs...\n"); err = unpack_to_rootfs((char *)initrd_start, initrd_end - initrd_start); if (!err) { - printk(" it is\n"); + printk(KERN_INFO "rootfs image is initramfs; unpacking...\n"); free_initrd(); return 0; } else { @@ -583,7 +583,8 @@ static int __init populate_rootfs(void) unpack_to_rootfs(__initramfs_start, __initramfs_end - __initramfs_start); } - printk("it isn't (%s); looks like an initrd\n", err); + printk(KERN_INFO "rootfs image is not initramfs (%s)" + "; looks like an initrd\n", err); fd = sys_open("/initrd.image", O_WRONLY|O_CREAT, 0700); if (fd >= 0) { sys_write(fd, (char *)initrd_start, -- cgit v1.2.3-55-g7522 From 627991a20b3f4d504d20466ab405fe035cb1a20a Mon Sep 17 00:00:00 2001 From: KAMEZAWA Hiroyuki Date: Thu, 2 Apr 2009 16:57:47 -0700 Subject: memcg: remove redundant message at swapon It's pointed out that swap_cgroup's message at swapon() is nonsense. Because * It can be calculated very easily if all necessary information is written in Kconfig. * It's not necessary to annoying people at every swapon(). In other view, now, memory usage per swp_entry is reduced to 2bytes from 8bytes(64bit) and I think it's reasonably small. Reported-by: Hugh Dickins Signed-off-by: KAMEZAWA Hiroyuki Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- init/Kconfig | 2 ++ mm/page_cgroup.c | 7 ------- 2 files changed, 2 insertions(+), 7 deletions(-) (limited to 'init') diff --git a/init/Kconfig b/init/Kconfig index 14c483d2b7c9..92d410603932 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -597,6 +597,8 @@ config CGROUP_MEM_RES_CTLR_SWAP is disabled by boot option, this will be automatically disabled and there will be no overhead from this. Even when you set this config=y, if boot option "noswapaccount" is set, swap will not be accounted. + Now, memory usage of swap_cgroup is 2 bytes per entry. If swap page + size is 4096bytes, 512k per 1Gbytes of swap. endif # CGROUPS diff --git a/mm/page_cgroup.c b/mm/page_cgroup.c index ebf81074bed4..791905c991df 100644 --- a/mm/page_cgroup.c +++ b/mm/page_cgroup.c @@ -426,13 +426,6 @@ int swap_cgroup_swapon(int type, unsigned long max_pages) } mutex_unlock(&swap_cgroup_mutex); - printk(KERN_INFO - "swap_cgroup: uses %ld bytes of vmalloc for pointer array space" - " and %ld bytes to hold mem_cgroup information per swap ents\n", - array_size, length * PAGE_SIZE); - printk(KERN_INFO - "swap_cgroup can be disabled by noswapaccount boot option.\n"); - return 0; nomem: printk(KERN_INFO "couldn't allocate enough memory for swap_cgroup.\n"); -- cgit v1.2.3-55-g7522 From db7f47cf4805e30decb0841764b21b7c4000f7dc Mon Sep 17 00:00:00 2001 From: Paul Menage Date: Thu, 2 Apr 2009 16:57:55 -0700 Subject: cpusets: allow cpusets to be configured/built on non-SMP systems Allow cpusets to be configured/built on non-SMP systems Currently it's impossible to build cpusets under UML on x86-64, since cpusets depends on SMP and x86-64 UML doesn't support SMP. There's code in cpusets that doesn't depend on SMP. This patch surrounds the minimum amount of cpusets code with #ifdef CONFIG_SMP in order to allow cpusets to build/run on UP systems (for testing purposes under UML). Reviewed-by: Li Zefan Signed-off-by: Paul Menage Cc: Ingo Molnar Cc: Peter Zijlstra Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- init/Kconfig | 2 +- kernel/cpuset.c | 15 +++++++++++++++ 2 files changed, 16 insertions(+), 1 deletion(-) (limited to 'init') diff --git a/init/Kconfig b/init/Kconfig index 92d410603932..1398a14b0191 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -531,7 +531,7 @@ config CGROUP_DEVICE config CPUSETS bool "Cpuset support" - depends on SMP && CGROUPS + depends on CGROUPS help This option will let you create and manage CPUSETs which allow dynamically partitioning a system into sets of CPUs and diff --git a/kernel/cpuset.c b/kernel/cpuset.c index 3ff910eb30d3..2b93b50cbe4b 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c @@ -517,6 +517,7 @@ static int validate_change(const struct cpuset *cur, const struct cpuset *trial) return 0; } +#ifdef CONFIG_SMP /* * Helper routine for generate_sched_domains(). * Do cpusets a, b have overlapping cpus_allowed masks? @@ -811,6 +812,18 @@ static void do_rebuild_sched_domains(struct work_struct *unused) put_online_cpus(); } +#else /* !CONFIG_SMP */ +static void do_rebuild_sched_domains(struct work_struct *unused) +{ +} + +static int generate_sched_domains(struct cpumask **domains, + struct sched_domain_attr **attributes) +{ + *domains = NULL; + return 1; +} +#endif /* CONFIG_SMP */ static DECLARE_WORK(rebuild_sched_domains_work, do_rebuild_sched_domains); @@ -1164,8 +1177,10 @@ int current_cpuset_is_being_rebound(void) static int update_relax_domain_level(struct cpuset *cs, s64 val) { +#ifdef CONFIG_SMP if (val < -1 || val >= SD_LV_MAX) return -EINVAL; +#endif if (val != cs->relax_domain_level) { cs->relax_domain_level = val; -- cgit v1.2.3-55-g7522 From 07fe7cb7c7c179f473fd9c823348fd3eb5dad369 Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 3 Apr 2009 16:42:35 +0100 Subject: Create a dynamically sized pool of threads for doing very slow work items Create a dynamically sized pool of threads for doing very slow work items, such as invoking mkdir() or rmdir() - things that may take a long time and may sleep, holding mutexes/semaphores and hogging a thread, and are thus unsuitable for workqueues. The number of threads is always at least a settable minimum, but more are started when there's more work to do, up to a limit. Because of the nature of the load, it's not suitable for a 1-thread-per-CPU type pool. A system with one CPU may well want several threads. This is used by FS-Cache to do slow caching operations in the background, such as looking up, creating or deleting cache objects. Signed-off-by: David Howells Acked-by: Serge Hallyn Acked-by: Steve Dickson Acked-by: Trond Myklebust Acked-by: Al Viro Tested-by: Daire Byrne --- include/linux/slow-work.h | 88 +++++++++++ init/Kconfig | 12 ++ kernel/Makefile | 1 + kernel/slow-work.c | 388 ++++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 489 insertions(+) create mode 100644 include/linux/slow-work.h create mode 100644 kernel/slow-work.c (limited to 'init') diff --git a/include/linux/slow-work.h b/include/linux/slow-work.h new file mode 100644 index 000000000000..4dd754af393e --- /dev/null +++ b/include/linux/slow-work.h @@ -0,0 +1,88 @@ +/* Worker thread pool for slow items, such as filesystem lookups or mkdirs + * + * Copyright (C) 2008 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public Licence + * as published by the Free Software Foundation; either version + * 2 of the Licence, or (at your option) any later version. + */ + +#ifndef _LINUX_SLOW_WORK_H +#define _LINUX_SLOW_WORK_H + +#ifdef CONFIG_SLOW_WORK + +struct slow_work; + +/* + * The operations used to support slow work items + */ +struct slow_work_ops { + /* get a ref on a work item + * - return 0 if successful, -ve if not + */ + int (*get_ref)(struct slow_work *work); + + /* discard a ref to a work item */ + void (*put_ref)(struct slow_work *work); + + /* execute a work item */ + void (*execute)(struct slow_work *work); +}; + +/* + * A slow work item + * - A reference is held on the parent object by the thread pool when it is + * queued + */ +struct slow_work { + unsigned long flags; +#define SLOW_WORK_PENDING 0 /* item pending (further) execution */ +#define SLOW_WORK_EXECUTING 1 /* item currently executing */ +#define SLOW_WORK_ENQ_DEFERRED 2 /* item enqueue deferred */ +#define SLOW_WORK_VERY_SLOW 3 /* item is very slow */ + const struct slow_work_ops *ops; /* operations table for this item */ + struct list_head link; /* link in queue */ +}; + +/** + * slow_work_init - Initialise a slow work item + * @work: The work item to initialise + * @ops: The operations to use to handle the slow work item + * + * Initialise a slow work item. + */ +static inline void slow_work_init(struct slow_work *work, + const struct slow_work_ops *ops) +{ + work->flags = 0; + work->ops = ops; + INIT_LIST_HEAD(&work->link); +} + +/** + * slow_work_init - Initialise a very slow work item + * @work: The work item to initialise + * @ops: The operations to use to handle the slow work item + * + * Initialise a very slow work item. This item will be restricted such that + * only a certain number of the pool threads will be able to execute items of + * this type. + */ +static inline void vslow_work_init(struct slow_work *work, + const struct slow_work_ops *ops) +{ + work->flags = 1 << SLOW_WORK_VERY_SLOW; + work->ops = ops; + INIT_LIST_HEAD(&work->link); +} + +extern int slow_work_enqueue(struct slow_work *work); +extern int slow_work_register_user(void); +extern void slow_work_unregister_user(void); + + +#endif /* CONFIG_SLOW_WORK */ +#endif /* _LINUX_SLOW_WORK_H */ diff --git a/init/Kconfig b/init/Kconfig index 1398a14b0191..236a79377b8e 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -1014,6 +1014,18 @@ config MARKERS source "arch/Kconfig" +config SLOW_WORK + default n + bool "Enable slow work thread pool" + help + The slow work thread pool provides a number of dynamically allocated + threads that can be used by the kernel to perform operations that + take a relatively long time. + + An example of this would be CacheFiles doing a path lookup followed + by a series of mkdirs and a create call, all of which have to touch + disk. + endmenu # General setup config HAVE_GENERIC_DMA_COHERENT diff --git a/kernel/Makefile b/kernel/Makefile index e4791b3ba55d..bab1dffe37e9 100644 --- a/kernel/Makefile +++ b/kernel/Makefile @@ -93,6 +93,7 @@ obj-$(CONFIG_HAVE_GENERIC_DMA_COHERENT) += dma-coherent.o obj-$(CONFIG_FUNCTION_TRACER) += trace/ obj-$(CONFIG_TRACING) += trace/ obj-$(CONFIG_SMP) += sched_cpupri.o +obj-$(CONFIG_SLOW_WORK) += slow-work.o ifneq ($(CONFIG_SCHED_OMIT_FRAME_POINTER),y) # According to Alan Modra , the -fno-omit-frame-pointer is diff --git a/kernel/slow-work.c b/kernel/slow-work.c new file mode 100644 index 000000000000..5a7392734c82 --- /dev/null +++ b/kernel/slow-work.c @@ -0,0 +1,388 @@ +/* Worker thread pool for slow items, such as filesystem lookups or mkdirs + * + * Copyright (C) 2008 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public Licence + * as published by the Free Software Foundation; either version + * 2 of the Licence, or (at your option) any later version. + */ + +#include +#include +#include +#include +#include +#include + +/* + * The pool of threads has at least min threads in it as long as someone is + * using the facility, and may have as many as max. + * + * A portion of the pool may be processing very slow operations. + */ +static unsigned slow_work_min_threads = 2; +static unsigned slow_work_max_threads = 4; +static unsigned vslow_work_proportion = 50; /* % of threads that may process + * very slow work */ +static atomic_t slow_work_thread_count; +static atomic_t vslow_work_executing_count; + +/* + * The queues of work items and the lock governing access to them. These are + * shared between all the CPUs. It doesn't make sense to have per-CPU queues + * as the number of threads bears no relation to the number of CPUs. + * + * There are two queues of work items: one for slow work items, and one for + * very slow work items. + */ +static LIST_HEAD(slow_work_queue); +static LIST_HEAD(vslow_work_queue); +static DEFINE_SPINLOCK(slow_work_queue_lock); + +/* + * The thread controls. A variable used to signal to the threads that they + * should exit when the queue is empty, a waitqueue used by the threads to wait + * for signals, and a completion set by the last thread to exit. + */ +static bool slow_work_threads_should_exit; +static DECLARE_WAIT_QUEUE_HEAD(slow_work_thread_wq); +static DECLARE_COMPLETION(slow_work_last_thread_exited); + +/* + * The number of users of the thread pool and its lock. Whilst this is zero we + * have no threads hanging around, and when this reaches zero, we wait for all + * active or queued work items to complete and kill all the threads we do have. + */ +static int slow_work_user_count; +static DEFINE_MUTEX(slow_work_user_lock); + +/* + * Calculate the maximum number of active threads in the pool that are + * permitted to process very slow work items. + * + * The answer is rounded up to at least 1, but may not equal or exceed the + * maximum number of the threads in the pool. This means we always have at + * least one thread that can process slow work items, and we always have at + * least one thread that won't get tied up doing so. + */ +static unsigned slow_work_calc_vsmax(void) +{ + unsigned vsmax; + + vsmax = atomic_read(&slow_work_thread_count) * vslow_work_proportion; + vsmax /= 100; + vsmax = max(vsmax, 1U); + return min(vsmax, slow_work_max_threads - 1); +} + +/* + * Attempt to execute stuff queued on a slow thread. Return true if we managed + * it, false if there was nothing to do. + */ +static bool slow_work_execute(void) +{ + struct slow_work *work = NULL; + unsigned vsmax; + bool very_slow; + + vsmax = slow_work_calc_vsmax(); + + /* find something to execute */ + spin_lock_irq(&slow_work_queue_lock); + if (!list_empty(&vslow_work_queue) && + atomic_read(&vslow_work_executing_count) < vsmax) { + work = list_entry(vslow_work_queue.next, + struct slow_work, link); + if (test_and_set_bit_lock(SLOW_WORK_EXECUTING, &work->flags)) + BUG(); + list_del_init(&work->link); + atomic_inc(&vslow_work_executing_count); + very_slow = true; + } else if (!list_empty(&slow_work_queue)) { + work = list_entry(slow_work_queue.next, + struct slow_work, link); + if (test_and_set_bit_lock(SLOW_WORK_EXECUTING, &work->flags)) + BUG(); + list_del_init(&work->link); + very_slow = false; + } else { + very_slow = false; /* avoid the compiler warning */ + } + spin_unlock_irq(&slow_work_queue_lock); + + if (!work) + return false; + + if (!test_and_clear_bit(SLOW_WORK_PENDING, &work->flags)) + BUG(); + + work->ops->execute(work); + + if (very_slow) + atomic_dec(&vslow_work_executing_count); + clear_bit_unlock(SLOW_WORK_EXECUTING, &work->flags); + + /* if someone tried to enqueue the item whilst we were executing it, + * then it'll be left unenqueued to avoid multiple threads trying to + * execute it simultaneously + * + * there is, however, a race between us testing the pending flag and + * getting the spinlock, and between the enqueuer setting the pending + * flag and getting the spinlock, so we use a deferral bit to tell us + * if the enqueuer got there first + */ + if (test_bit(SLOW_WORK_PENDING, &work->flags)) { + spin_lock_irq(&slow_work_queue_lock); + + if (!test_bit(SLOW_WORK_EXECUTING, &work->flags) && + test_and_clear_bit(SLOW_WORK_ENQ_DEFERRED, &work->flags)) + goto auto_requeue; + + spin_unlock_irq(&slow_work_queue_lock); + } + + work->ops->put_ref(work); + return true; + +auto_requeue: + /* we must complete the enqueue operation + * - we transfer our ref on the item back to the appropriate queue + * - don't wake another thread up as we're awake already + */ + if (test_bit(SLOW_WORK_VERY_SLOW, &work->flags)) + list_add_tail(&work->link, &vslow_work_queue); + else + list_add_tail(&work->link, &slow_work_queue); + spin_unlock_irq(&slow_work_queue_lock); + return true; +} + +/** + * slow_work_enqueue - Schedule a slow work item for processing + * @work: The work item to queue + * + * Schedule a slow work item for processing. If the item is already undergoing + * execution, this guarantees not to re-enter the execution routine until the + * first execution finishes. + * + * The item is pinned by this function as it retains a reference to it, managed + * through the item operations. The item is unpinned once it has been + * executed. + * + * An item may hog the thread that is running it for a relatively large amount + * of time, sufficient, for example, to perform several lookup, mkdir, create + * and setxattr operations. It may sleep on I/O and may sleep to obtain locks. + * + * Conversely, if a number of items are awaiting processing, it may take some + * time before any given item is given attention. The number of threads in the + * pool may be increased to deal with demand, but only up to a limit. + * + * If SLOW_WORK_VERY_SLOW is set on the work item, then it will be placed in + * the very slow queue, from which only a portion of the threads will be + * allowed to pick items to execute. This ensures that very slow items won't + * overly block ones that are just ordinarily slow. + * + * Returns 0 if successful, -EAGAIN if not. + */ +int slow_work_enqueue(struct slow_work *work) +{ + unsigned long flags; + + BUG_ON(slow_work_user_count <= 0); + BUG_ON(!work); + BUG_ON(!work->ops); + BUG_ON(!work->ops->get_ref); + + /* when honouring an enqueue request, we only promise that we will run + * the work function in the future; we do not promise to run it once + * per enqueue request + * + * we use the PENDING bit to merge together repeat requests without + * having to disable IRQs and take the spinlock, whilst still + * maintaining our promise + */ + if (!test_and_set_bit_lock(SLOW_WORK_PENDING, &work->flags)) { + spin_lock_irqsave(&slow_work_queue_lock, flags); + + /* we promise that we will not attempt to execute the work + * function in more than one thread simultaneously + * + * this, however, leaves us with a problem if we're asked to + * enqueue the work whilst someone is executing the work + * function as simply queueing the work immediately means that + * another thread may try executing it whilst it is already + * under execution + * + * to deal with this, we set the ENQ_DEFERRED bit instead of + * enqueueing, and the thread currently executing the work + * function will enqueue the work item when the work function + * returns and it has cleared the EXECUTING bit + */ + if (test_bit(SLOW_WORK_EXECUTING, &work->flags)) { + set_bit(SLOW_WORK_ENQ_DEFERRED, &work->flags); + } else { + if (work->ops->get_ref(work) < 0) + goto cant_get_ref; + if (test_bit(SLOW_WORK_VERY_SLOW, &work->flags)) + list_add_tail(&work->link, &vslow_work_queue); + else + list_add_tail(&work->link, &slow_work_queue); + wake_up(&slow_work_thread_wq); + } + + spin_unlock_irqrestore(&slow_work_queue_lock, flags); + } + return 0; + +cant_get_ref: + spin_unlock_irqrestore(&slow_work_queue_lock, flags); + return -EAGAIN; +} +EXPORT_SYMBOL(slow_work_enqueue); + +/* + * Determine if there is slow work available for dispatch + */ +static inline bool slow_work_available(int vsmax) +{ + return !list_empty(&slow_work_queue) || + (!list_empty(&vslow_work_queue) && + atomic_read(&vslow_work_executing_count) < vsmax); +} + +/* + * Worker thread dispatcher + */ +static int slow_work_thread(void *_data) +{ + int vsmax; + + DEFINE_WAIT(wait); + + set_freezable(); + set_user_nice(current, -5); + + for (;;) { + vsmax = vslow_work_proportion; + vsmax *= atomic_read(&slow_work_thread_count); + vsmax /= 100; + + prepare_to_wait(&slow_work_thread_wq, &wait, + TASK_INTERRUPTIBLE); + if (!freezing(current) && + !slow_work_threads_should_exit && + !slow_work_available(vsmax)) + schedule(); + finish_wait(&slow_work_thread_wq, &wait); + + try_to_freeze(); + + vsmax = vslow_work_proportion; + vsmax *= atomic_read(&slow_work_thread_count); + vsmax /= 100; + + if (slow_work_available(vsmax) && slow_work_execute()) { + cond_resched(); + continue; + } + + if (slow_work_threads_should_exit) + break; + } + + if (atomic_dec_and_test(&slow_work_thread_count)) + complete_and_exit(&slow_work_last_thread_exited, 0); + return 0; +} + +/** + * slow_work_register_user - Register a user of the facility + * + * Register a user of the facility, starting up the initial threads if there + * aren't any other users at this point. This will return 0 if successful, or + * an error if not. + */ +int slow_work_register_user(void) +{ + struct task_struct *p; + int loop; + + mutex_lock(&slow_work_user_lock); + + if (slow_work_user_count == 0) { + printk(KERN_NOTICE "Slow work thread pool: Starting up\n"); + init_completion(&slow_work_last_thread_exited); + + slow_work_threads_should_exit = false; + + /* start the minimum number of threads */ + for (loop = 0; loop < slow_work_min_threads; loop++) { + atomic_inc(&slow_work_thread_count); + p = kthread_run(slow_work_thread, NULL, "kslowd"); + if (IS_ERR(p)) + goto error; + } + printk(KERN_NOTICE "Slow work thread pool: Ready\n"); + } + + slow_work_user_count++; + mutex_unlock(&slow_work_user_lock); + return 0; + +error: + if (atomic_dec_and_test(&slow_work_thread_count)) + complete(&slow_work_last_thread_exited); + if (loop > 0) { + printk(KERN_ERR "Slow work thread pool:" + " Aborting startup on ENOMEM\n"); + slow_work_threads_should_exit = true; + wake_up_all(&slow_work_thread_wq); + wait_for_completion(&slow_work_last_thread_exited); + printk(KERN_ERR "Slow work thread pool: Aborted\n"); + } + mutex_unlock(&slow_work_user_lock); + return PTR_ERR(p); +} +EXPORT_SYMBOL(slow_work_register_user); + +/** + * slow_work_unregister_user - Unregister a user of the facility + * + * Unregister a user of the facility, killing all the threads if this was the + * last one. + */ +void slow_work_unregister_user(void) +{ + mutex_lock(&slow_work_user_lock); + + BUG_ON(slow_work_user_count <= 0); + + slow_work_user_count--; + if (slow_work_user_count == 0) { + printk(KERN_NOTICE "Slow work thread pool: Shutting down\n"); + slow_work_threads_should_exit = true; + wake_up_all(&slow_work_thread_wq); + wait_for_completion(&slow_work_last_thread_exited); + printk(KERN_NOTICE "Slow work thread pool:" + " Shut down complete\n"); + } + + mutex_unlock(&slow_work_user_lock); +} +EXPORT_SYMBOL(slow_work_unregister_user); + +/* + * Initialise the slow work facility + */ +static int __init init_slow_work(void) +{ + unsigned nr_cpus = num_possible_cpus(); + + if (nr_cpus > slow_work_max_threads) + slow_work_max_threads = nr_cpus; + return 0; +} + +subsys_initcall(init_slow_work); -- cgit v1.2.3-55-g7522 From 1c2d008c9e73626cc354751c62b94177c4094f8b Mon Sep 17 00:00:00 2001 From: David Howells Date: Mon, 6 Apr 2009 15:47:25 +0100 Subject: Make CONFIG_SLOW_WORK an automatic rather than manual config option Make CONFIG_SLOW_WORK an automatic rather than manual config option so that people configuring their kernels don't have to make the choice. It can be selected automatically by those things that require it (such as FS-Cache). Signed-off-by: David Howells Acked-by: Jeff Garzik Acked-by: Kyle McMartin Signed-off-by: Linus Torvalds --- init/Kconfig | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'init') diff --git a/init/Kconfig b/init/Kconfig index 09c79537ae09..c52d1d48272a 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -1016,7 +1016,7 @@ source "arch/Kconfig" config SLOW_WORK default n - bool "Enable slow work thread pool" + bool help The slow work thread pool provides a number of dynamically allocated threads that can be used by the kernel to perform operations that @@ -1026,6 +1026,8 @@ config SLOW_WORK by a series of mkdirs and a create call, all of which have to touch disk. + See Documentation/slow-work.txt. + endmenu # General setup config HAVE_GENERIC_DMA_COHERENT -- cgit v1.2.3-55-g7522 From 614b84cf4e4a920d2af32b8f147ea1e3b8c27ea6 Mon Sep 17 00:00:00 2001 From: Serge E. Hallyn Date: Mon, 6 Apr 2009 19:01:08 -0700 Subject: namespaces: mqueue ns: move mqueue_mnt into struct ipc_namespace Move mqueue vfsmount plus a few tunables into the ipc_namespace struct. The CONFIG_IPC_NS boolean and the ipc_namespace struct will serve both the posix message queue namespaces and the SYSV ipc namespaces. The sysctl code will be fixed separately in patch 3. After just this patch, making a change to posix mqueue tunables always changes the values in the initial ipc namespace. Signed-off-by: Cedric Le Goater Signed-off-by: Serge E. Hallyn Cc: Alexey Dobriyan Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/ipc_namespace.h | 39 +++++++++++-- init/Kconfig | 4 +- ipc/mqueue.c | 124 +++++++++++++++++++++++------------------- ipc/msgutil.c | 22 ++++++++ ipc/namespace.c | 2 + ipc/util.c | 9 --- ipc/util.h | 16 ++++++ 7 files changed, 145 insertions(+), 71 deletions(-) (limited to 'init') diff --git a/include/linux/ipc_namespace.h b/include/linux/ipc_namespace.h index ea330f9e7100..3e6fcacebe8a 100644 --- a/include/linux/ipc_namespace.h +++ b/include/linux/ipc_namespace.h @@ -44,24 +44,55 @@ struct ipc_namespace { int shm_tot; struct notifier_block ipcns_nb; + + /* The kern_mount of the mqueuefs sb. We take a ref on it */ + struct vfsmount *mq_mnt; + + /* # queues in this ns, protected by mq_lock */ + unsigned int mq_queues_count; + + /* next fields are set through sysctl */ + unsigned int mq_queues_max; /* initialized to DFLT_QUEUESMAX */ + unsigned int mq_msg_max; /* initialized to DFLT_MSGMAX */ + unsigned int mq_msgsize_max; /* initialized to DFLT_MSGSIZEMAX */ + }; extern struct ipc_namespace init_ipc_ns; extern atomic_t nr_ipc_ns; -#ifdef CONFIG_SYSVIPC +#if defined(CONFIG_POSIX_MQUEUE) || defined(CONFIG_SYSVIPC) #define INIT_IPC_NS(ns) .ns = &init_ipc_ns, +#else +#define INIT_IPC_NS(ns) +#endif +#ifdef CONFIG_SYSVIPC extern int register_ipcns_notifier(struct ipc_namespace *); extern int cond_register_ipcns_notifier(struct ipc_namespace *); extern void unregister_ipcns_notifier(struct ipc_namespace *); extern int ipcns_notify(unsigned long); - #else /* CONFIG_SYSVIPC */ -#define INIT_IPC_NS(ns) +static inline int register_ipcns_notifier(struct ipc_namespace *ns) +{ return 0; } +static inline int cond_register_ipcns_notifier(struct ipc_namespace *ns) +{ return 0; } +static inline void unregister_ipcns_notifier(struct ipc_namespace *ns) { } +static inline int ipcns_notify(unsigned long l) { return 0; } #endif /* CONFIG_SYSVIPC */ -#if defined(CONFIG_SYSVIPC) && defined(CONFIG_IPC_NS) +#ifdef CONFIG_POSIX_MQUEUE +extern void mq_init_ns(struct ipc_namespace *ns); +/* default values */ +#define DFLT_QUEUESMAX 256 /* max number of message queues */ +#define DFLT_MSGMAX 10 /* max number of messages in each queue */ +#define HARD_MSGMAX (131072/sizeof(void *)) +#define DFLT_MSGSIZEMAX 8192 /* max message size */ +#else +#define mq_init_ns(ns) ((void) 0) +#endif + +#if defined(CONFIG_IPC_NS) extern void free_ipc_ns(struct kref *kref); extern struct ipc_namespace *copy_ipcs(unsigned long flags, struct ipc_namespace *ns); diff --git a/init/Kconfig b/init/Kconfig index c52d1d48272a..a0807ba91644 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -670,10 +670,10 @@ config UTS_NS config IPC_NS bool "IPC namespace" - depends on NAMESPACES && SYSVIPC + depends on NAMESPACES && (SYSVIPC || POSIX_MQUEUE) help In this namespace tasks work with IPC ids which correspond to - different IPC objects in different namespaces + different IPC objects in different namespaces. config USER_NS bool "User namespace (EXPERIMENTAL)" diff --git a/ipc/mqueue.c b/ipc/mqueue.c index 916785363f0f..a3673a09069a 100644 --- a/ipc/mqueue.c +++ b/ipc/mqueue.c @@ -31,6 +31,7 @@ #include #include #include +#include #include #include "util.h" @@ -46,12 +47,6 @@ #define STATE_PENDING 1 #define STATE_READY 2 -/* default values */ -#define DFLT_QUEUESMAX 256 /* max number of message queues */ -#define DFLT_MSGMAX 10 /* max number of messages in each queue */ -#define HARD_MSGMAX (131072/sizeof(void*)) -#define DFLT_MSGSIZEMAX 8192 /* max message size */ - /* * Define the ranges various user-specified maximum values can * be set to. @@ -95,12 +90,6 @@ static void remove_notification(struct mqueue_inode_info *info); static spinlock_t mq_lock; static struct kmem_cache *mqueue_inode_cachep; -static struct vfsmount *mqueue_mnt; - -static unsigned int queues_count; -static unsigned int queues_max = DFLT_QUEUESMAX; -static unsigned int msg_max = DFLT_MSGMAX; -static unsigned int msgsize_max = DFLT_MSGSIZEMAX; static struct ctl_table_header * mq_sysctl_table; @@ -109,11 +98,27 @@ static inline struct mqueue_inode_info *MQUEUE_I(struct inode *inode) return container_of(inode, struct mqueue_inode_info, vfs_inode); } +void mq_init_ns(struct ipc_namespace *ns) +{ + ns->mq_queues_count = 0; + ns->mq_queues_max = DFLT_QUEUESMAX; + ns->mq_msg_max = DFLT_MSGMAX; + ns->mq_msgsize_max = DFLT_MSGSIZEMAX; + ns->mq_mnt = mntget(init_ipc_ns.mq_mnt); +} + +void mq_exit_ns(struct ipc_namespace *ns) +{ + /* will need to clear out ns->mq_mnt->mnt_sb->s_fs_info here */ + mntput(ns->mq_mnt); +} + static struct inode *mqueue_get_inode(struct super_block *sb, int mode, struct mq_attr *attr) { struct user_struct *u = current_user(); struct inode *inode; + struct ipc_namespace *ipc_ns = &init_ipc_ns; inode = new_inode(sb); if (inode) { @@ -141,8 +146,8 @@ static struct inode *mqueue_get_inode(struct super_block *sb, int mode, info->qsize = 0; info->user = NULL; /* set when all is ok */ memset(&info->attr, 0, sizeof(info->attr)); - info->attr.mq_maxmsg = msg_max; - info->attr.mq_msgsize = msgsize_max; + info->attr.mq_maxmsg = ipc_ns->mq_msg_max; + info->attr.mq_msgsize = ipc_ns->mq_msgsize_max; if (attr) { info->attr.mq_maxmsg = attr->mq_maxmsg; info->attr.mq_msgsize = attr->mq_msgsize; @@ -242,6 +247,7 @@ static void mqueue_delete_inode(struct inode *inode) struct user_struct *user; unsigned long mq_bytes; int i; + struct ipc_namespace *ipc_ns = &init_ipc_ns; if (S_ISDIR(inode->i_mode)) { clear_inode(inode); @@ -262,7 +268,7 @@ static void mqueue_delete_inode(struct inode *inode) if (user) { spin_lock(&mq_lock); user->mq_bytes -= mq_bytes; - queues_count--; + ipc_ns->mq_queues_count--; spin_unlock(&mq_lock); free_uid(user); } @@ -274,21 +280,23 @@ static int mqueue_create(struct inode *dir, struct dentry *dentry, struct inode *inode; struct mq_attr *attr = dentry->d_fsdata; int error; + struct ipc_namespace *ipc_ns = &init_ipc_ns; spin_lock(&mq_lock); - if (queues_count >= queues_max && !capable(CAP_SYS_RESOURCE)) { + if (ipc_ns->mq_queues_count >= ipc_ns->mq_queues_max && + !capable(CAP_SYS_RESOURCE)) { error = -ENOSPC; - goto out_lock; + goto out_unlock; } - queues_count++; + ipc_ns->mq_queues_count++; spin_unlock(&mq_lock); inode = mqueue_get_inode(dir->i_sb, mode, attr); if (!inode) { error = -ENOMEM; spin_lock(&mq_lock); - queues_count--; - goto out_lock; + ipc_ns->mq_queues_count--; + goto out_unlock; } dir->i_size += DIRENT_SIZE; @@ -297,7 +305,7 @@ static int mqueue_create(struct inode *dir, struct dentry *dentry, d_instantiate(dentry, inode); dget(dentry); return 0; -out_lock: +out_unlock: spin_unlock(&mq_lock); return error; } @@ -562,7 +570,7 @@ static void remove_notification(struct mqueue_inode_info *info) info->notify_owner = NULL; } -static int mq_attr_ok(struct mq_attr *attr) +static int mq_attr_ok(struct ipc_namespace *ipc_ns, struct mq_attr *attr) { if (attr->mq_maxmsg <= 0 || attr->mq_msgsize <= 0) return 0; @@ -570,8 +578,8 @@ static int mq_attr_ok(struct mq_attr *attr) if (attr->mq_maxmsg > HARD_MSGMAX) return 0; } else { - if (attr->mq_maxmsg > msg_max || - attr->mq_msgsize > msgsize_max) + if (attr->mq_maxmsg > ipc_ns->mq_msg_max || + attr->mq_msgsize > ipc_ns->mq_msgsize_max) return 0; } /* check for overflow */ @@ -587,8 +595,9 @@ static int mq_attr_ok(struct mq_attr *attr) /* * Invoked when creating a new queue via sys_mq_open */ -static struct file *do_create(struct dentry *dir, struct dentry *dentry, - int oflag, mode_t mode, struct mq_attr *attr) +static struct file *do_create(struct ipc_namespace *ipc_ns, struct dentry *dir, + struct dentry *dentry, int oflag, mode_t mode, + struct mq_attr *attr) { const struct cred *cred = current_cred(); struct file *result; @@ -596,14 +605,14 @@ static struct file *do_create(struct dentry *dir, struct dentry *dentry, if (attr) { ret = -EINVAL; - if (!mq_attr_ok(attr)) + if (!mq_attr_ok(ipc_ns, attr)) goto out; /* store for use during create */ dentry->d_fsdata = attr; } mode &= ~current_umask(); - ret = mnt_want_write(mqueue_mnt); + ret = mnt_want_write(ipc_ns->mq_mnt); if (ret) goto out; ret = vfs_create(dir->d_inode, dentry, mode, NULL); @@ -611,24 +620,25 @@ static struct file *do_create(struct dentry *dir, struct dentry *dentry, if (ret) goto out_drop_write; - result = dentry_open(dentry, mqueue_mnt, oflag, cred); + result = dentry_open(dentry, ipc_ns->mq_mnt, oflag, cred); /* * dentry_open() took a persistent mnt_want_write(), * so we can now drop this one. */ - mnt_drop_write(mqueue_mnt); + mnt_drop_write(ipc_ns->mq_mnt); return result; out_drop_write: - mnt_drop_write(mqueue_mnt); + mnt_drop_write(ipc_ns->mq_mnt); out: dput(dentry); - mntput(mqueue_mnt); + mntput(ipc_ns->mq_mnt); return ERR_PTR(ret); } /* Opens existing queue */ -static struct file *do_open(struct dentry *dentry, int oflag) +static struct file *do_open(struct ipc_namespace *ipc_ns, + struct dentry *dentry, int oflag) { const struct cred *cred = current_cred(); @@ -637,17 +647,17 @@ static struct file *do_open(struct dentry *dentry, int oflag) if ((oflag & O_ACCMODE) == (O_RDWR | O_WRONLY)) { dput(dentry); - mntput(mqueue_mnt); + mntput(ipc_ns->mq_mnt); return ERR_PTR(-EINVAL); } if (inode_permission(dentry->d_inode, oflag2acc[oflag & O_ACCMODE])) { dput(dentry); - mntput(mqueue_mnt); + mntput(ipc_ns->mq_mnt); return ERR_PTR(-EACCES); } - return dentry_open(dentry, mqueue_mnt, oflag, cred); + return dentry_open(dentry, ipc_ns->mq_mnt, oflag, cred); } SYSCALL_DEFINE4(mq_open, const char __user *, u_name, int, oflag, mode_t, mode, @@ -658,6 +668,7 @@ SYSCALL_DEFINE4(mq_open, const char __user *, u_name, int, oflag, mode_t, mode, char *name; struct mq_attr attr; int fd, error; + struct ipc_namespace *ipc_ns = &init_ipc_ns; if (u_attr && copy_from_user(&attr, u_attr, sizeof(struct mq_attr))) return -EFAULT; @@ -671,13 +682,13 @@ SYSCALL_DEFINE4(mq_open, const char __user *, u_name, int, oflag, mode_t, mode, if (fd < 0) goto out_putname; - mutex_lock(&mqueue_mnt->mnt_root->d_inode->i_mutex); - dentry = lookup_one_len(name, mqueue_mnt->mnt_root, strlen(name)); + mutex_lock(&ipc_ns->mq_mnt->mnt_root->d_inode->i_mutex); + dentry = lookup_one_len(name, ipc_ns->mq_mnt->mnt_root, strlen(name)); if (IS_ERR(dentry)) { error = PTR_ERR(dentry); goto out_err; } - mntget(mqueue_mnt); + mntget(ipc_ns->mq_mnt); if (oflag & O_CREAT) { if (dentry->d_inode) { /* entry already exists */ @@ -685,10 +696,10 @@ SYSCALL_DEFINE4(mq_open, const char __user *, u_name, int, oflag, mode_t, mode, error = -EEXIST; if (oflag & O_EXCL) goto out; - filp = do_open(dentry, oflag); + filp = do_open(ipc_ns, dentry, oflag); } else { - filp = do_create(mqueue_mnt->mnt_root, dentry, - oflag, mode, + filp = do_create(ipc_ns, ipc_ns->mq_mnt->mnt_root, + dentry, oflag, mode, u_attr ? &attr : NULL); } } else { @@ -696,7 +707,7 @@ SYSCALL_DEFINE4(mq_open, const char __user *, u_name, int, oflag, mode_t, mode, if (!dentry->d_inode) goto out; audit_inode(name, dentry); - filp = do_open(dentry, oflag); + filp = do_open(ipc_ns, dentry, oflag); } if (IS_ERR(filp)) { @@ -709,13 +720,13 @@ SYSCALL_DEFINE4(mq_open, const char __user *, u_name, int, oflag, mode_t, mode, out: dput(dentry); - mntput(mqueue_mnt); + mntput(ipc_ns->mq_mnt); out_putfd: put_unused_fd(fd); out_err: fd = error; out_upsem: - mutex_unlock(&mqueue_mnt->mnt_root->d_inode->i_mutex); + mutex_unlock(&ipc_ns->mq_mnt->mnt_root->d_inode->i_mutex); out_putname: putname(name); return fd; @@ -727,14 +738,15 @@ SYSCALL_DEFINE1(mq_unlink, const char __user *, u_name) char *name; struct dentry *dentry; struct inode *inode = NULL; + struct ipc_namespace *ipc_ns = &init_ipc_ns; name = getname(u_name); if (IS_ERR(name)) return PTR_ERR(name); - mutex_lock_nested(&mqueue_mnt->mnt_root->d_inode->i_mutex, + mutex_lock_nested(&ipc_ns->mq_mnt->mnt_root->d_inode->i_mutex, I_MUTEX_PARENT); - dentry = lookup_one_len(name, mqueue_mnt->mnt_root, strlen(name)); + dentry = lookup_one_len(name, ipc_ns->mq_mnt->mnt_root, strlen(name)); if (IS_ERR(dentry)) { err = PTR_ERR(dentry); goto out_unlock; @@ -748,16 +760,16 @@ SYSCALL_DEFINE1(mq_unlink, const char __user *, u_name) inode = dentry->d_inode; if (inode) atomic_inc(&inode->i_count); - err = mnt_want_write(mqueue_mnt); + err = mnt_want_write(ipc_ns->mq_mnt); if (err) goto out_err; err = vfs_unlink(dentry->d_parent->d_inode, dentry); - mnt_drop_write(mqueue_mnt); + mnt_drop_write(ipc_ns->mq_mnt); out_err: dput(dentry); out_unlock: - mutex_unlock(&mqueue_mnt->mnt_root->d_inode->i_mutex); + mutex_unlock(&ipc_ns->mq_mnt->mnt_root->d_inode->i_mutex); putname(name); if (inode) iput(inode); @@ -1214,14 +1226,14 @@ static int msg_maxsize_limit_max = MAX_MSGSIZEMAX; static ctl_table mq_sysctls[] = { { .procname = "queues_max", - .data = &queues_max, + .data = &init_ipc_ns.mq_queues_max, .maxlen = sizeof(int), .mode = 0644, .proc_handler = &proc_dointvec, }, { .procname = "msg_max", - .data = &msg_max, + .data = &init_ipc_ns.mq_msg_max, .maxlen = sizeof(int), .mode = 0644, .proc_handler = &proc_dointvec_minmax, @@ -1230,7 +1242,7 @@ static ctl_table mq_sysctls[] = { }, { .procname = "msgsize_max", - .data = &msgsize_max, + .data = &init_ipc_ns.mq_msgsize_max, .maxlen = sizeof(int), .mode = 0644, .proc_handler = &proc_dointvec_minmax, @@ -1276,13 +1288,13 @@ static int __init init_mqueue_fs(void) if (error) goto out_sysctl; - if (IS_ERR(mqueue_mnt = kern_mount(&mqueue_fs_type))) { - error = PTR_ERR(mqueue_mnt); + init_ipc_ns.mq_mnt = kern_mount(&mqueue_fs_type); + if (IS_ERR(init_ipc_ns.mq_mnt)) { + error = PTR_ERR(init_ipc_ns.mq_mnt); goto out_filesystem; } /* internal initialization - not common for vfs */ - queues_count = 0; spin_lock_init(&mq_lock); return 0; diff --git a/ipc/msgutil.c b/ipc/msgutil.c index c82c215693d7..73c316cb8613 100644 --- a/ipc/msgutil.c +++ b/ipc/msgutil.c @@ -13,10 +13,32 @@ #include #include #include +#include #include #include "util.h" +/* + * The next 2 defines are here bc this is the only file + * compiled when either CONFIG_SYSVIPC and CONFIG_POSIX_MQUEUE + * and not CONFIG_IPC_NS. + */ +struct ipc_namespace init_ipc_ns = { + .kref = { + /* It's not for this patch to change, but should this be 1? */ + .refcount = ATOMIC_INIT(2), + }, +#ifdef CONFIG_POSIX_MQUEUE + .mq_mnt = NULL, + .mq_queues_count = 0, + .mq_queues_max = DFLT_QUEUESMAX, + .mq_msg_max = DFLT_MSGMAX, + .mq_msgsize_max = DFLT_MSGSIZEMAX, +#endif +}; + +atomic_t nr_ipc_ns = ATOMIC_INIT(1); + struct msg_msgseg { struct msg_msgseg* next; /* the next part of the message follows immediately */ diff --git a/ipc/namespace.c b/ipc/namespace.c index 9171d948751e..4b4dc6d847f1 100644 --- a/ipc/namespace.c +++ b/ipc/namespace.c @@ -25,6 +25,7 @@ static struct ipc_namespace *clone_ipc_ns(struct ipc_namespace *old_ns) sem_init_ns(ns); msg_init_ns(ns); shm_init_ns(ns); + mq_init_ns(ns); /* * msgmni has already been computed for the new ipc ns. @@ -101,6 +102,7 @@ void free_ipc_ns(struct kref *kref) sem_exit_ns(ns); msg_exit_ns(ns); shm_exit_ns(ns); + mq_exit_ns(ns); kfree(ns); atomic_dec(&nr_ipc_ns); diff --git a/ipc/util.c b/ipc/util.c index 7585a72e259b..b8e4ba92f6d1 100644 --- a/ipc/util.c +++ b/ipc/util.c @@ -47,15 +47,6 @@ struct ipc_proc_iface { int (*show)(struct seq_file *, void *); }; -struct ipc_namespace init_ipc_ns = { - .kref = { - .refcount = ATOMIC_INIT(2), - }, -}; - -atomic_t nr_ipc_ns = ATOMIC_INIT(1); - - #ifdef CONFIG_MEMORY_HOTPLUG static void ipc_memory_notifier(struct work_struct *work) diff --git a/ipc/util.h b/ipc/util.h index 3646b45a03c9..0e7d9223acc1 100644 --- a/ipc/util.h +++ b/ipc/util.h @@ -20,6 +20,13 @@ void shm_init (void); struct ipc_namespace; +#ifdef CONFIG_POSIX_MQUEUE +void mq_exit_ns(struct ipc_namespace *ns); +#else +static inline void mq_exit_ns(struct ipc_namespace *ns) { } +#endif + +#ifdef CONFIG_SYSVIPC void sem_init_ns(struct ipc_namespace *ns); void msg_init_ns(struct ipc_namespace *ns); void shm_init_ns(struct ipc_namespace *ns); @@ -27,6 +34,15 @@ void shm_init_ns(struct ipc_namespace *ns); void sem_exit_ns(struct ipc_namespace *ns); void msg_exit_ns(struct ipc_namespace *ns); void shm_exit_ns(struct ipc_namespace *ns); +#else +static inline void sem_init_ns(struct ipc_namespace *ns) { } +static inline void msg_init_ns(struct ipc_namespace *ns) { } +static inline void shm_init_ns(struct ipc_namespace *ns) { } + +static inline void sem_exit_ns(struct ipc_namespace *ns) { } +static inline void msg_exit_ns(struct ipc_namespace *ns) { } +static inline void shm_exit_ns(struct ipc_namespace *ns) { } +#endif /* * Structure that holds the parameters needed by the ipc operations -- cgit v1.2.3-55-g7522 From bdc8e5f85f9abe2e7c78dcf39d81f9a97178788b Mon Sep 17 00:00:00 2001 From: Serge E. Hallyn Date: Mon, 6 Apr 2009 19:01:11 -0700 Subject: namespaces: mqueue namespace: adapt sysctl Largely inspired from ipc/ipc_sysctl.c. This patch isolates the mqueue sysctl stuff in its own file. [akpm@linux-foundation.org: build fix] Signed-off-by: Cedric Le Goater Signed-off-by: Nadia Derbey Signed-off-by: Serge E. Hallyn Cc: Alexey Dobriyan Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/ipc_namespace.h | 14 +++++ init/Kconfig | 6 +++ ipc/Makefile | 1 + ipc/mq_sysctl.c | 116 ++++++++++++++++++++++++++++++++++++++++++ ipc/mqueue.c | 65 +---------------------- 5 files changed, 138 insertions(+), 64 deletions(-) create mode 100644 ipc/mq_sysctl.c (limited to 'init') diff --git a/include/linux/ipc_namespace.h b/include/linux/ipc_namespace.h index 3392d50de351..3bf40e246a80 100644 --- a/include/linux/ipc_namespace.h +++ b/include/linux/ipc_namespace.h @@ -128,4 +128,18 @@ static inline void put_ipc_ns(struct ipc_namespace *ns) { } #endif + +#ifdef CONFIG_POSIX_MQUEUE_SYSCTL + +struct ctl_table_header; +extern struct ctl_table_header *mq_register_sysctl_table(void); + +#else /* CONFIG_POSIX_MQUEUE_SYSCTL */ + +static inline struct ctl_table_header *mq_register_sysctl_table(void) +{ + return NULL; +} + +#endif /* CONFIG_POSIX_MQUEUE_SYSCTL */ #endif diff --git a/init/Kconfig b/init/Kconfig index a0807ba91644..f2f9b5362b48 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -208,6 +208,12 @@ config POSIX_MQUEUE If unsure, say Y. +config POSIX_MQUEUE_SYSCTL + bool + depends on POSIX_MQUEUE + depends on SYSCTL + default y + config BSD_PROCESS_ACCT bool "BSD Process Accounting" help diff --git a/ipc/Makefile b/ipc/Makefile index 65c384395801..4e1955ea815d 100644 --- a/ipc/Makefile +++ b/ipc/Makefile @@ -8,4 +8,5 @@ obj-$(CONFIG_SYSVIPC_SYSCTL) += ipc_sysctl.o obj_mq-$(CONFIG_COMPAT) += compat_mq.o obj-$(CONFIG_POSIX_MQUEUE) += mqueue.o msgutil.o $(obj_mq-y) obj-$(CONFIG_IPC_NS) += namespace.o +obj-$(CONFIG_POSIX_MQUEUE_SYSCTL) += mq_sysctl.o diff --git a/ipc/mq_sysctl.c b/ipc/mq_sysctl.c new file mode 100644 index 000000000000..89f60ec8ee54 --- /dev/null +++ b/ipc/mq_sysctl.c @@ -0,0 +1,116 @@ +/* + * Copyright (C) 2007 IBM Corporation + * + * Author: Cedric Le Goater + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation, version 2 of the + * License. + */ + +#include +#include +#include + +/* + * Define the ranges various user-specified maximum values can + * be set to. + */ +#define MIN_MSGMAX 1 /* min value for msg_max */ +#define MAX_MSGMAX HARD_MSGMAX /* max value for msg_max */ +#define MIN_MSGSIZEMAX 128 /* min value for msgsize_max */ +#define MAX_MSGSIZEMAX (8192*128) /* max value for msgsize_max */ + +static void *get_mq(ctl_table *table) +{ + char *which = table->data; + struct ipc_namespace *ipc_ns = current->nsproxy->ipc_ns; + which = (which - (char *)&init_ipc_ns) + (char *)ipc_ns; + return which; +} + +#ifdef CONFIG_PROC_SYSCTL +static int proc_mq_dointvec(ctl_table *table, int write, struct file *filp, + void __user *buffer, size_t *lenp, loff_t *ppos) +{ + struct ctl_table mq_table; + memcpy(&mq_table, table, sizeof(mq_table)); + mq_table.data = get_mq(table); + + return proc_dointvec(&mq_table, write, filp, buffer, lenp, ppos); +} + +static int proc_mq_dointvec_minmax(ctl_table *table, int write, + struct file *filp, void __user *buffer, size_t *lenp, loff_t *ppos) +{ + struct ctl_table mq_table; + memcpy(&mq_table, table, sizeof(mq_table)); + mq_table.data = get_mq(table); + + return proc_dointvec_minmax(&mq_table, write, filp, buffer, + lenp, ppos); +} +#else +#define proc_mq_dointvec NULL +#define proc_mq_dointvec_minmax NULL +#endif + +static int msg_max_limit_min = MIN_MSGMAX; +static int msg_max_limit_max = MAX_MSGMAX; + +static int msg_maxsize_limit_min = MIN_MSGSIZEMAX; +static int msg_maxsize_limit_max = MAX_MSGSIZEMAX; + +static ctl_table mq_sysctls[] = { + { + .procname = "queues_max", + .data = &init_ipc_ns.mq_queues_max, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_mq_dointvec, + }, + { + .procname = "msg_max", + .data = &init_ipc_ns.mq_msg_max, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_mq_dointvec_minmax, + .extra1 = &msg_max_limit_min, + .extra2 = &msg_max_limit_max, + }, + { + .procname = "msgsize_max", + .data = &init_ipc_ns.mq_msgsize_max, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_mq_dointvec_minmax, + .extra1 = &msg_maxsize_limit_min, + .extra2 = &msg_maxsize_limit_max, + }, + { .ctl_name = 0 } +}; + +static ctl_table mq_sysctl_dir[] = { + { + .procname = "mqueue", + .mode = 0555, + .child = mq_sysctls, + }, + { .ctl_name = 0 } +}; + +static ctl_table mq_sysctl_root[] = { + { + .ctl_name = CTL_FS, + .procname = "fs", + .mode = 0555, + .child = mq_sysctl_dir, + }, + { .ctl_name = 0 } +}; + +struct ctl_table_header *mq_register_sysctl_table(void) +{ + return register_sysctl_table(mq_sysctl_root); +} diff --git a/ipc/mqueue.c b/ipc/mqueue.c index c82d7b51ef68..e35ba2c3a8d7 100644 --- a/ipc/mqueue.c +++ b/ipc/mqueue.c @@ -47,15 +47,6 @@ #define STATE_PENDING 1 #define STATE_READY 2 -/* - * Define the ranges various user-specified maximum values can - * be set to. - */ -#define MIN_MSGMAX 1 /* min value for msg_max */ -#define MAX_MSGMAX HARD_MSGMAX /* max value for msg_max */ -#define MIN_MSGSIZEMAX 128 /* min value for msgsize_max */ -#define MAX_MSGSIZEMAX (8192*128) /* max value for msgsize_max */ - struct ext_wait_queue { /* queue of sleeping tasks */ struct task_struct *task; struct list_head list; @@ -1271,60 +1262,6 @@ void mq_put_mnt(struct ipc_namespace *ns) mntput(ns->mq_mnt); } -static int msg_max_limit_min = MIN_MSGMAX; -static int msg_max_limit_max = MAX_MSGMAX; - -static int msg_maxsize_limit_min = MIN_MSGSIZEMAX; -static int msg_maxsize_limit_max = MAX_MSGSIZEMAX; - -static ctl_table mq_sysctls[] = { - { - .procname = "queues_max", - .data = &init_ipc_ns.mq_queues_max, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = &proc_dointvec, - }, - { - .procname = "msg_max", - .data = &init_ipc_ns.mq_msg_max, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = &proc_dointvec_minmax, - .extra1 = &msg_max_limit_min, - .extra2 = &msg_max_limit_max, - }, - { - .procname = "msgsize_max", - .data = &init_ipc_ns.mq_msgsize_max, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = &proc_dointvec_minmax, - .extra1 = &msg_maxsize_limit_min, - .extra2 = &msg_maxsize_limit_max, - }, - { .ctl_name = 0 } -}; - -static ctl_table mq_sysctl_dir[] = { - { - .procname = "mqueue", - .mode = 0555, - .child = mq_sysctls, - }, - { .ctl_name = 0 } -}; - -static ctl_table mq_sysctl_root[] = { - { - .ctl_name = CTL_FS, - .procname = "fs", - .mode = 0555, - .child = mq_sysctl_dir, - }, - { .ctl_name = 0 } -}; - static int __init init_mqueue_fs(void) { int error; @@ -1336,7 +1273,7 @@ static int __init init_mqueue_fs(void) return -ENOMEM; /* ignore failues - they are not fatal */ - mq_sysctl_table = register_sysctl_table(mq_sysctl_root); + mq_sysctl_table = mq_register_sysctl_table(); error = register_filesystem(&mqueue_fs_type); if (error) -- cgit v1.2.3-55-g7522 From 5d7d18f5bc507b60d3d8967e2739d5e6ffdd630f Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 4 Mar 2009 11:59:07 -0800 Subject: kbuild: make it possible for the linker to discard local symbols from vmlinux Make it possible for the linker to discard local symbols from vmlinux as they cause vmlinux to balloon when CONFIG_KALLSYMS=y and they cause dump_stack() and get_wchan() to produce useless information under some circumstances. With this we add a config option (CONFIG_STRIP_ASM_SYMS) that will cause the build to supply -X to the linker to tell it to strip temporary local symbols. This doesn't seem to cause gdb any problems. Signed-off-by: David Howells Signed-off-by: Andrew Morton Signed-off-by: Sam Ravnborg --- Makefile | 4 ++++ init/Kconfig | 8 ++++++++ 2 files changed, 12 insertions(+) (limited to 'init') diff --git a/Makefile b/Makefile index 3e95d454285c..ad830bd45a4b 100644 --- a/Makefile +++ b/Makefile @@ -597,6 +597,10 @@ LDFLAGS_BUILD_ID = $(patsubst -Wl$(comma)%,%,\ LDFLAGS_MODULE += $(LDFLAGS_BUILD_ID) LDFLAGS_vmlinux += $(LDFLAGS_BUILD_ID) +ifeq ($(CONFIG_STRIP_ASM_SYMS),y) +LDFLAGS_vmlinux += -X +endif + # Default kernel image to build when no specific target is given. # KBUILD_IMAGE may be overruled on the command line or # set in the environment diff --git a/init/Kconfig b/init/Kconfig index f2f9b5362b48..7be4d3836745 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -808,6 +808,14 @@ config KALLSYMS_EXTRA_PASS you wait for kallsyms to be fixed. +config STRIP_ASM_SYMS + bool "Strip assembler-generated symbols during link" + default n + help + Strip internal assembler-generated symbols during a link (symbols + that look like '.Lxxx') so they don't pollute the output of + get_wchan() and suchlike. + config HOTPLUG bool "Support for hot-pluggable devices" if EMBEDDED default y -- cgit v1.2.3-55-g7522 From b52bb3712a64c404846f30300b339cfd01e316be Mon Sep 17 00:00:00 2001 From: Nikanth Karthikesan Date: Mon, 13 Apr 2009 14:39:38 -0700 Subject: init/initramfs: fix warning with CONFIG_BLK_DEV_RAM=n init/initramfs.c:520: warning: 'clean_rootfs' defined but not used Signed-off-by: Nikanth Karthikesan Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- init/initramfs.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'init') diff --git a/init/initramfs.c b/init/initramfs.c index 80cd713f6cc5..e44f2d932cc4 100644 --- a/init/initramfs.c +++ b/init/initramfs.c @@ -515,6 +515,7 @@ skip: initrd_end = 0; } +#ifdef CONFIG_BLK_DEV_RAM #define BUF_SIZE 1024 static void __init clean_rootfs(void) { @@ -561,6 +562,7 @@ static void __init clean_rootfs(void) sys_close(fd); kfree(buf); } +#endif static int __init populate_rootfs(void) { -- cgit v1.2.3-55-g7522 From d20d5a7456d57d8affa88f45f27ae96ea49c29e4 Mon Sep 17 00:00:00 2001 From: Randy Robertson Date: Mon, 13 Apr 2009 14:40:04 -0700 Subject: initramfs: fix initramfs to work with hardlinked init Change cb6ff208076b5f434db1b8c983429269d719cef5 ("NOMMU: Support XIP on initramfs") seems to have broken booting from initramfs with /sbin/init being a hardlink. It seems like the logic required for XIP on nommu, i.e. ftruncate to reported cpio header file size (body_len) is broken for hardlinks, which have a reported size of 0, and the truncate thus nukes the contents of the file (in my case busybox), making boot impossible and ending with runaway loop modprobe binfmt-0000 - and of course 0000 is not a valid binary format. My fix is to only call ftruncate if size is non-zero which fixes things for me, but I'm not certain whether this will break XIP for those files on nommu systems, although I would guess not. Signed-off-by: Randy Robertson Acked-by: David Howells Acked-by: Paul Mundt Acked-by: H. Peter Anvin Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- init/initramfs.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'init') diff --git a/init/initramfs.c b/init/initramfs.c index e44f2d932cc4..9ee7b7810417 100644 --- a/init/initramfs.c +++ b/init/initramfs.c @@ -310,7 +310,8 @@ static int __init do_name(void) if (wfd >= 0) { sys_fchown(wfd, uid, gid); sys_fchmod(wfd, mode); - sys_ftruncate(wfd, body_len); + if (body_len) + sys_ftruncate(wfd, body_len); vcollected = kstrdup(collected, GFP_KERNEL); state = CopyFile; } -- cgit v1.2.3-55-g7522