From c643401218be0f4ab3522e0c0a63016596d6e9ca Mon Sep 17 00:00:00 2001 From: Roman Gushchin Date: Fri, 17 Nov 2017 15:26:45 -0800 Subject: proc, coredump: add CoreDumping flag to /proc/pid/status Right now there is no convenient way to check if a process is being coredumped at the moment. It might be necessary to recognize such state to prevent killing the process and getting a broken coredump. Writing a large core might take significant time, and the process is unresponsive during it, so it might be killed by timeout, if another process is monitoring and killing/restarting hanging tasks. We're getting a significant number of corrupted coredump files on machines in our fleet, just because processes are being killed by timeout in the middle of the core writing process. We do have a process health check, and some agent is responsible for restarting processes which are not responding for health check requests. Writing a large coredump to the disk can easily exceed the reasonable timeout (especially on an overloaded machine). This flag will allow the agent to distinguish processes which are being coredumped, extend the timeout for them, and let them produce a full coredump file. To provide an ability to detect if a process is in the state of being coredumped, we can expose a boolean CoreDumping flag in /proc/pid/status. Example: $ cat core.sh #!/bin/sh echo "|/usr/bin/sleep 10" > /proc/sys/kernel/core_pattern sleep 1000 & PID=$! cat /proc/$PID/status | grep CoreDumping kill -ABRT $PID sleep 1 cat /proc/$PID/status | grep CoreDumping $ ./core.sh CoreDumping: 0 CoreDumping: 1 [guro@fb.com: document CoreDumping flag in /proc//status] Link: http://lkml.kernel.org/r/20170928135357.GA8470@castle.DHCP.thefacebook.com Link: http://lkml.kernel.org/r/20170920230634.31572-1-guro@fb.com Signed-off-by: Roman Gushchin Cc: Alexander Viro Cc: Ingo Molnar Cc: Konstantin Khlebnikov Cc: Oleg Nesterov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/filesystems/proc.txt | 3 +++ 1 file changed, 3 insertions(+) (limited to 'Documentation') diff --git a/Documentation/filesystems/proc.txt b/Documentation/filesystems/proc.txt index ec571b9bb18a..2a84bb334894 100644 --- a/Documentation/filesystems/proc.txt +++ b/Documentation/filesystems/proc.txt @@ -181,6 +181,7 @@ read the file /proc/PID/status: VmPTE: 20 kb VmSwap: 0 kB HugetlbPages: 0 kB + CoreDumping: 0 Threads: 1 SigQ: 0/28578 SigPnd: 0000000000000000 @@ -253,6 +254,8 @@ Table 1-2: Contents of the status files (as of 4.8) VmSwap amount of swap used by anonymous private data (shmem swap usage is not included) HugetlbPages size of hugetlb memory portions + CoreDumping process's memory is currently being dumped + (killing the process may lead to a corrupted core) Threads number of threads SigQ number of signals queued/max. number for queue SigPnd bitmap of pending signals for the thread -- cgit v1.2.3-55-g7522 From b1fca27d384e8418aac84b39f6f5179aecc1b64f Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Fri, 17 Nov 2017 15:27:03 -0800 Subject: kernel debug: support resetting WARN*_ONCE I like _ONCE warnings because it's guaranteed that they don't flood the log. During testing I find it useful to reset the state of the once warnings, so that I can rerun tests and see if they trigger again, or can guarantee that a test run always hits the same warnings. This patch adds a debugfs interface to reset all the _ONCE warnings so that they appear again: echo 1 > /sys/kernel/debug/clear_warn_once This is implemented by putting all the warning booleans into a special section, and clearing it. [akpm@linux-foundation.org: coding-style fixes] Link: http://lkml.kernel.org/r/20171017221455.6740-1-andi@firstfloor.org Signed-off-by: Andi Kleen Tested-by: Michael Ellerman Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/clearing-warn-once.txt | 7 +++++++ include/asm-generic/bug.h | 6 +++--- include/asm-generic/sections.h | 1 + include/asm-generic/vmlinux.lds.h | 3 +++ kernel/panic.c | 28 ++++++++++++++++++++++++++++ 5 files changed, 42 insertions(+), 3 deletions(-) create mode 100644 Documentation/clearing-warn-once.txt (limited to 'Documentation') diff --git a/Documentation/clearing-warn-once.txt b/Documentation/clearing-warn-once.txt new file mode 100644 index 000000000000..5b1f5d547be1 --- /dev/null +++ b/Documentation/clearing-warn-once.txt @@ -0,0 +1,7 @@ + +WARN_ONCE / WARN_ON_ONCE only print a warning once. + +echo 1 > /sys/kernel/debug/clear_warn_once + +clears the state and allows the warnings to print once again. +This can be useful after test suite runs to reproduce problems. diff --git a/include/asm-generic/bug.h b/include/asm-generic/bug.h index af2cc94a61bf..7844b0df88cd 100644 --- a/include/asm-generic/bug.h +++ b/include/asm-generic/bug.h @@ -130,7 +130,7 @@ void __warn(const char *file, int line, void *caller, unsigned taint, #ifndef WARN_ON_ONCE #define WARN_ON_ONCE(condition) ({ \ - static bool __section(.data.unlikely) __warned; \ + static bool __section(.data.once) __warned; \ int __ret_warn_once = !!(condition); \ \ if (unlikely(__ret_warn_once && !__warned)) { \ @@ -142,7 +142,7 @@ void __warn(const char *file, int line, void *caller, unsigned taint, #endif #define WARN_ONCE(condition, format...) ({ \ - static bool __section(.data.unlikely) __warned; \ + static bool __section(.data.once) __warned; \ int __ret_warn_once = !!(condition); \ \ if (unlikely(__ret_warn_once && !__warned)) { \ @@ -153,7 +153,7 @@ void __warn(const char *file, int line, void *caller, unsigned taint, }) #define WARN_TAINT_ONCE(condition, taint, format...) ({ \ - static bool __section(.data.unlikely) __warned; \ + static bool __section(.data.once) __warned; \ int __ret_warn_once = !!(condition); \ \ if (unlikely(__ret_warn_once && !__warned)) { \ diff --git a/include/asm-generic/sections.h b/include/asm-generic/sections.h index 6d9576931084..03cc5f9bba71 100644 --- a/include/asm-generic/sections.h +++ b/include/asm-generic/sections.h @@ -44,6 +44,7 @@ extern char __entry_text_start[], __entry_text_end[]; extern char __start_rodata[], __end_rodata[]; extern char __irqentry_text_start[], __irqentry_text_end[]; extern char __softirqentry_text_start[], __softirqentry_text_end[]; +extern char __start_once[], __end_once[]; /* Start and end of .ctors section - used for constructor calls. */ extern char __ctors_start[], __ctors_end[]; diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h index bdcd1caae092..ee8b707d9fa9 100644 --- a/include/asm-generic/vmlinux.lds.h +++ b/include/asm-generic/vmlinux.lds.h @@ -223,6 +223,9 @@ MEM_KEEP(init.data) \ MEM_KEEP(exit.data) \ *(.data.unlikely) \ + VMLINUX_SYMBOL(__start_once) = .; \ + *(.data.once) \ + VMLINUX_SYMBOL(__end_once) = .; \ STRUCT_ALIGN(); \ *(__tracepoints) \ /* implement dynamic printk debug */ \ diff --git a/kernel/panic.c b/kernel/panic.c index bdd18afa19a4..672a91dc20fe 100644 --- a/kernel/panic.c +++ b/kernel/panic.c @@ -27,6 +27,8 @@ #include #include #include +#include +#include #define PANIC_TIMER_STEP 100 #define PANIC_BLINK_SPD 18 @@ -587,6 +589,32 @@ void warn_slowpath_null(const char *file, int line) EXPORT_SYMBOL(warn_slowpath_null); #endif +#ifdef CONFIG_BUG + +/* Support resetting WARN*_ONCE state */ + +static int clear_warn_once_set(void *data, u64 val) +{ + memset(__start_once, 0, __end_once - __start_once); + return 0; +} + +DEFINE_SIMPLE_ATTRIBUTE(clear_warn_once_fops, + NULL, + clear_warn_once_set, + "%lld\n"); + +static __init int register_warn_debugfs(void) +{ + /* Don't care about failure */ + debugfs_create_file("clear_warn_once", 0644, NULL, + NULL, &clear_warn_once_fops); + return 0; +} + +device_initcall(register_warn_debugfs); +#endif + #ifdef CONFIG_CC_STACKPROTECTOR /* -- cgit v1.2.3-55-g7522 From 8c188759fb5788579b5975d5a8c9f529e25c2171 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Fri, 17 Nov 2017 15:27:39 -0800 Subject: dynamic_debug documentation: minor fixes Fix minor typo. Fix missing words in explaining parsing of last line number. Link: http://lkml.kernel.org/r/ebb7ff42-4945-103f-d5b4-f07a6f3343a7@infradead.org Signed-off-by: Randy Dunlap Cc: Jason Baron Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/admin-guide/dynamic-debug-howto.rst | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'Documentation') diff --git a/Documentation/admin-guide/dynamic-debug-howto.rst b/Documentation/admin-guide/dynamic-debug-howto.rst index 12278a926370..fdf72429f801 100644 --- a/Documentation/admin-guide/dynamic-debug-howto.rst +++ b/Documentation/admin-guide/dynamic-debug-howto.rst @@ -18,7 +18,7 @@ shortcut for ``print_hex_dump(KERN_DEBUG)``. For ``print_hex_dump_debug()``/``print_hex_dump_bytes()``, format string is its ``prefix_str`` argument, if it is constant string; or ``hexdump`` -in case ``prefix_str`` is build dynamically. +in case ``prefix_str`` is built dynamically. Dynamic debug has even more useful features: @@ -197,8 +197,8 @@ line line number matches the callsite line number exactly. A range of line numbers matches any callsite between the first and last line number inclusive. An empty first number means - the first line in the file, an empty line number means the - last number in the file. Examples:: + the first line in the file, an empty last line number means the + last line number in the file. Examples:: line 1603 // exactly line 1603 line 1600-1605 // the six lines from line 1600 to line 1605 -- cgit v1.2.3-55-g7522 From 2743232c0c4c82311718bb4ec1fb659ed64ecf7a Mon Sep 17 00:00:00 2001 From: Kangmin Park Date: Fri, 17 Nov 2017 15:30:23 -0800 Subject: Documentation/sysctl/vm.txt: fix typo Link: http://lkml.kernel.org/r/CAKW4uUyCi=PnKf3epgFVz8z=1tMtHSOHNm+fdNxrNw3-THvRCA@mail.gmail.com Signed-off-by: Kangmin Park Cc: Jiri Kosina Cc: Alan Cox Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/sysctl/vm.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'Documentation') diff --git a/Documentation/sysctl/vm.txt b/Documentation/sysctl/vm.txt index 055c8b3e1018..b920423f88cb 100644 --- a/Documentation/sysctl/vm.txt +++ b/Documentation/sysctl/vm.txt @@ -818,7 +818,7 @@ tooling to work, you can do: swappiness This control is used to define how aggressive the kernel will swap -memory pages. Higher values will increase agressiveness, lower values +memory pages. Higher values will increase aggressiveness, lower values decrease the amount of swap. A value of 0 instructs the kernel not to initiate swap until the amount of free and file-backed pages is less than the high water mark in a zone. -- cgit v1.2.3-55-g7522 From c512ac01d8a841033da8ec538a83f80fb0b4d1fe Mon Sep 17 00:00:00 2001 From: Victor Chibotaru Date: Fri, 17 Nov 2017 15:30:53 -0800 Subject: kcov: update documentation The updated documentation describes new KCOV mode for collecting comparison operands. Link: http://lkml.kernel.org/r/20171011095459.70721-3-glider@google.com Signed-off-by: Victor Chibotaru Signed-off-by: Alexander Potapenko Cc: Dmitry Vyukov Cc: Andrey Konovalov Cc: Mark Rutland Cc: Alexander Popov Cc: Andrey Ryabinin Cc: Kees Cook Cc: Vegard Nossum Cc: Quentin Casasnovas Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/dev-tools/kcov.rst | 99 ++++++++++++++++++++++++++++++++++++++-- 1 file changed, 95 insertions(+), 4 deletions(-) (limited to 'Documentation') diff --git a/Documentation/dev-tools/kcov.rst b/Documentation/dev-tools/kcov.rst index 44886c91e112..c2f6452e38ed 100644 --- a/Documentation/dev-tools/kcov.rst +++ b/Documentation/dev-tools/kcov.rst @@ -12,19 +12,30 @@ To achieve this goal it does not collect coverage in soft/hard interrupts and instrumentation of some inherently non-deterministic parts of kernel is disabled (e.g. scheduler, locking). -Usage ------ +kcov is also able to collect comparison operands from the instrumented code +(this feature currently requires that the kernel is compiled with clang). + +Prerequisites +------------- Configure the kernel with:: CONFIG_KCOV=y CONFIG_KCOV requires gcc built on revision 231296 or later. + +If the comparison operands need to be collected, set:: + + CONFIG_KCOV_ENABLE_COMPARISONS=y + Profiling data will only become accessible once debugfs has been mounted:: mount -t debugfs none /sys/kernel/debug -The following program demonstrates kcov usage from within a test program: +Coverage collection +------------------- +The following program demonstrates coverage collection from within a test +program using kcov: .. code-block:: c @@ -44,6 +55,9 @@ The following program demonstrates kcov usage from within a test program: #define KCOV_DISABLE _IO('c', 101) #define COVER_SIZE (64<<10) + #define KCOV_TRACE_PC 0 + #define KCOV_TRACE_CMP 1 + int main(int argc, char **argv) { int fd; @@ -64,7 +78,7 @@ The following program demonstrates kcov usage from within a test program: if ((void*)cover == MAP_FAILED) perror("mmap"), exit(1); /* Enable coverage collection on the current thread. */ - if (ioctl(fd, KCOV_ENABLE, 0)) + if (ioctl(fd, KCOV_ENABLE, KCOV_TRACE_PC)) perror("ioctl"), exit(1); /* Reset coverage from the tail of the ioctl() call. */ __atomic_store_n(&cover[0], 0, __ATOMIC_RELAXED); @@ -111,3 +125,80 @@ The interface is fine-grained to allow efficient forking of test processes. That is, a parent process opens /sys/kernel/debug/kcov, enables trace mode, mmaps coverage buffer and then forks child processes in a loop. Child processes only need to enable coverage (disable happens automatically on thread end). + +Comparison operands collection +------------------------------ +Comparison operands collection is similar to coverage collection: + +.. code-block:: c + + /* Same includes and defines as above. */ + + /* Number of 64-bit words per record. */ + #define KCOV_WORDS_PER_CMP 4 + + /* + * The format for the types of collected comparisons. + * + * Bit 0 shows whether one of the arguments is a compile-time constant. + * Bits 1 & 2 contain log2 of the argument size, up to 8 bytes. + */ + + #define KCOV_CMP_CONST (1 << 0) + #define KCOV_CMP_SIZE(n) ((n) << 1) + #define KCOV_CMP_MASK KCOV_CMP_SIZE(3) + + int main(int argc, char **argv) + { + int fd; + uint64_t *cover, type, arg1, arg2, is_const, size; + unsigned long n, i; + + fd = open("/sys/kernel/debug/kcov", O_RDWR); + if (fd == -1) + perror("open"), exit(1); + if (ioctl(fd, KCOV_INIT_TRACE, COVER_SIZE)) + perror("ioctl"), exit(1); + /* + * Note that the buffer pointer is of type uint64_t*, because all + * the comparison operands are promoted to uint64_t. + */ + cover = (uint64_t *)mmap(NULL, COVER_SIZE * sizeof(unsigned long), + PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); + if ((void*)cover == MAP_FAILED) + perror("mmap"), exit(1); + /* Note KCOV_TRACE_CMP instead of KCOV_TRACE_PC. */ + if (ioctl(fd, KCOV_ENABLE, KCOV_TRACE_CMP)) + perror("ioctl"), exit(1); + __atomic_store_n(&cover[0], 0, __ATOMIC_RELAXED); + read(-1, NULL, 0); + /* Read number of comparisons collected. */ + n = __atomic_load_n(&cover[0], __ATOMIC_RELAXED); + for (i = 0; i < n; i++) { + type = cover[i * KCOV_WORDS_PER_CMP + 1]; + /* arg1 and arg2 - operands of the comparison. */ + arg1 = cover[i * KCOV_WORDS_PER_CMP + 2]; + arg2 = cover[i * KCOV_WORDS_PER_CMP + 3]; + /* ip - caller address. */ + ip = cover[i * KCOV_WORDS_PER_CMP + 4]; + /* size of the operands. */ + size = 1 << ((type & KCOV_CMP_MASK) >> 1); + /* is_const - true if either operand is a compile-time constant.*/ + is_const = type & KCOV_CMP_CONST; + printf("ip: 0x%lx type: 0x%lx, arg1: 0x%lx, arg2: 0x%lx, " + "size: %lu, %s\n", + ip, type, arg1, arg2, size, + is_const ? "const" : "non-const"); + } + if (ioctl(fd, KCOV_DISABLE, 0)) + perror("ioctl"), exit(1); + /* Free resources. */ + if (munmap(cover, COVER_SIZE * sizeof(unsigned long))) + perror("munmap"), exit(1); + if (close(fd)) + perror("close"), exit(1); + return 0; + } + +Note that the kcov modes (coverage collection or comparison operands) are +mutually exclusive. -- cgit v1.2.3-55-g7522