From b0b0382bb4904965a9e9fca77ad87514dfda0d1c Mon Sep 17 00:00:00 2001 From: Al Viro Date: Mon, 2 Apr 2012 14:34:06 -0400 Subject: ->encode_fh() API change pass inode + parent's inode or NULL instead of dentry + bool saying whether we want the parent or not. NOTE: that needs ceph fix folded in. Signed-off-by: Al Viro --- include/linux/exportfs.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/exportfs.h b/include/linux/exportfs.h index 3a4cef5322dc..12291a7ee275 100644 --- a/include/linux/exportfs.h +++ b/include/linux/exportfs.h @@ -165,8 +165,8 @@ struct fid { */ struct export_operations { - int (*encode_fh)(struct dentry *de, __u32 *fh, int *max_len, - int connectable); + int (*encode_fh)(struct inode *inode, __u32 *fh, int *max_len, + struct inode *parent); struct dentry * (*fh_to_dentry)(struct super_block *sb, struct fid *fid, int fh_len, int fh_type); struct dentry * (*fh_to_parent)(struct super_block *sb, struct fid *fid, -- cgit v1.2.3-55-g7522 From 9dd6fa03ab31bb57cee4623a689d058d222fbe68 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Tue, 8 May 2012 13:29:45 +0930 Subject: lglock: remove online variants of lock Optimizing the slow paths adds a lot of complexity. If you need to grab every lock often, you have other problems. Signed-off-by: Rusty Russell Acked-by: Nick Piggin Signed-off-by: Al Viro --- include/linux/lglock.h | 58 ++------------------------------------------------ 1 file changed, 2 insertions(+), 56 deletions(-) (limited to 'include/linux') diff --git a/include/linux/lglock.h b/include/linux/lglock.h index 87f402ccec55..0fdd821e77b7 100644 --- a/include/linux/lglock.h +++ b/include/linux/lglock.h @@ -28,8 +28,8 @@ #define br_lock_init(name) name##_lock_init() #define br_read_lock(name) name##_local_lock() #define br_read_unlock(name) name##_local_unlock() -#define br_write_lock(name) name##_global_lock_online() -#define br_write_unlock(name) name##_global_unlock_online() +#define br_write_lock(name) name##_global_lock() +#define br_write_unlock(name) name##_global_unlock() #define DECLARE_BRLOCK(name) DECLARE_LGLOCK(name) #define DEFINE_BRLOCK(name) DEFINE_LGLOCK(name) @@ -42,8 +42,6 @@ #define lg_local_unlock_cpu(name, cpu) name##_local_unlock_cpu(cpu) #define lg_global_lock(name) name##_global_lock() #define lg_global_unlock(name) name##_global_unlock() -#define lg_global_lock_online(name) name##_global_lock_online() -#define lg_global_unlock_online(name) name##_global_unlock_online() #ifdef CONFIG_DEBUG_LOCK_ALLOC #define LOCKDEP_INIT_MAP lockdep_init_map @@ -68,36 +66,13 @@ extern void name##_local_unlock_cpu(int cpu); \ extern void name##_global_lock(void); \ extern void name##_global_unlock(void); \ - extern void name##_global_lock_online(void); \ - extern void name##_global_unlock_online(void); \ #define DEFINE_LGLOCK(name) \ \ DEFINE_SPINLOCK(name##_cpu_lock); \ - cpumask_t name##_cpus __read_mostly; \ DEFINE_PER_CPU(arch_spinlock_t, name##_lock); \ DEFINE_LGLOCK_LOCKDEP(name); \ \ - static int \ - name##_lg_cpu_callback(struct notifier_block *nb, \ - unsigned long action, void *hcpu) \ - { \ - switch (action & ~CPU_TASKS_FROZEN) { \ - case CPU_UP_PREPARE: \ - spin_lock(&name##_cpu_lock); \ - cpu_set((unsigned long)hcpu, name##_cpus); \ - spin_unlock(&name##_cpu_lock); \ - break; \ - case CPU_UP_CANCELED: case CPU_DEAD: \ - spin_lock(&name##_cpu_lock); \ - cpu_clear((unsigned long)hcpu, name##_cpus); \ - spin_unlock(&name##_cpu_lock); \ - } \ - return NOTIFY_OK; \ - } \ - static struct notifier_block name##_lg_cpu_notifier = { \ - .notifier_call = name##_lg_cpu_callback, \ - }; \ void name##_lock_init(void) { \ int i; \ LOCKDEP_INIT_MAP(&name##_lock_dep_map, #name, &name##_lock_key, 0); \ @@ -106,11 +81,6 @@ lock = &per_cpu(name##_lock, i); \ *lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED; \ } \ - register_hotcpu_notifier(&name##_lg_cpu_notifier); \ - get_online_cpus(); \ - for_each_online_cpu(i) \ - cpu_set(i, name##_cpus); \ - put_online_cpus(); \ } \ EXPORT_SYMBOL(name##_lock_init); \ \ @@ -150,30 +120,6 @@ } \ EXPORT_SYMBOL(name##_local_unlock_cpu); \ \ - void name##_global_lock_online(void) { \ - int i; \ - spin_lock(&name##_cpu_lock); \ - rwlock_acquire(&name##_lock_dep_map, 0, 0, _RET_IP_); \ - for_each_cpu(i, &name##_cpus) { \ - arch_spinlock_t *lock; \ - lock = &per_cpu(name##_lock, i); \ - arch_spin_lock(lock); \ - } \ - } \ - EXPORT_SYMBOL(name##_global_lock_online); \ - \ - void name##_global_unlock_online(void) { \ - int i; \ - rwlock_release(&name##_lock_dep_map, 1, _RET_IP_); \ - for_each_cpu(i, &name##_cpus) { \ - arch_spinlock_t *lock; \ - lock = &per_cpu(name##_lock, i); \ - arch_spin_unlock(lock); \ - } \ - spin_unlock(&name##_cpu_lock); \ - } \ - EXPORT_SYMBOL(name##_global_unlock_online); \ - \ void name##_global_lock(void) { \ int i; \ preempt_disable(); \ -- cgit v1.2.3-55-g7522 From eea62f831b8030b0eeea8314eed73b6132d1de26 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Tue, 8 May 2012 13:32:24 +0930 Subject: brlocks/lglocks: turn into functions lglocks and brlocks are currently generated with some complicated macros in lglock.h. But there's no reason to not just use common utility functions and put all the data into a common data structure. Since there are at least two users it makes sense to share this code in a library. This is also easier maintainable than a macro forest. This will also make it later possible to dynamically allocate lglocks and also use them in modules (this would both still need some additional, but now straightforward, code) [akpm@linux-foundation.org: checkpatch fixes] Signed-off-by: Andi Kleen Cc: Al Viro Cc: Rusty Russell Signed-off-by: Andrew Morton Signed-off-by: Rusty Russell Signed-off-by: Al Viro --- fs/file_table.c | 1 - fs/internal.h | 2 +- include/linux/lglock.h | 125 ++++++++++--------------------------------------- kernel/Makefile | 2 +- kernel/lglock.c | 89 +++++++++++++++++++++++++++++++++++ 5 files changed, 117 insertions(+), 102 deletions(-) create mode 100644 kernel/lglock.c (limited to 'include/linux') diff --git a/fs/file_table.c b/fs/file_table.c index 70f2a0fd6aec..f5c67c59ec10 100644 --- a/fs/file_table.c +++ b/fs/file_table.c @@ -34,7 +34,6 @@ struct files_stat_struct files_stat = { .max_files = NR_FILE }; -DECLARE_LGLOCK(files_lglock); DEFINE_LGLOCK(files_lglock); /* SLAB cache for file structures */ diff --git a/fs/internal.h b/fs/internal.h index 9962c59ba280..8040af489c78 100644 --- a/fs/internal.h +++ b/fs/internal.h @@ -56,7 +56,7 @@ extern int sb_prepare_remount_readonly(struct super_block *); extern void __init mnt_init(void); -DECLARE_BRLOCK(vfsmount_lock); +extern struct lglock vfsmount_lock; /* diff --git a/include/linux/lglock.h b/include/linux/lglock.h index 0fdd821e77b7..f01e5f6d1f07 100644 --- a/include/linux/lglock.h +++ b/include/linux/lglock.h @@ -23,26 +23,17 @@ #include #include #include +#include /* can make br locks by using local lock for read side, global lock for write */ -#define br_lock_init(name) name##_lock_init() -#define br_read_lock(name) name##_local_lock() -#define br_read_unlock(name) name##_local_unlock() -#define br_write_lock(name) name##_global_lock() -#define br_write_unlock(name) name##_global_unlock() +#define br_lock_init(name) lg_lock_init(name, #name) +#define br_read_lock(name) lg_local_lock(name) +#define br_read_unlock(name) lg_local_unlock(name) +#define br_write_lock(name) lg_global_lock(name) +#define br_write_unlock(name) lg_global_unlock(name) -#define DECLARE_BRLOCK(name) DECLARE_LGLOCK(name) #define DEFINE_BRLOCK(name) DEFINE_LGLOCK(name) - -#define lg_lock_init(name) name##_lock_init() -#define lg_local_lock(name) name##_local_lock() -#define lg_local_unlock(name) name##_local_unlock() -#define lg_local_lock_cpu(name, cpu) name##_local_lock_cpu(cpu) -#define lg_local_unlock_cpu(name, cpu) name##_local_unlock_cpu(cpu) -#define lg_global_lock(name) name##_global_lock() -#define lg_global_unlock(name) name##_global_unlock() - #ifdef CONFIG_DEBUG_LOCK_ALLOC #define LOCKDEP_INIT_MAP lockdep_init_map @@ -57,90 +48,26 @@ #define DEFINE_LGLOCK_LOCKDEP(name) #endif - -#define DECLARE_LGLOCK(name) \ - extern void name##_lock_init(void); \ - extern void name##_local_lock(void); \ - extern void name##_local_unlock(void); \ - extern void name##_local_lock_cpu(int cpu); \ - extern void name##_local_unlock_cpu(int cpu); \ - extern void name##_global_lock(void); \ - extern void name##_global_unlock(void); \ +struct lglock { + arch_spinlock_t __percpu *lock; +#ifdef CONFIG_DEBUG_LOCK_ALLOC + struct lock_class_key lock_key; + struct lockdep_map lock_dep_map; +#endif +}; #define DEFINE_LGLOCK(name) \ - \ - DEFINE_SPINLOCK(name##_cpu_lock); \ - DEFINE_PER_CPU(arch_spinlock_t, name##_lock); \ - DEFINE_LGLOCK_LOCKDEP(name); \ - \ - void name##_lock_init(void) { \ - int i; \ - LOCKDEP_INIT_MAP(&name##_lock_dep_map, #name, &name##_lock_key, 0); \ - for_each_possible_cpu(i) { \ - arch_spinlock_t *lock; \ - lock = &per_cpu(name##_lock, i); \ - *lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED; \ - } \ - } \ - EXPORT_SYMBOL(name##_lock_init); \ - \ - void name##_local_lock(void) { \ - arch_spinlock_t *lock; \ - preempt_disable(); \ - rwlock_acquire_read(&name##_lock_dep_map, 0, 0, _THIS_IP_); \ - lock = &__get_cpu_var(name##_lock); \ - arch_spin_lock(lock); \ - } \ - EXPORT_SYMBOL(name##_local_lock); \ - \ - void name##_local_unlock(void) { \ - arch_spinlock_t *lock; \ - rwlock_release(&name##_lock_dep_map, 1, _THIS_IP_); \ - lock = &__get_cpu_var(name##_lock); \ - arch_spin_unlock(lock); \ - preempt_enable(); \ - } \ - EXPORT_SYMBOL(name##_local_unlock); \ - \ - void name##_local_lock_cpu(int cpu) { \ - arch_spinlock_t *lock; \ - preempt_disable(); \ - rwlock_acquire_read(&name##_lock_dep_map, 0, 0, _THIS_IP_); \ - lock = &per_cpu(name##_lock, cpu); \ - arch_spin_lock(lock); \ - } \ - EXPORT_SYMBOL(name##_local_lock_cpu); \ - \ - void name##_local_unlock_cpu(int cpu) { \ - arch_spinlock_t *lock; \ - rwlock_release(&name##_lock_dep_map, 1, _THIS_IP_); \ - lock = &per_cpu(name##_lock, cpu); \ - arch_spin_unlock(lock); \ - preempt_enable(); \ - } \ - EXPORT_SYMBOL(name##_local_unlock_cpu); \ - \ - void name##_global_lock(void) { \ - int i; \ - preempt_disable(); \ - rwlock_acquire(&name##_lock_dep_map, 0, 0, _RET_IP_); \ - for_each_possible_cpu(i) { \ - arch_spinlock_t *lock; \ - lock = &per_cpu(name##_lock, i); \ - arch_spin_lock(lock); \ - } \ - } \ - EXPORT_SYMBOL(name##_global_lock); \ - \ - void name##_global_unlock(void) { \ - int i; \ - rwlock_release(&name##_lock_dep_map, 1, _RET_IP_); \ - for_each_possible_cpu(i) { \ - arch_spinlock_t *lock; \ - lock = &per_cpu(name##_lock, i); \ - arch_spin_unlock(lock); \ - } \ - preempt_enable(); \ - } \ - EXPORT_SYMBOL(name##_global_unlock); + DEFINE_LGLOCK_LOCKDEP(name); \ + DEFINE_PER_CPU(arch_spinlock_t, name ## _lock) \ + = __ARCH_SPIN_LOCK_UNLOCKED; \ + struct lglock name = { .lock = &name ## _lock } + +void lg_lock_init(struct lglock *lg, char *name); +void lg_local_lock(struct lglock *lg); +void lg_local_unlock(struct lglock *lg); +void lg_local_lock_cpu(struct lglock *lg, int cpu); +void lg_local_unlock_cpu(struct lglock *lg, int cpu); +void lg_global_lock(struct lglock *lg); +void lg_global_unlock(struct lglock *lg); + #endif diff --git a/kernel/Makefile b/kernel/Makefile index 6c07f30fa9b7..296132c19a57 100644 --- a/kernel/Makefile +++ b/kernel/Makefile @@ -10,7 +10,7 @@ obj-y = fork.o exec_domain.o panic.o printk.o \ kthread.o wait.o kfifo.o sys_ni.o posix-cpu-timers.o mutex.o \ hrtimer.o rwsem.o nsproxy.o srcu.o semaphore.o \ notifier.o ksysfs.o cred.o \ - async.o range.o groups.o + async.o range.o groups.o lglock.o ifdef CONFIG_FUNCTION_TRACER # Do not trace debug files and internal ftrace files diff --git a/kernel/lglock.c b/kernel/lglock.c new file mode 100644 index 000000000000..6535a667a5a7 --- /dev/null +++ b/kernel/lglock.c @@ -0,0 +1,89 @@ +/* See include/linux/lglock.h for description */ +#include +#include +#include +#include + +/* + * Note there is no uninit, so lglocks cannot be defined in + * modules (but it's fine to use them from there) + * Could be added though, just undo lg_lock_init + */ + +void lg_lock_init(struct lglock *lg, char *name) +{ + LOCKDEP_INIT_MAP(&lg->lock_dep_map, name, &lg->lock_key, 0); +} +EXPORT_SYMBOL(lg_lock_init); + +void lg_local_lock(struct lglock *lg) +{ + arch_spinlock_t *lock; + + preempt_disable(); + rwlock_acquire_read(&lg->lock_dep_map, 0, 0, _RET_IP_); + lock = this_cpu_ptr(lg->lock); + arch_spin_lock(lock); +} +EXPORT_SYMBOL(lg_local_lock); + +void lg_local_unlock(struct lglock *lg) +{ + arch_spinlock_t *lock; + + rwlock_release(&lg->lock_dep_map, 1, _RET_IP_); + lock = this_cpu_ptr(lg->lock); + arch_spin_unlock(lock); + preempt_enable(); +} +EXPORT_SYMBOL(lg_local_unlock); + +void lg_local_lock_cpu(struct lglock *lg, int cpu) +{ + arch_spinlock_t *lock; + + preempt_disable(); + rwlock_acquire_read(&lg->lock_dep_map, 0, 0, _RET_IP_); + lock = per_cpu_ptr(lg->lock, cpu); + arch_spin_lock(lock); +} +EXPORT_SYMBOL(lg_local_lock_cpu); + +void lg_local_unlock_cpu(struct lglock *lg, int cpu) +{ + arch_spinlock_t *lock; + + rwlock_release(&lg->lock_dep_map, 1, _RET_IP_); + lock = per_cpu_ptr(lg->lock, cpu); + arch_spin_unlock(lock); + preempt_enable(); +} +EXPORT_SYMBOL(lg_local_unlock_cpu); + +void lg_global_lock(struct lglock *lg) +{ + int i; + + preempt_disable(); + rwlock_acquire(&lg->lock_dep_map, 0, 0, _RET_IP_); + for_each_possible_cpu(i) { + arch_spinlock_t *lock; + lock = per_cpu_ptr(lg->lock, i); + arch_spin_lock(lock); + } +} +EXPORT_SYMBOL(lg_global_lock); + +void lg_global_unlock(struct lglock *lg) +{ + int i; + + rwlock_release(&lg->lock_dep_map, 1, _RET_IP_); + for_each_possible_cpu(i) { + arch_spinlock_t *lock; + lock = per_cpu_ptr(lg->lock, i); + arch_spin_unlock(lock); + } + preempt_enable(); +} +EXPORT_SYMBOL(lg_global_unlock); -- cgit v1.2.3-55-g7522 From bb8ac181a5cf50458a0d83b4460790badc9fdc16 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 19 May 2012 10:25:23 -0400 Subject: bury __kernel_nlink_t, make internal nlink_t consistent Signed-off-by: Al Viro --- arch/alpha/include/asm/posix_types.h | 3 --- arch/arm/include/asm/posix_types.h | 3 --- arch/avr32/include/asm/posix_types.h | 3 --- arch/blackfin/include/asm/posix_types.h | 3 --- arch/cris/include/asm/posix_types.h | 3 --- arch/frv/include/asm/posix_types.h | 3 --- arch/h8300/include/asm/posix_types.h | 3 --- arch/ia64/include/asm/posix_types.h | 3 --- arch/m32r/include/asm/posix_types.h | 3 --- arch/m68k/include/asm/posix_types.h | 3 --- arch/mips/include/asm/posix_types.h | 5 ----- arch/mn10300/include/asm/posix_types.h | 3 --- arch/parisc/include/asm/posix_types.h | 3 --- arch/powerpc/include/asm/posix_types.h | 3 --- arch/s390/include/asm/posix_types.h | 3 --- arch/sh/include/asm/posix_types_32.h | 2 -- arch/sh/include/asm/posix_types_64.h | 2 -- arch/sparc/include/asm/posix_types.h | 5 ----- arch/tile/include/asm/compat.h | 1 - arch/x86/include/asm/posix_types_32.h | 3 --- include/asm-generic/posix_types.h | 4 ---- include/linux/types.h | 2 +- 22 files changed, 1 insertion(+), 65 deletions(-) (limited to 'include/linux') diff --git a/arch/alpha/include/asm/posix_types.h b/arch/alpha/include/asm/posix_types.h index 24779fc95994..5a8a48320efe 100644 --- a/arch/alpha/include/asm/posix_types.h +++ b/arch/alpha/include/asm/posix_types.h @@ -10,9 +10,6 @@ typedef unsigned int __kernel_ino_t; #define __kernel_ino_t __kernel_ino_t -typedef unsigned int __kernel_nlink_t; -#define __kernel_nlink_t __kernel_nlink_t - typedef unsigned long __kernel_sigset_t; /* at least 32 bits */ #include diff --git a/arch/arm/include/asm/posix_types.h b/arch/arm/include/asm/posix_types.h index efdf99045d87..d2de9cbbcd9b 100644 --- a/arch/arm/include/asm/posix_types.h +++ b/arch/arm/include/asm/posix_types.h @@ -22,9 +22,6 @@ typedef unsigned short __kernel_mode_t; #define __kernel_mode_t __kernel_mode_t -typedef unsigned short __kernel_nlink_t; -#define __kernel_nlink_t __kernel_nlink_t - typedef unsigned short __kernel_ipc_pid_t; #define __kernel_ipc_pid_t __kernel_ipc_pid_t diff --git a/arch/avr32/include/asm/posix_types.h b/arch/avr32/include/asm/posix_types.h index 74667bfc88cc..9ba9e749b3f3 100644 --- a/arch/avr32/include/asm/posix_types.h +++ b/arch/avr32/include/asm/posix_types.h @@ -17,9 +17,6 @@ typedef unsigned short __kernel_mode_t; #define __kernel_mode_t __kernel_mode_t -typedef unsigned short __kernel_nlink_t; -#define __kernel_nlink_t __kernel_nlink_t - typedef unsigned short __kernel_ipc_pid_t; #define __kernel_ipc_pid_t __kernel_ipc_pid_t diff --git a/arch/blackfin/include/asm/posix_types.h b/arch/blackfin/include/asm/posix_types.h index 41bc1875c4d7..1bd3436db6a7 100644 --- a/arch/blackfin/include/asm/posix_types.h +++ b/arch/blackfin/include/asm/posix_types.h @@ -10,9 +10,6 @@ typedef unsigned short __kernel_mode_t; #define __kernel_mode_t __kernel_mode_t -typedef unsigned short __kernel_nlink_t; -#define __kernel_nlink_t __kernel_nlink_t - typedef unsigned int __kernel_ipc_pid_t; #define __kernel_ipc_pid_t __kernel_ipc_pid_t diff --git a/arch/cris/include/asm/posix_types.h b/arch/cris/include/asm/posix_types.h index 234891c74e2b..ce4e51793151 100644 --- a/arch/cris/include/asm/posix_types.h +++ b/arch/cris/include/asm/posix_types.h @@ -15,9 +15,6 @@ typedef unsigned short __kernel_mode_t; #define __kernel_mode_t __kernel_mode_t -typedef unsigned short __kernel_nlink_t; -#define __kernel_nlink_t __kernel_nlink_t - typedef unsigned short __kernel_ipc_pid_t; #define __kernel_ipc_pid_t __kernel_ipc_pid_t diff --git a/arch/frv/include/asm/posix_types.h b/arch/frv/include/asm/posix_types.h index 3f34cb45fbb3..fe512af74a5a 100644 --- a/arch/frv/include/asm/posix_types.h +++ b/arch/frv/include/asm/posix_types.h @@ -10,9 +10,6 @@ typedef unsigned short __kernel_mode_t; #define __kernel_mode_t __kernel_mode_t -typedef unsigned short __kernel_nlink_t; -#define __kernel_nlink_t __kernel_nlink_t - typedef unsigned short __kernel_ipc_pid_t; #define __kernel_ipc_pid_t __kernel_ipc_pid_t diff --git a/arch/h8300/include/asm/posix_types.h b/arch/h8300/include/asm/posix_types.h index bc4c34efb1ad..91e62ba4c7b0 100644 --- a/arch/h8300/include/asm/posix_types.h +++ b/arch/h8300/include/asm/posix_types.h @@ -10,9 +10,6 @@ typedef unsigned short __kernel_mode_t; #define __kernel_mode_t __kernel_mode_t -typedef unsigned short __kernel_nlink_t; -#define __kernel_nlink_t __kernel_nlink_t - typedef unsigned short __kernel_ipc_pid_t; #define __kernel_ipc_pid_t __kernel_ipc_pid_t diff --git a/arch/ia64/include/asm/posix_types.h b/arch/ia64/include/asm/posix_types.h index 7323ab9467eb..99ee1d6510cf 100644 --- a/arch/ia64/include/asm/posix_types.h +++ b/arch/ia64/include/asm/posix_types.h @@ -1,9 +1,6 @@ #ifndef _ASM_IA64_POSIX_TYPES_H #define _ASM_IA64_POSIX_TYPES_H -typedef unsigned int __kernel_nlink_t; -#define __kernel_nlink_t __kernel_nlink_t - typedef unsigned long __kernel_sigset_t; /* at least 32 bits */ #include diff --git a/arch/m32r/include/asm/posix_types.h b/arch/m32r/include/asm/posix_types.h index 0195850e1f88..236de26a409b 100644 --- a/arch/m32r/include/asm/posix_types.h +++ b/arch/m32r/include/asm/posix_types.h @@ -10,9 +10,6 @@ typedef unsigned short __kernel_mode_t; #define __kernel_mode_t __kernel_mode_t -typedef unsigned short __kernel_nlink_t; -#define __kernel_nlink_t __kernel_nlink_t - typedef unsigned short __kernel_ipc_pid_t; #define __kernel_ipc_pid_t __kernel_ipc_pid_t diff --git a/arch/m68k/include/asm/posix_types.h b/arch/m68k/include/asm/posix_types.h index 6373093be72b..cf4dbf70fdc7 100644 --- a/arch/m68k/include/asm/posix_types.h +++ b/arch/m68k/include/asm/posix_types.h @@ -10,9 +10,6 @@ typedef unsigned short __kernel_mode_t; #define __kernel_mode_t __kernel_mode_t -typedef unsigned short __kernel_nlink_t; -#define __kernel_nlink_t __kernel_nlink_t - typedef unsigned short __kernel_ipc_pid_t; #define __kernel_ipc_pid_t __kernel_ipc_pid_t diff --git a/arch/mips/include/asm/posix_types.h b/arch/mips/include/asm/posix_types.h index e0308dcca135..fa03ec3fbf89 100644 --- a/arch/mips/include/asm/posix_types.h +++ b/arch/mips/include/asm/posix_types.h @@ -17,11 +17,6 @@ * assume GCC is being used. */ -#if (_MIPS_SZLONG == 64) -typedef unsigned int __kernel_nlink_t; -#define __kernel_nlink_t __kernel_nlink_t -#endif - typedef long __kernel_daddr_t; #define __kernel_daddr_t __kernel_daddr_t diff --git a/arch/mn10300/include/asm/posix_types.h b/arch/mn10300/include/asm/posix_types.h index ab506181ec31..d31eeea480cf 100644 --- a/arch/mn10300/include/asm/posix_types.h +++ b/arch/mn10300/include/asm/posix_types.h @@ -20,9 +20,6 @@ typedef unsigned short __kernel_mode_t; #define __kernel_mode_t __kernel_mode_t -typedef unsigned short __kernel_nlink_t; -#define __kernel_nlink_t __kernel_nlink_t - typedef unsigned short __kernel_ipc_pid_t; #define __kernel_ipc_pid_t __kernel_ipc_pid_t diff --git a/arch/parisc/include/asm/posix_types.h b/arch/parisc/include/asm/posix_types.h index 5212b0357daf..b9344256f76b 100644 --- a/arch/parisc/include/asm/posix_types.h +++ b/arch/parisc/include/asm/posix_types.h @@ -10,9 +10,6 @@ typedef unsigned short __kernel_mode_t; #define __kernel_mode_t __kernel_mode_t -typedef unsigned short __kernel_nlink_t; -#define __kernel_nlink_t __kernel_nlink_t - typedef unsigned short __kernel_ipc_pid_t; #define __kernel_ipc_pid_t __kernel_ipc_pid_t diff --git a/arch/powerpc/include/asm/posix_types.h b/arch/powerpc/include/asm/posix_types.h index f1393252bbda..2958c5b97b2d 100644 --- a/arch/powerpc/include/asm/posix_types.h +++ b/arch/powerpc/include/asm/posix_types.h @@ -16,9 +16,6 @@ typedef int __kernel_ssize_t; typedef long __kernel_ptrdiff_t; #define __kernel_size_t __kernel_size_t -typedef unsigned short __kernel_nlink_t; -#define __kernel_nlink_t __kernel_nlink_t - typedef short __kernel_ipc_pid_t; #define __kernel_ipc_pid_t __kernel_ipc_pid_t #endif diff --git a/arch/s390/include/asm/posix_types.h b/arch/s390/include/asm/posix_types.h index edf8527ff08d..7be104c0f192 100644 --- a/arch/s390/include/asm/posix_types.h +++ b/arch/s390/include/asm/posix_types.h @@ -24,7 +24,6 @@ typedef unsigned short __kernel_old_dev_t; typedef unsigned long __kernel_ino_t; typedef unsigned short __kernel_mode_t; -typedef unsigned short __kernel_nlink_t; typedef unsigned short __kernel_ipc_pid_t; typedef unsigned short __kernel_uid_t; typedef unsigned short __kernel_gid_t; @@ -35,7 +34,6 @@ typedef int __kernel_ptrdiff_t; typedef unsigned int __kernel_ino_t; typedef unsigned int __kernel_mode_t; -typedef unsigned int __kernel_nlink_t; typedef int __kernel_ipc_pid_t; typedef unsigned int __kernel_uid_t; typedef unsigned int __kernel_gid_t; @@ -47,7 +45,6 @@ typedef unsigned long __kernel_sigset_t; /* at least 32 bits */ #define __kernel_ino_t __kernel_ino_t #define __kernel_mode_t __kernel_mode_t -#define __kernel_nlink_t __kernel_nlink_t #define __kernel_ipc_pid_t __kernel_ipc_pid_t #define __kernel_uid_t __kernel_uid_t #define __kernel_gid_t __kernel_gid_t diff --git a/arch/sh/include/asm/posix_types_32.h b/arch/sh/include/asm/posix_types_32.h index abda58467ece..ba0bdc423b07 100644 --- a/arch/sh/include/asm/posix_types_32.h +++ b/arch/sh/include/asm/posix_types_32.h @@ -3,8 +3,6 @@ typedef unsigned short __kernel_mode_t; #define __kernel_mode_t __kernel_mode_t -typedef unsigned short __kernel_nlink_t; -#define __kernel_nlink_t __kernel_nlink_t typedef unsigned short __kernel_ipc_pid_t; #define __kernel_ipc_pid_t __kernel_ipc_pid_t typedef unsigned short __kernel_uid_t; diff --git a/arch/sh/include/asm/posix_types_64.h b/arch/sh/include/asm/posix_types_64.h index fcda07b4a616..244f7e950e17 100644 --- a/arch/sh/include/asm/posix_types_64.h +++ b/arch/sh/include/asm/posix_types_64.h @@ -3,8 +3,6 @@ typedef unsigned short __kernel_mode_t; #define __kernel_mode_t __kernel_mode_t -typedef unsigned short __kernel_nlink_t; -#define __kernel_nlink_t __kernel_nlink_t typedef unsigned short __kernel_ipc_pid_t; #define __kernel_ipc_pid_t __kernel_ipc_pid_t typedef unsigned short __kernel_uid_t; diff --git a/arch/sparc/include/asm/posix_types.h b/arch/sparc/include/asm/posix_types.h index 3070f25ae90a..156220ed99eb 100644 --- a/arch/sparc/include/asm/posix_types.h +++ b/arch/sparc/include/asm/posix_types.h @@ -9,8 +9,6 @@ #if defined(__sparc__) && defined(__arch64__) /* sparc 64 bit */ -typedef unsigned int __kernel_nlink_t; -#define __kernel_nlink_t __kernel_nlink_t typedef unsigned short __kernel_old_uid_t; typedef unsigned short __kernel_old_gid_t; @@ -38,9 +36,6 @@ typedef unsigned short __kernel_gid_t; typedef unsigned short __kernel_mode_t; #define __kernel_mode_t __kernel_mode_t -typedef short __kernel_nlink_t; -#define __kernel_nlink_t __kernel_nlink_t - typedef long __kernel_daddr_t; #define __kernel_daddr_t __kernel_daddr_t diff --git a/arch/tile/include/asm/compat.h b/arch/tile/include/asm/compat.h index 69adc08d36a5..6e74450ff0a1 100644 --- a/arch/tile/include/asm/compat.h +++ b/arch/tile/include/asm/compat.h @@ -44,7 +44,6 @@ typedef __kernel_uid32_t __compat_gid32_t; typedef __kernel_mode_t compat_mode_t; typedef __kernel_dev_t compat_dev_t; typedef __kernel_loff_t compat_loff_t; -typedef __kernel_nlink_t compat_nlink_t; typedef __kernel_ipc_pid_t compat_ipc_pid_t; typedef __kernel_daddr_t compat_daddr_t; typedef __kernel_fsid_t compat_fsid_t; diff --git a/arch/x86/include/asm/posix_types_32.h b/arch/x86/include/asm/posix_types_32.h index 99f262e04b91..8e525059e7d8 100644 --- a/arch/x86/include/asm/posix_types_32.h +++ b/arch/x86/include/asm/posix_types_32.h @@ -10,9 +10,6 @@ typedef unsigned short __kernel_mode_t; #define __kernel_mode_t __kernel_mode_t -typedef unsigned short __kernel_nlink_t; -#define __kernel_nlink_t __kernel_nlink_t - typedef unsigned short __kernel_ipc_pid_t; #define __kernel_ipc_pid_t __kernel_ipc_pid_t diff --git a/include/asm-generic/posix_types.h b/include/asm-generic/posix_types.h index 91d44bd4dde3..fe74fccf18db 100644 --- a/include/asm-generic/posix_types.h +++ b/include/asm-generic/posix_types.h @@ -23,10 +23,6 @@ typedef __kernel_ulong_t __kernel_ino_t; typedef unsigned int __kernel_mode_t; #endif -#ifndef __kernel_nlink_t -typedef __kernel_ulong_t __kernel_nlink_t; -#endif - #ifndef __kernel_pid_t typedef int __kernel_pid_t; #endif diff --git a/include/linux/types.h b/include/linux/types.h index 7f480db60231..9c1bd539ea70 100644 --- a/include/linux/types.h +++ b/include/linux/types.h @@ -25,7 +25,7 @@ typedef __kernel_dev_t dev_t; typedef __kernel_ino_t ino_t; typedef __kernel_mode_t mode_t; typedef unsigned short umode_t; -typedef __kernel_nlink_t nlink_t; +typedef __u32 nlink_t; typedef __kernel_off_t off_t; typedef __kernel_pid_t pid_t; typedef __kernel_daddr_t daddr_t; -- cgit v1.2.3-55-g7522 From a4f9a9a635e4d54ac93df4b861ed8792e17bd4a2 Mon Sep 17 00:00:00 2001 From: Naohiro Aota Date: Tue, 29 May 2012 11:02:24 -0700 Subject: fsnotify: handle subfiles' perm events Recently I'm working on fanotify and found the following strange behaviors. I wrote a program to set fanotify_mark on "/tmp/block" and FAN_DENY all events notified. fanotify_mask = FAN_ALL_EVENTS | FAN_ALL_PERM_EVENTS | FAN_EVENT_ON_CHILD: $ cd /tmp/block; cat foo cat: foo: Operation not permitted Operation on the file is blocked as expected. But, fanotify_mask = FAN_ALL_PERM_EVENTS | FAN_EVENT_ON_CHILD: $ cd /tmp/block; cat foo aaa It's not blocked anymore. This is confusing behavior. Also reading commit "fsnotify: call fsnotify_parent in perm events", it seems like fsnotify should handle subfiles' perm events as well as the other notify events. With this patch, regardless of FAN_ALL_EVENTS set or not: $ cd /tmp/block; cat foo cat: foo: Operation not permitted Operation on the file is now blocked properly. FS_OPEN_PERM and FS_ACCESS_PERM are not listed on FS_EVENTS_POSS_ON_CHILD. Due to fsnotify_inode_watches_children() check, if you only specify only these events as fsnotify_mask, you don't get subfiles' perm events notified. This patch add the events to FS_EVENTS_POSS_ON_CHILD to get them notified even if only these events are specified to fsnotify_mask. Signed-off-by: Naohiro Aota Cc: Eric Paris Cc: Al Viro Signed-off-by: Andrew Morton Signed-off-by: Al Viro --- include/linux/fsnotify_backend.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h index 91d0e0a34ef3..63d966d5c2ea 100644 --- a/include/linux/fsnotify_backend.h +++ b/include/linux/fsnotify_backend.h @@ -60,7 +60,7 @@ #define FS_EVENTS_POSS_ON_CHILD (FS_ACCESS | FS_MODIFY | FS_ATTRIB |\ FS_CLOSE_WRITE | FS_CLOSE_NOWRITE | FS_OPEN |\ FS_MOVED_FROM | FS_MOVED_TO | FS_CREATE |\ - FS_DELETE) + FS_DELETE | FS_OPEN_PERM | FS_ACCESS_PERM) #define FS_MOVE (FS_MOVED_FROM | FS_MOVED_TO) -- cgit v1.2.3-55-g7522 From d007794a182bc072a7b7479909dbd0d67ba341be Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 30 May 2012 13:11:37 -0400 Subject: split cap_mmap_addr() out of cap_file_mmap() ... switch callers. Signed-off-by: Al Viro --- include/linux/security.h | 3 ++- security/apparmor/lsm.c | 2 +- security/commoncap.c | 32 +++++++++++++++++++++++--------- security/selinux/hooks.c | 2 +- security/smack/smack_lsm.c | 2 +- 5 files changed, 28 insertions(+), 13 deletions(-) (limited to 'include/linux') diff --git a/include/linux/security.h b/include/linux/security.h index ab0e091ce5fa..4ad59c9fa731 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -86,6 +86,7 @@ extern int cap_inode_setxattr(struct dentry *dentry, const char *name, extern int cap_inode_removexattr(struct dentry *dentry, const char *name); extern int cap_inode_need_killpriv(struct dentry *dentry); extern int cap_inode_killpriv(struct dentry *dentry); +extern int cap_mmap_addr(unsigned long addr); extern int cap_file_mmap(struct file *file, unsigned long reqprot, unsigned long prot, unsigned long flags, unsigned long addr, unsigned long addr_only); @@ -2187,7 +2188,7 @@ static inline int security_file_mmap(struct file *file, unsigned long reqprot, unsigned long addr, unsigned long addr_only) { - return cap_file_mmap(file, reqprot, prot, flags, addr, addr_only); + return cap_mmap_addr(addr); } static inline int security_file_mprotect(struct vm_area_struct *vma, diff --git a/security/apparmor/lsm.c b/security/apparmor/lsm.c index 032daab449b0..8430d8937afb 100644 --- a/security/apparmor/lsm.c +++ b/security/apparmor/lsm.c @@ -497,7 +497,7 @@ static int apparmor_file_mmap(struct file *file, unsigned long reqprot, int rc = 0; /* do DAC check */ - rc = cap_file_mmap(file, reqprot, prot, flags, addr, addr_only); + rc = cap_mmap_addr(addr); if (rc || addr_only) return rc; diff --git a/security/commoncap.c b/security/commoncap.c index e771cb1b2d79..ebac3618896e 100644 --- a/security/commoncap.c +++ b/security/commoncap.c @@ -958,22 +958,15 @@ int cap_vm_enough_memory(struct mm_struct *mm, long pages) } /* - * cap_file_mmap - check if able to map given addr - * @file: unused - * @reqprot: unused - * @prot: unused - * @flags: unused + * cap_mmap_addr - check if able to map given addr * @addr: address attempting to be mapped - * @addr_only: unused * * If the process is attempting to map memory below dac_mmap_min_addr they need * CAP_SYS_RAWIO. The other parameters to this function are unused by the * capability security module. Returns 0 if this mapping should be allowed * -EPERM if not. */ -int cap_file_mmap(struct file *file, unsigned long reqprot, - unsigned long prot, unsigned long flags, - unsigned long addr, unsigned long addr_only) +int cap_mmap_addr(unsigned long addr) { int ret = 0; @@ -986,3 +979,24 @@ int cap_file_mmap(struct file *file, unsigned long reqprot, } return ret; } + +/* + * cap_file_mmap - check if able to map given addr + * @file: unused + * @reqprot: unused + * @prot: unused + * @flags: unused + * @addr: address attempting to be mapped + * @addr_only: unused + * + * If the process is attempting to map memory below dac_mmap_min_addr they need + * CAP_SYS_RAWIO. The other parameters to this function are unused by the + * capability security module. Returns 0 if this mapping should be allowed + * -EPERM if not. + */ +int cap_file_mmap(struct file *file, unsigned long reqprot, + unsigned long prot, unsigned long flags, + unsigned long addr, unsigned long addr_only) +{ + return cap_mmap_addr(addr); +} diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index fa2341b68331..25c125eaa3d8 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -3104,7 +3104,7 @@ static int selinux_file_mmap(struct file *file, unsigned long reqprot, } /* do DAC check on address space usage */ - rc = cap_file_mmap(file, reqprot, prot, flags, addr, addr_only); + rc = cap_mmap_addr(addr); if (rc || addr_only) return rc; diff --git a/security/smack/smack_lsm.c b/security/smack/smack_lsm.c index d583c0545808..a62197718768 100644 --- a/security/smack/smack_lsm.c +++ b/security/smack/smack_lsm.c @@ -1199,7 +1199,7 @@ static int smack_file_mmap(struct file *file, int rc; /* do DAC check on address space usage */ - rc = cap_file_mmap(file, reqprot, prot, flags, addr, addr_only); + rc = cap_mmap_addr(addr); if (rc || addr_only) return rc; -- cgit v1.2.3-55-g7522 From e5467859f7f79b69fc49004403009dfdba3bec53 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 30 May 2012 13:30:51 -0400 Subject: split ->file_mmap() into ->mmap_addr()/->mmap_file() ... i.e. file-dependent and address-dependent checks. Signed-off-by: Al Viro --- fs/exec.c | 4 ---- include/linux/security.h | 36 ++++++++++++++++++++---------------- mm/mmap.c | 12 ++++++++---- mm/mremap.c | 4 ++-- mm/nommu.c | 5 ++++- security/apparmor/lsm.c | 15 ++++----------- security/capability.c | 3 ++- security/commoncap.c | 21 +++------------------ security/security.c | 12 ++++++++---- security/selinux/hooks.c | 15 ++++++++------- security/smack/smack_lsm.c | 15 +++++---------- 11 files changed, 64 insertions(+), 78 deletions(-) (limited to 'include/linux') diff --git a/fs/exec.c b/fs/exec.c index 52c9e2ff6e6b..a79786a8d2c8 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -280,10 +280,6 @@ static int __bprm_mm_init(struct linux_binprm *bprm) vma->vm_page_prot = vm_get_page_prot(vma->vm_flags); INIT_LIST_HEAD(&vma->anon_vma_chain); - err = security_file_mmap(NULL, 0, 0, 0, vma->vm_start, 1); - if (err) - goto err; - err = insert_vm_struct(mm, vma); if (err) goto err; diff --git a/include/linux/security.h b/include/linux/security.h index 4ad59c9fa731..f1bae0963ddc 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -87,9 +87,8 @@ extern int cap_inode_removexattr(struct dentry *dentry, const char *name); extern int cap_inode_need_killpriv(struct dentry *dentry); extern int cap_inode_killpriv(struct dentry *dentry); extern int cap_mmap_addr(unsigned long addr); -extern int cap_file_mmap(struct file *file, unsigned long reqprot, - unsigned long prot, unsigned long flags, - unsigned long addr, unsigned long addr_only); +extern int cap_mmap_file(struct file *file, unsigned long reqprot, + unsigned long prot, unsigned long flags); extern int cap_task_fix_setuid(struct cred *new, const struct cred *old, int flags); extern int cap_task_prctl(int option, unsigned long arg2, unsigned long arg3, unsigned long arg4, unsigned long arg5); @@ -587,15 +586,17 @@ static inline void security_free_mnt_opts(struct security_mnt_opts *opts) * simple integer value. When @arg represents a user space pointer, it * should never be used by the security module. * Return 0 if permission is granted. - * @file_mmap : + * @mmap_addr : + * Check permissions for a mmap operation at @addr. + * @addr contains virtual address that will be used for the operation. + * Return 0 if permission is granted. + * @mmap_file : * Check permissions for a mmap operation. The @file may be NULL, e.g. * if mapping anonymous memory. * @file contains the file structure for file to map (may be NULL). * @reqprot contains the protection requested by the application. * @prot contains the protection that will be applied by the kernel. * @flags contains the operational flags. - * @addr contains virtual address that will be used for the operation. - * @addr_only contains a boolean: 0 if file-backed VMA, otherwise 1. * Return 0 if permission is granted. * @file_mprotect: * Check permissions before changing memory access permissions. @@ -1482,10 +1483,10 @@ struct security_operations { void (*file_free_security) (struct file *file); int (*file_ioctl) (struct file *file, unsigned int cmd, unsigned long arg); - int (*file_mmap) (struct file *file, + int (*mmap_addr) (unsigned long addr); + int (*mmap_file) (struct file *file, unsigned long reqprot, unsigned long prot, - unsigned long flags, unsigned long addr, - unsigned long addr_only); + unsigned long flags); int (*file_mprotect) (struct vm_area_struct *vma, unsigned long reqprot, unsigned long prot); @@ -1744,9 +1745,9 @@ int security_file_permission(struct file *file, int mask); int security_file_alloc(struct file *file); void security_file_free(struct file *file); int security_file_ioctl(struct file *file, unsigned int cmd, unsigned long arg); -int security_file_mmap(struct file *file, unsigned long reqprot, - unsigned long prot, unsigned long flags, - unsigned long addr, unsigned long addr_only); +int security_mmap_file(struct file *file, unsigned long reqprot, + unsigned long prot, unsigned long flags); +int security_mmap_addr(unsigned long addr); int security_file_mprotect(struct vm_area_struct *vma, unsigned long reqprot, unsigned long prot); int security_file_lock(struct file *file, unsigned int cmd); @@ -2182,11 +2183,14 @@ static inline int security_file_ioctl(struct file *file, unsigned int cmd, return 0; } -static inline int security_file_mmap(struct file *file, unsigned long reqprot, +static inline int security_mmap_file(struct file *file, unsigned long reqprot, unsigned long prot, - unsigned long flags, - unsigned long addr, - unsigned long addr_only) + unsigned long flags) +{ + return 0; +} + +static inline int security_mmap_addr(unsigned long addr) { return cap_mmap_addr(addr); } diff --git a/mm/mmap.c b/mm/mmap.c index 83c56624f1f6..49283da9a2ae 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -1101,7 +1101,11 @@ static unsigned long do_mmap_pgoff(struct file *file, unsigned long addr, } } - error = security_file_mmap(file, reqprot, prot, flags, addr, 0); + error = security_mmap_addr(addr); + if (error) + return error; + + error = security_mmap_file(file, reqprot, prot, flags); if (error) return error; @@ -1817,7 +1821,7 @@ int expand_downwards(struct vm_area_struct *vma, return -ENOMEM; address &= PAGE_MASK; - error = security_file_mmap(NULL, 0, 0, 0, address, 1); + error = security_mmap_addr(address); if (error) return error; @@ -2205,7 +2209,7 @@ static unsigned long do_brk(unsigned long addr, unsigned long len) if (!len) return addr; - error = security_file_mmap(NULL, 0, 0, 0, addr, 1); + error = security_mmap_addr(addr); if (error) return error; @@ -2561,7 +2565,7 @@ int install_special_mapping(struct mm_struct *mm, vma->vm_ops = &special_mapping_vmops; vma->vm_private_data = pages; - ret = security_file_mmap(NULL, 0, 0, 0, vma->vm_start, 1); + ret = security_mmap_addr(vma->vm_start); if (ret) goto out; diff --git a/mm/mremap.c b/mm/mremap.c index 169c53b87749..ebf10892b63d 100644 --- a/mm/mremap.c +++ b/mm/mremap.c @@ -371,7 +371,7 @@ static unsigned long mremap_to(unsigned long addr, if ((addr <= new_addr) && (addr+old_len) > new_addr) goto out; - ret = security_file_mmap(NULL, 0, 0, 0, new_addr, 1); + ret = security_mmap_addr(new_addr); if (ret) goto out; @@ -532,7 +532,7 @@ SYSCALL_DEFINE5(mremap, unsigned long, addr, unsigned long, old_len, goto out; } - ret = security_file_mmap(NULL, 0, 0, 0, new_addr, 1); + ret = security_mmap_addr(new_addr); if (ret) goto out; ret = move_vma(vma, addr, old_len, new_len, new_addr); diff --git a/mm/nommu.c b/mm/nommu.c index de6084e3a046..acfe419785db 100644 --- a/mm/nommu.c +++ b/mm/nommu.c @@ -1047,7 +1047,10 @@ static int validate_mmap_request(struct file *file, } /* allow the security API to have its say */ - ret = security_file_mmap(file, reqprot, prot, flags, addr, 0); + ret = security_mmap_addr(addr); + if (ret < 0) + return ret; + ret = security_mmap_file(file, reqprot, prot, flags); if (ret < 0) return ret; diff --git a/security/apparmor/lsm.c b/security/apparmor/lsm.c index 8430d8937afb..8ea39aabe948 100644 --- a/security/apparmor/lsm.c +++ b/security/apparmor/lsm.c @@ -490,17 +490,9 @@ static int common_mmap(int op, struct file *file, unsigned long prot, return common_file_perm(op, file, mask); } -static int apparmor_file_mmap(struct file *file, unsigned long reqprot, - unsigned long prot, unsigned long flags, - unsigned long addr, unsigned long addr_only) +static int apparmor_mmap_file(struct file *file, unsigned long reqprot, + unsigned long prot, unsigned long flags) { - int rc = 0; - - /* do DAC check */ - rc = cap_mmap_addr(addr); - if (rc || addr_only) - return rc; - return common_mmap(OP_FMMAP, file, prot, flags); } @@ -646,7 +638,8 @@ static struct security_operations apparmor_ops = { .file_permission = apparmor_file_permission, .file_alloc_security = apparmor_file_alloc_security, .file_free_security = apparmor_file_free_security, - .file_mmap = apparmor_file_mmap, + .mmap_file = apparmor_mmap_file, + .mmap_addr = cap_mmap_addr, .file_mprotect = apparmor_file_mprotect, .file_lock = apparmor_file_lock, diff --git a/security/capability.c b/security/capability.c index fca889676c5e..61095df8b89a 100644 --- a/security/capability.c +++ b/security/capability.c @@ -949,7 +949,8 @@ void __init security_fixup_ops(struct security_operations *ops) set_to_cap_if_null(ops, file_alloc_security); set_to_cap_if_null(ops, file_free_security); set_to_cap_if_null(ops, file_ioctl); - set_to_cap_if_null(ops, file_mmap); + set_to_cap_if_null(ops, mmap_addr); + set_to_cap_if_null(ops, mmap_file); set_to_cap_if_null(ops, file_mprotect); set_to_cap_if_null(ops, file_lock); set_to_cap_if_null(ops, file_fcntl); diff --git a/security/commoncap.c b/security/commoncap.c index ebac3618896e..6dbae4650abe 100644 --- a/security/commoncap.c +++ b/security/commoncap.c @@ -980,23 +980,8 @@ int cap_mmap_addr(unsigned long addr) return ret; } -/* - * cap_file_mmap - check if able to map given addr - * @file: unused - * @reqprot: unused - * @prot: unused - * @flags: unused - * @addr: address attempting to be mapped - * @addr_only: unused - * - * If the process is attempting to map memory below dac_mmap_min_addr they need - * CAP_SYS_RAWIO. The other parameters to this function are unused by the - * capability security module. Returns 0 if this mapping should be allowed - * -EPERM if not. - */ -int cap_file_mmap(struct file *file, unsigned long reqprot, - unsigned long prot, unsigned long flags, - unsigned long addr, unsigned long addr_only) +int cap_mmap_file(struct file *file, unsigned long reqprot, + unsigned long prot, unsigned long flags) { - return cap_mmap_addr(addr); + return 0; } diff --git a/security/security.c b/security/security.c index 5497a57fba01..d91c66d3956b 100644 --- a/security/security.c +++ b/security/security.c @@ -657,18 +657,22 @@ int security_file_ioctl(struct file *file, unsigned int cmd, unsigned long arg) return security_ops->file_ioctl(file, cmd, arg); } -int security_file_mmap(struct file *file, unsigned long reqprot, - unsigned long prot, unsigned long flags, - unsigned long addr, unsigned long addr_only) +int security_mmap_file(struct file *file, unsigned long reqprot, + unsigned long prot, unsigned long flags) { int ret; - ret = security_ops->file_mmap(file, reqprot, prot, flags, addr, addr_only); + ret = security_ops->mmap_file(file, reqprot, prot, flags); if (ret) return ret; return ima_file_mmap(file, prot); } +int security_mmap_addr(unsigned long addr) +{ + return security_ops->mmap_addr(addr); +} + int security_file_mprotect(struct vm_area_struct *vma, unsigned long reqprot, unsigned long prot) { diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index 25c125eaa3d8..372ec6502aa8 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -3083,9 +3083,7 @@ error: return rc; } -static int selinux_file_mmap(struct file *file, unsigned long reqprot, - unsigned long prot, unsigned long flags, - unsigned long addr, unsigned long addr_only) +static int selinux_mmap_addr(unsigned long addr) { int rc = 0; u32 sid = current_sid(); @@ -3104,10 +3102,12 @@ static int selinux_file_mmap(struct file *file, unsigned long reqprot, } /* do DAC check on address space usage */ - rc = cap_mmap_addr(addr); - if (rc || addr_only) - return rc; + return cap_mmap_addr(addr); +} +static int selinux_mmap_file(struct file *file, unsigned long reqprot, + unsigned long prot, unsigned long flags) +{ if (selinux_checkreqprot) prot = reqprot; @@ -5570,7 +5570,8 @@ static struct security_operations selinux_ops = { .file_alloc_security = selinux_file_alloc_security, .file_free_security = selinux_file_free_security, .file_ioctl = selinux_file_ioctl, - .file_mmap = selinux_file_mmap, + .mmap_file = selinux_mmap_file, + .mmap_addr = selinux_mmap_addr, .file_mprotect = selinux_file_mprotect, .file_lock = selinux_file_lock, .file_fcntl = selinux_file_fcntl, diff --git a/security/smack/smack_lsm.c b/security/smack/smack_lsm.c index a62197718768..ee0bb5735f35 100644 --- a/security/smack/smack_lsm.c +++ b/security/smack/smack_lsm.c @@ -1171,7 +1171,7 @@ static int smack_file_fcntl(struct file *file, unsigned int cmd, } /** - * smack_file_mmap : + * smack_mmap_file : * Check permissions for a mmap operation. The @file may be NULL, e.g. * if mapping anonymous memory. * @file contains the file structure for file to map (may be NULL). @@ -1180,10 +1180,9 @@ static int smack_file_fcntl(struct file *file, unsigned int cmd, * @flags contains the operational flags. * Return 0 if permission is granted. */ -static int smack_file_mmap(struct file *file, +static int smack_mmap_file(struct file *file, unsigned long reqprot, unsigned long prot, - unsigned long flags, unsigned long addr, - unsigned long addr_only) + unsigned long flags) { struct smack_known *skp; struct smack_rule *srp; @@ -1198,11 +1197,6 @@ static int smack_file_mmap(struct file *file, int tmay; int rc; - /* do DAC check on address space usage */ - rc = cap_mmap_addr(addr); - if (rc || addr_only) - return rc; - if (file == NULL || file->f_dentry == NULL) return 0; @@ -3482,7 +3476,8 @@ struct security_operations smack_ops = { .file_ioctl = smack_file_ioctl, .file_lock = smack_file_lock, .file_fcntl = smack_file_fcntl, - .file_mmap = smack_file_mmap, + .mmap_file = smack_mmap_file, + .mmap_addr = cap_mmap_addr, .file_set_fowner = smack_file_set_fowner, .file_send_sigiotask = smack_file_send_sigiotask, .file_receive = smack_file_receive, -- cgit v1.2.3-55-g7522 From 8b3ec6814c83d76b85bd13badc48552836c24839 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 30 May 2012 17:11:23 -0400 Subject: take security_mmap_file() outside of ->mmap_sem Signed-off-by: Al Viro --- include/linux/security.h | 7 +++---- ipc/shm.c | 5 +++++ mm/mmap.c | 23 ++++++++++++----------- mm/nommu.c | 22 ++++++++++++---------- security/security.c | 33 ++++++++++++++++++++++++++++++--- 5 files changed, 62 insertions(+), 28 deletions(-) (limited to 'include/linux') diff --git a/include/linux/security.h b/include/linux/security.h index f1bae0963ddc..4e5a73cdbbef 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -1745,8 +1745,8 @@ int security_file_permission(struct file *file, int mask); int security_file_alloc(struct file *file); void security_file_free(struct file *file); int security_file_ioctl(struct file *file, unsigned int cmd, unsigned long arg); -int security_mmap_file(struct file *file, unsigned long reqprot, - unsigned long prot, unsigned long flags); +int security_mmap_file(struct file *file, unsigned long prot, + unsigned long flags); int security_mmap_addr(unsigned long addr); int security_file_mprotect(struct vm_area_struct *vma, unsigned long reqprot, unsigned long prot); @@ -2183,8 +2183,7 @@ static inline int security_file_ioctl(struct file *file, unsigned int cmd, return 0; } -static inline int security_mmap_file(struct file *file, unsigned long reqprot, - unsigned long prot, +static inline int security_mmap_file(struct file *file, unsigned long prot, unsigned long flags) { return 0; diff --git a/ipc/shm.c b/ipc/shm.c index 406c5b208193..e3a8063b1768 100644 --- a/ipc/shm.c +++ b/ipc/shm.c @@ -1036,6 +1036,10 @@ long do_shmat(int shmid, char __user *shmaddr, int shmflg, ulong *raddr) sfd->file = shp->shm_file; sfd->vm_ops = NULL; + err = security_mmap_file(file, prot, flags); + if (err) + goto out_fput; + down_write(¤t->mm->mmap_sem); if (addr && !(shmflg & SHM_REMAP)) { err = -EINVAL; @@ -1058,6 +1062,7 @@ long do_shmat(int shmid, char __user *shmaddr, int shmflg, ulong *raddr) invalid: up_write(¤t->mm->mmap_sem); +out_fput: fput(file); out_nattch: diff --git a/mm/mmap.c b/mm/mmap.c index 49283da9a2ae..34b280f4238d 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -979,7 +979,6 @@ static unsigned long do_mmap_pgoff(struct file *file, unsigned long addr, struct inode *inode; vm_flags_t vm_flags; int error; - unsigned long reqprot = prot; /* * Does the application expect PROT_READ to imply PROT_EXEC? @@ -1105,10 +1104,6 @@ static unsigned long do_mmap_pgoff(struct file *file, unsigned long addr, if (error) return error; - error = security_mmap_file(file, reqprot, prot, flags); - if (error) - return error; - return mmap_region(file, addr, len, flags, vm_flags, pgoff); } @@ -1130,9 +1125,12 @@ unsigned long vm_mmap(struct file *file, unsigned long addr, unsigned long ret; struct mm_struct *mm = current->mm; - down_write(&mm->mmap_sem); - ret = do_mmap(file, addr, len, prot, flag, offset); - up_write(&mm->mmap_sem); + ret = security_mmap_file(file, prot, flag); + if (!ret) { + down_write(&mm->mmap_sem); + ret = do_mmap(file, addr, len, prot, flag, offset); + up_write(&mm->mmap_sem); + } return ret; } EXPORT_SYMBOL(vm_mmap); @@ -1168,9 +1166,12 @@ SYSCALL_DEFINE6(mmap_pgoff, unsigned long, addr, unsigned long, len, flags &= ~(MAP_EXECUTABLE | MAP_DENYWRITE); - down_write(¤t->mm->mmap_sem); - retval = do_mmap_pgoff(file, addr, len, prot, flags, pgoff); - up_write(¤t->mm->mmap_sem); + retval = security_mmap_file(file, prot, flags); + if (!retval) { + down_write(¤t->mm->mmap_sem); + retval = do_mmap_pgoff(file, addr, len, prot, flags, pgoff); + up_write(¤t->mm->mmap_sem); + } if (file) fput(file); diff --git a/mm/nommu.c b/mm/nommu.c index acfe419785db..8cbfd623b04a 100644 --- a/mm/nommu.c +++ b/mm/nommu.c @@ -889,7 +889,6 @@ static int validate_mmap_request(struct file *file, unsigned long *_capabilities) { unsigned long capabilities, rlen; - unsigned long reqprot = prot; int ret; /* do the simple checks first */ @@ -1048,9 +1047,6 @@ static int validate_mmap_request(struct file *file, /* allow the security API to have its say */ ret = security_mmap_addr(addr); - if (ret < 0) - return ret; - ret = security_mmap_file(file, reqprot, prot, flags); if (ret < 0) return ret; @@ -1492,9 +1488,12 @@ unsigned long vm_mmap(struct file *file, unsigned long addr, unsigned long ret; struct mm_struct *mm = current->mm; - down_write(&mm->mmap_sem); - ret = do_mmap(file, addr, len, prot, flag, offset); - up_write(&mm->mmap_sem); + ret = security_mmap_file(file, prot, flag); + if (!ret) { + down_write(&mm->mmap_sem); + ret = do_mmap(file, addr, len, prot, flag, offset); + up_write(&mm->mmap_sem); + } return ret; } EXPORT_SYMBOL(vm_mmap); @@ -1515,9 +1514,12 @@ SYSCALL_DEFINE6(mmap_pgoff, unsigned long, addr, unsigned long, len, flags &= ~(MAP_EXECUTABLE | MAP_DENYWRITE); - down_write(¤t->mm->mmap_sem); - retval = do_mmap_pgoff(file, addr, len, prot, flags, pgoff); - up_write(¤t->mm->mmap_sem); + ret = security_mmap_file(file, prot, flags); + if (!ret) { + down_write(¤t->mm->mmap_sem); + retval = do_mmap_pgoff(file, addr, len, prot, flags, pgoff); + up_write(¤t->mm->mmap_sem); + } if (file) fput(file); diff --git a/security/security.c b/security/security.c index d91c66d3956b..3b11b3b72fe2 100644 --- a/security/security.c +++ b/security/security.c @@ -20,6 +20,9 @@ #include #include #include +#include +#include +#include #include #define MAX_LSM_EVM_XATTR 2 @@ -657,11 +660,35 @@ int security_file_ioctl(struct file *file, unsigned int cmd, unsigned long arg) return security_ops->file_ioctl(file, cmd, arg); } -int security_mmap_file(struct file *file, unsigned long reqprot, - unsigned long prot, unsigned long flags) +int security_mmap_file(struct file *file, unsigned long prot, + unsigned long flags) { + unsigned long reqprot = prot; int ret; - + /* + * Does the application expect PROT_READ to imply PROT_EXEC? + * + * (the exception is when the underlying filesystem is noexec + * mounted, in which case we dont add PROT_EXEC.) + */ + if (!(reqprot & PROT_READ)) + goto out; + if (!(current->personality & READ_IMPLIES_EXEC)) + goto out; + if (!file) { + prot |= PROT_EXEC; + } else if (!(file->f_path.mnt->mnt_flags & MNT_NOEXEC)) { +#ifndef CONFIG_MMU + unsigned long caps = 0; + struct address_space *mapping = file->f_mapping; + if (mapping && mapping->backing_dev_info) + caps = mapping->backing_dev_info->capabilities; + if (!(caps & BDI_CAP_EXEC_MAP)) + goto out; +#endif + prot |= PROT_EXEC; + } +out: ret = security_ops->mmap_file(file, reqprot, prot, flags); if (ret) return ret; -- cgit v1.2.3-55-g7522 From e3fc629d7bb70848fbf479688a66d4e76dff46ac Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 30 May 2012 20:08:42 -0400 Subject: switch aio and shm to do_mmap_pgoff(), make do_mmap() static after all, 0 bytes and 0 pages is the same thing... Signed-off-by: Al Viro --- fs/aio.c | 6 +++--- include/linux/mm.h | 2 +- ipc/shm.c | 2 +- mm/mmap.c | 4 ++-- mm/nommu.c | 4 ++-- 5 files changed, 9 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/fs/aio.c b/fs/aio.c index e7f2fad7b4ce..07154d99cc67 100644 --- a/fs/aio.c +++ b/fs/aio.c @@ -134,9 +134,9 @@ static int aio_setup_ring(struct kioctx *ctx) info->mmap_size = nr_pages * PAGE_SIZE; dprintk("attempting mmap of %lu bytes\n", info->mmap_size); down_write(&ctx->mm->mmap_sem); - info->mmap_base = do_mmap(NULL, 0, info->mmap_size, - PROT_READ|PROT_WRITE, MAP_ANONYMOUS|MAP_PRIVATE, - 0); + info->mmap_base = do_mmap_pgoff(NULL, 0, info->mmap_size, + PROT_READ|PROT_WRITE, + MAP_ANONYMOUS|MAP_PRIVATE, 0); if (IS_ERR((void *)info->mmap_base)) { up_write(&ctx->mm->mmap_sem); info->mmap_size = 0; diff --git a/include/linux/mm.h b/include/linux/mm.h index 7d5c37f24c63..4189e0d0ac05 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1394,7 +1394,7 @@ extern unsigned long get_unmapped_area(struct file *, unsigned long, unsigned lo extern unsigned long mmap_region(struct file *file, unsigned long addr, unsigned long len, unsigned long flags, vm_flags_t vm_flags, unsigned long pgoff); -extern unsigned long do_mmap(struct file *, unsigned long, +extern unsigned long do_mmap_pgoff(struct file *, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long); extern int do_munmap(struct mm_struct *, unsigned long, size_t); diff --git a/ipc/shm.c b/ipc/shm.c index e3a8063b1768..5e2cbfdab6fc 100644 --- a/ipc/shm.c +++ b/ipc/shm.c @@ -1054,7 +1054,7 @@ long do_shmat(int shmid, char __user *shmaddr, int shmflg, ulong *raddr) goto invalid; } - user_addr = do_mmap (file, addr, size, prot, flags, 0); + user_addr = do_mmap_pgoff(file, addr, size, prot, flags, 0); *raddr = user_addr; err = 0; if (IS_ERR_VALUE(user_addr)) diff --git a/mm/mmap.c b/mm/mmap.c index 131521e12f13..f7786542c59d 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -971,7 +971,7 @@ static inline unsigned long round_hint_to_min(unsigned long hint) * The caller must hold down_write(¤t->mm->mmap_sem). */ -static unsigned long do_mmap_pgoff(struct file *file, unsigned long addr, +unsigned long do_mmap_pgoff(struct file *file, unsigned long addr, unsigned long len, unsigned long prot, unsigned long flags, unsigned long pgoff) { @@ -1102,7 +1102,7 @@ static unsigned long do_mmap_pgoff(struct file *file, unsigned long addr, return mmap_region(file, addr, len, flags, vm_flags, pgoff); } -unsigned long do_mmap(struct file *file, unsigned long addr, +static unsigned long do_mmap(struct file *file, unsigned long addr, unsigned long len, unsigned long prot, unsigned long flag, unsigned long offset) { diff --git a/mm/nommu.c b/mm/nommu.c index 8cbfd623b04a..a1792ed2cb1a 100644 --- a/mm/nommu.c +++ b/mm/nommu.c @@ -1232,7 +1232,7 @@ enomem: /* * handle mapping creation for uClinux */ -static unsigned long do_mmap_pgoff(struct file *file, +unsigned long do_mmap_pgoff(struct file *file, unsigned long addr, unsigned long len, unsigned long prot, @@ -1470,7 +1470,7 @@ error_getting_region: return -ENOMEM; } -unsigned long do_mmap(struct file *file, unsigned long addr, +static unsigned long do_mmap(struct file *file, unsigned long addr, unsigned long len, unsigned long prot, unsigned long flag, unsigned long offset) { -- cgit v1.2.3-55-g7522 From c3b2da314834499f34cba94f7053e55f6d6f92d8 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Mon, 26 Mar 2012 09:59:21 -0400 Subject: fs: introduce inode operation ->update_time Btrfs has to make sure we have space to allocate new blocks in order to modify the inode, so updating time can fail. We've gotten around this by having our own file_update_time but this is kind of a pain, and Christoph has indicated he would like to make xfs do something different with atime updates. So introduce ->update_time, where we will deal with i_version an a/m/c time updates and indicate which changes need to be made. The normal version just does what it has always done, updates the time and marks the inode dirty, and then filesystems can choose to do something different. I've gone through all of the users of file_update_time and made them check for errors with the exception of the fault code since it's complicated and I wasn't quite sure what to do there, also Jan is going to be pushing the file time updates into page_mkwrite for those who have it so that should satisfy btrfs and make it not a big deal to check the file_update_time() return code in the generic fault path. Thanks, Signed-off-by: Josef Bacik --- Documentation/filesystems/Locking | 3 +++ Documentation/filesystems/vfs.txt | 4 +++ fs/fuse/file.c | 4 ++- fs/inode.c | 56 ++++++++++++++++++++++++++++----------- fs/ncpfs/file.c | 6 +++-- fs/ntfs/file.c | 4 ++- fs/pipe.c | 7 +++-- fs/splice.c | 6 +++-- fs/xfs/xfs_file.c | 7 +++-- include/linux/fs.h | 10 ++++++- mm/filemap.c | 4 ++- mm/filemap_xip.c | 4 ++- 12 files changed, 86 insertions(+), 29 deletions(-) (limited to 'include/linux') diff --git a/Documentation/filesystems/Locking b/Documentation/filesystems/Locking index 4fca82e5276e..d5a269a51a9e 100644 --- a/Documentation/filesystems/Locking +++ b/Documentation/filesystems/Locking @@ -62,6 +62,7 @@ ata *); int (*removexattr) (struct dentry *, const char *); void (*truncate_range)(struct inode *, loff_t, loff_t); int (*fiemap)(struct inode *, struct fiemap_extent_info *, u64 start, u64 len); + void (*update_time)(struct inode *, struct timespec *, int); locking rules: all may block @@ -89,6 +90,8 @@ listxattr: no removexattr: yes truncate_range: yes fiemap: no +update_time: no + Additionally, ->rmdir(), ->unlink() and ->rename() have ->i_mutex on victim. cross-directory ->rename() has (per-superblock) ->s_vfs_rename_sem. diff --git a/Documentation/filesystems/vfs.txt b/Documentation/filesystems/vfs.txt index 0d0492028082..b2aa722e5ea2 100644 --- a/Documentation/filesystems/vfs.txt +++ b/Documentation/filesystems/vfs.txt @@ -364,6 +364,7 @@ struct inode_operations { ssize_t (*listxattr) (struct dentry *, char *, size_t); int (*removexattr) (struct dentry *, const char *); void (*truncate_range)(struct inode *, loff_t, loff_t); + void (*update_time)(struct inode *, struct timespec *, int); }; Again, all methods are called without any locks being held, unless @@ -475,6 +476,9 @@ otherwise noted. truncate_range: a method provided by the underlying filesystem to truncate a range of blocks , i.e. punch a hole somewhere in a file. + update_time: called by the VFS to update a specific time or the i_version of + an inode. If this is not defined the VFS will update the inode itself + and call mark_inode_dirty_sync. The Address Space Object ======================== diff --git a/fs/fuse/file.c b/fs/fuse/file.c index 504e61b7fd75..9562109d3a87 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -962,7 +962,9 @@ static ssize_t fuse_file_aio_write(struct kiocb *iocb, const struct iovec *iov, if (err) goto out; - file_update_time(file); + err = file_update_time(file); + if (err) + goto out; if (file->f_flags & O_DIRECT) { written = generic_file_direct_write(iocb, iov, &nr_segs, diff --git a/fs/inode.c b/fs/inode.c index a79555e492e0..f0335fc315ed 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -1487,6 +1487,27 @@ static int relatime_need_update(struct vfsmount *mnt, struct inode *inode, return 0; } +/* + * This does the actual work of updating an inodes time or version. Must have + * had called mnt_want_write() before calling this. + */ +static int update_time(struct inode *inode, struct timespec *time, int flags) +{ + if (inode->i_op->update_time) + return inode->i_op->update_time(inode, time, flags); + + if (flags & S_ATIME) + inode->i_atime = *time; + if (flags & S_VERSION) + inode_inc_iversion(inode); + if (flags & S_CTIME) + inode->i_ctime = *time; + if (flags & S_MTIME) + inode->i_mtime = *time; + mark_inode_dirty_sync(inode); + return 0; +} + /** * touch_atime - update the access time * @path: the &struct path to update @@ -1524,8 +1545,14 @@ void touch_atime(struct path *path) if (mnt_want_write(mnt)) return; - inode->i_atime = now; - mark_inode_dirty_sync(inode); + /* + * File systems can error out when updating inodes if they need to + * allocate new space to modify an inode (such is the case for + * Btrfs), but since we touch atime while walking down the path we + * really don't care if we failed to update the atime of the file, + * so just ignore the return value. + */ + update_time(inode, &now, S_ATIME); mnt_drop_write(mnt); } EXPORT_SYMBOL(touch_atime); @@ -1604,18 +1631,20 @@ EXPORT_SYMBOL(file_remove_suid); * usage in the file write path of filesystems, and filesystems may * choose to explicitly ignore update via this function with the * S_NOCMTIME inode flag, e.g. for network filesystem where these - * timestamps are handled by the server. + * timestamps are handled by the server. This can return an error for + * file systems who need to allocate space in order to update an inode. */ -void file_update_time(struct file *file) +int file_update_time(struct file *file) { struct inode *inode = file->f_path.dentry->d_inode; struct timespec now; - enum { S_MTIME = 1, S_CTIME = 2, S_VERSION = 4 } sync_it = 0; + int sync_it = 0; + int ret; /* First try to exhaust all avenues to not sync */ if (IS_NOCMTIME(inode)) - return; + return 0; now = current_fs_time(inode->i_sb); if (!timespec_equal(&inode->i_mtime, &now)) @@ -1628,21 +1657,16 @@ void file_update_time(struct file *file) sync_it |= S_VERSION; if (!sync_it) - return; + return 0; /* Finally allowed to write? Takes lock. */ if (mnt_want_write_file(file)) - return; + return 0; - /* Only change inode inside the lock region */ - if (sync_it & S_VERSION) - inode_inc_iversion(inode); - if (sync_it & S_CTIME) - inode->i_ctime = now; - if (sync_it & S_MTIME) - inode->i_mtime = now; - mark_inode_dirty_sync(inode); + ret = update_time(inode, &now, sync_it); mnt_drop_write_file(file); + + return ret; } EXPORT_SYMBOL(file_update_time); diff --git a/fs/ncpfs/file.c b/fs/ncpfs/file.c index 3ff5fcc1528f..122e260247f5 100644 --- a/fs/ncpfs/file.c +++ b/fs/ncpfs/file.c @@ -221,6 +221,10 @@ ncp_file_write(struct file *file, const char __user *buf, size_t count, loff_t * already_written = 0; + errno = file_update_time(file); + if (errno) + goto outrel; + bouncebuffer = vmalloc(bufsize); if (!bouncebuffer) { errno = -EIO; /* -ENOMEM */ @@ -252,8 +256,6 @@ ncp_file_write(struct file *file, const char __user *buf, size_t count, loff_t * } vfree(bouncebuffer); - file_update_time(file); - *ppos = pos; if (pos > i_size_read(inode)) { diff --git a/fs/ntfs/file.c b/fs/ntfs/file.c index 8639169221c7..7389d2d5e51d 100644 --- a/fs/ntfs/file.c +++ b/fs/ntfs/file.c @@ -2096,7 +2096,9 @@ static ssize_t ntfs_file_aio_write_nolock(struct kiocb *iocb, err = file_remove_suid(file); if (err) goto out; - file_update_time(file); + err = file_update_time(file); + if (err) + goto out; written = ntfs_file_buffered_write(iocb, iov, nr_segs, pos, ppos, count); out: diff --git a/fs/pipe.c b/fs/pipe.c index 95ebb56de494..49c1065256fd 100644 --- a/fs/pipe.c +++ b/fs/pipe.c @@ -654,8 +654,11 @@ out: wake_up_interruptible_sync_poll(&pipe->wait, POLLIN | POLLRDNORM); kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN); } - if (ret > 0) - file_update_time(filp); + if (ret > 0) { + int err = file_update_time(filp); + if (err) + ret = err; + } return ret; } diff --git a/fs/splice.c b/fs/splice.c index f8476841eb04..47c4c1ad0c04 100644 --- a/fs/splice.c +++ b/fs/splice.c @@ -1003,8 +1003,10 @@ generic_file_splice_write(struct pipe_inode_info *pipe, struct file *out, mutex_lock_nested(&inode->i_mutex, I_MUTEX_CHILD); ret = file_remove_suid(out); if (!ret) { - file_update_time(out); - ret = splice_from_pipe_feed(pipe, &sd, pipe_to_file); + ret = file_update_time(out); + if (!ret) + ret = splice_from_pipe_feed(pipe, &sd, + pipe_to_file); } mutex_unlock(&inode->i_mutex); } while (ret > 0); diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c index 8d214b87f6bb..9f7ec15a6522 100644 --- a/fs/xfs/xfs_file.c +++ b/fs/xfs/xfs_file.c @@ -586,8 +586,11 @@ restart: * lock above. Eventually we should look into a way to avoid * the pointless lock roundtrip. */ - if (likely(!(file->f_mode & FMODE_NOCMTIME))) - file_update_time(file); + if (likely(!(file->f_mode & FMODE_NOCMTIME))) { + error = file_update_time(file); + if (error) + return error; + } /* * If we're writing the file then make sure to clear the setuid and diff --git a/include/linux/fs.h b/include/linux/fs.h index cdc1a9630948..57fc70574d20 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1684,6 +1684,7 @@ struct inode_operations { void (*truncate_range)(struct inode *, loff_t, loff_t); int (*fiemap)(struct inode *, struct fiemap_extent_info *, u64 start, u64 len); + int (*update_time)(struct inode *, struct timespec *, int); } ____cacheline_aligned; struct seq_file; @@ -1843,6 +1844,13 @@ static inline void inode_inc_iversion(struct inode *inode) spin_unlock(&inode->i_lock); } +enum file_time_flags { + S_ATIME = 1, + S_MTIME = 2, + S_CTIME = 4, + S_VERSION = 8, +}; + extern void touch_atime(struct path *); static inline void file_accessed(struct file *file) { @@ -2579,7 +2587,7 @@ extern int inode_change_ok(const struct inode *, struct iattr *); extern int inode_newsize_ok(const struct inode *, loff_t offset); extern void setattr_copy(struct inode *inode, const struct iattr *attr); -extern void file_update_time(struct file *file); +extern int file_update_time(struct file *file); extern int generic_show_options(struct seq_file *m, struct dentry *root); extern void save_mount_options(struct super_block *sb, char *options); diff --git a/mm/filemap.c b/mm/filemap.c index 21e5abfbcdf6..51070f1f1b52 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -2463,7 +2463,9 @@ ssize_t __generic_file_aio_write(struct kiocb *iocb, const struct iovec *iov, if (err) goto out; - file_update_time(file); + err = file_update_time(file); + if (err) + goto out; /* coalesce the iovecs and go direct-to-BIO for O_DIRECT */ if (unlikely(file->f_flags & O_DIRECT)) { diff --git a/mm/filemap_xip.c b/mm/filemap_xip.c index a4eb31132229..213ca1f53409 100644 --- a/mm/filemap_xip.c +++ b/mm/filemap_xip.c @@ -426,7 +426,9 @@ xip_file_write(struct file *filp, const char __user *buf, size_t len, if (ret) goto out_backing; - file_update_time(filp); + ret = file_update_time(filp); + if (ret) + goto out_backing; ret = __xip_file_write (filp, buf, count, pos, ppos); -- cgit v1.2.3-55-g7522 From 16b1c1cd71176ab0a76b26818fbf12db9183ed57 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Mon, 21 May 2012 17:30:19 +0200 Subject: vfs: retry last component if opening stale dentry NFS optimizes away d_revalidates for last component of open. This means that open itself can find the dentry stale. This patch allows the filesystem to return EOPENSTALE and the VFS will retry the lookup on just the last component if possible. If the lookup was done using RCU mode, including the last component, then this is not possible since the parent dentry is lost. In this case fall back to non-RCU lookup. Currently this is not used since NFS will always leave RCU mode. Signed-off-by: Miklos Szeredi Signed-off-by: Al Viro --- fs/namei.c | 37 +++++++++++++++++++++++++++++++++++-- include/linux/errno.h | 1 + 2 files changed, 36 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/fs/namei.c b/fs/namei.c index 998d5316921a..7d694194024a 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -2202,6 +2202,8 @@ static struct file *do_last(struct nameidata *nd, struct path *path, struct file *filp; struct inode *inode; int symlink_ok = 0; + struct path save_parent = { .dentry = NULL, .mnt = NULL }; + bool retried = false; int error; nd->flags &= ~LOOKUP_PARENT; @@ -2267,6 +2269,7 @@ static struct file *do_last(struct nameidata *nd, struct path *path, if (nd->last.name[nd->last.len]) goto exit; +retry_lookup: mutex_lock(&dir->d_inode->i_mutex); dentry = lookup_hash(nd); @@ -2349,12 +2352,21 @@ finish_lookup: return NULL; } - path_to_nameidata(path, nd); + if ((nd->flags & LOOKUP_RCU) || nd->path.mnt != path->mnt) { + path_to_nameidata(path, nd); + } else { + save_parent.dentry = nd->path.dentry; + save_parent.mnt = mntget(path->mnt); + nd->path.dentry = path->dentry; + + } nd->inode = inode; /* Why this, you ask? _Now_ we might have grown LOOKUP_JUMPED... */ error = complete_walk(nd); - if (error) + if (error) { + path_put(&save_parent); return ERR_PTR(error); + } error = -EISDIR; if ((open_flag & O_CREAT) && S_ISDIR(nd->inode->i_mode)) goto exit; @@ -2377,6 +2389,20 @@ common: if (error) goto exit; filp = nameidata_to_filp(nd); + if (filp == ERR_PTR(-EOPENSTALE) && save_parent.dentry && !retried) { + BUG_ON(save_parent.dentry != dir); + path_put(&nd->path); + nd->path = save_parent; + nd->inode = dir->d_inode; + save_parent.mnt = NULL; + save_parent.dentry = NULL; + if (want_write) { + mnt_drop_write(nd->path.mnt); + want_write = 0; + } + retried = true; + goto retry_lookup; + } if (!IS_ERR(filp)) { error = ima_file_check(filp, op->acc_mode); if (error) { @@ -2396,6 +2422,7 @@ common: out: if (want_write) mnt_drop_write(nd->path.mnt); + path_put(&save_parent); terminate_walk(nd); return filp; @@ -2459,6 +2486,12 @@ out: if (base) fput(base); release_open_intent(nd); + if (filp == ERR_PTR(-EOPENSTALE)) { + if (flags & LOOKUP_RCU) + filp = ERR_PTR(-ECHILD); + else + filp = ERR_PTR(-ESTALE); + } return filp; out_filp: diff --git a/include/linux/errno.h b/include/linux/errno.h index 2d09bfa5c262..e0de516374da 100644 --- a/include/linux/errno.h +++ b/include/linux/errno.h @@ -17,6 +17,7 @@ #define ENOIOCTLCMD 515 /* No ioctl command */ #define ERESTART_RESTARTBLOCK 516 /* restart by calling sys_restart_syscall */ #define EPROBE_DEFER 517 /* Driver requests probe retry */ +#define EOPENSTALE 518 /* open found a stale dentry */ /* Defined for the NFSv3 protocol */ #define EBADHANDLE 521 /* Illegal NFS file handle */ -- cgit v1.2.3-55-g7522