From d3a23e1678a5827c38ed8a465ad91d65e59fa911 Mon Sep 17 00:00:00 2001 From: Nick Piggin Date: Wed, 5 Jan 2011 20:01:21 +1100 Subject: Revert "fs: use RCU read side protection in d_validate" This reverts commit 3825bdb7ed920845961f32f364454bee5f469abb. You cannot dget() a dentry without having a reference, or holding a lock that guarantees it remains valid. Signed-off-by: Nick Piggin --- fs/dcache.c | 31 +++++++++++++++++++------------ 1 file changed, 19 insertions(+), 12 deletions(-) (limited to 'fs/dcache.c') diff --git a/fs/dcache.c b/fs/dcache.c index 23702a9d4e6d..cc2b93802179 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -1491,26 +1491,33 @@ out: * This is used by ncpfs in its readdir implementation. * Zero is returned in the dentry is invalid. */ -int d_validate(struct dentry *dentry, struct dentry *parent) + +int d_validate(struct dentry *dentry, struct dentry *dparent) { - struct hlist_head *head = d_hash(parent, dentry->d_name.hash); - struct hlist_node *node; - struct dentry *d; + struct hlist_head *base; + struct hlist_node *lhp; /* Check whether the ptr might be valid at all.. */ if (!kmem_ptr_validate(dentry_cache, dentry)) - return 0; - if (dentry->d_parent != parent) - return 0; + goto out; - rcu_read_lock(); - hlist_for_each_entry_rcu(d, node, head, d_hash) { - if (d == dentry) { - dget(dentry); + if (dentry->d_parent != dparent) + goto out; + + spin_lock(&dcache_lock); + base = d_hash(dparent, dentry->d_name.hash); + hlist_for_each(lhp,base) { + /* hlist_for_each_entry_rcu() not required for d_hash list + * as it is parsed under dcache_lock + */ + if (dentry == hlist_entry(lhp, struct dentry, d_hash)) { + __dget_locked(dentry); + spin_unlock(&dcache_lock); return 1; } } - rcu_read_unlock(); + spin_unlock(&dcache_lock); +out: return 0; } EXPORT_SYMBOL(d_validate); -- cgit v1.2.3 From 786a5e15b613a9cee4fc9139fc3113a5ab0fde79 Mon Sep 17 00:00:00 2001 From: Nick Piggin Date: Fri, 7 Jan 2011 17:49:16 +1100 Subject: fs: d_validate fixes d_validate has been broken for a long time. kmem_ptr_validate does not guarantee that a pointer can be dereferenced if it can go away at any time. Even rcu_read_lock doesn't help, because the pointer might be queued in RCU callbacks but not executed yet. So the parent cannot be checked, nor the name hashed. The dentry pointer can not be touched until it can be verified under lock. Hashing simply cannot be used. Instead, verify the parent/child relationship by traversing parent's d_child list. It's slow, but only ncpfs and the destaged smbfs care about it, at this point. Signed-off-by: Nick Piggin --- fs/dcache.c | 25 +++++++------------------ 1 file changed, 7 insertions(+), 18 deletions(-) (limited to 'fs/dcache.c') diff --git a/fs/dcache.c b/fs/dcache.c index cc2b93802179..9d1a59dfda0b 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -1483,41 +1483,30 @@ out: } /** - * d_validate - verify dentry provided from insecure source + * d_validate - verify dentry provided from insecure source (deprecated) * @dentry: The dentry alleged to be valid child of @dparent * @dparent: The parent dentry (known to be valid) * * An insecure source has sent us a dentry, here we verify it and dget() it. * This is used by ncpfs in its readdir implementation. * Zero is returned in the dentry is invalid. + * + * This function is slow for big directories, and deprecated, do not use it. */ - int d_validate(struct dentry *dentry, struct dentry *dparent) { - struct hlist_head *base; - struct hlist_node *lhp; - - /* Check whether the ptr might be valid at all.. */ - if (!kmem_ptr_validate(dentry_cache, dentry)) - goto out; - - if (dentry->d_parent != dparent) - goto out; + struct dentry *child; spin_lock(&dcache_lock); - base = d_hash(dparent, dentry->d_name.hash); - hlist_for_each(lhp,base) { - /* hlist_for_each_entry_rcu() not required for d_hash list - * as it is parsed under dcache_lock - */ - if (dentry == hlist_entry(lhp, struct dentry, d_hash)) { + list_for_each_entry(child, &dparent->d_subdirs, d_u.d_child) { + if (dentry == child) { __dget_locked(dentry); spin_unlock(&dcache_lock); return 1; } } spin_unlock(&dcache_lock); -out: + return 0; } EXPORT_SYMBOL(d_validate); -- cgit v1.2.3 From 86c8749ede0c59e590de9267066932a26f1ce796 Mon Sep 17 00:00:00 2001 From: Nick Piggin Date: Fri, 7 Jan 2011 17:49:18 +1100 Subject: vfs: revert per-cpu nr_unused counters for dentry and inodes The nr_unused counters count the number of objects on an LRU, and as such they are synchronized with LRU object insertion and removal and scanning, and protected under the LRU lock. Making it per-cpu does not actually get any concurrency improvements because of this lock, and summing the counter is much slower, and incrementing/decrementing it costs more code size and is slower too. These counters should stay per-LRU, which currently means global. Signed-off-by: Nick Piggin --- fs/dcache.c | 16 +++++----------- fs/inode.c | 17 +++++++---------- 2 files changed, 12 insertions(+), 21 deletions(-) (limited to 'fs/dcache.c') diff --git a/fs/dcache.c b/fs/dcache.c index 9d1a59dfda0b..f62ba90bce91 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -68,14 +68,12 @@ struct dentry_stat_t dentry_stat = { }; static struct percpu_counter nr_dentry __cacheline_aligned_in_smp; -static struct percpu_counter nr_dentry_unused __cacheline_aligned_in_smp; #if defined(CONFIG_SYSCTL) && defined(CONFIG_PROC_FS) int proc_nr_dentry(ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { dentry_stat.nr_dentry = percpu_counter_sum_positive(&nr_dentry); - dentry_stat.nr_unused = percpu_counter_sum_positive(&nr_dentry_unused); return proc_dointvec(table, write, buffer, lenp, ppos); } #endif @@ -140,7 +138,7 @@ static void dentry_lru_add(struct dentry *dentry) if (list_empty(&dentry->d_lru)) { list_add(&dentry->d_lru, &dentry->d_sb->s_dentry_lru); dentry->d_sb->s_nr_dentry_unused++; - percpu_counter_inc(&nr_dentry_unused); + dentry_stat.nr_unused++; } } @@ -149,7 +147,7 @@ static void dentry_lru_del(struct dentry *dentry) if (!list_empty(&dentry->d_lru)) { list_del_init(&dentry->d_lru); dentry->d_sb->s_nr_dentry_unused--; - percpu_counter_dec(&nr_dentry_unused); + dentry_stat.nr_unused--; } } @@ -158,7 +156,7 @@ static void dentry_lru_move_tail(struct dentry *dentry) if (list_empty(&dentry->d_lru)) { list_add_tail(&dentry->d_lru, &dentry->d_sb->s_dentry_lru); dentry->d_sb->s_nr_dentry_unused++; - percpu_counter_inc(&nr_dentry_unused); + dentry_stat.nr_unused++; } else { list_move_tail(&dentry->d_lru, &dentry->d_sb->s_dentry_lru); } @@ -546,7 +544,7 @@ static void prune_dcache(int count) { struct super_block *sb, *p = NULL; int w_count; - int unused = percpu_counter_sum_positive(&nr_dentry_unused); + int unused = dentry_stat.nr_unused; int prune_ratio; int pruned; @@ -908,16 +906,13 @@ EXPORT_SYMBOL(shrink_dcache_parent); */ static int shrink_dcache_memory(struct shrinker *shrink, int nr, gfp_t gfp_mask) { - int nr_unused; - if (nr) { if (!(gfp_mask & __GFP_FS)) return -1; prune_dcache(nr); } - nr_unused = percpu_counter_sum_positive(&nr_dentry_unused); - return (nr_unused / 100) * sysctl_vfs_cache_pressure; + return (dentry_stat.nr_unused / 100) * sysctl_vfs_cache_pressure; } static struct shrinker dcache_shrinker = { @@ -2424,7 +2419,6 @@ static void __init dcache_init(void) int loop; percpu_counter_init(&nr_dentry, 0); - percpu_counter_init(&nr_dentry_unused, 0); /* * A constructor could be added for stable state like the lists, diff --git a/fs/inode.c b/fs/inode.c index ae2727ab0c3a..efc43979709f 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -103,7 +103,6 @@ static DECLARE_RWSEM(iprune_sem); struct inodes_stat_t inodes_stat; static struct percpu_counter nr_inodes __cacheline_aligned_in_smp; -static struct percpu_counter nr_inodes_unused __cacheline_aligned_in_smp; static struct kmem_cache *inode_cachep __read_mostly; @@ -114,7 +113,7 @@ static inline int get_nr_inodes(void) static inline int get_nr_inodes_unused(void) { - return percpu_counter_sum_positive(&nr_inodes_unused); + return inodes_stat.nr_unused; } int get_nr_dirty_inodes(void) @@ -132,7 +131,6 @@ int proc_nr_inodes(ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { inodes_stat.nr_inodes = get_nr_inodes(); - inodes_stat.nr_unused = get_nr_inodes_unused(); return proc_dointvec(table, write, buffer, lenp, ppos); } #endif @@ -335,7 +333,7 @@ static void inode_lru_list_add(struct inode *inode) { if (list_empty(&inode->i_lru)) { list_add(&inode->i_lru, &inode_lru); - percpu_counter_inc(&nr_inodes_unused); + inodes_stat.nr_unused++; } } @@ -343,7 +341,7 @@ static void inode_lru_list_del(struct inode *inode) { if (!list_empty(&inode->i_lru)) { list_del_init(&inode->i_lru); - percpu_counter_dec(&nr_inodes_unused); + inodes_stat.nr_unused--; } } @@ -513,7 +511,7 @@ void evict_inodes(struct super_block *sb) list_move(&inode->i_lru, &dispose); list_del_init(&inode->i_wb_list); if (!(inode->i_state & (I_DIRTY | I_SYNC))) - percpu_counter_dec(&nr_inodes_unused); + inodes_stat.nr_unused--; } spin_unlock(&inode_lock); @@ -554,7 +552,7 @@ int invalidate_inodes(struct super_block *sb) list_move(&inode->i_lru, &dispose); list_del_init(&inode->i_wb_list); if (!(inode->i_state & (I_DIRTY | I_SYNC))) - percpu_counter_dec(&nr_inodes_unused); + inodes_stat.nr_unused--; } spin_unlock(&inode_lock); @@ -616,7 +614,7 @@ static void prune_icache(int nr_to_scan) if (atomic_read(&inode->i_count) || (inode->i_state & ~I_REFERENCED)) { list_del_init(&inode->i_lru); - percpu_counter_dec(&nr_inodes_unused); + inodes_stat.nr_unused--; continue; } @@ -650,7 +648,7 @@ static void prune_icache(int nr_to_scan) */ list_move(&inode->i_lru, &freeable); list_del_init(&inode->i_wb_list); - percpu_counter_dec(&nr_inodes_unused); + inodes_stat.nr_unused--; } if (current_is_kswapd()) __count_vm_events(KSWAPD_INODESTEAL, reap); @@ -1649,7 +1647,6 @@ void __init inode_init(void) init_once); register_shrinker(&icache_shrinker); percpu_counter_init(&nr_inodes, 0); - percpu_counter_init(&nr_inodes_unused, 0); /* Hash may have been set up in inode_init_early */ if (!hashdist) -- cgit v1.2.3 From 3e880fb5e4bb6a012035e3edd0586ee2817c2e24 Mon Sep 17 00:00:00 2001 From: Nick Piggin Date: Fri, 7 Jan 2011 17:49:19 +1100 Subject: fs: use fast counters for vfs caches percpu_counter library generates quite nasty code, so unless you need to dynamically allocate counters or take fast approximate value, a simple per cpu set of counters is much better. The percpu_counter can never be made to work as well, because it has an indirection from pointer to percpu memory, and it can't use direct this_cpu_inc interfaces because it doesn't use static PER_CPU data, so code will always be worse. In the fastpath, it is the difference between this: incl %gs:nr_dentry # nr_dentry and this: movl percpu_counter_batch(%rip), %edx # percpu_counter_batch, movl $1, %esi #, movq $nr_dentry, %rdi #, call __percpu_counter_add # (plus I clobber registers) __percpu_counter_add: pushq %rbp # movq %rsp, %rbp #, subq $32, %rsp #, movq %rbx, -24(%rbp) #, movq %r12, -16(%rbp) #, movq %r13, -8(%rbp) #, movq %rdi, %rbx # fbc, fbc #APP # 216 "/home/npiggin/usr/src/linux-2.6/arch/x86/include/asm/thread_info.h" 1 movq %gs:kernel_stack,%rax #, pfo_ret__ # 0 "" 2 #NO_APP incl -8124(%rax) # .preempt_count movq 32(%rdi), %r12 # .counters, tcp_ptr__ #APP # 78 "lib/percpu_counter.c" 1 add %gs:this_cpu_off, %r12 # this_cpu_off, tcp_ptr__ # 0 "" 2 #NO_APP movslq (%r12),%r13 #* tcp_ptr__, tmp73 movslq %edx,%rax # batch, batch addq %rsi, %r13 # amount, count cmpq %rax, %r13 # batch, count jge .L27 #, negl %edx # tmp76 movslq %edx,%rdx # tmp76, tmp77 cmpq %rdx, %r13 # tmp77, count jg .L28 #, .L27: movq %rbx, %rdi # fbc, call _raw_spin_lock # addq %r13, 8(%rbx) # count, .count movq %rbx, %rdi # fbc, movl $0, (%r12) #,* tcp_ptr__ call _raw_spin_unlock # .L29: #APP # 216 "/home/npiggin/usr/src/linux-2.6/arch/x86/include/asm/thread_info.h" 1 movq %gs:kernel_stack,%rax #, pfo_ret__ # 0 "" 2 #NO_APP decl -8124(%rax) # .preempt_count movq -8136(%rax), %rax #, D.14625 testb $8, %al #, D.14625 jne .L32 #, .L31: movq -24(%rbp), %rbx #, movq -16(%rbp), %r12 #, movq -8(%rbp), %r13 #, leave ret .p2align 4,,10 .p2align 3 .L28: movl %r13d, (%r12) # count,* jmp .L29 # .L32: call preempt_schedule # .p2align 4,,6 jmp .L31 # .size __percpu_counter_add, .-__percpu_counter_add .p2align 4,,15 Signed-off-by: Nick Piggin --- fs/dcache.c | 19 +++++++++++++------ fs/inode.c | 17 ++++++++++------- 2 files changed, 23 insertions(+), 13 deletions(-) (limited to 'fs/dcache.c') diff --git a/fs/dcache.c b/fs/dcache.c index f62ba90bce91..b2cb2662ca00 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -67,13 +67,22 @@ struct dentry_stat_t dentry_stat = { .age_limit = 45, }; -static struct percpu_counter nr_dentry __cacheline_aligned_in_smp; +static DEFINE_PER_CPU(unsigned int, nr_dentry); #if defined(CONFIG_SYSCTL) && defined(CONFIG_PROC_FS) +static int get_nr_dentry(void) +{ + int i; + int sum = 0; + for_each_possible_cpu(i) + sum += per_cpu(nr_dentry, i); + return sum < 0 ? 0 : sum; +} + int proc_nr_dentry(ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { - dentry_stat.nr_dentry = percpu_counter_sum_positive(&nr_dentry); + dentry_stat.nr_dentry = get_nr_dentry(); return proc_dointvec(table, write, buffer, lenp, ppos); } #endif @@ -93,7 +102,7 @@ static void __d_free(struct rcu_head *head) */ static void d_free(struct dentry *dentry) { - percpu_counter_dec(&nr_dentry); + this_cpu_dec(nr_dentry); if (dentry->d_op && dentry->d_op->d_release) dentry->d_op->d_release(dentry); @@ -981,7 +990,7 @@ struct dentry *d_alloc(struct dentry * parent, const struct qstr *name) list_add(&dentry->d_u.d_child, &parent->d_subdirs); spin_unlock(&dcache_lock); - percpu_counter_inc(&nr_dentry); + this_cpu_inc(nr_dentry); return dentry; } @@ -2418,8 +2427,6 @@ static void __init dcache_init(void) { int loop; - percpu_counter_init(&nr_dentry, 0); - /* * A constructor could be added for stable state like the lists, * but it is probably not worth it because of the cache nature diff --git a/fs/inode.c b/fs/inode.c index efc43979709f..5a0a898f55d1 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -102,13 +102,17 @@ static DECLARE_RWSEM(iprune_sem); */ struct inodes_stat_t inodes_stat; -static struct percpu_counter nr_inodes __cacheline_aligned_in_smp; +static DEFINE_PER_CPU(unsigned int, nr_inodes); static struct kmem_cache *inode_cachep __read_mostly; -static inline int get_nr_inodes(void) +static int get_nr_inodes(void) { - return percpu_counter_sum_positive(&nr_inodes); + int i; + int sum = 0; + for_each_possible_cpu(i) + sum += per_cpu(nr_inodes, i); + return sum < 0 ? 0 : sum; } static inline int get_nr_inodes_unused(void) @@ -118,9 +122,9 @@ static inline int get_nr_inodes_unused(void) int get_nr_dirty_inodes(void) { + /* not actually dirty inodes, but a wild approximation */ int nr_dirty = get_nr_inodes() - get_nr_inodes_unused(); return nr_dirty > 0 ? nr_dirty : 0; - } /* @@ -222,7 +226,7 @@ int inode_init_always(struct super_block *sb, struct inode *inode) inode->i_fsnotify_mask = 0; #endif - percpu_counter_inc(&nr_inodes); + this_cpu_inc(nr_inodes); return 0; out: @@ -264,7 +268,7 @@ void __destroy_inode(struct inode *inode) if (inode->i_default_acl && inode->i_default_acl != ACL_NOT_CACHED) posix_acl_release(inode->i_default_acl); #endif - percpu_counter_dec(&nr_inodes); + this_cpu_dec(nr_inodes); } EXPORT_SYMBOL(__destroy_inode); @@ -1646,7 +1650,6 @@ void __init inode_init(void) SLAB_MEM_SPREAD), init_once); register_shrinker(&icache_shrinker); - percpu_counter_init(&nr_inodes, 0); /* Hash may have been set up in inode_init_early */ if (!hashdist) -- cgit v1.2.3 From fe15ce446beb3a33583af81ffe6c9d01a75314ed Mon Sep 17 00:00:00 2001 From: Nick Piggin Date: Fri, 7 Jan 2011 17:49:23 +1100 Subject: fs: change d_delete semantics Change d_delete from a dentry deletion notification to a dentry caching advise, more like ->drop_inode. Require it to be constant and idempotent, and not take d_lock. This is how all existing filesystems use the callback anyway. This makes fine grained dentry locking of dput and dentry lru scanning much simpler. Signed-off-by: Nick Piggin --- Documentation/filesystems/porting | 8 ++++++++ Documentation/filesystems/vfs.txt | 27 +++++++++++++-------------- arch/ia64/kernel/perfmon.c | 2 +- drivers/staging/smbfs/dir.c | 4 ++-- fs/9p/vfs_dentry.c | 4 ++-- fs/afs/dir.c | 4 ++-- fs/btrfs/inode.c | 2 +- fs/coda/dir.c | 4 ++-- fs/configfs/dir.c | 2 +- fs/dcache.c | 2 -- fs/gfs2/dentry.c | 2 +- fs/hostfs/hostfs_kern.c | 2 +- fs/libfs.c | 2 +- fs/ncpfs/dir.c | 4 ++-- fs/nfs/dir.c | 2 +- fs/proc/base.c | 2 +- fs/proc/generic.c | 2 +- fs/proc/proc_sysctl.c | 2 +- fs/sysfs/dir.c | 2 +- include/linux/dcache.h | 6 +++--- kernel/cgroup.c | 2 +- net/sunrpc/rpc_pipe.c | 2 +- 22 files changed, 47 insertions(+), 42 deletions(-) (limited to 'fs/dcache.c') diff --git a/Documentation/filesystems/porting b/Documentation/filesystems/porting index b12c89538680..9e71c9ad3108 100644 --- a/Documentation/filesystems/porting +++ b/Documentation/filesystems/porting @@ -318,3 +318,11 @@ if it's zero is not *and* *never* *had* *been* enough. Final unlink() and iput( may happen while the inode is in the middle of ->write_inode(); e.g. if you blindly free the on-disk inode, you may end up doing that while ->write_inode() is writing to it. + +--- +[mandatory] + + .d_delete() now only advises the dcache as to whether or not to cache +unreferenced dentries, and is now only called when the dentry refcount goes to +0. Even on 0 refcount transition, it must be able to tolerate being called 0, +1, or more times (eg. constant, idempotent). diff --git a/Documentation/filesystems/vfs.txt b/Documentation/filesystems/vfs.txt index 20899e095e7e..95c0a93f056c 100644 --- a/Documentation/filesystems/vfs.txt +++ b/Documentation/filesystems/vfs.txt @@ -847,9 +847,9 @@ defined: struct dentry_operations { int (*d_revalidate)(struct dentry *, struct nameidata *); - int (*d_hash) (struct dentry *, struct qstr *); - int (*d_compare) (struct dentry *, struct qstr *, struct qstr *); - int (*d_delete)(struct dentry *); + int (*d_hash)(struct dentry *, struct qstr *); + int (*d_compare)(struct dentry *, struct qstr *, struct qstr *); + int (*d_delete)(const struct dentry *); void (*d_release)(struct dentry *); void (*d_iput)(struct dentry *, struct inode *); char *(*d_dname)(struct dentry *, char *, int); @@ -864,9 +864,11 @@ struct dentry_operations { d_compare: called when a dentry should be compared with another - d_delete: called when the last reference to a dentry is - deleted. This means no-one is using the dentry, however it is - still valid and in the dcache + d_delete: called when the last reference to a dentry is dropped and the + dcache is deciding whether or not to cache it. Return 1 to delete + immediately, or 0 to cache the dentry. Default is NULL which means to + always cache a reachable dentry. d_delete must be constant and + idempotent. d_release: called when a dentry is really deallocated @@ -910,14 +912,11 @@ manipulate dentries: the usage count) dput: close a handle for a dentry (decrements the usage count). If - the usage count drops to 0, the "d_delete" method is called - and the dentry is placed on the unused list if the dentry is - still in its parents hash list. Putting the dentry on the - unused list just means that if the system needs some RAM, it - goes through the unused list of dentries and deallocates them. - If the dentry has already been unhashed and the usage count - drops to 0, in this case the dentry is deallocated after the - "d_delete" method is called + the usage count drops to 0, and the dentry is still in its + parent's hash, the "d_delete" method is called to check whether + it should be cached. If it should not be cached, or if the dentry + is not hashed, it is deleted. Otherwise cached dentries are put + into an LRU list to be reclaimed on memory shortage. d_drop: this unhashes a dentry from its parents hash list. A subsequent call to dput() will deallocate the dentry if its diff --git a/arch/ia64/kernel/perfmon.c b/arch/ia64/kernel/perfmon.c index 39e534f5a3b0..d39d8a53b579 100644 --- a/arch/ia64/kernel/perfmon.c +++ b/arch/ia64/kernel/perfmon.c @@ -2185,7 +2185,7 @@ static const struct file_operations pfm_file_ops = { }; static int -pfmfs_delete_dentry(struct dentry *dentry) +pfmfs_delete_dentry(const struct dentry *dentry) { return 1; } diff --git a/drivers/staging/smbfs/dir.c b/drivers/staging/smbfs/dir.c index f088ea2f6ac9..2270d4822c2f 100644 --- a/drivers/staging/smbfs/dir.c +++ b/drivers/staging/smbfs/dir.c @@ -276,7 +276,7 @@ smb_dir_open(struct inode *dir, struct file *file) static int smb_lookup_validate(struct dentry *, struct nameidata *); static int smb_hash_dentry(struct dentry *, struct qstr *); static int smb_compare_dentry(struct dentry *, struct qstr *, struct qstr *); -static int smb_delete_dentry(struct dentry *); +static int smb_delete_dentry(const struct dentry *); static const struct dentry_operations smbfs_dentry_operations = { @@ -367,7 +367,7 @@ out: * We use this to unhash dentries with bad inodes. */ static int -smb_delete_dentry(struct dentry * dentry) +smb_delete_dentry(const struct dentry *dentry) { if (dentry->d_inode) { if (is_bad_inode(dentry->d_inode)) { diff --git a/fs/9p/vfs_dentry.c b/fs/9p/vfs_dentry.c index cbf4e50f3933..466d2a4fc5cb 100644 --- a/fs/9p/vfs_dentry.c +++ b/fs/9p/vfs_dentry.c @@ -51,7 +51,7 @@ * */ -static int v9fs_dentry_delete(struct dentry *dentry) +static int v9fs_dentry_delete(const struct dentry *dentry) { P9_DPRINTK(P9_DEBUG_VFS, " dentry: %s (%p)\n", dentry->d_name.name, dentry); @@ -68,7 +68,7 @@ static int v9fs_dentry_delete(struct dentry *dentry) * */ -static int v9fs_cached_dentry_delete(struct dentry *dentry) +static int v9fs_cached_dentry_delete(const struct dentry *dentry) { struct inode *inode = dentry->d_inode; P9_DPRINTK(P9_DEBUG_VFS, " dentry: %s (%p)\n", dentry->d_name.name, diff --git a/fs/afs/dir.c b/fs/afs/dir.c index 5439e1bc9a86..2c18cde27000 100644 --- a/fs/afs/dir.c +++ b/fs/afs/dir.c @@ -23,7 +23,7 @@ static struct dentry *afs_lookup(struct inode *dir, struct dentry *dentry, static int afs_dir_open(struct inode *inode, struct file *file); static int afs_readdir(struct file *file, void *dirent, filldir_t filldir); static int afs_d_revalidate(struct dentry *dentry, struct nameidata *nd); -static int afs_d_delete(struct dentry *dentry); +static int afs_d_delete(const struct dentry *dentry); static void afs_d_release(struct dentry *dentry); static int afs_lookup_filldir(void *_cookie, const char *name, int nlen, loff_t fpos, u64 ino, unsigned dtype); @@ -730,7 +730,7 @@ out_bad: * - called from dput() when d_count is going to 0. * - return 1 to request dentry be unhashed, 0 otherwise */ -static int afs_d_delete(struct dentry *dentry) +static int afs_d_delete(const struct dentry *dentry) { _enter("%s", dentry->d_name.name); diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 72f31ecb5c90..7ce9f8932789 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -4127,7 +4127,7 @@ struct inode *btrfs_lookup_dentry(struct inode *dir, struct dentry *dentry) return inode; } -static int btrfs_dentry_delete(struct dentry *dentry) +static int btrfs_dentry_delete(const struct dentry *dentry) { struct btrfs_root *root; diff --git a/fs/coda/dir.c b/fs/coda/dir.c index 5d8b35539601..4cce3b07d9d7 100644 --- a/fs/coda/dir.c +++ b/fs/coda/dir.c @@ -47,7 +47,7 @@ static int coda_readdir(struct file *file, void *buf, filldir_t filldir); /* dentry ops */ static int coda_dentry_revalidate(struct dentry *de, struct nameidata *nd); -static int coda_dentry_delete(struct dentry *); +static int coda_dentry_delete(const struct dentry *); /* support routines */ static int coda_venus_readdir(struct file *coda_file, void *buf, @@ -577,7 +577,7 @@ out: * This is the callback from dput() when d_count is going to 0. * We use this to unhash dentries with bad inodes. */ -static int coda_dentry_delete(struct dentry * dentry) +static int coda_dentry_delete(const struct dentry * dentry) { int flags; diff --git a/fs/configfs/dir.c b/fs/configfs/dir.c index 578706969415..20024a9ef5a7 100644 --- a/fs/configfs/dir.c +++ b/fs/configfs/dir.c @@ -67,7 +67,7 @@ static void configfs_d_iput(struct dentry * dentry, * We _must_ delete our dentries on last dput, as the chain-to-parent * behavior is required to clear the parents of default_groups. */ -static int configfs_d_delete(struct dentry *dentry) +static int configfs_d_delete(const struct dentry *dentry) { return 1; } diff --git a/fs/dcache.c b/fs/dcache.c index b2cb2662ca00..6ee6bc40cb63 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -453,8 +453,6 @@ static void prune_one_dentry(struct dentry * dentry) if (!atomic_dec_and_lock(&dentry->d_count, &dentry->d_lock)) return; - if (dentry->d_op && dentry->d_op->d_delete) - dentry->d_op->d_delete(dentry); dentry_lru_del(dentry); __d_drop(dentry); dentry = d_kill(dentry); diff --git a/fs/gfs2/dentry.c b/fs/gfs2/dentry.c index 6798755b3858..e80fea2f65ff 100644 --- a/fs/gfs2/dentry.c +++ b/fs/gfs2/dentry.c @@ -106,7 +106,7 @@ static int gfs2_dhash(struct dentry *dentry, struct qstr *str) return 0; } -static int gfs2_dentry_delete(struct dentry *dentry) +static int gfs2_dentry_delete(const struct dentry *dentry) { struct gfs2_inode *ginode; diff --git a/fs/hostfs/hostfs_kern.c b/fs/hostfs/hostfs_kern.c index 2c0f148a49e6..cfe8bc7de511 100644 --- a/fs/hostfs/hostfs_kern.c +++ b/fs/hostfs/hostfs_kern.c @@ -32,7 +32,7 @@ static inline struct hostfs_inode_info *HOSTFS_I(struct inode *inode) #define FILE_HOSTFS_I(file) HOSTFS_I((file)->f_path.dentry->d_inode) -static int hostfs_d_delete(struct dentry *dentry) +static int hostfs_d_delete(const struct dentry *dentry) { return 1; } diff --git a/fs/libfs.c b/fs/libfs.c index a3accdf528ad..b9d25d83e228 100644 --- a/fs/libfs.c +++ b/fs/libfs.c @@ -37,7 +37,7 @@ int simple_statfs(struct dentry *dentry, struct kstatfs *buf) * Retaining negative dentries for an in-memory filesystem just wastes * memory and lookup time: arrange for them to be deleted immediately. */ -static int simple_delete_dentry(struct dentry *dentry) +static int simple_delete_dentry(const struct dentry *dentry) { return 1; } diff --git a/fs/ncpfs/dir.c b/fs/ncpfs/dir.c index f22b12e7d337..d6e6453881ce 100644 --- a/fs/ncpfs/dir.c +++ b/fs/ncpfs/dir.c @@ -76,7 +76,7 @@ const struct inode_operations ncp_dir_inode_operations = static int ncp_lookup_validate(struct dentry *, struct nameidata *); static int ncp_hash_dentry(struct dentry *, struct qstr *); static int ncp_compare_dentry (struct dentry *, struct qstr *, struct qstr *); -static int ncp_delete_dentry(struct dentry *); +static int ncp_delete_dentry(const struct dentry *); static const struct dentry_operations ncp_dentry_operations = { @@ -162,7 +162,7 @@ ncp_compare_dentry(struct dentry *dentry, struct qstr *a, struct qstr *b) * Closing files can be safely postponed until iput() - it's done there anyway. */ static int -ncp_delete_dentry(struct dentry * dentry) +ncp_delete_dentry(const struct dentry * dentry) { struct inode *inode = dentry->d_inode; diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 996dd8989a91..9184c7c80f78 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -1117,7 +1117,7 @@ out_error: /* * This is called from dput() when d_count is going to 0. */ -static int nfs_dentry_delete(struct dentry *dentry) +static int nfs_dentry_delete(const struct dentry *dentry) { dfprintk(VFS, "NFS: dentry_delete(%s/%s, %x)\n", dentry->d_parent->d_name.name, dentry->d_name.name, diff --git a/fs/proc/base.c b/fs/proc/base.c index 182845147fe4..d932fdb6a245 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -1744,7 +1744,7 @@ static int pid_revalidate(struct dentry *dentry, struct nameidata *nd) return 0; } -static int pid_delete_dentry(struct dentry * dentry) +static int pid_delete_dentry(const struct dentry * dentry) { /* Is the task we represent dead? * If so, then don't put the dentry on the lru list, diff --git a/fs/proc/generic.c b/fs/proc/generic.c index dd29f0337661..1d607be36d95 100644 --- a/fs/proc/generic.c +++ b/fs/proc/generic.c @@ -400,7 +400,7 @@ static const struct inode_operations proc_link_inode_operations = { * smarter: we could keep a "volatile" flag in the * inode to indicate which ones to keep. */ -static int proc_delete_dentry(struct dentry * dentry) +static int proc_delete_dentry(const struct dentry * dentry) { return 1; } diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c index b652cb00906b..a256d770ea18 100644 --- a/fs/proc/proc_sysctl.c +++ b/fs/proc/proc_sysctl.c @@ -392,7 +392,7 @@ static int proc_sys_revalidate(struct dentry *dentry, struct nameidata *nd) return !PROC_I(dentry->d_inode)->sysctl->unregistering; } -static int proc_sys_delete(struct dentry *dentry) +static int proc_sys_delete(const struct dentry *dentry) { return !!PROC_I(dentry->d_inode)->sysctl->unregistering; } diff --git a/fs/sysfs/dir.c b/fs/sysfs/dir.c index 7e54bac8c4b0..27e1102e303e 100644 --- a/fs/sysfs/dir.c +++ b/fs/sysfs/dir.c @@ -231,7 +231,7 @@ void release_sysfs_dirent(struct sysfs_dirent * sd) goto repeat; } -static int sysfs_dentry_delete(struct dentry *dentry) +static int sysfs_dentry_delete(const struct dentry *dentry) { struct sysfs_dirent *sd = dentry->d_fsdata; return !!(sd->s_flags & SYSFS_FLAG_REMOVED); diff --git a/include/linux/dcache.h b/include/linux/dcache.h index fff975576b5b..cbfc9567e4e9 100644 --- a/include/linux/dcache.h +++ b/include/linux/dcache.h @@ -133,9 +133,9 @@ enum dentry_d_lock_class struct dentry_operations { int (*d_revalidate)(struct dentry *, struct nameidata *); - int (*d_hash) (struct dentry *, struct qstr *); - int (*d_compare) (struct dentry *, struct qstr *, struct qstr *); - int (*d_delete)(struct dentry *); + int (*d_hash)(struct dentry *, struct qstr *); + int (*d_compare)(struct dentry *, struct qstr *, struct qstr *); + int (*d_delete)(const struct dentry *); void (*d_release)(struct dentry *); void (*d_iput)(struct dentry *, struct inode *); char *(*d_dname)(struct dentry *, char *, int); diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 163c890f436d..746055b214d7 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -2198,7 +2198,7 @@ static inline struct cftype *__file_cft(struct file *file) return __d_cft(file->f_dentry); } -static int cgroup_delete_dentry(struct dentry *dentry) +static int cgroup_delete_dentry(const struct dentry *dentry) { return 1; } diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c index 10a17a37ec4e..a0dc1a86fcea 100644 --- a/net/sunrpc/rpc_pipe.c +++ b/net/sunrpc/rpc_pipe.c @@ -430,7 +430,7 @@ void rpc_put_mount(void) } EXPORT_SYMBOL_GPL(rpc_put_mount); -static int rpc_delete_dentry(struct dentry *dentry) +static int rpc_delete_dentry(const struct dentry *dentry) { return 1; } -- cgit v1.2.3 From fb2d5b86aff355a27ebfc132d3c99f4a940cc3fe Mon Sep 17 00:00:00 2001 From: Nick Piggin Date: Fri, 7 Jan 2011 17:49:26 +1100 Subject: fs: name case update method smpfs and ncpfs want to update a live dentry name in-place. Rather than have them open code the locking, provide a documented dcache API. Signed-off-by: Nick Piggin --- drivers/staging/smbfs/cache.c | 4 ++-- fs/dcache.c | 27 +++++++++++++++++++++++++++ fs/ncpfs/dir.c | 35 ++++++----------------------------- include/linux/dcache.h | 2 ++ 4 files changed, 37 insertions(+), 31 deletions(-) (limited to 'fs/dcache.c') diff --git a/drivers/staging/smbfs/cache.c b/drivers/staging/smbfs/cache.c index dbb98658148b..dbd2e1df3ba9 100644 --- a/drivers/staging/smbfs/cache.c +++ b/drivers/staging/smbfs/cache.c @@ -145,8 +145,8 @@ smb_fill_cache(struct file *filp, void *dirent, filldir_t filldir, goto end_advance; } else { hashed = 1; - memcpy((char *) newdent->d_name.name, qname->name, - newdent->d_name.len); + /* dir i_mutex is locked because we're in readdir */ + dentry_update_name_case(newdent, qname); } if (!newdent->d_inode) { diff --git a/fs/dcache.c b/fs/dcache.c index 6ee6bc40cb63..814e5f491e9c 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -1589,6 +1589,33 @@ void d_rehash(struct dentry * entry) } EXPORT_SYMBOL(d_rehash); +/** + * dentry_update_name_case - update case insensitive dentry with a new name + * @dentry: dentry to be updated + * @name: new name + * + * Update a case insensitive dentry with new case of name. + * + * dentry must have been returned by d_lookup with name @name. Old and new + * name lengths must match (ie. no d_compare which allows mismatched name + * lengths). + * + * Parent inode i_mutex must be held over d_lookup and into this call (to + * keep renames and concurrent inserts, and readdir(2) away). + */ +void dentry_update_name_case(struct dentry *dentry, struct qstr *name) +{ + BUG_ON(!mutex_is_locked(&dentry->d_inode->i_mutex)); + BUG_ON(dentry->d_name.len != name->len); /* d_lookup gives this */ + + spin_lock(&dcache_lock); + spin_lock(&dentry->d_lock); + memcpy((unsigned char *)dentry->d_name.name, name->name, name->len); + spin_unlock(&dentry->d_lock); + spin_unlock(&dcache_lock); +} +EXPORT_SYMBOL(dentry_update_name_case); + /* * When switching names, the actual string doesn't strictly have to * be preserved in the target - because we're dropping the target diff --git a/fs/ncpfs/dir.c b/fs/ncpfs/dir.c index d6e6453881ce..e80ea4e37c48 100644 --- a/fs/ncpfs/dir.c +++ b/fs/ncpfs/dir.c @@ -611,35 +611,12 @@ ncp_fill_cache(struct file *filp, void *dirent, filldir_t filldir, shrink_dcache_parent(newdent); /* - * It is not as dangerous as it looks. NetWare's OS2 namespace is - * case preserving yet case insensitive. So we update dentry's name - * as received from server. We found dentry via d_lookup with our - * hash, so we know that hash does not change, and so replacing name - * should be reasonably safe. + * NetWare's OS2 namespace is case preserving yet case + * insensitive. So we update dentry's name as received from + * server. Parent dir's i_mutex is locked because we're in + * readdir. */ - if (qname.len == newdent->d_name.len && - memcmp(newdent->d_name.name, qname.name, newdent->d_name.len)) { - struct inode *inode = newdent->d_inode; - - /* - * Inside ncpfs all uses of d_name are either for debugging, - * or on functions which acquire inode mutex (mknod, creat, - * lookup). So grab i_mutex here, to be sure. d_path - * uses dcache_lock when generating path, so we should too. - * And finally d_compare is protected by dentry's d_lock, so - * here we go. - */ - if (inode) - mutex_lock(&inode->i_mutex); - spin_lock(&dcache_lock); - spin_lock(&newdent->d_lock); - memcpy((char *) newdent->d_name.name, qname.name, - newdent->d_name.len); - spin_unlock(&newdent->d_lock); - spin_unlock(&dcache_lock); - if (inode) - mutex_unlock(&inode->i_mutex); - } + dentry_update_name_case(newdent, &qname); } if (!newdent->d_inode) { @@ -657,7 +634,7 @@ ncp_fill_cache(struct file *filp, void *dirent, filldir_t filldir, } else { struct inode *inode = newdent->d_inode; - mutex_lock(&inode->i_mutex); + mutex_lock_nested(&inode->i_mutex, I_MUTEX_CHILD); ncp_update_inode2(inode, entry); mutex_unlock(&inode->i_mutex); } diff --git a/include/linux/dcache.h b/include/linux/dcache.h index cbfc9567e4e9..6cdf4995c90a 100644 --- a/include/linux/dcache.h +++ b/include/linux/dcache.h @@ -290,6 +290,8 @@ static inline struct dentry *d_add_unique(struct dentry *entry, struct inode *in return res; } +extern void dentry_update_name_case(struct dentry *, struct qstr *); + /* used for rename() and baskets */ extern void d_move(struct dentry *, struct dentry *); extern struct dentry *d_ancestor(struct dentry *, struct dentry *); -- cgit v1.2.3 From 621e155a3591962420eacdd39f6f0aa29ceb221e Mon Sep 17 00:00:00 2001 From: Nick Piggin Date: Fri, 7 Jan 2011 17:49:27 +1100 Subject: fs: change d_compare for rcu-walk Change d_compare so it may be called from lock-free RCU lookups. This does put significant restrictions on what may be done from the callback, however there don't seem to have been any problems with in-tree fses. If some strange use case pops up that _really_ cannot cope with the rcu-walk rules, we can just add new rcu-unaware callbacks, which would cause name lookup to drop out of rcu-walk mode. For in-tree filesystems, this is just a mechanical change. Signed-off-by: Nick Piggin --- Documentation/filesystems/Locking | 4 +- Documentation/filesystems/porting | 7 +++ Documentation/filesystems/vfs.txt | 26 +++++++++-- drivers/staging/smbfs/dir.c | 16 ++++--- fs/adfs/dir.c | 8 ++-- fs/affs/namei.c | 46 ++++++++++++-------- fs/cifs/dir.c | 12 ++--- fs/dcache.c | 4 +- fs/fat/namei_msdos.c | 14 +++--- fs/fat/namei_vfat.c | 33 ++++++++------ fs/hfs/hfs_fs.h | 5 ++- fs/hfs/string.c | 14 +++--- fs/hfsplus/hfsplus_fs.h | 5 ++- fs/hfsplus/unicode.c | 15 ++++--- fs/hpfs/dentry.c | 22 ++++++---- fs/isofs/inode.c | 92 ++++++++++++++++++++------------------- fs/isofs/namei.c | 3 +- fs/jfs/namei.c | 11 +++-- fs/ncpfs/dir.c | 25 ++++++----- fs/ncpfs/ncplib_kernel.h | 8 ++-- fs/proc/proc_sysctl.c | 13 +++--- include/linux/dcache.h | 12 +++-- include/linux/ncp_fs.h | 4 +- 23 files changed, 242 insertions(+), 157 deletions(-) (limited to 'fs/dcache.c') diff --git a/Documentation/filesystems/Locking b/Documentation/filesystems/Locking index 33fa3e5d38fd..9a76f8d8bf95 100644 --- a/Documentation/filesystems/Locking +++ b/Documentation/filesystems/Locking @@ -11,7 +11,9 @@ be able to use diff(1). prototypes: int (*d_revalidate)(struct dentry *, int); int (*d_hash) (struct dentry *, struct qstr *); - int (*d_compare) (struct dentry *, struct qstr *, struct qstr *); + int (*d_compare)(const struct dentry *, const struct inode *, + const struct dentry *, const struct inode *, + unsigned int, const char *, const struct qstr *); int (*d_delete)(struct dentry *); void (*d_release)(struct dentry *); void (*d_iput)(struct dentry *, struct inode *); diff --git a/Documentation/filesystems/porting b/Documentation/filesystems/porting index 9e71c9ad3108..d44511e20828 100644 --- a/Documentation/filesystems/porting +++ b/Documentation/filesystems/porting @@ -326,3 +326,10 @@ to it. unreferenced dentries, and is now only called when the dentry refcount goes to 0. Even on 0 refcount transition, it must be able to tolerate being called 0, 1, or more times (eg. constant, idempotent). + +--- +[mandatory] + + .d_compare() calling convention and locking rules are significantly +changed. Read updated documentation in Documentation/filesystems/vfs.txt (and +look at examples of other filesystems) for guidance. diff --git a/Documentation/filesystems/vfs.txt b/Documentation/filesystems/vfs.txt index 95c0a93f056c..250681b8c7cc 100644 --- a/Documentation/filesystems/vfs.txt +++ b/Documentation/filesystems/vfs.txt @@ -848,7 +848,9 @@ defined: struct dentry_operations { int (*d_revalidate)(struct dentry *, struct nameidata *); int (*d_hash)(struct dentry *, struct qstr *); - int (*d_compare)(struct dentry *, struct qstr *, struct qstr *); + int (*d_compare)(const struct dentry *, const struct inode *, + const struct dentry *, const struct inode *, + unsigned int, const char *, const struct qstr *); int (*d_delete)(const struct dentry *); void (*d_release)(struct dentry *); void (*d_iput)(struct dentry *, struct inode *); @@ -860,9 +862,27 @@ struct dentry_operations { dcache. Most filesystems leave this as NULL, because all their dentries in the dcache are valid - d_hash: called when the VFS adds a dentry to the hash table + d_hash: called when the VFS adds a dentry to the hash table. The first + dentry passed to d_hash is the parent directory that the name is + to be hashed into. - d_compare: called when a dentry should be compared with another + d_compare: called to compare a dentry name with a given name. The first + dentry is the parent of the dentry to be compared, the second is + the parent's inode, then the dentry and inode (may be NULL) of the + child dentry. len and name string are properties of the dentry to be + compared. qstr is the name to compare it with. + + Must be constant and idempotent, and should not take locks if + possible, and should not or store into the dentry or inodes. + Should not dereference pointers outside the dentry or inodes without + lots of care (eg. d_parent, d_inode, d_name should not be used). + + However, our vfsmount is pinned, and RCU held, so the dentries and + inodes won't disappear, neither will our sb or filesystem module. + ->i_sb and ->d_sb may be used. + + It is a tricky calling convention because it needs to be called under + "rcu-walk", ie. without any locks or references on things. d_delete: called when the last reference to a dentry is dropped and the dcache is deciding whether or not to cache it. Return 1 to delete diff --git a/drivers/staging/smbfs/dir.c b/drivers/staging/smbfs/dir.c index 2270d4822c2f..bd63229f43f2 100644 --- a/drivers/staging/smbfs/dir.c +++ b/drivers/staging/smbfs/dir.c @@ -275,7 +275,10 @@ smb_dir_open(struct inode *dir, struct file *file) */ static int smb_lookup_validate(struct dentry *, struct nameidata *); static int smb_hash_dentry(struct dentry *, struct qstr *); -static int smb_compare_dentry(struct dentry *, struct qstr *, struct qstr *); +static int smb_compare_dentry(const struct dentry *, + const struct inode *, + const struct dentry *, const struct inode *, + unsigned int, const char *, const struct qstr *); static int smb_delete_dentry(const struct dentry *); static const struct dentry_operations smbfs_dentry_operations = @@ -347,14 +350,17 @@ smb_hash_dentry(struct dentry *dir, struct qstr *this) } static int -smb_compare_dentry(struct dentry *dir, struct qstr *a, struct qstr *b) +smb_compare_dentry(const struct dentry *parent, + const struct inode *pinode, + const struct dentry *dentry, const struct inode *inode, + unsigned int len, const char *str, const struct qstr *name) { int i, result = 1; - if (a->len != b->len) + if (len != name->len) goto out; - for (i=0; i < a->len; i++) { - if (tolower(a->name[i]) != tolower(b->name[i])) + for (i=0; i < len; i++) { + if (tolower(str[i]) != tolower(name->name[i])) goto out; } result = 0; diff --git a/fs/adfs/dir.c b/fs/adfs/dir.c index f4287e4de744..c8ed66162bd4 100644 --- a/fs/adfs/dir.c +++ b/fs/adfs/dir.c @@ -237,17 +237,19 @@ adfs_hash(struct dentry *parent, struct qstr *qstr) * requirements of the underlying filesystem. */ static int -adfs_compare(struct dentry *parent, struct qstr *entry, struct qstr *name) +adfs_compare(const struct dentry *parent, const struct inode *pinode, + const struct dentry *dentry, const struct inode *inode, + unsigned int len, const char *str, const struct qstr *name) { int i; - if (entry->len != name->len) + if (len != name->len) return 1; for (i = 0; i < name->len; i++) { char a, b; - a = entry->name[i]; + a = str[i]; b = name->name[i]; if (a >= 'A' && a <= 'Z') diff --git a/fs/affs/namei.c b/fs/affs/namei.c index 914d1c0bc07a..547d5deb0d42 100644 --- a/fs/affs/namei.c +++ b/fs/affs/namei.c @@ -14,10 +14,16 @@ typedef int (*toupper_t)(int); static int affs_toupper(int ch); static int affs_hash_dentry(struct dentry *, struct qstr *); -static int affs_compare_dentry(struct dentry *, struct qstr *, struct qstr *); +static int affs_compare_dentry(const struct dentry *parent, + const struct inode *pinode, + const struct dentry *dentry, const struct inode *inode, + unsigned int len, const char *str, const struct qstr *name); static int affs_intl_toupper(int ch); static int affs_intl_hash_dentry(struct dentry *, struct qstr *); -static int affs_intl_compare_dentry(struct dentry *, struct qstr *, struct qstr *); +static int affs_intl_compare_dentry(const struct dentry *parent, + const struct inode *pinode, + const struct dentry *dentry, const struct inode *inode, + unsigned int len, const char *str, const struct qstr *name); const struct dentry_operations affs_dentry_operations = { .d_hash = affs_hash_dentry, @@ -88,29 +94,29 @@ affs_intl_hash_dentry(struct dentry *dentry, struct qstr *qstr) return __affs_hash_dentry(dentry, qstr, affs_intl_toupper); } -static inline int -__affs_compare_dentry(struct dentry *dentry, struct qstr *a, struct qstr *b, toupper_t toupper) +static inline int __affs_compare_dentry(unsigned int len, + const char *str, const struct qstr *name, toupper_t toupper) { - const u8 *aname = a->name; - const u8 *bname = b->name; - int len; + const u8 *aname = str; + const u8 *bname = name->name; - /* 'a' is the qstr of an already existing dentry, so the name - * must be valid. 'b' must be validated first. + /* + * 'str' is the name of an already existing dentry, so the name + * must be valid. 'name' must be validated first. */ - if (affs_check_name(b->name,b->len)) + if (affs_check_name(name->name, name->len)) return 1; - /* If the names are longer than the allowed 30 chars, + /* + * If the names are longer than the allowed 30 chars, * the excess is ignored, so their length may differ. */ - len = a->len; if (len >= 30) { - if (b->len < 30) + if (name->len < 30) return 1; len = 30; - } else if (len != b->len) + } else if (len != name->len) return 1; for (; len > 0; len--) @@ -121,14 +127,18 @@ __affs_compare_dentry(struct dentry *dentry, struct qstr *a, struct qstr *b, tou } static int -affs_compare_dentry(struct dentry *dentry, struct qstr *a, struct qstr *b) +affs_compare_dentry(const struct dentry *parent, const struct inode *pinode, + const struct dentry *dentry, const struct inode *inode, + unsigned int len, const char *str, const struct qstr *name) { - return __affs_compare_dentry(dentry, a, b, affs_toupper); + return __affs_compare_dentry(len, str, name, affs_toupper); } static int -affs_intl_compare_dentry(struct dentry *dentry, struct qstr *a, struct qstr *b) +affs_intl_compare_dentry(const struct dentry *parent,const struct inode *pinode, + const struct dentry *dentry, const struct inode *inode, + unsigned int len, const char *str, const struct qstr *name) { - return __affs_compare_dentry(dentry, a, b, affs_intl_toupper); + return __affs_compare_dentry(len, str, name, affs_intl_toupper); } /* diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c index 521d841b1fd1..c60133f0d8e4 100644 --- a/fs/cifs/dir.c +++ b/fs/cifs/dir.c @@ -715,13 +715,15 @@ static int cifs_ci_hash(struct dentry *dentry, struct qstr *q) return 0; } -static int cifs_ci_compare(struct dentry *dentry, struct qstr *a, - struct qstr *b) +static int cifs_ci_compare(const struct dentry *parent, + const struct inode *pinode, + const struct dentry *dentry, const struct inode *inode, + unsigned int len, const char *str, const struct qstr *name) { - struct nls_table *codepage = CIFS_SB(dentry->d_inode->i_sb)->local_nls; + struct nls_table *codepage = CIFS_SB(pinode->i_sb)->local_nls; - if ((a->len == b->len) && - (nls_strnicmp(codepage, a->name, b->name, a->len) == 0)) + if ((name->len == len) && + (nls_strnicmp(codepage, name->name, str, len) == 0)) return 0; return 1; } diff --git a/fs/dcache.c b/fs/dcache.c index 814e5f491e9c..7075555fbb04 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -1437,7 +1437,9 @@ struct dentry * __d_lookup(struct dentry * parent, struct qstr * name) */ qstr = &dentry->d_name; if (parent->d_op && parent->d_op->d_compare) { - if (parent->d_op->d_compare(parent, qstr, name)) + if (parent->d_op->d_compare(parent, parent->d_inode, + dentry, dentry->d_inode, + qstr->len, qstr->name, name)) goto next; } else { if (qstr->len != len) diff --git a/fs/fat/namei_msdos.c b/fs/fat/namei_msdos.c index 3345aabd1dd7..99d3c7ac973c 100644 --- a/fs/fat/namei_msdos.c +++ b/fs/fat/namei_msdos.c @@ -164,16 +164,18 @@ static int msdos_hash(struct dentry *dentry, struct qstr *qstr) * Compare two msdos names. If either of the names are invalid, * we fall back to doing the standard name comparison. */ -static int msdos_cmp(struct dentry *dentry, struct qstr *a, struct qstr *b) +static int msdos_cmp(const struct dentry *parent, const struct inode *pinode, + const struct dentry *dentry, const struct inode *inode, + unsigned int len, const char *str, const struct qstr *name) { - struct fat_mount_options *options = &MSDOS_SB(dentry->d_sb)->options; + struct fat_mount_options *options = &MSDOS_SB(parent->d_sb)->options; unsigned char a_msdos_name[MSDOS_NAME], b_msdos_name[MSDOS_NAME]; int error; - error = msdos_format_name(a->name, a->len, a_msdos_name, options); + error = msdos_format_name(name->name, name->len, a_msdos_name, options); if (error) goto old_compare; - error = msdos_format_name(b->name, b->len, b_msdos_name, options); + error = msdos_format_name(str, len, b_msdos_name, options); if (error) goto old_compare; error = memcmp(a_msdos_name, b_msdos_name, MSDOS_NAME); @@ -182,8 +184,8 @@ out: old_compare: error = 1; - if (a->len == b->len) - error = memcmp(a->name, b->name, a->len); + if (name->len == len) + error = memcmp(name->name, str, len); goto out; } diff --git a/fs/fat/namei_vfat.c b/fs/fat/namei_vfat.c index b936703b8924..95e00ab84c3f 100644 --- a/fs/fat/namei_vfat.c +++ b/fs/fat/namei_vfat.c @@ -85,15 +85,18 @@ static int vfat_revalidate_ci(struct dentry *dentry, struct nameidata *nd) } /* returns the length of a struct qstr, ignoring trailing dots */ -static unsigned int vfat_striptail_len(struct qstr *qstr) +static unsigned int __vfat_striptail_len(unsigned int len, const char *name) { - unsigned int len = qstr->len; - - while (len && qstr->name[len - 1] == '.') + while (len && name[len - 1] == '.') len--; return len; } +static unsigned int vfat_striptail_len(const struct qstr *qstr) +{ + return __vfat_striptail_len(qstr->len, qstr->name); +} + /* * Compute the hash for the vfat name corresponding to the dentry. * Note: if the name is invalid, we leave the hash code unchanged so @@ -133,16 +136,18 @@ static int vfat_hashi(struct dentry *dentry, struct qstr *qstr) /* * Case insensitive compare of two vfat names. */ -static int vfat_cmpi(struct dentry *dentry, struct qstr *a, struct qstr *b) +static int vfat_cmpi(const struct dentry *parent, const struct inode *pinode, + const struct dentry *dentry, const struct inode *inode, + unsigned int len, const char *str, const struct qstr *name) { - struct nls_table *t = MSDOS_SB(dentry->d_inode->i_sb)->nls_io; + struct nls_table *t = MSDOS_SB(parent->d_sb)->nls_io; unsigned int alen, blen; /* A filename cannot end in '.' or we treat it like it has none */ - alen = vfat_striptail_len(a); - blen = vfat_striptail_len(b); + alen = vfat_striptail_len(name); + blen = __vfat_striptail_len(len, str); if (alen == blen) { - if (nls_strnicmp(t, a->name, b->name, alen) == 0) + if (nls_strnicmp(t, name->name, str, alen) == 0) return 0; } return 1; @@ -151,15 +156,17 @@ static int vfat_cmpi(struct dentry *dentry, struct qstr *a, struct qstr *b) /* * Case sensitive compare of two vfat names. */ -static int vfat_cmp(struct dentry *dentry, struct qstr *a, struct qstr *b) +static int vfat_cmp(const struct dentry *parent, const struct inode *pinode, + const struct dentry *dentry, const struct inode *inode, + unsigned int len, const char *str, const struct qstr *name) { unsigned int alen, blen; /* A filename cannot end in '.' or we treat it like it has none */ - alen = vfat_striptail_len(a); - blen = vfat_striptail_len(b); + alen = vfat_striptail_len(name); + blen = __vfat_striptail_len(len, str); if (alen == blen) { - if (strncmp(a->name, b->name, alen) == 0) + if (strncmp(name->name, str, alen) == 0) return 0; } return 1; diff --git a/fs/hfs/hfs_fs.h b/fs/hfs/hfs_fs.h index c8cffb81e849..8cd876f0e961 100644 --- a/fs/hfs/hfs_fs.h +++ b/fs/hfs/hfs_fs.h @@ -216,7 +216,10 @@ extern const struct dentry_operations hfs_dentry_operations; extern int hfs_hash_dentry(struct dentry *, struct qstr *); extern int hfs_strcmp(const unsigned char *, unsigned int, const unsigned char *, unsigned int); -extern int hfs_compare_dentry(struct dentry *, struct qstr *, struct qstr *); +extern int hfs_compare_dentry(const struct dentry *parent, + const struct inode *pinode, + const struct dentry *dentry, const struct inode *inode, + unsigned int len, const char *str, const struct qstr *name); /* trans.c */ extern void hfs_asc2mac(struct super_block *, struct hfs_name *, struct qstr *); diff --git a/fs/hfs/string.c b/fs/hfs/string.c index 927a5af79428..aaf90d0d6940 100644 --- a/fs/hfs/string.c +++ b/fs/hfs/string.c @@ -92,21 +92,21 @@ int hfs_strcmp(const unsigned char *s1, unsigned int len1, * Test for equality of two strings in the HFS filename character ordering. * return 1 on failure and 0 on success */ -int hfs_compare_dentry(struct dentry *dentry, struct qstr *s1, struct qstr *s2) +int hfs_compare_dentry(const struct dentry *parent, const struct inode *pinode, + const struct dentry *dentry, const struct inode *inode, + unsigned int len, const char *str, const struct qstr *name) { const unsigned char *n1, *n2; - int len; - len = s1->len; if (len >= HFS_NAMELEN) { - if (s2->len < HFS_NAMELEN) + if (name->len < HFS_NAMELEN) return 1; len = HFS_NAMELEN; - } else if (len != s2->len) + } else if (len != name->len) return 1; - n1 = s1->name; - n2 = s2->name; + n1 = str; + n2 = name->name; while (len--) { if (caseorder[*n1++] != caseorder[*n2++]) return 1; diff --git a/fs/hfsplus/hfsplus_fs.h b/fs/hfsplus/hfsplus_fs.h index cb3653efb57a..7aa96eefe483 100644 --- a/fs/hfsplus/hfsplus_fs.h +++ b/fs/hfsplus/hfsplus_fs.h @@ -380,7 +380,10 @@ int hfsplus_strcmp(const struct hfsplus_unistr *, const struct hfsplus_unistr *) int hfsplus_uni2asc(struct super_block *, const struct hfsplus_unistr *, char *, int *); int hfsplus_asc2uni(struct super_block *, struct hfsplus_unistr *, const char *, int); int hfsplus_hash_dentry(struct dentry *dentry, struct qstr *str); -int hfsplus_compare_dentry(struct dentry *dentry, struct qstr *s1, struct qstr *s2); +int hfsplus_compare_dentry(const struct dentry *parent, + const struct inode *pinode, + const struct dentry *dentry, const struct inode *inode, + unsigned int len, const char *str, const struct qstr *name); /* wrapper.c */ int hfsplus_read_wrapper(struct super_block *); diff --git a/fs/hfsplus/unicode.c b/fs/hfsplus/unicode.c index b66d67de882c..b178c997efa8 100644 --- a/fs/hfsplus/unicode.c +++ b/fs/hfsplus/unicode.c @@ -363,9 +363,12 @@ int hfsplus_hash_dentry(struct dentry *dentry, struct qstr *str) * Composed unicode characters are decomposed and case-folding is performed * if the appropriate bits are (un)set on the superblock. */ -int hfsplus_compare_dentry(struct dentry *dentry, struct qstr *s1, struct qstr *s2) +int hfsplus_compare_dentry(const struct dentry *parent, + const struct inode *pinode, + const struct dentry *dentry, const struct inode *inode, + unsigned int len, const char *str, const struct qstr *name) { - struct super_block *sb = dentry->d_sb; + struct super_block *sb = parent->d_sb; int casefold, decompose, size; int dsize1, dsize2, len1, len2; const u16 *dstr1, *dstr2; @@ -375,10 +378,10 @@ int hfsplus_compare_dentry(struct dentry *dentry, struct qstr *s1, struct qstr * casefold = test_bit(HFSPLUS_SB_CASEFOLD, &HFSPLUS_SB(sb)->flags); decompose = !test_bit(HFSPLUS_SB_NODECOMPOSE, &HFSPLUS_SB(sb)->flags); - astr1 = s1->name; - len1 = s1->len; - astr2 = s2->name; - len2 = s2->len; + astr1 = str; + len1 = len; + astr2 = name->name; + len2 = name->len; dsize1 = dsize2 = 0; dstr1 = dstr2 = NULL; diff --git a/fs/hpfs/dentry.c b/fs/hpfs/dentry.c index 67d9d36b3d5f..dd9b1e74a734 100644 --- a/fs/hpfs/dentry.c +++ b/fs/hpfs/dentry.c @@ -34,19 +34,25 @@ static int hpfs_hash_dentry(struct dentry *dentry, struct qstr *qstr) return 0; } -static int hpfs_compare_dentry(struct dentry *dentry, struct qstr *a, struct qstr *b) +static int hpfs_compare_dentry(const struct dentry *parent, + const struct inode *pinode, + const struct dentry *dentry, const struct inode *inode, + unsigned int len, const char *str, const struct qstr *name) { - unsigned al=a->len; - unsigned bl=b->len; - hpfs_adjust_length(a->name, &al); + unsigned al = len; + unsigned bl = name->len; + + hpfs_adjust_length(str, &al); /*hpfs_adjust_length(b->name, &bl);*/ - /* 'a' is the qstr of an already existing dentry, so the name - * must be valid. 'b' must be validated first. + + /* + * 'str' is the nane of an already existing dentry, so the name + * must be valid. 'name' must be validated first. */ - if (hpfs_chk_name(b->name, &bl)) + if (hpfs_chk_name(name->name, &bl)) return 1; - if (hpfs_compare_names(dentry->d_sb, a->name, al, b->name, bl, 0)) + if (hpfs_compare_names(parent->d_sb, str, al, name->name, bl, 0)) return 1; return 0; } diff --git a/fs/isofs/inode.c b/fs/isofs/inode.c index bfdeb82a53be..7b0fbc61af81 100644 --- a/fs/isofs/inode.c +++ b/fs/isofs/inode.c @@ -28,14 +28,26 @@ static int isofs_hashi(struct dentry *parent, struct qstr *qstr); static int isofs_hash(struct dentry *parent, struct qstr *qstr); -static int isofs_dentry_cmpi(struct dentry *dentry, struct qstr *a, struct qstr *b); -static int isofs_dentry_cmp(struct dentry *dentry, struct qstr *a, struct qstr *b); +static int isofs_dentry_cmpi(const struct dentry *parent, + const struct inode *pinode, + const struct dentry *dentry, const struct inode *inode, + unsigned int len, const char *str, const struct qstr *name); +static int isofs_dentry_cmp(const struct dentry *parent, + const struct inode *pinode, + const struct dentry *dentry, const struct inode *inode, + unsigned int len, const char *str, const struct qstr *name); #ifdef CONFIG_JOLIET static int isofs_hashi_ms(struct dentry *parent, struct qstr *qstr); static int isofs_hash_ms(struct dentry *parent, struct qstr *qstr); -static int isofs_dentry_cmpi_ms(struct dentry *dentry, struct qstr *a, struct qstr *b); -static int isofs_dentry_cmp_ms(struct dentry *dentry, struct qstr *a, struct qstr *b); +static int isofs_dentry_cmpi_ms(const struct dentry *parent, + const struct inode *pinode, + const struct dentry *dentry, const struct inode *inode, + unsigned int len, const char *str, const struct qstr *name); +static int isofs_dentry_cmp_ms(const struct dentry *parent, + const struct inode *pinode, + const struct dentry *dentry, const struct inode *inode, + unsigned int len, const char *str, const struct qstr *name); #endif static void isofs_put_super(struct super_block *sb) @@ -206,49 +218,31 @@ isofs_hashi_common(struct dentry *dentry, struct qstr *qstr, int ms) } /* - * Case insensitive compare of two isofs names. + * Compare of two isofs names. */ -static int isofs_dentry_cmpi_common(struct dentry *dentry, struct qstr *a, - struct qstr *b, int ms) +static int isofs_dentry_cmp_common( + unsigned int len, const char *str, + const struct qstr *name, int ms, int ci) { int alen, blen; /* A filename cannot end in '.' or we treat it like it has none */ - alen = a->len; - blen = b->len; + alen = name->len; + blen = len; if (ms) { - while (alen && a->name[alen-1] == '.') + while (alen && name->name[alen-1] == '.') alen--; - while (blen && b->name[blen-1] == '.') + while (blen && str[blen-1] == '.') blen--; } if (alen == blen) { - if (strnicmp(a->name, b->name, alen) == 0) - return 0; - } - return 1; -} - -/* - * Case sensitive compare of two isofs names. - */ -static int isofs_dentry_cmp_common(struct dentry *dentry, struct qstr *a, - struct qstr *b, int ms) -{ - int alen, blen; - - /* A filename cannot end in '.' or we treat it like it has none */ - alen = a->len; - blen = b->len; - if (ms) { - while (alen && a->name[alen-1] == '.') - alen--; - while (blen && b->name[blen-1] == '.') - blen--; - } - if (alen == blen) { - if (strncmp(a->name, b->name, alen) == 0) - return 0; + if (ci) { + if (strnicmp(name->name, str, alen) == 0) + return 0; + } else { + if (strncmp(name->name, str, alen) == 0) + return 0; + } } return 1; } @@ -266,15 +260,19 @@ isofs_hashi(struct dentry *dentry, struct qstr *qstr) } static int -isofs_dentry_cmp(struct dentry *dentry,struct qstr *a,struct qstr *b) +isofs_dentry_cmp(const struct dentry *parent, const struct inode *pinode, + const struct dentry *dentry, const struct inode *inode, + unsigned int len, const char *str, const struct qstr *name) { - return isofs_dentry_cmp_common(dentry, a, b, 0); + return isofs_dentry_cmp_common(len, str, name, 0, 0); } static int -isofs_dentry_cmpi(struct dentry *dentry,struct qstr *a,struct qstr *b) +isofs_dentry_cmpi(const struct dentry *parent, const struct inode *pinode, + const struct dentry *dentry, const struct inode *inode, + unsigned int len, const char *str, const struct qstr *name) { - return isofs_dentry_cmpi_common(dentry, a, b, 0); + return isofs_dentry_cmp_common(len, str, name, 0, 1); } #ifdef CONFIG_JOLIET @@ -291,15 +289,19 @@ isofs_hashi_ms(struct dentry *dentry, struct qstr *qstr) } static int -isofs_dentry_cmp_ms(struct dentry *dentry,struct qstr *a,struct qstr *b) +isofs_dentry_cmp_ms(const struct dentry *parent, const struct inode *pinode, + const struct dentry *dentry, const struct inode *inode, + unsigned int len, const char *str, const struct qstr *name) { - return isofs_dentry_cmp_common(dentry, a, b, 1); + return isofs_dentry_cmp_common(len, str, name, 1, 0); } static int -isofs_dentry_cmpi_ms(struct dentry *dentry,struct qstr *a,struct qstr *b) +isofs_dentry_cmpi_ms(const struct dentry *parent, const struct inode *pinode, + const struct dentry *dentry, const struct inode *inode, + unsigned int len, const char *str, const struct qstr *name) { - return isofs_dentry_cmpi_common(dentry, a, b, 1); + return isofs_dentry_cmp_common(len, str, name, 1, 1); } #endif diff --git a/fs/isofs/namei.c b/fs/isofs/namei.c index 0d23abfd4280..715f7d318046 100644 --- a/fs/isofs/namei.c +++ b/fs/isofs/namei.c @@ -37,7 +37,8 @@ isofs_cmp(struct dentry *dentry, const char *compare, int dlen) qstr.name = compare; qstr.len = dlen; - return dentry->d_op->d_compare(dentry, &dentry->d_name, &qstr); + return dentry->d_op->d_compare(NULL, NULL, NULL, NULL, + dentry->d_name.len, dentry->d_name.name, &qstr); } /* diff --git a/fs/jfs/namei.c b/fs/jfs/namei.c index 2da1546161fa..92129016cd79 100644 --- a/fs/jfs/namei.c +++ b/fs/jfs/namei.c @@ -1587,14 +1587,17 @@ static int jfs_ci_hash(struct dentry *dir, struct qstr *this) return 0; } -static int jfs_ci_compare(struct dentry *dir, struct qstr *a, struct qstr *b) +static int jfs_ci_compare(const struct dentry *parent, + const struct inode *pinode, + const struct dentry *dentry, const struct inode *inode, + unsigned int len, const char *str, const struct qstr *name) { int i, result = 1; - if (a->len != b->len) + if (len != name->len) goto out; - for (i=0; i < a->len; i++) { - if (tolower(a->name[i]) != tolower(b->name[i])) + for (i=0; i < len; i++) { + if (tolower(str[i]) != tolower(name->name[i])) goto out; } result = 0; diff --git a/fs/ncpfs/dir.c b/fs/ncpfs/dir.c index e80ea4e37c48..3bcc68aed416 100644 --- a/fs/ncpfs/dir.c +++ b/fs/ncpfs/dir.c @@ -75,7 +75,9 @@ const struct inode_operations ncp_dir_inode_operations = */ static int ncp_lookup_validate(struct dentry *, struct nameidata *); static int ncp_hash_dentry(struct dentry *, struct qstr *); -static int ncp_compare_dentry (struct dentry *, struct qstr *, struct qstr *); +static int ncp_compare_dentry(const struct dentry *, const struct inode *, + const struct dentry *, const struct inode *, + unsigned int, const char *, const struct qstr *); static int ncp_delete_dentry(const struct dentry *); static const struct dentry_operations ncp_dentry_operations = @@ -113,10 +115,10 @@ static inline int ncp_preserve_entry_case(struct inode *i, __u32 nscreator) #define ncp_preserve_case(i) (ncp_namespace(i) != NW_NS_DOS) -static inline int ncp_case_sensitive(struct dentry *dentry) +static inline int ncp_case_sensitive(const struct inode *i) { #ifdef CONFIG_NCPFS_NFS_NS - return ncp_namespace(dentry->d_inode) == NW_NS_NFS; + return ncp_namespace(i) == NW_NS_NFS; #else return 0; #endif /* CONFIG_NCPFS_NFS_NS */ @@ -129,12 +131,13 @@ static inline int ncp_case_sensitive(struct dentry *dentry) static int ncp_hash_dentry(struct dentry *dentry, struct qstr *this) { - if (!ncp_case_sensitive(dentry)) { + if (!ncp_case_sensitive(dentry->d_inode)) { + struct super_block *sb = dentry->d_sb; struct nls_table *t; unsigned long hash; int i; - t = NCP_IO_TABLE(dentry); + t = NCP_IO_TABLE(sb); hash = init_name_hash(); for (i=0; ilen ; i++) hash = partial_name_hash(ncp_tolower(t, this->name[i]), @@ -145,15 +148,17 @@ ncp_hash_dentry(struct dentry *dentry, struct qstr *this) } static int -ncp_compare_dentry(struct dentry *dentry, struct qstr *a, struct qstr *b) +ncp_compare_dentry(const struct dentry *parent, const struct inode *pinode, + const struct dentry *dentry, const struct inode *inode, + unsigned int len, const char *str, const struct qstr *name) { - if (a->len != b->len) + if (len != name->len) return 1; - if (ncp_case_sensitive(dentry)) - return strncmp(a->name, b->name, a->len); + if (ncp_case_sensitive(pinode)) + return strncmp(str, name->name, len); - return ncp_strnicmp(NCP_IO_TABLE(dentry), a->name, b->name, a->len); + return ncp_strnicmp(NCP_IO_TABLE(pinode->i_sb), str, name->name, len); } /* diff --git a/fs/ncpfs/ncplib_kernel.h b/fs/ncpfs/ncplib_kernel.h index 3c57eca634ce..244d1b73fda7 100644 --- a/fs/ncpfs/ncplib_kernel.h +++ b/fs/ncpfs/ncplib_kernel.h @@ -135,7 +135,7 @@ int ncp__vol2io(struct ncp_server *, unsigned char *, unsigned int *, const unsigned char *, unsigned int, int); #define NCP_ESC ':' -#define NCP_IO_TABLE(dentry) (NCP_SERVER((dentry)->d_inode)->nls_io) +#define NCP_IO_TABLE(sb) (NCP_SBP(sb)->nls_io) #define ncp_tolower(t, c) nls_tolower(t, c) #define ncp_toupper(t, c) nls_toupper(t, c) #define ncp_strnicmp(t, s1, s2, len) \ @@ -150,15 +150,15 @@ int ncp__io2vol(unsigned char *, unsigned int *, int ncp__vol2io(unsigned char *, unsigned int *, const unsigned char *, unsigned int, int); -#define NCP_IO_TABLE(dentry) NULL +#define NCP_IO_TABLE(sb) NULL #define ncp_tolower(t, c) tolower(c) #define ncp_toupper(t, c) toupper(c) #define ncp_io2vol(S,m,i,n,k,U) ncp__io2vol(m,i,n,k,U) #define ncp_vol2io(S,m,i,n,k,U) ncp__vol2io(m,i,n,k,U) -static inline int ncp_strnicmp(struct nls_table *t, const unsigned char *s1, - const unsigned char *s2, int len) +static inline int ncp_strnicmp(const struct nls_table *t, + const unsigned char *s1, const unsigned char *s2, int len) { while (len--) { if (tolower(*s1++) != tolower(*s2++)) diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c index a256d770ea18..ae4b0fd9033f 100644 --- a/fs/proc/proc_sysctl.c +++ b/fs/proc/proc_sysctl.c @@ -397,15 +397,16 @@ static int proc_sys_delete(const struct dentry *dentry) return !!PROC_I(dentry->d_inode)->sysctl->unregistering; } -static int proc_sys_compare(struct dentry *dir, struct qstr *qstr, - struct qstr *name) +static int proc_sys_compare(const struct dentry *parent, + const struct inode *pinode, + const struct dentry *dentry, const struct inode *inode, + unsigned int len, const char *str, const struct qstr *name) { - struct dentry *dentry = container_of(qstr, struct dentry, d_name); - if (qstr->len != name->len) + if (name->len != len) return 1; - if (memcmp(qstr->name, name->name, name->len)) + if (memcmp(name->name, str, len)) return 1; - return !sysctl_is_seen(PROC_I(dentry->d_inode)->sysctl); + return !sysctl_is_seen(PROC_I(inode)->sysctl); } static const struct dentry_operations proc_sys_dentry_operations = { diff --git a/include/linux/dcache.h b/include/linux/dcache.h index 6cdf4995c90a..75a072bf2a34 100644 --- a/include/linux/dcache.h +++ b/include/linux/dcache.h @@ -134,7 +134,9 @@ enum dentry_d_lock_class struct dentry_operations { int (*d_revalidate)(struct dentry *, struct nameidata *); int (*d_hash)(struct dentry *, struct qstr *); - int (*d_compare)(struct dentry *, struct qstr *, struct qstr *); + int (*d_compare)(const struct dentry *, const struct inode *, + const struct dentry *, const struct inode *, + unsigned int, const char *, const struct qstr *); int (*d_delete)(const struct dentry *); void (*d_release)(struct dentry *); void (*d_iput)(struct dentry *, struct inode *); @@ -145,12 +147,8 @@ struct dentry_operations { * Locking rules for dentry_operations callbacks are to be found in * Documentation/filesystems/Locking. Keep it updated! * - * the dentry parameter passed to d_hash and d_compare is the parent - * directory of the entries to be compared. It is used in case these - * functions need any directory specific information for determining - * equivalency classes. Using the dentry itself might not work, as it - * might be a negative dentry which has no information associated with - * it. + * FUrther descriptions are found in Documentation/filesystems/vfs.txt. + * Keep it updated too! */ /* d_flags entries */ diff --git a/include/linux/ncp_fs.h b/include/linux/ncp_fs.h index ef663061d5ac..1c27f201c856 100644 --- a/include/linux/ncp_fs.h +++ b/include/linux/ncp_fs.h @@ -184,13 +184,13 @@ struct ncp_entry_info { __u8 file_handle[6]; }; -static inline struct ncp_server *NCP_SBP(struct super_block *sb) +static inline struct ncp_server *NCP_SBP(const struct super_block *sb) { return sb->s_fs_info; } #define NCP_SERVER(inode) NCP_SBP((inode)->i_sb) -static inline struct ncp_inode_info *NCP_FINFO(struct inode *inode) +static inline struct ncp_inode_info *NCP_FINFO(const struct inode *inode) { return container_of(inode, struct ncp_inode_info, vfs_inode); } -- cgit v1.2.3 From b1e6a015a580ad145689ad1d6b4aa0e03e6c868b Mon Sep 17 00:00:00 2001 From: Nick Piggin Date: Fri, 7 Jan 2011 17:49:28 +1100 Subject: fs: change d_hash for rcu-walk Change d_hash so it may be called from lock-free RCU lookups. See similar patch for d_compare for details. For in-tree filesystems, this is just a mechanical change. Signed-off-by: Nick Piggin --- Documentation/filesystems/Locking | 5 +++-- Documentation/filesystems/porting | 7 +++++++ Documentation/filesystems/vfs.txt | 8 ++++++-- drivers/staging/smbfs/cache.c | 2 +- drivers/staging/smbfs/dir.c | 6 ++++-- fs/adfs/dir.c | 3 ++- fs/affs/namei.c | 20 ++++++++++++-------- fs/cifs/dir.c | 5 +++-- fs/cifs/readdir.c | 2 +- fs/dcache.c | 2 +- fs/ecryptfs/inode.c | 4 ++-- fs/fat/namei_msdos.c | 3 ++- fs/fat/namei_vfat.c | 8 +++++--- fs/gfs2/dentry.c | 3 ++- fs/hfs/hfs_fs.h | 3 ++- fs/hfs/string.c | 3 ++- fs/hfsplus/hfsplus_fs.h | 3 ++- fs/hfsplus/unicode.c | 3 ++- fs/hpfs/dentry.c | 3 ++- fs/isofs/inode.c | 28 ++++++++++++++++++---------- fs/jfs/namei.c | 3 ++- fs/namei.c | 5 +++-- fs/ncpfs/dir.c | 10 ++++++---- fs/sysv/namei.c | 3 ++- include/linux/dcache.h | 3 ++- 25 files changed, 94 insertions(+), 51 deletions(-) (limited to 'fs/dcache.c') diff --git a/Documentation/filesystems/Locking b/Documentation/filesystems/Locking index 9a76f8d8bf95..a15ee207b449 100644 --- a/Documentation/filesystems/Locking +++ b/Documentation/filesystems/Locking @@ -10,7 +10,8 @@ be able to use diff(1). --------------------------- dentry_operations -------------------------- prototypes: int (*d_revalidate)(struct dentry *, int); - int (*d_hash) (struct dentry *, struct qstr *); + int (*d_hash)(const struct dentry *, const struct inode *, + struct qstr *); int (*d_compare)(const struct dentry *, const struct inode *, const struct dentry *, const struct inode *, unsigned int, const char *, const struct qstr *); @@ -22,7 +23,7 @@ prototypes: locking rules: dcache_lock rename_lock ->d_lock may block d_revalidate: no no no yes -d_hash no no no yes +d_hash no no no no d_compare: no yes no no d_delete: yes no yes no d_release: no no no yes diff --git a/Documentation/filesystems/porting b/Documentation/filesystems/porting index d44511e20828..9fd31940a8ef 100644 --- a/Documentation/filesystems/porting +++ b/Documentation/filesystems/porting @@ -333,3 +333,10 @@ unreferenced dentries, and is now only called when the dentry refcount goes to .d_compare() calling convention and locking rules are significantly changed. Read updated documentation in Documentation/filesystems/vfs.txt (and look at examples of other filesystems) for guidance. + +--- +[mandatory] + + .d_hash() calling convention and locking rules are significantly +changed. Read updated documentation in Documentation/filesystems/vfs.txt (and +look at examples of other filesystems) for guidance. diff --git a/Documentation/filesystems/vfs.txt b/Documentation/filesystems/vfs.txt index 250681b8c7cc..69b10ff5ec81 100644 --- a/Documentation/filesystems/vfs.txt +++ b/Documentation/filesystems/vfs.txt @@ -847,7 +847,8 @@ defined: struct dentry_operations { int (*d_revalidate)(struct dentry *, struct nameidata *); - int (*d_hash)(struct dentry *, struct qstr *); + int (*d_hash)(const struct dentry *, const struct inode *, + struct qstr *); int (*d_compare)(const struct dentry *, const struct inode *, const struct dentry *, const struct inode *, unsigned int, const char *, const struct qstr *); @@ -864,7 +865,10 @@ struct dentry_operations { d_hash: called when the VFS adds a dentry to the hash table. The first dentry passed to d_hash is the parent directory that the name is - to be hashed into. + to be hashed into. The inode is the dentry's inode. + + Same locking and synchronisation rules as d_compare regarding + what is safe to dereference etc. d_compare: called to compare a dentry name with a given name. The first dentry is the parent of the dentry to be compared, the second is diff --git a/drivers/staging/smbfs/cache.c b/drivers/staging/smbfs/cache.c index dbd2e1df3ba9..0beded260b00 100644 --- a/drivers/staging/smbfs/cache.c +++ b/drivers/staging/smbfs/cache.c @@ -134,7 +134,7 @@ smb_fill_cache(struct file *filp, void *dirent, filldir_t filldir, qname->hash = full_name_hash(qname->name, qname->len); if (dentry->d_op && dentry->d_op->d_hash) - if (dentry->d_op->d_hash(dentry, qname) != 0) + if (dentry->d_op->d_hash(dentry, inode, qname) != 0) goto end_advance; newdent = d_lookup(dentry, qname); diff --git a/drivers/staging/smbfs/dir.c b/drivers/staging/smbfs/dir.c index bd63229f43f2..5f79799d5d4a 100644 --- a/drivers/staging/smbfs/dir.c +++ b/drivers/staging/smbfs/dir.c @@ -274,7 +274,8 @@ smb_dir_open(struct inode *dir, struct file *file) * Dentry operations routines */ static int smb_lookup_validate(struct dentry *, struct nameidata *); -static int smb_hash_dentry(struct dentry *, struct qstr *); +static int smb_hash_dentry(const struct dentry *, const struct inode *, + struct qstr *); static int smb_compare_dentry(const struct dentry *, const struct inode *, const struct dentry *, const struct inode *, @@ -336,7 +337,8 @@ smb_lookup_validate(struct dentry * dentry, struct nameidata *nd) } static int -smb_hash_dentry(struct dentry *dir, struct qstr *this) +smb_hash_dentry(const struct dentry *dir, const struct inode *inode, + struct qstr *this) { unsigned long hash; int i; diff --git a/fs/adfs/dir.c b/fs/adfs/dir.c index c8ed66162bd4..a11e5e102716 100644 --- a/fs/adfs/dir.c +++ b/fs/adfs/dir.c @@ -201,7 +201,8 @@ const struct file_operations adfs_dir_operations = { }; static int -adfs_hash(struct dentry *parent, struct qstr *qstr) +adfs_hash(const struct dentry *parent, const struct inode *inode, + struct qstr *qstr) { const unsigned int name_len = ADFS_SB(parent->d_sb)->s_namelen; const unsigned char *name; diff --git a/fs/affs/namei.c b/fs/affs/namei.c index 547d5deb0d42..5aca08c21100 100644 --- a/fs/affs/namei.c +++ b/fs/affs/namei.c @@ -13,13 +13,15 @@ typedef int (*toupper_t)(int); static int affs_toupper(int ch); -static int affs_hash_dentry(struct dentry *, struct qstr *); +static int affs_hash_dentry(const struct dentry *, + const struct inode *, struct qstr *); static int affs_compare_dentry(const struct dentry *parent, const struct inode *pinode, const struct dentry *dentry, const struct inode *inode, unsigned int len, const char *str, const struct qstr *name); static int affs_intl_toupper(int ch); -static int affs_intl_hash_dentry(struct dentry *, struct qstr *); +static int affs_intl_hash_dentry(const struct dentry *, + const struct inode *, struct qstr *); static int affs_intl_compare_dentry(const struct dentry *parent, const struct inode *pinode, const struct dentry *dentry, const struct inode *inode, @@ -64,13 +66,13 @@ affs_get_toupper(struct super_block *sb) * Note: the dentry argument is the parent dentry. */ static inline int -__affs_hash_dentry(struct dentry *dentry, struct qstr *qstr, toupper_t toupper) +__affs_hash_dentry(struct qstr *qstr, toupper_t toupper) { const u8 *name = qstr->name; unsigned long hash; int i; - i = affs_check_name(qstr->name,qstr->len); + i = affs_check_name(qstr->name, qstr->len); if (i) return i; @@ -84,14 +86,16 @@ __affs_hash_dentry(struct dentry *dentry, struct qstr *qstr, toupper_t toupper) } static int -affs_hash_dentry(struct dentry *dentry, struct qstr *qstr) +affs_hash_dentry(const struct dentry *dentry, const struct inode *inode, + struct qstr *qstr) { - return __affs_hash_dentry(dentry, qstr, affs_toupper); + return __affs_hash_dentry(qstr, affs_toupper); } static int -affs_intl_hash_dentry(struct dentry *dentry, struct qstr *qstr) +affs_intl_hash_dentry(const struct dentry *dentry, const struct inode *inode, + struct qstr *qstr) { - return __affs_hash_dentry(dentry, qstr, affs_intl_toupper); + return __affs_hash_dentry(qstr, affs_intl_toupper); } static inline int __affs_compare_dentry(unsigned int len, diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c index c60133f0d8e4..88bfe686ac00 100644 --- a/fs/cifs/dir.c +++ b/fs/cifs/dir.c @@ -700,9 +700,10 @@ const struct dentry_operations cifs_dentry_ops = { /* d_delete: cifs_d_delete, */ /* not needed except for debugging */ }; -static int cifs_ci_hash(struct dentry *dentry, struct qstr *q) +static int cifs_ci_hash(const struct dentry *dentry, const struct inode *inode, + struct qstr *q) { - struct nls_table *codepage = CIFS_SB(dentry->d_inode->i_sb)->local_nls; + struct nls_table *codepage = CIFS_SB(dentry->d_sb)->local_nls; unsigned long hash; int i; diff --git a/fs/cifs/readdir.c b/fs/cifs/readdir.c index a73eb9f4bdaf..ee463aeca0b0 100644 --- a/fs/cifs/readdir.c +++ b/fs/cifs/readdir.c @@ -79,7 +79,7 @@ cifs_readdir_lookup(struct dentry *parent, struct qstr *name, cFYI(1, "For %s", name->name); if (parent->d_op && parent->d_op->d_hash) - parent->d_op->d_hash(parent, name); + parent->d_op->d_hash(parent, parent->d_inode, name); else name->hash = full_name_hash(name->name, name->len); diff --git a/fs/dcache.c b/fs/dcache.c index 7075555fbb04..6f59f375ed9d 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -1478,7 +1478,7 @@ struct dentry *d_hash_and_lookup(struct dentry *dir, struct qstr *name) */ name->hash = full_name_hash(name->name, name->len); if (dir->d_op && dir->d_op->d_hash) { - if (dir->d_op->d_hash(dir, name) < 0) + if (dir->d_op->d_hash(dir, dir->d_inode, name) < 0) goto out; } dentry = d_lookup(dir, name); diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c index 9d1a22d62765..a1ed7a7cb173 100644 --- a/fs/ecryptfs/inode.c +++ b/fs/ecryptfs/inode.c @@ -454,7 +454,7 @@ static struct dentry *ecryptfs_lookup(struct inode *ecryptfs_dir_inode, lower_name.hash = ecryptfs_dentry->d_name.hash; if (lower_dir_dentry->d_op && lower_dir_dentry->d_op->d_hash) { rc = lower_dir_dentry->d_op->d_hash(lower_dir_dentry, - &lower_name); + lower_dir_dentry->d_inode, &lower_name); if (rc < 0) goto out_d_drop; } @@ -489,7 +489,7 @@ static struct dentry *ecryptfs_lookup(struct inode *ecryptfs_dir_inode, lower_name.hash = full_name_hash(lower_name.name, lower_name.len); if (lower_dir_dentry->d_op && lower_dir_dentry->d_op->d_hash) { rc = lower_dir_dentry->d_op->d_hash(lower_dir_dentry, - &lower_name); + lower_dir_dentry->d_inode, &lower_name); if (rc < 0) goto out_d_drop; } diff --git a/fs/fat/namei_msdos.c b/fs/fat/namei_msdos.c index 99d3c7ac973c..3b3e072d8982 100644 --- a/fs/fat/namei_msdos.c +++ b/fs/fat/namei_msdos.c @@ -148,7 +148,8 @@ static int msdos_find(struct inode *dir, const unsigned char *name, int len, * that the existing dentry can be used. The msdos fs routines will * return ENOENT or EINVAL as appropriate. */ -static int msdos_hash(struct dentry *dentry, struct qstr *qstr) +static int msdos_hash(const struct dentry *dentry, const struct inode *inode, + struct qstr *qstr) { struct fat_mount_options *options = &MSDOS_SB(dentry->d_sb)->options; unsigned char msdos_name[MSDOS_NAME]; diff --git a/fs/fat/namei_vfat.c b/fs/fat/namei_vfat.c index 95e00ab84c3f..4fc06278db48 100644 --- a/fs/fat/namei_vfat.c +++ b/fs/fat/namei_vfat.c @@ -103,7 +103,8 @@ static unsigned int vfat_striptail_len(const struct qstr *qstr) * that the existing dentry can be used. The vfat fs routines will * return ENOENT or EINVAL as appropriate. */ -static int vfat_hash(struct dentry *dentry, struct qstr *qstr) +static int vfat_hash(const struct dentry *dentry, const struct inode *inode, + struct qstr *qstr) { qstr->hash = full_name_hash(qstr->name, vfat_striptail_len(qstr)); return 0; @@ -115,9 +116,10 @@ static int vfat_hash(struct dentry *dentry, struct qstr *qstr) * that the existing dentry can be used. The vfat fs routines will * return ENOENT or EINVAL as appropriate. */ -static int vfat_hashi(struct dentry *dentry, struct qstr *qstr) +static int vfat_hashi(const struct dentry *dentry, const struct inode *inode, + struct qstr *qstr) { - struct nls_table *t = MSDOS_SB(dentry->d_inode->i_sb)->nls_io; + struct nls_table *t = MSDOS_SB(dentry->d_sb)->nls_io; const unsigned char *name; unsigned int len; unsigned long hash; diff --git a/fs/gfs2/dentry.c b/fs/gfs2/dentry.c index e80fea2f65ff..50497f65763b 100644 --- a/fs/gfs2/dentry.c +++ b/fs/gfs2/dentry.c @@ -100,7 +100,8 @@ fail: return 0; } -static int gfs2_dhash(struct dentry *dentry, struct qstr *str) +static int gfs2_dhash(const struct dentry *dentry, const struct inode *inode, + struct qstr *str) { str->hash = gfs2_disk_hash(str->name, str->len); return 0; diff --git a/fs/hfs/hfs_fs.h b/fs/hfs/hfs_fs.h index 8cd876f0e961..ad97c2d58287 100644 --- a/fs/hfs/hfs_fs.h +++ b/fs/hfs/hfs_fs.h @@ -213,7 +213,8 @@ extern int hfs_part_find(struct super_block *, sector_t *, sector_t *); /* string.c */ extern const struct dentry_operations hfs_dentry_operations; -extern int hfs_hash_dentry(struct dentry *, struct qstr *); +extern int hfs_hash_dentry(const struct dentry *, const struct inode *, + struct qstr *); extern int hfs_strcmp(const unsigned char *, unsigned int, const unsigned char *, unsigned int); extern int hfs_compare_dentry(const struct dentry *parent, diff --git a/fs/hfs/string.c b/fs/hfs/string.c index aaf90d0d6940..495a976a3cc9 100644 --- a/fs/hfs/string.c +++ b/fs/hfs/string.c @@ -51,7 +51,8 @@ static unsigned char caseorder[256] = { /* * Hash a string to an integer in a case-independent way */ -int hfs_hash_dentry(struct dentry *dentry, struct qstr *this) +int hfs_hash_dentry(const struct dentry *dentry, const struct inode *inode, + struct qstr *this) { const unsigned char *name = this->name; unsigned int hash, len = this->len; diff --git a/fs/hfsplus/hfsplus_fs.h b/fs/hfsplus/hfsplus_fs.h index 7aa96eefe483..a5308f491e3e 100644 --- a/fs/hfsplus/hfsplus_fs.h +++ b/fs/hfsplus/hfsplus_fs.h @@ -379,7 +379,8 @@ int hfsplus_strcasecmp(const struct hfsplus_unistr *, const struct hfsplus_unist int hfsplus_strcmp(const struct hfsplus_unistr *, const struct hfsplus_unistr *); int hfsplus_uni2asc(struct super_block *, const struct hfsplus_unistr *, char *, int *); int hfsplus_asc2uni(struct super_block *, struct hfsplus_unistr *, const char *, int); -int hfsplus_hash_dentry(struct dentry *dentry, struct qstr *str); +int hfsplus_hash_dentry(const struct dentry *dentry, const struct inode *inode, + struct qstr *str); int hfsplus_compare_dentry(const struct dentry *parent, const struct inode *pinode, const struct dentry *dentry, const struct inode *inode, diff --git a/fs/hfsplus/unicode.c b/fs/hfsplus/unicode.c index b178c997efa8..d800aa0f2c80 100644 --- a/fs/hfsplus/unicode.c +++ b/fs/hfsplus/unicode.c @@ -320,7 +320,8 @@ int hfsplus_asc2uni(struct super_block *sb, struct hfsplus_unistr *ustr, * Composed unicode characters are decomposed and case-folding is performed * if the appropriate bits are (un)set on the superblock. */ -int hfsplus_hash_dentry(struct dentry *dentry, struct qstr *str) +int hfsplus_hash_dentry(const struct dentry *dentry, const struct inode *inode, + struct qstr *str) { struct super_block *sb = dentry->d_sb; const char *astr; diff --git a/fs/hpfs/dentry.c b/fs/hpfs/dentry.c index dd9b1e74a734..35526df1fd32 100644 --- a/fs/hpfs/dentry.c +++ b/fs/hpfs/dentry.c @@ -12,7 +12,8 @@ * Note: the dentry argument is the parent dentry. */ -static int hpfs_hash_dentry(struct dentry *dentry, struct qstr *qstr) +static int hpfs_hash_dentry(const struct dentry *dentry, const struct inode *inode, + struct qstr *qstr) { unsigned long hash; int i; diff --git a/fs/isofs/inode.c b/fs/isofs/inode.c index 7b0fbc61af81..d204ee4235fd 100644 --- a/fs/isofs/inode.c +++ b/fs/isofs/inode.c @@ -26,8 +26,10 @@ #define BEQUIET -static int isofs_hashi(struct dentry *parent, struct qstr *qstr); -static int isofs_hash(struct dentry *parent, struct qstr *qstr); +static int isofs_hashi(const struct dentry *parent, const struct inode *inode, + struct qstr *qstr); +static int isofs_hash(const struct dentry *parent, const struct inode *inode, + struct qstr *qstr); static int isofs_dentry_cmpi(const struct dentry *parent, const struct inode *pinode, const struct dentry *dentry, const struct inode *inode, @@ -38,8 +40,10 @@ static int isofs_dentry_cmp(const struct dentry *parent, unsigned int len, const char *str, const struct qstr *name); #ifdef CONFIG_JOLIET -static int isofs_hashi_ms(struct dentry *parent, struct qstr *qstr); -static int isofs_hash_ms(struct dentry *parent, struct qstr *qstr); +static int isofs_hashi_ms(const struct dentry *parent, const struct inode *inode, + struct qstr *qstr); +static int isofs_hash_ms(const struct dentry *parent, const struct inode *inode, + struct qstr *qstr); static int isofs_dentry_cmpi_ms(const struct dentry *parent, const struct inode *pinode, const struct dentry *dentry, const struct inode *inode, @@ -172,7 +176,7 @@ struct iso9660_options{ * Compute the hash for the isofs name corresponding to the dentry. */ static int -isofs_hash_common(struct dentry *dentry, struct qstr *qstr, int ms) +isofs_hash_common(const struct dentry *dentry, struct qstr *qstr, int ms) { const char *name; int len; @@ -193,7 +197,7 @@ isofs_hash_common(struct dentry *dentry, struct qstr *qstr, int ms) * Compute the hash for the isofs name corresponding to the dentry. */ static int -isofs_hashi_common(struct dentry *dentry, struct qstr *qstr, int ms) +isofs_hashi_common(const struct dentry *dentry, struct qstr *qstr, int ms) { const char *name; int len; @@ -248,13 +252,15 @@ static int isofs_dentry_cmp_common( } static int -isofs_hash(struct dentry *dentry, struct qstr *qstr) +isofs_hash(const struct dentry *dentry, const struct inode *inode, + struct qstr *qstr) { return isofs_hash_common(dentry, qstr, 0); } static int -isofs_hashi(struct dentry *dentry, struct qstr *qstr) +isofs_hashi(const struct dentry *dentry, const struct inode *inode, + struct qstr *qstr) { return isofs_hashi_common(dentry, qstr, 0); } @@ -277,13 +283,15 @@ isofs_dentry_cmpi(const struct dentry *parent, const struct inode *pinode, #ifdef CONFIG_JOLIET static int -isofs_hash_ms(struct dentry *dentry, struct qstr *qstr) +isofs_hash_ms(const struct dentry *dentry, const struct inode *inode, + struct qstr *qstr) { return isofs_hash_common(dentry, qstr, 1); } static int -isofs_hashi_ms(struct dentry *dentry, struct qstr *qstr) +isofs_hashi_ms(const struct dentry *dentry, const struct inode *inode, + struct qstr *qstr) { return isofs_hashi_common(dentry, qstr, 1); } diff --git a/fs/jfs/namei.c b/fs/jfs/namei.c index 92129016cd79..57f90dad8919 100644 --- a/fs/jfs/namei.c +++ b/fs/jfs/namei.c @@ -1574,7 +1574,8 @@ const struct file_operations jfs_dir_operations = { .llseek = generic_file_llseek, }; -static int jfs_ci_hash(struct dentry *dir, struct qstr *this) +static int jfs_ci_hash(const struct dentry *dir, const struct inode *inode, + struct qstr *this) { unsigned long hash; int i; diff --git a/fs/namei.c b/fs/namei.c index 4ff7ca530533..f3b5ca404659 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -731,7 +731,8 @@ static int do_lookup(struct nameidata *nd, struct qstr *name, * to use its own hash.. */ if (nd->path.dentry->d_op && nd->path.dentry->d_op->d_hash) { - int err = nd->path.dentry->d_op->d_hash(nd->path.dentry, name); + int err = nd->path.dentry->d_op->d_hash(nd->path.dentry, + nd->path.dentry->d_inode, name); if (err < 0) return err; } @@ -1134,7 +1135,7 @@ static struct dentry *__lookup_hash(struct qstr *name, * to use its own hash.. */ if (base->d_op && base->d_op->d_hash) { - err = base->d_op->d_hash(base, name); + err = base->d_op->d_hash(base, inode, name); dentry = ERR_PTR(err); if (err < 0) goto out; diff --git a/fs/ncpfs/dir.c b/fs/ncpfs/dir.c index 3bcc68aed416..bbbf7922f422 100644 --- a/fs/ncpfs/dir.c +++ b/fs/ncpfs/dir.c @@ -74,7 +74,8 @@ const struct inode_operations ncp_dir_inode_operations = * Dentry operations routines */ static int ncp_lookup_validate(struct dentry *, struct nameidata *); -static int ncp_hash_dentry(struct dentry *, struct qstr *); +static int ncp_hash_dentry(const struct dentry *, const struct inode *, + struct qstr *); static int ncp_compare_dentry(const struct dentry *, const struct inode *, const struct dentry *, const struct inode *, unsigned int, const char *, const struct qstr *); @@ -129,9 +130,10 @@ static inline int ncp_case_sensitive(const struct inode *i) * is case-sensitive. */ static int -ncp_hash_dentry(struct dentry *dentry, struct qstr *this) +ncp_hash_dentry(const struct dentry *dentry, const struct inode *inode, + struct qstr *this) { - if (!ncp_case_sensitive(dentry->d_inode)) { + if (!ncp_case_sensitive(inode)) { struct super_block *sb = dentry->d_sb; struct nls_table *t; unsigned long hash; @@ -597,7 +599,7 @@ ncp_fill_cache(struct file *filp, void *dirent, filldir_t filldir, qname.hash = full_name_hash(qname.name, qname.len); if (dentry->d_op && dentry->d_op->d_hash) - if (dentry->d_op->d_hash(dentry, &qname) != 0) + if (dentry->d_op->d_hash(dentry, dentry->d_inode, &qname) != 0) goto end_advance; newdent = d_lookup(dentry, &qname); diff --git a/fs/sysv/namei.c b/fs/sysv/namei.c index 11e7f7d11cd0..7507aeb4c90e 100644 --- a/fs/sysv/namei.c +++ b/fs/sysv/namei.c @@ -27,7 +27,8 @@ static int add_nondir(struct dentry *dentry, struct inode *inode) return err; } -static int sysv_hash(struct dentry *dentry, struct qstr *qstr) +static int sysv_hash(const struct dentry *dentry, const struct inode *inode, + struct qstr *qstr) { /* Truncate the name in place, avoids having to define a compare function. */ diff --git a/include/linux/dcache.h b/include/linux/dcache.h index 75a072bf2a34..1149e706f04d 100644 --- a/include/linux/dcache.h +++ b/include/linux/dcache.h @@ -133,7 +133,8 @@ enum dentry_d_lock_class struct dentry_operations { int (*d_revalidate)(struct dentry *, struct nameidata *); - int (*d_hash)(struct dentry *, struct qstr *); + int (*d_hash)(const struct dentry *, const struct inode *, + struct qstr *); int (*d_compare)(const struct dentry *, const struct inode *, const struct dentry *, const struct inode *, unsigned int, const char *, const struct qstr *); -- cgit v1.2.3 From ec2447c278ee973d35f38e53ca16ba7f965ae33d Mon Sep 17 00:00:00 2001 From: Nick Piggin Date: Fri, 7 Jan 2011 17:49:29 +1100 Subject: hostfs: simplify locking Remove dcache_lock locking from hostfs filesystem, and move it into dcache helpers. All that is required is a coherent path name. Protection from concurrent modification of the namespace after path name generation is not provided in current code, because dcache_lock is dropped before the path is used. Signed-off-by: Nick Piggin --- fs/dcache.c | 15 +++++++++++++-- fs/hostfs/hostfs_kern.c | 24 ++++++++++-------------- include/linux/dcache.h | 2 +- 3 files changed, 24 insertions(+), 17 deletions(-) (limited to 'fs/dcache.c') diff --git a/fs/dcache.c b/fs/dcache.c index 6f59f375ed9d..61beb40dd6bf 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -2171,7 +2171,7 @@ char *dynamic_dname(struct dentry *dentry, char *buffer, int buflen, /* * Write full pathname from the root of the filesystem into the buffer. */ -char *__dentry_path(struct dentry *dentry, char *buf, int buflen) +static char *__dentry_path(struct dentry *dentry, char *buf, int buflen) { char *end = buf + buflen; char *retval; @@ -2198,7 +2198,18 @@ char *__dentry_path(struct dentry *dentry, char *buf, int buflen) Elong: return ERR_PTR(-ENAMETOOLONG); } -EXPORT_SYMBOL(__dentry_path); + +char *dentry_path_raw(struct dentry *dentry, char *buf, int buflen) +{ + char *retval; + + spin_lock(&dcache_lock); + retval = __dentry_path(dentry, buf, buflen); + spin_unlock(&dcache_lock); + + return retval; +} +EXPORT_SYMBOL(dentry_path_raw); char *dentry_path(struct dentry *dentry, char *buf, int buflen) { diff --git a/fs/hostfs/hostfs_kern.c b/fs/hostfs/hostfs_kern.c index cfe8bc7de511..39dc505ed273 100644 --- a/fs/hostfs/hostfs_kern.c +++ b/fs/hostfs/hostfs_kern.c @@ -92,12 +92,10 @@ __uml_setup("hostfs=", hostfs_args, static char *__dentry_name(struct dentry *dentry, char *name) { - char *p = __dentry_path(dentry, name, PATH_MAX); + char *p = dentry_path_raw(dentry, name, PATH_MAX); char *root; size_t len; - spin_unlock(&dcache_lock); - root = dentry->d_sb->s_fs_info; len = strlen(root); if (IS_ERR(p)) { @@ -123,25 +121,23 @@ static char *dentry_name(struct dentry *dentry) if (!name) return NULL; - spin_lock(&dcache_lock); return __dentry_name(dentry, name); /* will unlock */ } static char *inode_name(struct inode *ino) { struct dentry *dentry; - char *name = __getname(); - if (!name) - return NULL; + char *name; - spin_lock(&dcache_lock); - if (list_empty(&ino->i_dentry)) { - spin_unlock(&dcache_lock); - __putname(name); + dentry = d_find_alias(ino); + if (!dentry) return NULL; - } - dentry = list_first_entry(&ino->i_dentry, struct dentry, d_alias); - return __dentry_name(dentry, name); /* will unlock */ + + name = dentry_name(dentry); + + dput(dentry); + + return name; } static char *follow_link(char *link) diff --git a/include/linux/dcache.h b/include/linux/dcache.h index 1149e706f04d..cea27dfca532 100644 --- a/include/linux/dcache.h +++ b/include/linux/dcache.h @@ -311,7 +311,7 @@ extern char *dynamic_dname(struct dentry *, char *, int, const char *, ...); extern char *__d_path(const struct path *path, struct path *root, char *, int); extern char *d_path(const struct path *, char *, int); extern char *d_path_with_unreachable(const struct path *, char *, int); -extern char *__dentry_path(struct dentry *, char *, int); +extern char *dentry_path_raw(struct dentry *, char *, int); extern char *dentry_path(struct dentry *, char *, int); /* Allocation counts.. */ -- cgit v1.2.3 From 789680d1ee9311cdf095241dc02bd9784d799cd1 Mon Sep 17 00:00:00 2001 From: Nick Piggin Date: Fri, 7 Jan 2011 17:49:30 +1100 Subject: fs: dcache scale hash Add a new lock, dcache_hash_lock, to protect the dcache hash table from concurrent modification. d_hash is also protected by d_lock. Signed-off-by: Nick Piggin --- fs/dcache.c | 73 ++++++++++++++++++++++++++++++++++++++++++-------- include/linux/dcache.h | 35 ++---------------------- 2 files changed, 64 insertions(+), 44 deletions(-) (limited to 'fs/dcache.c') diff --git a/fs/dcache.c b/fs/dcache.c index 61beb40dd6bf..1e124d4673c6 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -35,10 +35,24 @@ #include #include "internal.h" +/* + * Usage: + * dcache_hash_lock protects dcache hash table, s_anon lists + * + * Ordering: + * dcache_lock + * dentry->d_lock + * dcache_hash_lock + * + * if (dentry1 < dentry2) + * dentry1->d_lock + * dentry2->d_lock + */ int sysctl_vfs_cache_pressure __read_mostly = 100; EXPORT_SYMBOL_GPL(sysctl_vfs_cache_pressure); - __cacheline_aligned_in_smp DEFINE_SPINLOCK(dcache_lock); +static __cacheline_aligned_in_smp DEFINE_SPINLOCK(dcache_hash_lock); +__cacheline_aligned_in_smp DEFINE_SPINLOCK(dcache_lock); __cacheline_aligned_in_smp DEFINE_SEQLOCK(rename_lock); EXPORT_SYMBOL(dcache_lock); @@ -196,6 +210,42 @@ static struct dentry *d_kill(struct dentry *dentry) return parent; } +/** + * d_drop - drop a dentry + * @dentry: dentry to drop + * + * d_drop() unhashes the entry from the parent dentry hashes, so that it won't + * be found through a VFS lookup any more. Note that this is different from + * deleting the dentry - d_delete will try to mark the dentry negative if + * possible, giving a successful _negative_ lookup, while d_drop will + * just make the cache lookup fail. + * + * d_drop() is used mainly for stuff that wants to invalidate a dentry for some + * reason (NFS timeouts or autofs deletes). + * + * __d_drop requires dentry->d_lock. + */ +void __d_drop(struct dentry *dentry) +{ + if (!(dentry->d_flags & DCACHE_UNHASHED)) { + dentry->d_flags |= DCACHE_UNHASHED; + spin_lock(&dcache_hash_lock); + hlist_del_rcu(&dentry->d_hash); + spin_unlock(&dcache_hash_lock); + } +} +EXPORT_SYMBOL(__d_drop); + +void d_drop(struct dentry *dentry) +{ + spin_lock(&dcache_lock); + spin_lock(&dentry->d_lock); + __d_drop(dentry); + spin_unlock(&dentry->d_lock); + spin_unlock(&dcache_lock); +} +EXPORT_SYMBOL(d_drop); + /* * This is dput * @@ -1199,7 +1249,9 @@ struct dentry *d_obtain_alias(struct inode *inode) tmp->d_flags |= DCACHE_DISCONNECTED; tmp->d_flags &= ~DCACHE_UNHASHED; list_add(&tmp->d_alias, &inode->i_dentry); + spin_lock(&dcache_hash_lock); hlist_add_head(&tmp->d_hash, &inode->i_sb->s_anon); + spin_unlock(&dcache_hash_lock); spin_unlock(&tmp->d_lock); spin_unlock(&dcache_lock); @@ -1585,7 +1637,9 @@ void d_rehash(struct dentry * entry) { spin_lock(&dcache_lock); spin_lock(&entry->d_lock); + spin_lock(&dcache_hash_lock); _d_rehash(entry); + spin_unlock(&dcache_hash_lock); spin_unlock(&entry->d_lock); spin_unlock(&dcache_lock); } @@ -1692,8 +1746,6 @@ static void switch_names(struct dentry *dentry, struct dentry *target) */ static void d_move_locked(struct dentry * dentry, struct dentry * target) { - struct hlist_head *list; - if (!dentry->d_inode) printk(KERN_WARNING "VFS: moving negative dcache entry\n"); @@ -1710,14 +1762,11 @@ static void d_move_locked(struct dentry * dentry, struct dentry * target) } /* Move the dentry to the target hash queue, if on different bucket */ - if (d_unhashed(dentry)) - goto already_unhashed; - - hlist_del_rcu(&dentry->d_hash); - -already_unhashed: - list = d_hash(target->d_parent, target->d_name.hash); - __d_rehash(dentry, list); + spin_lock(&dcache_hash_lock); + if (!d_unhashed(dentry)) + hlist_del_rcu(&dentry->d_hash); + __d_rehash(dentry, d_hash(target->d_parent, target->d_name.hash)); + spin_unlock(&dcache_hash_lock); /* Unhash the target: dput() will then get rid of it */ __d_drop(target); @@ -1914,7 +1963,9 @@ struct dentry *d_materialise_unique(struct dentry *dentry, struct inode *inode) found_lock: spin_lock(&actual->d_lock); found: + spin_lock(&dcache_hash_lock); _d_rehash(actual); + spin_unlock(&dcache_hash_lock); spin_unlock(&actual->d_lock); spin_unlock(&dcache_lock); out_nolock: diff --git a/include/linux/dcache.h b/include/linux/dcache.h index cea27dfca532..2feb624b67f1 100644 --- a/include/linux/dcache.h +++ b/include/linux/dcache.h @@ -184,39 +184,6 @@ struct dentry_operations { extern spinlock_t dcache_lock; extern seqlock_t rename_lock; -/** - * d_drop - drop a dentry - * @dentry: dentry to drop - * - * d_drop() unhashes the entry from the parent dentry hashes, so that it won't - * be found through a VFS lookup any more. Note that this is different from - * deleting the dentry - d_delete will try to mark the dentry negative if - * possible, giving a successful _negative_ lookup, while d_drop will - * just make the cache lookup fail. - * - * d_drop() is used mainly for stuff that wants to invalidate a dentry for some - * reason (NFS timeouts or autofs deletes). - * - * __d_drop requires dentry->d_lock. - */ - -static inline void __d_drop(struct dentry *dentry) -{ - if (!(dentry->d_flags & DCACHE_UNHASHED)) { - dentry->d_flags |= DCACHE_UNHASHED; - hlist_del_rcu(&dentry->d_hash); - } -} - -static inline void d_drop(struct dentry *dentry) -{ - spin_lock(&dcache_lock); - spin_lock(&dentry->d_lock); - __d_drop(dentry); - spin_unlock(&dentry->d_lock); - spin_unlock(&dcache_lock); -} - static inline int dname_external(struct dentry *dentry) { return dentry->d_name.name != dentry->d_iname; @@ -228,6 +195,8 @@ static inline int dname_external(struct dentry *dentry) extern void d_instantiate(struct dentry *, struct inode *); extern struct dentry * d_instantiate_unique(struct dentry *, struct inode *); extern struct dentry * d_materialise_unique(struct dentry *, struct inode *); +extern void __d_drop(struct dentry *dentry); +extern void d_drop(struct dentry *dentry); extern void d_delete(struct dentry *); /* allocate/de-allocate */ -- cgit v1.2.3 From 2304450783dfde7b0b94ae234edd0dbffa865073 Mon Sep 17 00:00:00 2001 From: Nick Piggin Date: Fri, 7 Jan 2011 17:49:31 +1100 Subject: fs: dcache scale lru Add a new lock, dcache_lru_lock, to protect the dcache LRU list from concurrent modification. d_lru is also protected by d_lock, which allows LRU lists to be accessed without the lru lock, using RCU in future patches. Signed-off-by: Nick Piggin --- fs/dcache.c | 112 +++++++++++++++++++++++++++++++++++++++++++++--------------- 1 file changed, 84 insertions(+), 28 deletions(-) (limited to 'fs/dcache.c') diff --git a/fs/dcache.c b/fs/dcache.c index 1e124d4673c6..3d3c843c36ed 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -37,11 +37,19 @@ /* * Usage: - * dcache_hash_lock protects dcache hash table, s_anon lists + * dcache_hash_lock protects: + * - the dcache hash table, s_anon lists + * dcache_lru_lock protects: + * - the dcache lru lists and counters + * d_lock protects: + * - d_flags + * - d_name + * - d_lru * * Ordering: * dcache_lock * dentry->d_lock + * dcache_lru_lock * dcache_hash_lock * * if (dentry1 < dentry2) @@ -52,6 +60,7 @@ int sysctl_vfs_cache_pressure __read_mostly = 100; EXPORT_SYMBOL_GPL(sysctl_vfs_cache_pressure); static __cacheline_aligned_in_smp DEFINE_SPINLOCK(dcache_hash_lock); +static __cacheline_aligned_in_smp DEFINE_SPINLOCK(dcache_lru_lock); __cacheline_aligned_in_smp DEFINE_SPINLOCK(dcache_lock); __cacheline_aligned_in_smp DEFINE_SEQLOCK(rename_lock); @@ -154,28 +163,38 @@ static void dentry_iput(struct dentry * dentry) } /* - * dentry_lru_(add|del|move_tail) must be called with dcache_lock held. + * dentry_lru_(add|del|move_tail) must be called with d_lock held. */ static void dentry_lru_add(struct dentry *dentry) { if (list_empty(&dentry->d_lru)) { + spin_lock(&dcache_lru_lock); list_add(&dentry->d_lru, &dentry->d_sb->s_dentry_lru); dentry->d_sb->s_nr_dentry_unused++; dentry_stat.nr_unused++; + spin_unlock(&dcache_lru_lock); } } +static void __dentry_lru_del(struct dentry *dentry) +{ + list_del_init(&dentry->d_lru); + dentry->d_sb->s_nr_dentry_unused--; + dentry_stat.nr_unused--; +} + static void dentry_lru_del(struct dentry *dentry) { if (!list_empty(&dentry->d_lru)) { - list_del_init(&dentry->d_lru); - dentry->d_sb->s_nr_dentry_unused--; - dentry_stat.nr_unused--; + spin_lock(&dcache_lru_lock); + __dentry_lru_del(dentry); + spin_unlock(&dcache_lru_lock); } } static void dentry_lru_move_tail(struct dentry *dentry) { + spin_lock(&dcache_lru_lock); if (list_empty(&dentry->d_lru)) { list_add_tail(&dentry->d_lru, &dentry->d_sb->s_dentry_lru); dentry->d_sb->s_nr_dentry_unused++; @@ -183,6 +202,7 @@ static void dentry_lru_move_tail(struct dentry *dentry) } else { list_move_tail(&dentry->d_lru, &dentry->d_sb->s_dentry_lru); } + spin_unlock(&dcache_lru_lock); } /** @@ -192,6 +212,8 @@ static void dentry_lru_move_tail(struct dentry *dentry) * The dentry must already be unhashed and removed from the LRU. * * If this is the root of the dentry tree, return NULL. + * + * dcache_lock and d_lock must be held by caller, are dropped by d_kill. */ static struct dentry *d_kill(struct dentry *dentry) __releases(dentry->d_lock) @@ -383,10 +405,19 @@ int d_invalidate(struct dentry * dentry) EXPORT_SYMBOL(d_invalidate); /* This should be called _only_ with dcache_lock held */ +static inline struct dentry * __dget_locked_dlock(struct dentry *dentry) +{ + atomic_inc(&dentry->d_count); + dentry_lru_del(dentry); + return dentry; +} + static inline struct dentry * __dget_locked(struct dentry *dentry) { atomic_inc(&dentry->d_count); + spin_lock(&dentry->d_lock); dentry_lru_del(dentry); + spin_unlock(&dentry->d_lock); return dentry; } @@ -465,7 +496,7 @@ restart: list_for_each_entry(dentry, &inode->i_dentry, d_alias) { spin_lock(&dentry->d_lock); if (!atomic_read(&dentry->d_count)) { - __dget_locked(dentry); + __dget_locked_dlock(dentry); __d_drop(dentry); spin_unlock(&dentry->d_lock); spin_unlock(&dcache_lock); @@ -489,7 +520,6 @@ EXPORT_SYMBOL(d_prune_aliases); static void prune_one_dentry(struct dentry * dentry) __releases(dentry->d_lock) __releases(dcache_lock) - __acquires(dcache_lock) { __d_drop(dentry); dentry = d_kill(dentry); @@ -498,15 +528,16 @@ static void prune_one_dentry(struct dentry * dentry) * Prune ancestors. Locking is simpler than in dput(), * because dcache_lock needs to be taken anyway. */ - spin_lock(&dcache_lock); while (dentry) { - if (!atomic_dec_and_lock(&dentry->d_count, &dentry->d_lock)) + spin_lock(&dcache_lock); + if (!atomic_dec_and_lock(&dentry->d_count, &dentry->d_lock)) { + spin_unlock(&dcache_lock); return; + } dentry_lru_del(dentry); __d_drop(dentry); dentry = d_kill(dentry); - spin_lock(&dcache_lock); } } @@ -516,21 +547,31 @@ static void shrink_dentry_list(struct list_head *list) while (!list_empty(list)) { dentry = list_entry(list->prev, struct dentry, d_lru); - dentry_lru_del(dentry); + + if (!spin_trylock(&dentry->d_lock)) { + spin_unlock(&dcache_lru_lock); + cpu_relax(); + spin_lock(&dcache_lru_lock); + continue; + } + + __dentry_lru_del(dentry); /* * We found an inuse dentry which was not removed from * the LRU because of laziness during lookup. Do not free * it - just keep it off the LRU list. */ - spin_lock(&dentry->d_lock); if (atomic_read(&dentry->d_count)) { spin_unlock(&dentry->d_lock); continue; } + spin_unlock(&dcache_lru_lock); + prune_one_dentry(dentry); - /* dentry->d_lock was dropped in prune_one_dentry() */ - cond_resched_lock(&dcache_lock); + /* dcache_lock and dentry->d_lock dropped */ + spin_lock(&dcache_lock); + spin_lock(&dcache_lru_lock); } } @@ -551,32 +592,36 @@ static void __shrink_dcache_sb(struct super_block *sb, int *count, int flags) int cnt = *count; spin_lock(&dcache_lock); +relock: + spin_lock(&dcache_lru_lock); while (!list_empty(&sb->s_dentry_lru)) { dentry = list_entry(sb->s_dentry_lru.prev, struct dentry, d_lru); BUG_ON(dentry->d_sb != sb); + if (!spin_trylock(&dentry->d_lock)) { + spin_unlock(&dcache_lru_lock); + cpu_relax(); + goto relock; + } + /* * If we are honouring the DCACHE_REFERENCED flag and the * dentry has this flag set, don't free it. Clear the flag * and put it back on the LRU. */ - if (flags & DCACHE_REFERENCED) { - spin_lock(&dentry->d_lock); - if (dentry->d_flags & DCACHE_REFERENCED) { - dentry->d_flags &= ~DCACHE_REFERENCED; - list_move(&dentry->d_lru, &referenced); - spin_unlock(&dentry->d_lock); - cond_resched_lock(&dcache_lock); - continue; - } + if (flags & DCACHE_REFERENCED && + dentry->d_flags & DCACHE_REFERENCED) { + dentry->d_flags &= ~DCACHE_REFERENCED; + list_move(&dentry->d_lru, &referenced); + spin_unlock(&dentry->d_lock); + } else { + list_move_tail(&dentry->d_lru, &tmp); spin_unlock(&dentry->d_lock); + if (!--cnt) + break; } - - list_move_tail(&dentry->d_lru, &tmp); - if (!--cnt) - break; - cond_resched_lock(&dcache_lock); + /* XXX: re-add cond_resched_lock when dcache_lock goes away */ } *count = cnt; @@ -584,6 +629,7 @@ static void __shrink_dcache_sb(struct super_block *sb, int *count, int flags) if (!list_empty(&referenced)) list_splice(&referenced, &sb->s_dentry_lru); + spin_unlock(&dcache_lru_lock); spin_unlock(&dcache_lock); } @@ -679,10 +725,12 @@ void shrink_dcache_sb(struct super_block *sb) LIST_HEAD(tmp); spin_lock(&dcache_lock); + spin_lock(&dcache_lru_lock); while (!list_empty(&sb->s_dentry_lru)) { list_splice_init(&sb->s_dentry_lru, &tmp); shrink_dentry_list(&tmp); } + spin_unlock(&dcache_lru_lock); spin_unlock(&dcache_lock); } EXPORT_SYMBOL(shrink_dcache_sb); @@ -701,7 +749,9 @@ static void shrink_dcache_for_umount_subtree(struct dentry *dentry) /* detach this root from the system */ spin_lock(&dcache_lock); + spin_lock(&dentry->d_lock); dentry_lru_del(dentry); + spin_unlock(&dentry->d_lock); __d_drop(dentry); spin_unlock(&dcache_lock); @@ -715,7 +765,9 @@ static void shrink_dcache_for_umount_subtree(struct dentry *dentry) spin_lock(&dcache_lock); list_for_each_entry(loop, &dentry->d_subdirs, d_u.d_child) { + spin_lock(&loop->d_lock); dentry_lru_del(loop); + spin_unlock(&loop->d_lock); __d_drop(loop); cond_resched_lock(&dcache_lock); } @@ -892,6 +944,8 @@ resume: struct dentry *dentry = list_entry(tmp, struct dentry, d_u.d_child); next = tmp->next; + spin_lock(&dentry->d_lock); + /* * move only zero ref count dentries to the end * of the unused list for prune_dcache @@ -903,6 +957,8 @@ resume: dentry_lru_del(dentry); } + spin_unlock(&dentry->d_lock); + /* * We can return to the caller if we have found some (this * ensures forward progress). We'll be coming back to find -- cgit v1.2.3 From b7ab39f631f505edc2bbdb86620d5493f995c9da Mon Sep 17 00:00:00 2001 From: Nick Piggin Date: Fri, 7 Jan 2011 17:49:32 +1100 Subject: fs: dcache scale dentry refcount Make d_count non-atomic and protect it with d_lock. This allows us to ensure a 0 refcount dentry remains 0 without dcache_lock. It is also fairly natural when we start protecting many other dentry members with d_lock. Signed-off-by: Nick Piggin --- arch/powerpc/platforms/cell/spufs/inode.c | 2 +- drivers/infiniband/hw/ipath/ipath_fs.c | 2 +- drivers/infiniband/hw/qib/qib_fs.c | 2 +- fs/autofs4/expire.c | 8 +-- fs/autofs4/root.c | 6 +- fs/ceph/dir.c | 4 +- fs/ceph/inode.c | 4 +- fs/ceph/mds_client.c | 2 +- fs/coda/dir.c | 2 +- fs/configfs/dir.c | 3 +- fs/configfs/inode.c | 2 +- fs/dcache.c | 106 +++++++++++++++++++++++------- fs/ecryptfs/inode.c | 2 +- fs/locks.c | 2 +- fs/namei.c | 2 +- fs/nfs/dir.c | 6 +- fs/nfs/unlink.c | 2 +- fs/nfsd/vfs.c | 5 +- fs/nilfs2/super.c | 2 +- include/linux/dcache.h | 29 ++++---- kernel/cgroup.c | 2 - 21 files changed, 126 insertions(+), 69 deletions(-) (limited to 'fs/dcache.c') diff --git a/arch/powerpc/platforms/cell/spufs/inode.c b/arch/powerpc/platforms/cell/spufs/inode.c index 3532b92de983..29a406a77547 100644 --- a/arch/powerpc/platforms/cell/spufs/inode.c +++ b/arch/powerpc/platforms/cell/spufs/inode.c @@ -162,7 +162,7 @@ static void spufs_prune_dir(struct dentry *dir) spin_lock(&dcache_lock); spin_lock(&dentry->d_lock); if (!(d_unhashed(dentry)) && dentry->d_inode) { - dget_locked(dentry); + dget_locked_dlock(dentry); __d_drop(dentry); spin_unlock(&dentry->d_lock); simple_unlink(dir->d_inode, dentry); diff --git a/drivers/infiniband/hw/ipath/ipath_fs.c b/drivers/infiniband/hw/ipath/ipath_fs.c index 8c8afc716b98..18aee04a8411 100644 --- a/drivers/infiniband/hw/ipath/ipath_fs.c +++ b/drivers/infiniband/hw/ipath/ipath_fs.c @@ -280,7 +280,7 @@ static int remove_file(struct dentry *parent, char *name) spin_lock(&dcache_lock); spin_lock(&tmp->d_lock); if (!(d_unhashed(tmp) && tmp->d_inode)) { - dget_locked(tmp); + dget_locked_dlock(tmp); __d_drop(tmp); spin_unlock(&tmp->d_lock); spin_unlock(&dcache_lock); diff --git a/drivers/infiniband/hw/qib/qib_fs.c b/drivers/infiniband/hw/qib/qib_fs.c index f99bddc01716..fe4b242f0094 100644 --- a/drivers/infiniband/hw/qib/qib_fs.c +++ b/drivers/infiniband/hw/qib/qib_fs.c @@ -456,7 +456,7 @@ static int remove_file(struct dentry *parent, char *name) spin_lock(&dcache_lock); spin_lock(&tmp->d_lock); if (!(d_unhashed(tmp) && tmp->d_inode)) { - dget_locked(tmp); + dget_locked_dlock(tmp); __d_drop(tmp); spin_unlock(&tmp->d_lock); spin_unlock(&dcache_lock); diff --git a/fs/autofs4/expire.c b/fs/autofs4/expire.c index a796c9417fb1..413b5642e6cf 100644 --- a/fs/autofs4/expire.c +++ b/fs/autofs4/expire.c @@ -198,7 +198,7 @@ static int autofs4_tree_busy(struct vfsmount *mnt, else ino_count++; - if (atomic_read(&p->d_count) > ino_count) { + if (p->d_count > ino_count) { top_ino->last_used = jiffies; dput(p); return 1; @@ -347,7 +347,7 @@ struct dentry *autofs4_expire_indirect(struct super_block *sb, /* Path walk currently on this dentry? */ ino_count = atomic_read(&ino->count) + 2; - if (atomic_read(&dentry->d_count) > ino_count) + if (dentry->d_count > ino_count) goto next; /* Can we umount this guy */ @@ -369,7 +369,7 @@ struct dentry *autofs4_expire_indirect(struct super_block *sb, if (!exp_leaves) { /* Path walk currently on this dentry? */ ino_count = atomic_read(&ino->count) + 1; - if (atomic_read(&dentry->d_count) > ino_count) + if (dentry->d_count > ino_count) goto next; if (!autofs4_tree_busy(mnt, dentry, timeout, do_now)) { @@ -383,7 +383,7 @@ struct dentry *autofs4_expire_indirect(struct super_block *sb, } else { /* Path walk currently on this dentry? */ ino_count = atomic_read(&ino->count) + 1; - if (atomic_read(&dentry->d_count) > ino_count) + if (dentry->d_count > ino_count) goto next; expired = autofs4_check_leaves(mnt, dentry, timeout, do_now); diff --git a/fs/autofs4/root.c b/fs/autofs4/root.c index d34896cfb19f..7922509b5b2b 100644 --- a/fs/autofs4/root.c +++ b/fs/autofs4/root.c @@ -436,7 +436,7 @@ static struct dentry *autofs4_lookup_active(struct dentry *dentry) spin_lock(&active->d_lock); /* Already gone? */ - if (atomic_read(&active->d_count) == 0) + if (active->d_count == 0) goto next; qstr = &active->d_name; @@ -452,7 +452,7 @@ static struct dentry *autofs4_lookup_active(struct dentry *dentry) goto next; if (d_unhashed(active)) { - dget(active); + dget_dlock(active); spin_unlock(&active->d_lock); spin_unlock(&sbi->lookup_lock); spin_unlock(&dcache_lock); @@ -507,7 +507,7 @@ static struct dentry *autofs4_lookup_expiring(struct dentry *dentry) goto next; if (d_unhashed(expiring)) { - dget(expiring); + dget_dlock(expiring); spin_unlock(&expiring->d_lock); spin_unlock(&sbi->lookup_lock); spin_unlock(&dcache_lock); diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c index d902948a90d8..3ecf915a4550 100644 --- a/fs/ceph/dir.c +++ b/fs/ceph/dir.c @@ -150,7 +150,9 @@ more: di = ceph_dentry(dentry); } - atomic_inc(&dentry->d_count); + spin_lock(&dentry->d_lock); + dentry->d_count++; + spin_unlock(&dentry->d_lock); spin_unlock(&dcache_lock); dout(" %llu (%llu) dentry %p %.*s %p\n", di->offset, filp->f_pos, diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c index bf1286588f26..bb68c799074d 100644 --- a/fs/ceph/inode.c +++ b/fs/ceph/inode.c @@ -879,8 +879,8 @@ static struct dentry *splice_dentry(struct dentry *dn, struct inode *in, } else if (realdn) { dout("dn %p (%d) spliced with %p (%d) " "inode %p ino %llx.%llx\n", - dn, atomic_read(&dn->d_count), - realdn, atomic_read(&realdn->d_count), + dn, dn->d_count, + realdn, realdn->d_count, realdn->d_inode, ceph_vinop(realdn->d_inode)); dput(dn); dn = realdn; diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index 38800eaa81d0..a50fca1e03be 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c @@ -1486,7 +1486,7 @@ retry: *base = ceph_ino(temp->d_inode); *plen = len; dout("build_path on %p %d built %llx '%.*s'\n", - dentry, atomic_read(&dentry->d_count), *base, len, path); + dentry, dentry->d_count, *base, len, path); return path; } diff --git a/fs/coda/dir.c b/fs/coda/dir.c index 4cce3b07d9d7..9e37e8bc9b89 100644 --- a/fs/coda/dir.c +++ b/fs/coda/dir.c @@ -559,7 +559,7 @@ static int coda_dentry_revalidate(struct dentry *de, struct nameidata *nd) if (cii->c_flags & C_FLUSH) coda_flag_inode_children(inode, C_FLUSH); - if (atomic_read(&de->d_count) > 1) + if (de->d_count > 1) /* pretend it's valid, but don't change the flags */ goto out; diff --git a/fs/configfs/dir.c b/fs/configfs/dir.c index 20024a9ef5a7..e9acea440ffc 100644 --- a/fs/configfs/dir.c +++ b/fs/configfs/dir.c @@ -394,8 +394,7 @@ static void remove_dir(struct dentry * d) if (d->d_inode) simple_rmdir(parent->d_inode,d); - pr_debug(" o %s removing done (%d)\n",d->d_name.name, - atomic_read(&d->d_count)); + pr_debug(" o %s removing done (%d)\n",d->d_name.name, d->d_count); dput(parent); } diff --git a/fs/configfs/inode.c b/fs/configfs/inode.c index 253476d78ed8..79b37765d8ff 100644 --- a/fs/configfs/inode.c +++ b/fs/configfs/inode.c @@ -253,7 +253,7 @@ void configfs_drop_dentry(struct configfs_dirent * sd, struct dentry * parent) spin_lock(&dcache_lock); spin_lock(&dentry->d_lock); if (!(d_unhashed(dentry) && dentry->d_inode)) { - dget_locked(dentry); + dget_locked_dlock(dentry); __d_drop(dentry); spin_unlock(&dentry->d_lock); spin_unlock(&dcache_lock); diff --git a/fs/dcache.c b/fs/dcache.c index 3d3c843c36ed..81e91502b294 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -45,6 +45,7 @@ * - d_flags * - d_name * - d_lru + * - d_count * * Ordering: * dcache_lock @@ -125,6 +126,7 @@ static void __d_free(struct rcu_head *head) */ static void d_free(struct dentry *dentry) { + BUG_ON(dentry->d_count); this_cpu_dec(nr_dentry); if (dentry->d_op && dentry->d_op->d_release) dentry->d_op->d_release(dentry); @@ -222,8 +224,11 @@ static struct dentry *d_kill(struct dentry *dentry) struct dentry *parent; list_del(&dentry->d_u.d_child); - /*drops the locks, at that point nobody can reach this dentry */ dentry_iput(dentry); + /* + * dentry_iput drops the locks, at which point nobody (except + * transient RCU lookups) can reach this dentry. + */ if (IS_ROOT(dentry)) parent = NULL; else @@ -303,13 +308,23 @@ void dput(struct dentry *dentry) return; repeat: - if (atomic_read(&dentry->d_count) == 1) + if (dentry->d_count == 1) might_sleep(); - if (!atomic_dec_and_lock(&dentry->d_count, &dcache_lock)) - return; - spin_lock(&dentry->d_lock); - if (atomic_read(&dentry->d_count)) { + if (dentry->d_count == 1) { + if (!spin_trylock(&dcache_lock)) { + /* + * Something of a livelock possibility we could avoid + * by taking dcache_lock and trying again, but we + * want to reduce dcache_lock anyway so this will + * get improved. + */ + spin_unlock(&dentry->d_lock); + goto repeat; + } + } + dentry->d_count--; + if (dentry->d_count) { spin_unlock(&dentry->d_lock); spin_unlock(&dcache_lock); return; @@ -389,7 +404,7 @@ int d_invalidate(struct dentry * dentry) * working directory or similar). */ spin_lock(&dentry->d_lock); - if (atomic_read(&dentry->d_count) > 1) { + if (dentry->d_count > 1) { if (dentry->d_inode && S_ISDIR(dentry->d_inode->i_mode)) { spin_unlock(&dentry->d_lock); spin_unlock(&dcache_lock); @@ -404,29 +419,61 @@ int d_invalidate(struct dentry * dentry) } EXPORT_SYMBOL(d_invalidate); -/* This should be called _only_ with dcache_lock held */ +/* This must be called with dcache_lock and d_lock held */ static inline struct dentry * __dget_locked_dlock(struct dentry *dentry) { - atomic_inc(&dentry->d_count); + dentry->d_count++; dentry_lru_del(dentry); return dentry; } +/* This should be called _only_ with dcache_lock held */ static inline struct dentry * __dget_locked(struct dentry *dentry) { - atomic_inc(&dentry->d_count); spin_lock(&dentry->d_lock); - dentry_lru_del(dentry); + __dget_locked_dlock(dentry); spin_unlock(&dentry->d_lock); return dentry; } +struct dentry * dget_locked_dlock(struct dentry *dentry) +{ + return __dget_locked_dlock(dentry); +} + struct dentry * dget_locked(struct dentry *dentry) { return __dget_locked(dentry); } EXPORT_SYMBOL(dget_locked); +struct dentry *dget_parent(struct dentry *dentry) +{ + struct dentry *ret; + +repeat: + spin_lock(&dentry->d_lock); + ret = dentry->d_parent; + if (!ret) + goto out; + if (dentry == ret) { + ret->d_count++; + goto out; + } + if (!spin_trylock(&ret->d_lock)) { + spin_unlock(&dentry->d_lock); + cpu_relax(); + goto repeat; + } + BUG_ON(!ret->d_count); + ret->d_count++; + spin_unlock(&ret->d_lock); +out: + spin_unlock(&dentry->d_lock); + return ret; +} +EXPORT_SYMBOL(dget_parent); + /** * d_find_alias - grab a hashed alias of inode * @inode: inode in question @@ -495,7 +542,7 @@ restart: spin_lock(&dcache_lock); list_for_each_entry(dentry, &inode->i_dentry, d_alias) { spin_lock(&dentry->d_lock); - if (!atomic_read(&dentry->d_count)) { + if (!dentry->d_count) { __dget_locked_dlock(dentry); __d_drop(dentry); spin_unlock(&dentry->d_lock); @@ -530,7 +577,10 @@ static void prune_one_dentry(struct dentry * dentry) */ while (dentry) { spin_lock(&dcache_lock); - if (!atomic_dec_and_lock(&dentry->d_count, &dentry->d_lock)) { + spin_lock(&dentry->d_lock); + dentry->d_count--; + if (dentry->d_count) { + spin_unlock(&dentry->d_lock); spin_unlock(&dcache_lock); return; } @@ -562,7 +612,7 @@ static void shrink_dentry_list(struct list_head *list) * the LRU because of laziness during lookup. Do not free * it - just keep it off the LRU list. */ - if (atomic_read(&dentry->d_count)) { + if (dentry->d_count) { spin_unlock(&dentry->d_lock); continue; } @@ -783,7 +833,7 @@ static void shrink_dcache_for_umount_subtree(struct dentry *dentry) do { struct inode *inode; - if (atomic_read(&dentry->d_count) != 0) { + if (dentry->d_count != 0) { printk(KERN_ERR "BUG: Dentry %p{i=%lx,n=%s}" " still in use (%d)" @@ -792,7 +842,7 @@ static void shrink_dcache_for_umount_subtree(struct dentry *dentry) dentry->d_inode ? dentry->d_inode->i_ino : 0UL, dentry->d_name.name, - atomic_read(&dentry->d_count), + dentry->d_count, dentry->d_sb->s_type->name, dentry->d_sb->s_id); BUG(); @@ -802,7 +852,9 @@ static void shrink_dcache_for_umount_subtree(struct dentry *dentry) parent = NULL; else { parent = dentry->d_parent; - atomic_dec(&parent->d_count); + spin_lock(&parent->d_lock); + parent->d_count--; + spin_unlock(&parent->d_lock); } list_del(&dentry->d_u.d_child); @@ -853,7 +905,9 @@ void shrink_dcache_for_umount(struct super_block *sb) dentry = sb->s_root; sb->s_root = NULL; - atomic_dec(&dentry->d_count); + spin_lock(&dentry->d_lock); + dentry->d_count--; + spin_unlock(&dentry->d_lock); shrink_dcache_for_umount_subtree(dentry); while (!hlist_empty(&sb->s_anon)) { @@ -950,7 +1004,7 @@ resume: * move only zero ref count dentries to the end * of the unused list for prune_dcache */ - if (!atomic_read(&dentry->d_count)) { + if (!dentry->d_count) { dentry_lru_move_tail(dentry); found++; } else { @@ -1068,7 +1122,7 @@ struct dentry *d_alloc(struct dentry * parent, const struct qstr *name) memcpy(dname, name->name, name->len); dname[name->len] = 0; - atomic_set(&dentry->d_count, 1); + dentry->d_count = 1; dentry->d_flags = DCACHE_UNHASHED; spin_lock_init(&dentry->d_lock); dentry->d_inode = NULL; @@ -1556,7 +1610,7 @@ struct dentry * __d_lookup(struct dentry * parent, struct qstr * name) goto next; } - atomic_inc(&dentry->d_count); + dentry->d_count++; found = dentry; spin_unlock(&dentry->d_lock); break; @@ -1653,7 +1707,7 @@ void d_delete(struct dentry * dentry) spin_lock(&dcache_lock); spin_lock(&dentry->d_lock); isdir = S_ISDIR(dentry->d_inode->i_mode); - if (atomic_read(&dentry->d_count) == 1) { + if (dentry->d_count == 1) { dentry->d_flags &= ~DCACHE_CANT_MOUNT; dentry_iput(dentry); fsnotify_nameremove(dentry, isdir); @@ -2494,11 +2548,15 @@ resume: this_parent = dentry; goto repeat; } - atomic_dec(&dentry->d_count); + spin_lock(&dentry->d_lock); + dentry->d_count--; + spin_unlock(&dentry->d_lock); } if (this_parent != root) { next = this_parent->d_u.d_child.next; - atomic_dec(&this_parent->d_count); + spin_lock(&this_parent->d_lock); + this_parent->d_count--; + spin_unlock(&this_parent->d_lock); this_parent = this_parent->d_parent; goto resume; } diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c index a1ed7a7cb173..5e5c7ec1fc98 100644 --- a/fs/ecryptfs/inode.c +++ b/fs/ecryptfs/inode.c @@ -260,7 +260,7 @@ int ecryptfs_lookup_and_interpose_lower(struct dentry *ecryptfs_dentry, ecryptfs_dentry->d_parent)); lower_inode = lower_dentry->d_inode; fsstack_copy_attr_atime(ecryptfs_dir_inode, lower_dir_dentry->d_inode); - BUG_ON(!atomic_read(&lower_dentry->d_count)); + BUG_ON(!lower_dentry->d_count); ecryptfs_set_dentry_private(ecryptfs_dentry, kmem_cache_alloc(ecryptfs_dentry_info_cache, GFP_KERNEL)); diff --git a/fs/locks.c b/fs/locks.c index 8729347bcd1a..08415b2a6d36 100644 --- a/fs/locks.c +++ b/fs/locks.c @@ -1389,7 +1389,7 @@ int generic_setlease(struct file *filp, long arg, struct file_lock **flp) if ((arg == F_RDLCK) && (atomic_read(&inode->i_writecount) > 0)) goto out; if ((arg == F_WRLCK) - && ((atomic_read(&dentry->d_count) > 1) + && ((dentry->d_count > 1) || (atomic_read(&inode->i_count) > 1))) goto out; } diff --git a/fs/namei.c b/fs/namei.c index f3b5ca404659..cbfa5fb31072 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -2133,7 +2133,7 @@ void dentry_unhash(struct dentry *dentry) shrink_dcache_parent(dentry); spin_lock(&dcache_lock); spin_lock(&dentry->d_lock); - if (atomic_read(&dentry->d_count) == 2) + if (dentry->d_count == 2) __d_drop(dentry); spin_unlock(&dentry->d_lock); spin_unlock(&dcache_lock); diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 9184c7c80f78..12de824edb5c 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -1720,7 +1720,7 @@ static int nfs_unlink(struct inode *dir, struct dentry *dentry) spin_lock(&dcache_lock); spin_lock(&dentry->d_lock); - if (atomic_read(&dentry->d_count) > 1) { + if (dentry->d_count > 1) { spin_unlock(&dentry->d_lock); spin_unlock(&dcache_lock); /* Start asynchronous writeout of the inode */ @@ -1868,7 +1868,7 @@ static int nfs_rename(struct inode *old_dir, struct dentry *old_dentry, dfprintk(VFS, "NFS: rename(%s/%s -> %s/%s, ct=%d)\n", old_dentry->d_parent->d_name.name, old_dentry->d_name.name, new_dentry->d_parent->d_name.name, new_dentry->d_name.name, - atomic_read(&new_dentry->d_count)); + new_dentry->d_count); /* * For non-directories, check whether the target is busy and if so, @@ -1886,7 +1886,7 @@ static int nfs_rename(struct inode *old_dir, struct dentry *old_dentry, rehash = new_dentry; } - if (atomic_read(&new_dentry->d_count) > 2) { + if (new_dentry->d_count > 2) { int err; /* copy the target dentry's name */ diff --git a/fs/nfs/unlink.c b/fs/nfs/unlink.c index 7bdec8531400..8fe9eb47a97f 100644 --- a/fs/nfs/unlink.c +++ b/fs/nfs/unlink.c @@ -496,7 +496,7 @@ nfs_sillyrename(struct inode *dir, struct dentry *dentry) dfprintk(VFS, "NFS: silly-rename(%s/%s, ct=%d)\n", dentry->d_parent->d_name.name, dentry->d_name.name, - atomic_read(&dentry->d_count)); + dentry->d_count); nfs_inc_stats(dir, NFSIOS_SILLYRENAME); /* diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index 184938fcff04..3a359023c9f7 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -1756,8 +1756,7 @@ nfsd_rename(struct svc_rqst *rqstp, struct svc_fh *ffhp, char *fname, int flen, goto out_dput_new; if (svc_msnfs(ffhp) && - ((atomic_read(&odentry->d_count) > 1) - || (atomic_read(&ndentry->d_count) > 1))) { + ((odentry->d_count > 1) || (ndentry->d_count > 1))) { host_err = -EPERM; goto out_dput_new; } @@ -1843,7 +1842,7 @@ nfsd_unlink(struct svc_rqst *rqstp, struct svc_fh *fhp, int type, if (type != S_IFDIR) { /* It's UNLINK */ #ifdef MSNFS if ((fhp->fh_export->ex_flags & NFSEXP_MSNFS) && - (atomic_read(&rdentry->d_count) > 1)) { + (rdentry->d_count > 1)) { host_err = -EPERM; } else #endif diff --git a/fs/nilfs2/super.c b/fs/nilfs2/super.c index f804d41ec9d3..d36fc7ee615f 100644 --- a/fs/nilfs2/super.c +++ b/fs/nilfs2/super.c @@ -838,7 +838,7 @@ static int nilfs_attach_snapshot(struct super_block *s, __u64 cno, static int nilfs_tree_was_touched(struct dentry *root_dentry) { - return atomic_read(&root_dentry->d_count) > 1; + return root_dentry->d_count > 1; } /** diff --git a/include/linux/dcache.h b/include/linux/dcache.h index 2feb624b67f1..b0ade2d46805 100644 --- a/include/linux/dcache.h +++ b/include/linux/dcache.h @@ -87,7 +87,7 @@ full_name_hash(const unsigned char *name, unsigned int len) #endif struct dentry { - atomic_t d_count; + unsigned int d_count; /* protected by d_lock */ unsigned int d_flags; /* protected by d_lock */ spinlock_t d_lock; /* per dentry lock */ int d_mounted; @@ -297,17 +297,28 @@ extern char *dentry_path(struct dentry *, char *, int); * needs and they take necessary precautions) you should hold dcache_lock * and call dget_locked() instead of dget(). */ - +static inline struct dentry *dget_dlock(struct dentry *dentry) +{ + if (dentry) { + BUG_ON(!dentry->d_count); + dentry->d_count++; + } + return dentry; +} static inline struct dentry *dget(struct dentry *dentry) { if (dentry) { - BUG_ON(!atomic_read(&dentry->d_count)); - atomic_inc(&dentry->d_count); + spin_lock(&dentry->d_lock); + dget_dlock(dentry); + spin_unlock(&dentry->d_lock); } return dentry; } extern struct dentry * dget_locked(struct dentry *); +extern struct dentry * dget_locked_dlock(struct dentry *); + +extern struct dentry *dget_parent(struct dentry *dentry); /** * d_unhashed - is dentry hashed @@ -338,16 +349,6 @@ static inline void dont_mount(struct dentry *dentry) spin_unlock(&dentry->d_lock); } -static inline struct dentry *dget_parent(struct dentry *dentry) -{ - struct dentry *ret; - - spin_lock(&dentry->d_lock); - ret = dget(dentry->d_parent); - spin_unlock(&dentry->d_lock); - return ret; -} - extern void dput(struct dentry *); static inline int d_mountpoint(struct dentry *dentry) diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 746055b214d7..eb7af39350c6 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -3655,9 +3655,7 @@ again: list_del(&cgrp->sibling); cgroup_unlock_hierarchy(cgrp->root); - spin_lock(&cgrp->dentry->d_lock); d = dget(cgrp->dentry); - spin_unlock(&d->d_lock); cgroup_d_remove_dir(d); dput(d); -- cgit v1.2.3 From da5029563a0a026c64821b09e8e7b4fd81d3fe1b Mon Sep 17 00:00:00 2001 From: Nick Piggin Date: Fri, 7 Jan 2011 17:49:33 +1100 Subject: fs: dcache scale d_unhashed Protect d_unhashed(dentry) condition with d_lock. This means keeping DCACHE_UNHASHED bit in synch with hash manipulations. Signed-off-by: Nick Piggin --- arch/powerpc/platforms/cell/spufs/inode.c | 3 ++ drivers/usb/core/inode.c | 3 ++ fs/autofs4/autofs_i.h | 13 ------ fs/autofs4/expire.c | 21 ++++++--- fs/ceph/dir.c | 5 ++- fs/configfs/configfs_internal.h | 2 + fs/dcache.c | 74 +++++++++++++++++++++---------- fs/libfs.c | 29 ++++++++---- fs/ocfs2/dcache.c | 5 ++- security/tomoyo/realpath.c | 1 + 10 files changed, 102 insertions(+), 54 deletions(-) (limited to 'fs/dcache.c') diff --git a/arch/powerpc/platforms/cell/spufs/inode.c b/arch/powerpc/platforms/cell/spufs/inode.c index 29a406a77547..5aef1a7f5e4b 100644 --- a/arch/powerpc/platforms/cell/spufs/inode.c +++ b/arch/powerpc/platforms/cell/spufs/inode.c @@ -166,6 +166,9 @@ static void spufs_prune_dir(struct dentry *dir) __d_drop(dentry); spin_unlock(&dentry->d_lock); simple_unlink(dir->d_inode, dentry); + /* XXX: what is dcache_lock protecting here? Other + * filesystems (IB, configfs) release dcache_lock + * before unlink */ spin_unlock(&dcache_lock); dput(dentry); } else { diff --git a/drivers/usb/core/inode.c b/drivers/usb/core/inode.c index b690aa35df9a..e3ab4437ea96 100644 --- a/drivers/usb/core/inode.c +++ b/drivers/usb/core/inode.c @@ -347,10 +347,13 @@ static int usbfs_empty (struct dentry *dentry) list_for_each(list, &dentry->d_subdirs) { struct dentry *de = list_entry(list, struct dentry, d_u.d_child); + spin_lock(&de->d_lock); if (usbfs_positive(de)) { + spin_unlock(&de->d_lock); spin_unlock(&dcache_lock); return 0; } + spin_unlock(&de->d_lock); } spin_unlock(&dcache_lock); diff --git a/fs/autofs4/autofs_i.h b/fs/autofs4/autofs_i.h index 3d283abf67d7..3912dcf047e5 100644 --- a/fs/autofs4/autofs_i.h +++ b/fs/autofs4/autofs_i.h @@ -254,19 +254,6 @@ static inline int simple_positive(struct dentry *dentry) return dentry->d_inode && !d_unhashed(dentry); } -static inline int __simple_empty(struct dentry *dentry) -{ - struct dentry *child; - int ret = 0; - - list_for_each_entry(child, &dentry->d_subdirs, d_u.d_child) - if (simple_positive(child)) - goto out; - ret = 1; -out: - return ret; -} - static inline void autofs4_add_expiring(struct dentry *dentry) { struct autofs_sb_info *sbi = autofs4_sbi(dentry->d_sb); diff --git a/fs/autofs4/expire.c b/fs/autofs4/expire.c index 413b5642e6cf..ee6402050f13 100644 --- a/fs/autofs4/expire.c +++ b/fs/autofs4/expire.c @@ -160,14 +160,18 @@ static int autofs4_tree_busy(struct vfsmount *mnt, spin_lock(&dcache_lock); for (p = top; p; p = next_dentry(p, top)) { + spin_lock(&p->d_lock); /* Negative dentry - give up */ - if (!simple_positive(p)) + if (!simple_positive(p)) { + spin_unlock(&p->d_lock); continue; + } DPRINTK("dentry %p %.*s", p, (int) p->d_name.len, p->d_name.name); - p = dget(p); + p = dget_dlock(p); + spin_unlock(&p->d_lock); spin_unlock(&dcache_lock); /* @@ -228,14 +232,18 @@ static struct dentry *autofs4_check_leaves(struct vfsmount *mnt, spin_lock(&dcache_lock); for (p = parent; p; p = next_dentry(p, parent)) { + spin_lock(&p->d_lock); /* Negative dentry - give up */ - if (!simple_positive(p)) + if (!simple_positive(p)) { + spin_unlock(&p->d_lock); continue; + } DPRINTK("dentry %p %.*s", p, (int) p->d_name.len, p->d_name.name); - p = dget(p); + p = dget_dlock(p); + spin_unlock(&p->d_lock); spin_unlock(&dcache_lock); if (d_mountpoint(p)) { @@ -324,12 +332,15 @@ struct dentry *autofs4_expire_indirect(struct super_block *sb, struct dentry *dentry = list_entry(next, struct dentry, d_u.d_child); /* Negative dentry - give up */ + spin_lock(&dentry->d_lock); if (!simple_positive(dentry)) { next = next->next; + spin_unlock(&dentry->d_lock); continue; } - dentry = dget(dentry); + dentry = dget_dlock(dentry); + spin_unlock(&dentry->d_lock); spin_unlock(&dcache_lock); spin_lock(&sbi->fs_lock); diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c index 3ecf915a4550..571f270dca0f 100644 --- a/fs/ceph/dir.c +++ b/fs/ceph/dir.c @@ -136,6 +136,7 @@ more: fi->at_end = 1; goto out_unlock; } + spin_lock(&dentry->d_lock); if (!d_unhashed(dentry) && dentry->d_inode && ceph_snap(dentry->d_inode) != CEPH_SNAPDIR && ceph_ino(dentry->d_inode) != CEPH_INO_CEPH && @@ -145,13 +146,13 @@ more: dentry->d_name.len, dentry->d_name.name, di->offset, filp->f_pos, d_unhashed(dentry) ? " unhashed" : "", !dentry->d_inode ? " null" : ""); + spin_unlock(&dentry->d_lock); p = p->prev; dentry = list_entry(p, struct dentry, d_u.d_child); di = ceph_dentry(dentry); } - spin_lock(&dentry->d_lock); - dentry->d_count++; + dget_dlock(dentry); spin_unlock(&dentry->d_lock); spin_unlock(&dcache_lock); diff --git a/fs/configfs/configfs_internal.h b/fs/configfs/configfs_internal.h index da6061a6df40..e58b4c30e216 100644 --- a/fs/configfs/configfs_internal.h +++ b/fs/configfs/configfs_internal.h @@ -121,6 +121,7 @@ static inline struct config_item *configfs_get_config_item(struct dentry *dentry struct config_item * item = NULL; spin_lock(&dcache_lock); + spin_lock(&dentry->d_lock); if (!d_unhashed(dentry)) { struct configfs_dirent * sd = dentry->d_fsdata; if (sd->s_type & CONFIGFS_ITEM_LINK) { @@ -129,6 +130,7 @@ static inline struct config_item *configfs_get_config_item(struct dentry *dentry } else item = config_item_get(sd->s_element); } + spin_unlock(&dentry->d_lock); spin_unlock(&dcache_lock); return item; diff --git a/fs/dcache.c b/fs/dcache.c index 81e91502b294..ee127f9ab274 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -46,6 +46,7 @@ * - d_name * - d_lru * - d_count + * - d_unhashed() * * Ordering: * dcache_lock @@ -53,6 +54,13 @@ * dcache_lru_lock * dcache_hash_lock * + * If there is an ancestor relationship: + * dentry->d_parent->...->d_parent->d_lock + * ... + * dentry->d_parent->d_lock + * dentry->d_lock + * + * If no ancestor relationship: * if (dentry1 < dentry2) * dentry1->d_lock * dentry2->d_lock @@ -379,7 +387,9 @@ int d_invalidate(struct dentry * dentry) * If it's already been dropped, return OK. */ spin_lock(&dcache_lock); + spin_lock(&dentry->d_lock); if (d_unhashed(dentry)) { + spin_unlock(&dentry->d_lock); spin_unlock(&dcache_lock); return 0; } @@ -388,9 +398,11 @@ int d_invalidate(struct dentry * dentry) * to get rid of unused child entries. */ if (!list_empty(&dentry->d_subdirs)) { + spin_unlock(&dentry->d_lock); spin_unlock(&dcache_lock); shrink_dcache_parent(dentry); spin_lock(&dcache_lock); + spin_lock(&dentry->d_lock); } /* @@ -403,7 +415,6 @@ int d_invalidate(struct dentry * dentry) * we might still populate it if it was a * working directory or similar). */ - spin_lock(&dentry->d_lock); if (dentry->d_count > 1) { if (dentry->d_inode && S_ISDIR(dentry->d_inode->i_mode)) { spin_unlock(&dentry->d_lock); @@ -490,35 +501,44 @@ EXPORT_SYMBOL(dget_parent); * any other hashed alias over that one unless @want_discon is set, * in which case only return an IS_ROOT, DCACHE_DISCONNECTED alias. */ - -static struct dentry * __d_find_alias(struct inode *inode, int want_discon) +static struct dentry *__d_find_alias(struct inode *inode, int want_discon) { - struct list_head *head, *next, *tmp; - struct dentry *alias, *discon_alias=NULL; + struct dentry *alias, *discon_alias; - head = &inode->i_dentry; - next = inode->i_dentry.next; - while (next != head) { - tmp = next; - next = tmp->next; - prefetch(next); - alias = list_entry(tmp, struct dentry, d_alias); +again: + discon_alias = NULL; + list_for_each_entry(alias, &inode->i_dentry, d_alias) { + spin_lock(&alias->d_lock); if (S_ISDIR(inode->i_mode) || !d_unhashed(alias)) { if (IS_ROOT(alias) && - (alias->d_flags & DCACHE_DISCONNECTED)) + (alias->d_flags & DCACHE_DISCONNECTED)) { discon_alias = alias; - else if (!want_discon) { - __dget_locked(alias); + } else if (!want_discon) { + __dget_locked_dlock(alias); + spin_unlock(&alias->d_lock); + return alias; + } + } + spin_unlock(&alias->d_lock); + } + if (discon_alias) { + alias = discon_alias; + spin_lock(&alias->d_lock); + if (S_ISDIR(inode->i_mode) || !d_unhashed(alias)) { + if (IS_ROOT(alias) && + (alias->d_flags & DCACHE_DISCONNECTED)) { + __dget_locked_dlock(alias); + spin_unlock(&alias->d_lock); return alias; } } + spin_unlock(&alias->d_lock); + goto again; } - if (discon_alias) - __dget_locked(discon_alias); - return discon_alias; + return NULL; } -struct dentry * d_find_alias(struct inode *inode) +struct dentry *d_find_alias(struct inode *inode) { struct dentry *de = NULL; @@ -801,8 +821,8 @@ static void shrink_dcache_for_umount_subtree(struct dentry *dentry) spin_lock(&dcache_lock); spin_lock(&dentry->d_lock); dentry_lru_del(dentry); - spin_unlock(&dentry->d_lock); __d_drop(dentry); + spin_unlock(&dentry->d_lock); spin_unlock(&dcache_lock); for (;;) { @@ -817,8 +837,8 @@ static void shrink_dcache_for_umount_subtree(struct dentry *dentry) d_u.d_child) { spin_lock(&loop->d_lock); dentry_lru_del(loop); - spin_unlock(&loop->d_lock); __d_drop(loop); + spin_unlock(&loop->d_lock); cond_resched_lock(&dcache_lock); } spin_unlock(&dcache_lock); @@ -1863,7 +1883,10 @@ static void d_move_locked(struct dentry * dentry, struct dentry * target) /* * XXXX: do we really need to take target->d_lock? */ - if (target < dentry) { + if (d_ancestor(dentry, target)) { + spin_lock(&dentry->d_lock); + spin_lock_nested(&target->d_lock, DENTRY_D_LOCK_NESTED); + } else if (d_ancestor(target, dentry) || target < dentry) { spin_lock(&target->d_lock); spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED); } else { @@ -2542,13 +2565,16 @@ resume: struct list_head *tmp = next; struct dentry *dentry = list_entry(tmp, struct dentry, d_u.d_child); next = tmp->next; - if (d_unhashed(dentry)||!dentry->d_inode) + spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED); + if (d_unhashed(dentry) || !dentry->d_inode) { + spin_unlock(&dentry->d_lock); continue; + } if (!list_empty(&dentry->d_subdirs)) { + spin_unlock(&dentry->d_lock); this_parent = dentry; goto repeat; } - spin_lock(&dentry->d_lock); dentry->d_count--; spin_unlock(&dentry->d_lock); } diff --git a/fs/libfs.c b/fs/libfs.c index b9d25d83e228..433e7139c23a 100644 --- a/fs/libfs.c +++ b/fs/libfs.c @@ -16,6 +16,11 @@ #include +static inline int simple_positive(struct dentry *dentry) +{ + return dentry->d_inode && !d_unhashed(dentry); +} + int simple_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat) { @@ -100,8 +105,10 @@ loff_t dcache_dir_lseek(struct file *file, loff_t offset, int origin) while (n && p != &file->f_path.dentry->d_subdirs) { struct dentry *next; next = list_entry(p, struct dentry, d_u.d_child); - if (!d_unhashed(next) && next->d_inode) + spin_lock(&next->d_lock); + if (simple_positive(next)) n--; + spin_unlock(&next->d_lock); p = p->next; } list_add_tail(&cursor->d_u.d_child, p); @@ -155,9 +162,13 @@ int dcache_readdir(struct file * filp, void * dirent, filldir_t filldir) for (p=q->next; p != &dentry->d_subdirs; p=p->next) { struct dentry *next; next = list_entry(p, struct dentry, d_u.d_child); - if (d_unhashed(next) || !next->d_inode) + spin_lock_nested(&next->d_lock, DENTRY_D_LOCK_NESTED); + if (!simple_positive(next)) { + spin_unlock(&next->d_lock); continue; + } + spin_unlock(&next->d_lock); spin_unlock(&dcache_lock); if (filldir(dirent, next->d_name.name, next->d_name.len, filp->f_pos, @@ -259,20 +270,20 @@ int simple_link(struct dentry *old_dentry, struct inode *dir, struct dentry *den return 0; } -static inline int simple_positive(struct dentry *dentry) -{ - return dentry->d_inode && !d_unhashed(dentry); -} - int simple_empty(struct dentry *dentry) { struct dentry *child; int ret = 0; spin_lock(&dcache_lock); - list_for_each_entry(child, &dentry->d_subdirs, d_u.d_child) - if (simple_positive(child)) + list_for_each_entry(child, &dentry->d_subdirs, d_u.d_child) { + spin_lock_nested(&child->d_lock, DENTRY_D_LOCK_NESTED); + if (simple_positive(child)) { + spin_unlock(&child->d_lock); goto out; + } + spin_unlock(&child->d_lock); + } ret = 1; out: spin_unlock(&dcache_lock); diff --git a/fs/ocfs2/dcache.c b/fs/ocfs2/dcache.c index 895532ac4d98..35e5f5a9ef59 100644 --- a/fs/ocfs2/dcache.c +++ b/fs/ocfs2/dcache.c @@ -174,13 +174,16 @@ struct dentry *ocfs2_find_local_alias(struct inode *inode, list_for_each(p, &inode->i_dentry) { dentry = list_entry(p, struct dentry, d_alias); + spin_lock(&dentry->d_lock); if (ocfs2_match_dentry(dentry, parent_blkno, skip_unhashed)) { mlog(0, "dentry found: %.*s\n", dentry->d_name.len, dentry->d_name.name); - dget_locked(dentry); + dget_locked_dlock(dentry); + spin_unlock(&dentry->d_lock); break; } + spin_unlock(&dentry->d_lock); dentry = NULL; } diff --git a/security/tomoyo/realpath.c b/security/tomoyo/realpath.c index 1d0bf8fa1922..d1e05b047715 100644 --- a/security/tomoyo/realpath.c +++ b/security/tomoyo/realpath.c @@ -14,6 +14,7 @@ #include #include #include "common.h" +#include "../../fs/internal.h" /** * tomoyo_encode: Convert binary string to ascii string. -- cgit v1.2.3 From 2fd6b7f50797f2e993eea59e0a0b8c6399c811dc Mon Sep 17 00:00:00 2001 From: Nick Piggin Date: Fri, 7 Jan 2011 17:49:34 +1100 Subject: fs: dcache scale subdirs Protect d_subdirs and d_child with d_lock, except in filesystems that aren't using dcache_lock for these anyway (eg. using i_mutex). Note: if we change the locking rule in future so that ->d_child protection is provided only with ->d_parent->d_lock, it may allow us to reduce some locking. But it would be an exception to an otherwise regular locking scheme, so we'd have to see some good results. Probably not worthwhile. Signed-off-by: Nick Piggin --- drivers/staging/smbfs/cache.c | 4 + drivers/usb/core/inode.c | 8 +- fs/autofs4/autofs_i.h | 11 ++ fs/autofs4/expire.c | 127 ++++++++++----------- fs/autofs4/root.c | 18 ++- fs/ceph/dir.c | 6 +- fs/ceph/inode.c | 8 +- fs/coda/cache.c | 2 + fs/dcache.c | 248 +++++++++++++++++++++++++++++------------- fs/libfs.c | 24 +++- fs/ncpfs/dir.c | 3 + fs/ncpfs/ncplib_kernel.h | 4 + fs/notify/fsnotify.c | 4 +- include/linux/dcache.h | 1 + kernel/cgroup.c | 19 +++- security/selinux/selinuxfs.c | 12 +- 16 files changed, 339 insertions(+), 160 deletions(-) (limited to 'fs/dcache.c') diff --git a/drivers/staging/smbfs/cache.c b/drivers/staging/smbfs/cache.c index 0beded260b00..920434b6c071 100644 --- a/drivers/staging/smbfs/cache.c +++ b/drivers/staging/smbfs/cache.c @@ -63,6 +63,7 @@ smb_invalidate_dircache_entries(struct dentry *parent) struct dentry *dentry; spin_lock(&dcache_lock); + spin_lock(&parent->d_lock); next = parent->d_subdirs.next; while (next != &parent->d_subdirs) { dentry = list_entry(next, struct dentry, d_u.d_child); @@ -70,6 +71,7 @@ smb_invalidate_dircache_entries(struct dentry *parent) smb_age_dentry(server, dentry); next = next->next; } + spin_unlock(&parent->d_lock); spin_unlock(&dcache_lock); } @@ -97,6 +99,7 @@ smb_dget_fpos(struct dentry *dentry, struct dentry *parent, unsigned long fpos) /* If a pointer is invalid, we search the dentry. */ spin_lock(&dcache_lock); + spin_lock(&parent->d_lock); next = parent->d_subdirs.next; while (next != &parent->d_subdirs) { dent = list_entry(next, struct dentry, d_u.d_child); @@ -111,6 +114,7 @@ smb_dget_fpos(struct dentry *dentry, struct dentry *parent, unsigned long fpos) } dent = NULL; out_unlock: + spin_unlock(&parent->d_lock); spin_unlock(&dcache_lock); return dent; } diff --git a/drivers/usb/core/inode.c b/drivers/usb/core/inode.c index e3ab4437ea96..89a0e8366585 100644 --- a/drivers/usb/core/inode.c +++ b/drivers/usb/core/inode.c @@ -344,18 +344,20 @@ static int usbfs_empty (struct dentry *dentry) struct list_head *list; spin_lock(&dcache_lock); - + spin_lock(&dentry->d_lock); list_for_each(list, &dentry->d_subdirs) { struct dentry *de = list_entry(list, struct dentry, d_u.d_child); - spin_lock(&de->d_lock); + + spin_lock_nested(&de->d_lock, DENTRY_D_LOCK_NESTED); if (usbfs_positive(de)) { spin_unlock(&de->d_lock); + spin_unlock(&dentry->d_lock); spin_unlock(&dcache_lock); return 0; } spin_unlock(&de->d_lock); } - + spin_unlock(&dentry->d_lock); spin_unlock(&dcache_lock); return 1; } diff --git a/fs/autofs4/autofs_i.h b/fs/autofs4/autofs_i.h index 3912dcf047e5..9d2ae9b30d9f 100644 --- a/fs/autofs4/autofs_i.h +++ b/fs/autofs4/autofs_i.h @@ -254,6 +254,17 @@ static inline int simple_positive(struct dentry *dentry) return dentry->d_inode && !d_unhashed(dentry); } +static inline void __autofs4_add_expiring(struct dentry *dentry) +{ + struct autofs_sb_info *sbi = autofs4_sbi(dentry->d_sb); + struct autofs_info *ino = autofs4_dentry_ino(dentry); + if (ino) { + if (list_empty(&ino->expiring)) + list_add(&ino->expiring, &sbi->expiring_list); + } + return; +} + static inline void autofs4_add_expiring(struct dentry *dentry) { struct autofs_sb_info *sbi = autofs4_sbi(dentry->d_sb); diff --git a/fs/autofs4/expire.c b/fs/autofs4/expire.c index ee6402050f13..968c1434af62 100644 --- a/fs/autofs4/expire.c +++ b/fs/autofs4/expire.c @@ -91,24 +91,64 @@ done: } /* - * Calculate next entry in top down tree traversal. - * From next_mnt in namespace.c - elegant. + * Calculate and dget next entry in top down tree traversal. */ -static struct dentry *next_dentry(struct dentry *p, struct dentry *root) +static struct dentry *get_next_positive_dentry(struct dentry *prev, + struct dentry *root) { - struct list_head *next = p->d_subdirs.next; + struct list_head *next; + struct dentry *p, *ret; + + if (prev == NULL) + return dget(prev); + spin_lock(&dcache_lock); +relock: + p = prev; + spin_lock(&p->d_lock); +again: + next = p->d_subdirs.next; if (next == &p->d_subdirs) { while (1) { - if (p == root) + struct dentry *parent; + + if (p == root) { + spin_unlock(&p->d_lock); + spin_unlock(&dcache_lock); + dput(prev); return NULL; + } + + parent = p->d_parent; + if (!spin_trylock(&parent->d_lock)) { + spin_unlock(&p->d_lock); + cpu_relax(); + goto relock; + } + spin_unlock(&p->d_lock); next = p->d_u.d_child.next; - if (next != &p->d_parent->d_subdirs) + p = parent; + if (next != &parent->d_subdirs) break; - p = p->d_parent; } } - return list_entry(next, struct dentry, d_u.d_child); + ret = list_entry(next, struct dentry, d_u.d_child); + + spin_lock_nested(&ret->d_lock, DENTRY_D_LOCK_NESTED); + /* Negative dentry - try next */ + if (!simple_positive(ret)) { + spin_unlock(&ret->d_lock); + p = ret; + goto again; + } + dget_dlock(ret); + spin_unlock(&ret->d_lock); + spin_unlock(&p->d_lock); + spin_unlock(&dcache_lock); + + dput(prev); + + return ret; } /* @@ -158,22 +198,11 @@ static int autofs4_tree_busy(struct vfsmount *mnt, if (!simple_positive(top)) return 1; - spin_lock(&dcache_lock); - for (p = top; p; p = next_dentry(p, top)) { - spin_lock(&p->d_lock); - /* Negative dentry - give up */ - if (!simple_positive(p)) { - spin_unlock(&p->d_lock); - continue; - } - + p = NULL; + while ((p = get_next_positive_dentry(p, top))) { DPRINTK("dentry %p %.*s", p, (int) p->d_name.len, p->d_name.name); - p = dget_dlock(p); - spin_unlock(&p->d_lock); - spin_unlock(&dcache_lock); - /* * Is someone visiting anywhere in the subtree ? * If there's no mount we need to check the usage @@ -208,10 +237,7 @@ static int autofs4_tree_busy(struct vfsmount *mnt, return 1; } } - dput(p); - spin_lock(&dcache_lock); } - spin_unlock(&dcache_lock); /* Timeout of a tree mount is ultimately determined by its top dentry */ if (!autofs4_can_expire(top, timeout, do_now)) @@ -230,36 +256,21 @@ static struct dentry *autofs4_check_leaves(struct vfsmount *mnt, DPRINTK("parent %p %.*s", parent, (int)parent->d_name.len, parent->d_name.name); - spin_lock(&dcache_lock); - for (p = parent; p; p = next_dentry(p, parent)) { - spin_lock(&p->d_lock); - /* Negative dentry - give up */ - if (!simple_positive(p)) { - spin_unlock(&p->d_lock); - continue; - } - + p = NULL; + while ((p = get_next_positive_dentry(p, parent))) { DPRINTK("dentry %p %.*s", p, (int) p->d_name.len, p->d_name.name); - p = dget_dlock(p); - spin_unlock(&p->d_lock); - spin_unlock(&dcache_lock); - if (d_mountpoint(p)) { /* Can we umount this guy */ if (autofs4_mount_busy(mnt, p)) - goto cont; + continue; /* Can we expire this guy */ if (autofs4_can_expire(p, timeout, do_now)) return p; } -cont: - dput(p); - spin_lock(&dcache_lock); } - spin_unlock(&dcache_lock); return NULL; } @@ -310,8 +321,8 @@ struct dentry *autofs4_expire_indirect(struct super_block *sb, { unsigned long timeout; struct dentry *root = sb->s_root; + struct dentry *dentry; struct dentry *expired = NULL; - struct list_head *next; int do_now = how & AUTOFS_EXP_IMMEDIATE; int exp_leaves = how & AUTOFS_EXP_LEAVES; struct autofs_info *ino; @@ -323,26 +334,8 @@ struct dentry *autofs4_expire_indirect(struct super_block *sb, now = jiffies; timeout = sbi->exp_timeout; - spin_lock(&dcache_lock); - next = root->d_subdirs.next; - - /* On exit from the loop expire is set to a dgot dentry - * to expire or it's NULL */ - while ( next != &root->d_subdirs ) { - struct dentry *dentry = list_entry(next, struct dentry, d_u.d_child); - - /* Negative dentry - give up */ - spin_lock(&dentry->d_lock); - if (!simple_positive(dentry)) { - next = next->next; - spin_unlock(&dentry->d_lock); - continue; - } - - dentry = dget_dlock(dentry); - spin_unlock(&dentry->d_lock); - spin_unlock(&dcache_lock); - + dentry = NULL; + while ((dentry = get_next_positive_dentry(dentry, root))) { spin_lock(&sbi->fs_lock); ino = autofs4_dentry_ino(dentry); @@ -405,11 +398,7 @@ struct dentry *autofs4_expire_indirect(struct super_block *sb, } next: spin_unlock(&sbi->fs_lock); - dput(dentry); - spin_lock(&dcache_lock); - next = next->next; } - spin_unlock(&dcache_lock); return NULL; found: @@ -420,7 +409,11 @@ found: init_completion(&ino->expire_complete); spin_unlock(&sbi->fs_lock); spin_lock(&dcache_lock); + spin_lock(&expired->d_parent->d_lock); + spin_lock_nested(&expired->d_lock, DENTRY_D_LOCK_NESTED); list_move(&expired->d_parent->d_subdirs, &expired->d_u.d_child); + spin_unlock(&expired->d_lock); + spin_unlock(&expired->d_parent->d_lock); spin_unlock(&dcache_lock); return expired; } diff --git a/fs/autofs4/root.c b/fs/autofs4/root.c index 7922509b5b2b..7a9ed6b88291 100644 --- a/fs/autofs4/root.c +++ b/fs/autofs4/root.c @@ -143,10 +143,13 @@ static int autofs4_dir_open(struct inode *inode, struct file *file) * it. */ spin_lock(&dcache_lock); + spin_lock(&dentry->d_lock); if (!d_mountpoint(dentry) && list_empty(&dentry->d_subdirs)) { + spin_unlock(&dentry->d_lock); spin_unlock(&dcache_lock); return -ENOENT; } + spin_unlock(&dentry->d_lock); spin_unlock(&dcache_lock); out: @@ -253,7 +256,9 @@ static void *autofs4_follow_link(struct dentry *dentry, struct nameidata *nd) lookup_type = autofs4_need_mount(nd->flags); spin_lock(&sbi->fs_lock); spin_lock(&dcache_lock); + spin_lock(&dentry->d_lock); if (!(lookup_type || ino->flags & AUTOFS_INF_PENDING)) { + spin_unlock(&dentry->d_lock); spin_unlock(&dcache_lock); spin_unlock(&sbi->fs_lock); goto follow; @@ -266,6 +271,7 @@ static void *autofs4_follow_link(struct dentry *dentry, struct nameidata *nd) */ if (ino->flags & AUTOFS_INF_PENDING || (!d_mountpoint(dentry) && list_empty(&dentry->d_subdirs))) { + spin_unlock(&dentry->d_lock); spin_unlock(&dcache_lock); spin_unlock(&sbi->fs_lock); @@ -275,6 +281,7 @@ static void *autofs4_follow_link(struct dentry *dentry, struct nameidata *nd) goto follow; } + spin_unlock(&dentry->d_lock); spin_unlock(&dcache_lock); spin_unlock(&sbi->fs_lock); follow: @@ -347,10 +354,12 @@ static int autofs4_revalidate(struct dentry *dentry, struct nameidata *nd) /* Check for a non-mountpoint directory with no contents */ spin_lock(&dcache_lock); + spin_lock(&dentry->d_lock); if (S_ISDIR(dentry->d_inode->i_mode) && !d_mountpoint(dentry) && list_empty(&dentry->d_subdirs)) { DPRINTK("dentry=%p %.*s, emptydir", dentry, dentry->d_name.len, dentry->d_name.name); + spin_unlock(&dentry->d_lock); spin_unlock(&dcache_lock); /* The daemon never causes a mount to trigger */ @@ -367,6 +376,7 @@ static int autofs4_revalidate(struct dentry *dentry, struct nameidata *nd) return status; } + spin_unlock(&dentry->d_lock); spin_unlock(&dcache_lock); return 1; @@ -776,12 +786,16 @@ static int autofs4_dir_rmdir(struct inode *dir, struct dentry *dentry) return -EACCES; spin_lock(&dcache_lock); + spin_lock(&sbi->lookup_lock); + spin_lock(&dentry->d_lock); if (!list_empty(&dentry->d_subdirs)) { + spin_unlock(&dentry->d_lock); + spin_unlock(&sbi->lookup_lock); spin_unlock(&dcache_lock); return -ENOTEMPTY; } - autofs4_add_expiring(dentry); - spin_lock(&dentry->d_lock); + __autofs4_add_expiring(dentry); + spin_unlock(&sbi->lookup_lock); __d_drop(dentry); spin_unlock(&dentry->d_lock); spin_unlock(&dcache_lock); diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c index 571f270dca0f..2c924e8d85fe 100644 --- a/fs/ceph/dir.c +++ b/fs/ceph/dir.c @@ -113,6 +113,7 @@ static int __dcache_readdir(struct file *filp, last); spin_lock(&dcache_lock); + spin_lock(&parent->d_lock); /* start at beginning? */ if (filp->f_pos == 2 || last == NULL || @@ -136,7 +137,7 @@ more: fi->at_end = 1; goto out_unlock; } - spin_lock(&dentry->d_lock); + spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED); if (!d_unhashed(dentry) && dentry->d_inode && ceph_snap(dentry->d_inode) != CEPH_SNAPDIR && ceph_ino(dentry->d_inode) != CEPH_INO_CEPH && @@ -154,6 +155,7 @@ more: dget_dlock(dentry); spin_unlock(&dentry->d_lock); + spin_unlock(&parent->d_lock); spin_unlock(&dcache_lock); dout(" %llu (%llu) dentry %p %.*s %p\n", di->offset, filp->f_pos, @@ -188,10 +190,12 @@ more: } spin_lock(&dcache_lock); + spin_lock(&parent->d_lock); p = p->prev; /* advance to next dentry */ goto more; out_unlock: + spin_unlock(&parent->d_lock); spin_unlock(&dcache_lock); out: if (last) diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c index bb68c799074d..2c6944473366 100644 --- a/fs/ceph/inode.c +++ b/fs/ceph/inode.c @@ -842,11 +842,13 @@ static void ceph_set_dentry_offset(struct dentry *dn) spin_unlock(&inode->i_lock); spin_lock(&dcache_lock); - spin_lock(&dn->d_lock); + spin_lock(&dir->d_lock); + spin_lock_nested(&dn->d_lock, DENTRY_D_LOCK_NESTED); list_move(&dn->d_u.d_child, &dir->d_subdirs); dout("set_dentry_offset %p %lld (%p %p)\n", dn, di->offset, dn->d_u.d_child.prev, dn->d_u.d_child.next); spin_unlock(&dn->d_lock); + spin_unlock(&dir->d_lock); spin_unlock(&dcache_lock); } @@ -1232,9 +1234,11 @@ retry_lookup: } else { /* reorder parent's d_subdirs */ spin_lock(&dcache_lock); - spin_lock(&dn->d_lock); + spin_lock(&parent->d_lock); + spin_lock_nested(&dn->d_lock, DENTRY_D_LOCK_NESTED); list_move(&dn->d_u.d_child, &parent->d_subdirs); spin_unlock(&dn->d_lock); + spin_unlock(&parent->d_lock); spin_unlock(&dcache_lock); } diff --git a/fs/coda/cache.c b/fs/coda/cache.c index 9060f08e70cf..859393fca2b7 100644 --- a/fs/coda/cache.c +++ b/fs/coda/cache.c @@ -94,6 +94,7 @@ static void coda_flag_children(struct dentry *parent, int flag) struct dentry *de; spin_lock(&dcache_lock); + spin_lock(&parent->d_lock); list_for_each(child, &parent->d_subdirs) { de = list_entry(child, struct dentry, d_u.d_child); @@ -102,6 +103,7 @@ static void coda_flag_children(struct dentry *parent, int flag) continue; coda_flag_inode(de->d_inode, flag); } + spin_unlock(&parent->d_lock); spin_unlock(&dcache_lock); return; } diff --git a/fs/dcache.c b/fs/dcache.c index ee127f9ab274..a661247a20d5 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -47,6 +47,8 @@ * - d_lru * - d_count * - d_unhashed() + * - d_parent and d_subdirs + * - childrens' d_child and d_parent * * Ordering: * dcache_lock @@ -223,24 +225,22 @@ static void dentry_lru_move_tail(struct dentry *dentry) * * If this is the root of the dentry tree, return NULL. * - * dcache_lock and d_lock must be held by caller, are dropped by d_kill. + * dcache_lock and d_lock and d_parent->d_lock must be held by caller, and + * are dropped by d_kill. */ -static struct dentry *d_kill(struct dentry *dentry) +static struct dentry *d_kill(struct dentry *dentry, struct dentry *parent) __releases(dentry->d_lock) + __releases(parent->d_lock) __releases(dcache_lock) { - struct dentry *parent; - list_del(&dentry->d_u.d_child); + if (parent) + spin_unlock(&parent->d_lock); dentry_iput(dentry); /* * dentry_iput drops the locks, at which point nobody (except * transient RCU lookups) can reach this dentry. */ - if (IS_ROOT(dentry)) - parent = NULL; - else - parent = dentry->d_parent; d_free(dentry); return parent; } @@ -312,6 +312,7 @@ EXPORT_SYMBOL(d_drop); void dput(struct dentry *dentry) { + struct dentry *parent; if (!dentry) return; @@ -319,6 +320,10 @@ repeat: if (dentry->d_count == 1) might_sleep(); spin_lock(&dentry->d_lock); + if (IS_ROOT(dentry)) + parent = NULL; + else + parent = dentry->d_parent; if (dentry->d_count == 1) { if (!spin_trylock(&dcache_lock)) { /* @@ -330,10 +335,17 @@ repeat: spin_unlock(&dentry->d_lock); goto repeat; } + if (parent && !spin_trylock(&parent->d_lock)) { + spin_unlock(&dentry->d_lock); + spin_unlock(&dcache_lock); + goto repeat; + } } dentry->d_count--; if (dentry->d_count) { spin_unlock(&dentry->d_lock); + if (parent) + spin_unlock(&parent->d_lock); spin_unlock(&dcache_lock); return; } @@ -355,6 +367,8 @@ repeat: dentry_lru_add(dentry); spin_unlock(&dentry->d_lock); + if (parent) + spin_unlock(&parent->d_lock); spin_unlock(&dcache_lock); return; @@ -363,7 +377,7 @@ unhash_it: kill_it: /* if dentry was on the d_lru list delete it from there */ dentry_lru_del(dentry); - dentry = d_kill(dentry); + dentry = d_kill(dentry, parent); if (dentry) goto repeat; } @@ -584,12 +598,13 @@ EXPORT_SYMBOL(d_prune_aliases); * quadratic behavior of shrink_dcache_parent(), but is also expected * to be beneficial in reducing dentry cache fragmentation. */ -static void prune_one_dentry(struct dentry * dentry) +static void prune_one_dentry(struct dentry *dentry, struct dentry *parent) __releases(dentry->d_lock) + __releases(parent->d_lock) __releases(dcache_lock) { __d_drop(dentry); - dentry = d_kill(dentry); + dentry = d_kill(dentry, parent); /* * Prune ancestors. Locking is simpler than in dput(), @@ -597,9 +612,20 @@ static void prune_one_dentry(struct dentry * dentry) */ while (dentry) { spin_lock(&dcache_lock); +again: spin_lock(&dentry->d_lock); + if (IS_ROOT(dentry)) + parent = NULL; + else + parent = dentry->d_parent; + if (parent && !spin_trylock(&parent->d_lock)) { + spin_unlock(&dentry->d_lock); + goto again; + } dentry->d_count--; if (dentry->d_count) { + if (parent) + spin_unlock(&parent->d_lock); spin_unlock(&dentry->d_lock); spin_unlock(&dcache_lock); return; @@ -607,7 +633,7 @@ static void prune_one_dentry(struct dentry * dentry) dentry_lru_del(dentry); __d_drop(dentry); - dentry = d_kill(dentry); + dentry = d_kill(dentry, parent); } } @@ -616,29 +642,40 @@ static void shrink_dentry_list(struct list_head *list) struct dentry *dentry; while (!list_empty(list)) { + struct dentry *parent; + dentry = list_entry(list->prev, struct dentry, d_lru); if (!spin_trylock(&dentry->d_lock)) { +relock: spin_unlock(&dcache_lru_lock); cpu_relax(); spin_lock(&dcache_lru_lock); continue; } - __dentry_lru_del(dentry); - /* * We found an inuse dentry which was not removed from * the LRU because of laziness during lookup. Do not free * it - just keep it off the LRU list. */ if (dentry->d_count) { + __dentry_lru_del(dentry); spin_unlock(&dentry->d_lock); continue; } + if (IS_ROOT(dentry)) + parent = NULL; + else + parent = dentry->d_parent; + if (parent && !spin_trylock(&parent->d_lock)) { + spin_unlock(&dentry->d_lock); + goto relock; + } + __dentry_lru_del(dentry); spin_unlock(&dcache_lru_lock); - prune_one_dentry(dentry); + prune_one_dentry(dentry, parent); /* dcache_lock and dentry->d_lock dropped */ spin_lock(&dcache_lock); spin_lock(&dcache_lru_lock); @@ -833,14 +870,16 @@ static void shrink_dcache_for_umount_subtree(struct dentry *dentry) /* this is a branch with children - detach all of them * from the system in one go */ spin_lock(&dcache_lock); + spin_lock(&dentry->d_lock); list_for_each_entry(loop, &dentry->d_subdirs, d_u.d_child) { - spin_lock(&loop->d_lock); + spin_lock_nested(&loop->d_lock, + DENTRY_D_LOCK_NESTED); dentry_lru_del(loop); __d_drop(loop); spin_unlock(&loop->d_lock); - cond_resched_lock(&dcache_lock); } + spin_unlock(&dentry->d_lock); spin_unlock(&dcache_lock); /* move to the first child */ @@ -868,16 +907,17 @@ static void shrink_dcache_for_umount_subtree(struct dentry *dentry) BUG(); } - if (IS_ROOT(dentry)) + if (IS_ROOT(dentry)) { parent = NULL; - else { + list_del(&dentry->d_u.d_child); + } else { parent = dentry->d_parent; spin_lock(&parent->d_lock); parent->d_count--; + list_del(&dentry->d_u.d_child); spin_unlock(&parent->d_lock); } - list_del(&dentry->d_u.d_child); detached++; inode = dentry->d_inode; @@ -958,6 +998,7 @@ int have_submounts(struct dentry *parent) spin_lock(&dcache_lock); if (d_mountpoint(parent)) goto positive; + spin_lock(&this_parent->d_lock); repeat: next = this_parent->d_subdirs.next; resume: @@ -965,22 +1006,34 @@ resume: struct list_head *tmp = next; struct dentry *dentry = list_entry(tmp, struct dentry, d_u.d_child); next = tmp->next; + + spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED); /* Have we found a mount point ? */ - if (d_mountpoint(dentry)) + if (d_mountpoint(dentry)) { + spin_unlock(&dentry->d_lock); + spin_unlock(&this_parent->d_lock); goto positive; + } if (!list_empty(&dentry->d_subdirs)) { + spin_unlock(&this_parent->d_lock); + spin_release(&dentry->d_lock.dep_map, 1, _RET_IP_); this_parent = dentry; + spin_acquire(&this_parent->d_lock.dep_map, 0, 1, _RET_IP_); goto repeat; } + spin_unlock(&dentry->d_lock); } /* * All done at this level ... ascend and resume the search. */ if (this_parent != parent) { next = this_parent->d_u.d_child.next; + spin_unlock(&this_parent->d_lock); this_parent = this_parent->d_parent; + spin_lock(&this_parent->d_lock); goto resume; } + spin_unlock(&this_parent->d_lock); spin_unlock(&dcache_lock); return 0; /* No mount points found in tree */ positive: @@ -1010,6 +1063,7 @@ static int select_parent(struct dentry * parent) int found = 0; spin_lock(&dcache_lock); + spin_lock(&this_parent->d_lock); repeat: next = this_parent->d_subdirs.next; resume: @@ -1017,8 +1071,9 @@ resume: struct list_head *tmp = next; struct dentry *dentry = list_entry(tmp, struct dentry, d_u.d_child); next = tmp->next; + BUG_ON(this_parent == dentry); - spin_lock(&dentry->d_lock); + spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED); /* * move only zero ref count dentries to the end @@ -1031,33 +1086,44 @@ resume: dentry_lru_del(dentry); } - spin_unlock(&dentry->d_lock); - /* * We can return to the caller if we have found some (this * ensures forward progress). We'll be coming back to find * the rest. */ - if (found && need_resched()) + if (found && need_resched()) { + spin_unlock(&dentry->d_lock); goto out; + } /* * Descend a level if the d_subdirs list is non-empty. */ if (!list_empty(&dentry->d_subdirs)) { + spin_unlock(&this_parent->d_lock); + spin_release(&dentry->d_lock.dep_map, 1, _RET_IP_); this_parent = dentry; + spin_acquire(&this_parent->d_lock.dep_map, 0, 1, _RET_IP_); goto repeat; } + + spin_unlock(&dentry->d_lock); } /* * All done at this level ... ascend and resume the search. */ if (this_parent != parent) { + struct dentry *tmp; next = this_parent->d_u.d_child.next; - this_parent = this_parent->d_parent; + tmp = this_parent->d_parent; + spin_unlock(&this_parent->d_lock); + BUG_ON(tmp == this_parent); + this_parent = tmp; + spin_lock(&this_parent->d_lock); goto resume; } out: + spin_unlock(&this_parent->d_lock); spin_unlock(&dcache_lock); return found; } @@ -1155,18 +1221,19 @@ struct dentry *d_alloc(struct dentry * parent, const struct qstr *name) INIT_LIST_HEAD(&dentry->d_lru); INIT_LIST_HEAD(&dentry->d_subdirs); INIT_LIST_HEAD(&dentry->d_alias); + INIT_LIST_HEAD(&dentry->d_u.d_child); if (parent) { - dentry->d_parent = dget(parent); + spin_lock(&dcache_lock); + spin_lock(&parent->d_lock); + spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED); + dentry->d_parent = dget_dlock(parent); dentry->d_sb = parent->d_sb; - } else { - INIT_LIST_HEAD(&dentry->d_u.d_child); - } - - spin_lock(&dcache_lock); - if (parent) list_add(&dentry->d_u.d_child, &parent->d_subdirs); - spin_unlock(&dcache_lock); + spin_unlock(&dentry->d_lock); + spin_unlock(&parent->d_lock); + spin_unlock(&dcache_lock); + } this_cpu_inc(nr_dentry); @@ -1684,13 +1751,18 @@ int d_validate(struct dentry *dentry, struct dentry *dparent) struct dentry *child; spin_lock(&dcache_lock); + spin_lock(&dparent->d_lock); list_for_each_entry(child, &dparent->d_subdirs, d_u.d_child) { if (dentry == child) { - __dget_locked(dentry); + spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED); + __dget_locked_dlock(dentry); + spin_unlock(&dentry->d_lock); + spin_unlock(&dparent->d_lock); spin_unlock(&dcache_lock); return 1; } } + spin_unlock(&dparent->d_lock); spin_unlock(&dcache_lock); return 0; @@ -1802,17 +1874,6 @@ void dentry_update_name_case(struct dentry *dentry, struct qstr *name) } EXPORT_SYMBOL(dentry_update_name_case); -/* - * When switching names, the actual string doesn't strictly have to - * be preserved in the target - because we're dropping the target - * anyway. As such, we can just do a simple memcpy() to copy over - * the new name before we switch. - * - * Note that we have to be a lot more careful about getting the hash - * switched - we have to switch the hash value properly even if it - * then no longer matches the actual (corrupted) string of the target. - * The hash value has to match the hash queue that the dentry is on.. - */ static void switch_names(struct dentry *dentry, struct dentry *target) { if (dname_external(target)) { @@ -1854,18 +1915,53 @@ static void switch_names(struct dentry *dentry, struct dentry *target) swap(dentry->d_name.len, target->d_name.len); } +static void dentry_lock_for_move(struct dentry *dentry, struct dentry *target) +{ + /* + * XXXX: do we really need to take target->d_lock? + */ + if (IS_ROOT(dentry) || dentry->d_parent == target->d_parent) + spin_lock(&target->d_parent->d_lock); + else { + if (d_ancestor(dentry->d_parent, target->d_parent)) { + spin_lock(&dentry->d_parent->d_lock); + spin_lock_nested(&target->d_parent->d_lock, + DENTRY_D_LOCK_NESTED); + } else { + spin_lock(&target->d_parent->d_lock); + spin_lock_nested(&dentry->d_parent->d_lock, + DENTRY_D_LOCK_NESTED); + } + } + if (target < dentry) { + spin_lock_nested(&target->d_lock, 2); + spin_lock_nested(&dentry->d_lock, 3); + } else { + spin_lock_nested(&dentry->d_lock, 2); + spin_lock_nested(&target->d_lock, 3); + } +} + +static void dentry_unlock_parents_for_move(struct dentry *dentry, + struct dentry *target) +{ + if (target->d_parent != dentry->d_parent) + spin_unlock(&dentry->d_parent->d_lock); + if (target->d_parent != target) + spin_unlock(&target->d_parent->d_lock); +} + /* - * We cannibalize "target" when moving dentry on top of it, - * because it's going to be thrown away anyway. We could be more - * polite about it, though. - * - * This forceful removal will result in ugly /proc output if - * somebody holds a file open that got deleted due to a rename. - * We could be nicer about the deleted file, and let it show - * up under the name it had before it was deleted rather than - * under the original name of the file that was moved on top of it. + * When switching names, the actual string doesn't strictly have to + * be preserved in the target - because we're dropping the target + * anyway. As such, we can just do a simple memcpy() to copy over + * the new name before we switch. + * + * Note that we have to be a lot more careful about getting the hash + * switched - we have to switch the hash value properly even if it + * then no longer matches the actual (corrupted) string of the target. + * The hash value has to match the hash queue that the dentry is on.. */ - /* * d_move_locked - move a dentry * @dentry: entry to move @@ -1879,20 +1975,12 @@ static void d_move_locked(struct dentry * dentry, struct dentry * target) if (!dentry->d_inode) printk(KERN_WARNING "VFS: moving negative dcache entry\n"); + BUG_ON(d_ancestor(dentry, target)); + BUG_ON(d_ancestor(target, dentry)); + write_seqlock(&rename_lock); - /* - * XXXX: do we really need to take target->d_lock? - */ - if (d_ancestor(dentry, target)) { - spin_lock(&dentry->d_lock); - spin_lock_nested(&target->d_lock, DENTRY_D_LOCK_NESTED); - } else if (d_ancestor(target, dentry) || target < dentry) { - spin_lock(&target->d_lock); - spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED); - } else { - spin_lock(&dentry->d_lock); - spin_lock_nested(&target->d_lock, DENTRY_D_LOCK_NESTED); - } + + dentry_lock_for_move(dentry, target); /* Move the dentry to the target hash queue, if on different bucket */ spin_lock(&dcache_hash_lock); @@ -1924,6 +2012,8 @@ static void d_move_locked(struct dentry * dentry, struct dentry * target) } list_add(&dentry->d_u.d_child, &dentry->d_parent->d_subdirs); + + dentry_unlock_parents_for_move(dentry, target); spin_unlock(&target->d_lock); fsnotify_d_move(dentry); spin_unlock(&dentry->d_lock); @@ -2013,17 +2103,20 @@ out_err: /* * Prepare an anonymous dentry for life in the superblock's dentry tree as a * named dentry in place of the dentry to be replaced. + * returns with anon->d_lock held! */ static void __d_materialise_dentry(struct dentry *dentry, struct dentry *anon) { struct dentry *dparent, *aparent; - switch_names(dentry, anon); - swap(dentry->d_name.hash, anon->d_name.hash); + dentry_lock_for_move(anon, dentry); dparent = dentry->d_parent; aparent = anon->d_parent; + switch_names(dentry, anon); + swap(dentry->d_name.hash, anon->d_name.hash); + dentry->d_parent = (aparent == anon) ? dentry : aparent; list_del(&dentry->d_u.d_child); if (!IS_ROOT(dentry)) @@ -2038,6 +2131,10 @@ static void __d_materialise_dentry(struct dentry *dentry, struct dentry *anon) else INIT_LIST_HEAD(&anon->d_u.d_child); + dentry_unlock_parents_for_move(anon, dentry); + spin_unlock(&dentry->d_lock); + + /* anon->d_lock still locked, returns locked */ anon->d_flags &= ~DCACHE_DISCONNECTED; } @@ -2073,7 +2170,6 @@ struct dentry *d_materialise_unique(struct dentry *dentry, struct inode *inode) /* Is this an anonymous mountpoint that we could splice * into our tree? */ if (IS_ROOT(alias)) { - spin_lock(&alias->d_lock); __d_materialise_dentry(dentry, alias); __d_drop(alias); goto found; @@ -2558,6 +2654,7 @@ void d_genocide(struct dentry *root) struct list_head *next; spin_lock(&dcache_lock); + spin_lock(&this_parent->d_lock); repeat: next = this_parent->d_subdirs.next; resume: @@ -2571,8 +2668,10 @@ resume: continue; } if (!list_empty(&dentry->d_subdirs)) { - spin_unlock(&dentry->d_lock); + spin_unlock(&this_parent->d_lock); + spin_release(&dentry->d_lock.dep_map, 1, _RET_IP_); this_parent = dentry; + spin_acquire(&this_parent->d_lock.dep_map, 0, 1, _RET_IP_); goto repeat; } dentry->d_count--; @@ -2580,12 +2679,13 @@ resume: } if (this_parent != root) { next = this_parent->d_u.d_child.next; - spin_lock(&this_parent->d_lock); this_parent->d_count--; spin_unlock(&this_parent->d_lock); this_parent = this_parent->d_parent; + spin_lock(&this_parent->d_lock); goto resume; } + spin_unlock(&this_parent->d_lock); spin_unlock(&dcache_lock); } diff --git a/fs/libfs.c b/fs/libfs.c index 433e7139c23a..cc4794914b52 100644 --- a/fs/libfs.c +++ b/fs/libfs.c @@ -81,7 +81,8 @@ int dcache_dir_close(struct inode *inode, struct file *file) loff_t dcache_dir_lseek(struct file *file, loff_t offset, int origin) { - mutex_lock(&file->f_path.dentry->d_inode->i_mutex); + struct dentry *dentry = file->f_path.dentry; + mutex_lock(&dentry->d_inode->i_mutex); switch (origin) { case 1: offset += file->f_pos; @@ -89,7 +90,7 @@ loff_t dcache_dir_lseek(struct file *file, loff_t offset, int origin) if (offset >= 0) break; default: - mutex_unlock(&file->f_path.dentry->d_inode->i_mutex); + mutex_unlock(&dentry->d_inode->i_mutex); return -EINVAL; } if (offset != file->f_pos) { @@ -100,22 +101,25 @@ loff_t dcache_dir_lseek(struct file *file, loff_t offset, int origin) loff_t n = file->f_pos - 2; spin_lock(&dcache_lock); + spin_lock(&dentry->d_lock); + /* d_lock not required for cursor */ list_del(&cursor->d_u.d_child); - p = file->f_path.dentry->d_subdirs.next; - while (n && p != &file->f_path.dentry->d_subdirs) { + p = dentry->d_subdirs.next; + while (n && p != &dentry->d_subdirs) { struct dentry *next; next = list_entry(p, struct dentry, d_u.d_child); - spin_lock(&next->d_lock); + spin_lock_nested(&next->d_lock, DENTRY_D_LOCK_NESTED); if (simple_positive(next)) n--; spin_unlock(&next->d_lock); p = p->next; } list_add_tail(&cursor->d_u.d_child, p); + spin_unlock(&dentry->d_lock); spin_unlock(&dcache_lock); } } - mutex_unlock(&file->f_path.dentry->d_inode->i_mutex); + mutex_unlock(&dentry->d_inode->i_mutex); return offset; } @@ -156,6 +160,7 @@ int dcache_readdir(struct file * filp, void * dirent, filldir_t filldir) /* fallthrough */ default: spin_lock(&dcache_lock); + spin_lock(&dentry->d_lock); if (filp->f_pos == 2) list_move(q, &dentry->d_subdirs); @@ -169,6 +174,7 @@ int dcache_readdir(struct file * filp, void * dirent, filldir_t filldir) } spin_unlock(&next->d_lock); + spin_unlock(&dentry->d_lock); spin_unlock(&dcache_lock); if (filldir(dirent, next->d_name.name, next->d_name.len, filp->f_pos, @@ -176,11 +182,15 @@ int dcache_readdir(struct file * filp, void * dirent, filldir_t filldir) dt_type(next->d_inode)) < 0) return 0; spin_lock(&dcache_lock); + spin_lock(&dentry->d_lock); + spin_lock_nested(&next->d_lock, DENTRY_D_LOCK_NESTED); /* next is still alive */ list_move(q, p); + spin_unlock(&next->d_lock); p = q; filp->f_pos++; } + spin_unlock(&dentry->d_lock); spin_unlock(&dcache_lock); } return 0; @@ -276,6 +286,7 @@ int simple_empty(struct dentry *dentry) int ret = 0; spin_lock(&dcache_lock); + spin_lock(&dentry->d_lock); list_for_each_entry(child, &dentry->d_subdirs, d_u.d_child) { spin_lock_nested(&child->d_lock, DENTRY_D_LOCK_NESTED); if (simple_positive(child)) { @@ -286,6 +297,7 @@ int simple_empty(struct dentry *dentry) } ret = 1; out: + spin_unlock(&dentry->d_lock); spin_unlock(&dcache_lock); return ret; } diff --git a/fs/ncpfs/dir.c b/fs/ncpfs/dir.c index bbbf7922f422..102278ed38bd 100644 --- a/fs/ncpfs/dir.c +++ b/fs/ncpfs/dir.c @@ -392,6 +392,7 @@ ncp_dget_fpos(struct dentry *dentry, struct dentry *parent, unsigned long fpos) /* If a pointer is invalid, we search the dentry. */ spin_lock(&dcache_lock); + spin_lock(&parent->d_lock); next = parent->d_subdirs.next; while (next != &parent->d_subdirs) { dent = list_entry(next, struct dentry, d_u.d_child); @@ -400,11 +401,13 @@ ncp_dget_fpos(struct dentry *dentry, struct dentry *parent, unsigned long fpos) dget_locked(dent); else dent = NULL; + spin_unlock(&parent->d_lock); spin_unlock(&dcache_lock); goto out; } next = next->next; } + spin_unlock(&parent->d_lock); spin_unlock(&dcache_lock); return NULL; diff --git a/fs/ncpfs/ncplib_kernel.h b/fs/ncpfs/ncplib_kernel.h index 244d1b73fda7..c4b718ff9a6b 100644 --- a/fs/ncpfs/ncplib_kernel.h +++ b/fs/ncpfs/ncplib_kernel.h @@ -194,6 +194,7 @@ ncp_renew_dentries(struct dentry *parent) struct dentry *dentry; spin_lock(&dcache_lock); + spin_lock(&parent->d_lock); next = parent->d_subdirs.next; while (next != &parent->d_subdirs) { dentry = list_entry(next, struct dentry, d_u.d_child); @@ -205,6 +206,7 @@ ncp_renew_dentries(struct dentry *parent) next = next->next; } + spin_unlock(&parent->d_lock); spin_unlock(&dcache_lock); } @@ -216,6 +218,7 @@ ncp_invalidate_dircache_entries(struct dentry *parent) struct dentry *dentry; spin_lock(&dcache_lock); + spin_lock(&parent->d_lock); next = parent->d_subdirs.next; while (next != &parent->d_subdirs) { dentry = list_entry(next, struct dentry, d_u.d_child); @@ -223,6 +226,7 @@ ncp_invalidate_dircache_entries(struct dentry *parent) ncp_age_dentry(server, dentry); next = next->next; } + spin_unlock(&parent->d_lock); spin_unlock(&dcache_lock); } diff --git a/fs/notify/fsnotify.c b/fs/notify/fsnotify.c index 20dc218707ca..aa4f25e803f6 100644 --- a/fs/notify/fsnotify.c +++ b/fs/notify/fsnotify.c @@ -68,17 +68,19 @@ void __fsnotify_update_child_dentry_flags(struct inode *inode) /* run all of the children of the original inode and fix their * d_flags to indicate parental interest (their parent is the * original inode) */ + spin_lock(&alias->d_lock); list_for_each_entry(child, &alias->d_subdirs, d_u.d_child) { if (!child->d_inode) continue; - spin_lock(&child->d_lock); + spin_lock_nested(&child->d_lock, DENTRY_D_LOCK_NESTED); if (watched) child->d_flags |= DCACHE_FSNOTIFY_PARENT_WATCHED; else child->d_flags &= ~DCACHE_FSNOTIFY_PARENT_WATCHED; spin_unlock(&child->d_lock); } + spin_unlock(&alias->d_lock); } spin_unlock(&dcache_lock); } diff --git a/include/linux/dcache.h b/include/linux/dcache.h index b0ade2d46805..ddf4f55624f7 100644 --- a/include/linux/dcache.h +++ b/include/linux/dcache.h @@ -305,6 +305,7 @@ static inline struct dentry *dget_dlock(struct dentry *dentry) } return dentry; } + static inline struct dentry *dget(struct dentry *dentry) { if (dentry) { diff --git a/kernel/cgroup.c b/kernel/cgroup.c index eb7af39350c6..7b4705b51d4a 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -877,23 +877,31 @@ static void cgroup_clear_directory(struct dentry *dentry) BUG_ON(!mutex_is_locked(&dentry->d_inode->i_mutex)); spin_lock(&dcache_lock); + spin_lock(&dentry->d_lock); node = dentry->d_subdirs.next; while (node != &dentry->d_subdirs) { struct dentry *d = list_entry(node, struct dentry, d_u.d_child); + + spin_lock_nested(&d->d_lock, DENTRY_D_LOCK_NESTED); list_del_init(node); if (d->d_inode) { /* This should never be called on a cgroup * directory with child cgroups */ BUG_ON(d->d_inode->i_mode & S_IFDIR); - d = dget_locked(d); + dget_locked_dlock(d); + spin_unlock(&d->d_lock); + spin_unlock(&dentry->d_lock); spin_unlock(&dcache_lock); d_delete(d); simple_unlink(dentry->d_inode, d); dput(d); spin_lock(&dcache_lock); - } + spin_lock(&dentry->d_lock); + } else + spin_unlock(&d->d_lock); node = dentry->d_subdirs.next; } + spin_unlock(&dentry->d_lock); spin_unlock(&dcache_lock); } @@ -902,10 +910,17 @@ static void cgroup_clear_directory(struct dentry *dentry) */ static void cgroup_d_remove_dir(struct dentry *dentry) { + struct dentry *parent; + cgroup_clear_directory(dentry); spin_lock(&dcache_lock); + parent = dentry->d_parent; + spin_lock(&parent->d_lock); + spin_lock(&dentry->d_lock); list_del_init(&dentry->d_u.d_child); + spin_unlock(&dentry->d_lock); + spin_unlock(&parent->d_lock); spin_unlock(&dcache_lock); remove_dir(dentry); } diff --git a/security/selinux/selinuxfs.c b/security/selinux/selinuxfs.c index 073fd5b0a53a..017ec096446e 100644 --- a/security/selinux/selinuxfs.c +++ b/security/selinux/selinuxfs.c @@ -1146,22 +1146,30 @@ static void sel_remove_entries(struct dentry *de) struct list_head *node; spin_lock(&dcache_lock); + spin_lock(&de->d_lock); node = de->d_subdirs.next; while (node != &de->d_subdirs) { struct dentry *d = list_entry(node, struct dentry, d_u.d_child); + + spin_lock_nested(&d->d_lock, DENTRY_D_LOCK_NESTED); list_del_init(node); if (d->d_inode) { - d = dget_locked(d); + dget_locked_dlock(d); + spin_unlock(&de->d_lock); + spin_unlock(&d->d_lock); spin_unlock(&dcache_lock); d_delete(d); simple_unlink(de->d_inode, d); dput(d); spin_lock(&dcache_lock); - } + spin_lock(&de->d_lock); + } else + spin_unlock(&d->d_lock); node = de->d_subdirs.next; } + spin_unlock(&de->d_lock); spin_unlock(&dcache_lock); } -- cgit v1.2.3 From b23fb0a60379a95e10c671f646b259ea2558421e Mon Sep 17 00:00:00 2001 From: Nick Piggin Date: Fri, 7 Jan 2011 17:49:35 +1100 Subject: fs: scale inode alias list Add a new lock, dcache_inode_lock, to protect the inode's i_dentry list from concurrent modification. d_alias is also protected by d_lock. Signed-off-by: Nick Piggin --- fs/9p/vfs_inode.c | 2 ++ fs/affs/amigaffs.c | 2 ++ fs/cifs/inode.c | 3 +++ fs/dcache.c | 66 ++++++++++++++++++++++++++++++++++++++++++++------ fs/exportfs/expfs.c | 4 +++ fs/nfs/getroot.c | 4 +++ fs/notify/fsnotify.c | 2 ++ fs/ocfs2/dcache.c | 3 ++- include/linux/dcache.h | 1 + 9 files changed, 78 insertions(+), 9 deletions(-) (limited to 'fs/dcache.c') diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c index 34bf71b56542..47dfd5d29a6b 100644 --- a/fs/9p/vfs_inode.c +++ b/fs/9p/vfs_inode.c @@ -271,9 +271,11 @@ static struct dentry *v9fs_dentry_from_dir_inode(struct inode *inode) struct dentry *dentry; spin_lock(&dcache_lock); + spin_lock(&dcache_inode_lock); /* Directory should have only one entry. */ BUG_ON(S_ISDIR(inode->i_mode) && !list_is_singular(&inode->i_dentry)); dentry = list_entry(inode->i_dentry.next, struct dentry, d_alias); + spin_unlock(&dcache_inode_lock); spin_unlock(&dcache_lock); return dentry; } diff --git a/fs/affs/amigaffs.c b/fs/affs/amigaffs.c index 7d0f0a30f7a3..2321cc92d44f 100644 --- a/fs/affs/amigaffs.c +++ b/fs/affs/amigaffs.c @@ -129,6 +129,7 @@ affs_fix_dcache(struct dentry *dentry, u32 entry_ino) struct list_head *head, *next; spin_lock(&dcache_lock); + spin_lock(&dcache_inode_lock); head = &inode->i_dentry; next = head->next; while (next != head) { @@ -139,6 +140,7 @@ affs_fix_dcache(struct dentry *dentry, u32 entry_ino) } next = next->next; } + spin_unlock(&dcache_inode_lock); spin_unlock(&dcache_lock); } diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c index 589f3e3f6e00..003698365ece 100644 --- a/fs/cifs/inode.c +++ b/fs/cifs/inode.c @@ -810,12 +810,15 @@ inode_has_hashed_dentries(struct inode *inode) struct dentry *dentry; spin_lock(&dcache_lock); + spin_lock(&dcache_inode_lock); list_for_each_entry(dentry, &inode->i_dentry, d_alias) { if (!d_unhashed(dentry) || IS_ROOT(dentry)) { + spin_unlock(&dcache_inode_lock); spin_unlock(&dcache_lock); return true; } } + spin_unlock(&dcache_inode_lock); spin_unlock(&dcache_lock); return false; } diff --git a/fs/dcache.c b/fs/dcache.c index a661247a20d5..de38680ee0ed 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -37,6 +37,8 @@ /* * Usage: + * dcache_inode_lock protects: + * - i_dentry, d_alias, d_inode * dcache_hash_lock protects: * - the dcache hash table, s_anon lists * dcache_lru_lock protects: @@ -49,12 +51,14 @@ * - d_unhashed() * - d_parent and d_subdirs * - childrens' d_child and d_parent + * - d_alias, d_inode * * Ordering: * dcache_lock - * dentry->d_lock - * dcache_lru_lock - * dcache_hash_lock + * dcache_inode_lock + * dentry->d_lock + * dcache_lru_lock + * dcache_hash_lock * * If there is an ancestor relationship: * dentry->d_parent->...->d_parent->d_lock @@ -70,11 +74,13 @@ int sysctl_vfs_cache_pressure __read_mostly = 100; EXPORT_SYMBOL_GPL(sysctl_vfs_cache_pressure); +__cacheline_aligned_in_smp DEFINE_SPINLOCK(dcache_inode_lock); static __cacheline_aligned_in_smp DEFINE_SPINLOCK(dcache_hash_lock); static __cacheline_aligned_in_smp DEFINE_SPINLOCK(dcache_lru_lock); __cacheline_aligned_in_smp DEFINE_SPINLOCK(dcache_lock); __cacheline_aligned_in_smp DEFINE_SEQLOCK(rename_lock); +EXPORT_SYMBOL(dcache_inode_lock); EXPORT_SYMBOL(dcache_lock); static struct kmem_cache *dentry_cache __read_mostly; @@ -154,6 +160,7 @@ static void d_free(struct dentry *dentry) */ static void dentry_iput(struct dentry * dentry) __releases(dentry->d_lock) + __releases(dcache_inode_lock) __releases(dcache_lock) { struct inode *inode = dentry->d_inode; @@ -161,6 +168,7 @@ static void dentry_iput(struct dentry * dentry) dentry->d_inode = NULL; list_del_init(&dentry->d_alias); spin_unlock(&dentry->d_lock); + spin_unlock(&dcache_inode_lock); spin_unlock(&dcache_lock); if (!inode->i_nlink) fsnotify_inoderemove(inode); @@ -170,6 +178,7 @@ static void dentry_iput(struct dentry * dentry) iput(inode); } else { spin_unlock(&dentry->d_lock); + spin_unlock(&dcache_inode_lock); spin_unlock(&dcache_lock); } } @@ -231,6 +240,7 @@ static void dentry_lru_move_tail(struct dentry *dentry) static struct dentry *d_kill(struct dentry *dentry, struct dentry *parent) __releases(dentry->d_lock) __releases(parent->d_lock) + __releases(dcache_inode_lock) __releases(dcache_lock) { list_del(&dentry->d_u.d_child); @@ -332,13 +342,18 @@ repeat: * want to reduce dcache_lock anyway so this will * get improved. */ +drop1: spin_unlock(&dentry->d_lock); goto repeat; } - if (parent && !spin_trylock(&parent->d_lock)) { - spin_unlock(&dentry->d_lock); + if (!spin_trylock(&dcache_inode_lock)) { +drop2: spin_unlock(&dcache_lock); - goto repeat; + goto drop1; + } + if (parent && !spin_trylock(&parent->d_lock)) { + spin_unlock(&dcache_inode_lock); + goto drop2; } } dentry->d_count--; @@ -369,6 +384,7 @@ repeat: spin_unlock(&dentry->d_lock); if (parent) spin_unlock(&parent->d_lock); + spin_unlock(&dcache_inode_lock); spin_unlock(&dcache_lock); return; @@ -558,7 +574,9 @@ struct dentry *d_find_alias(struct inode *inode) if (!list_empty(&inode->i_dentry)) { spin_lock(&dcache_lock); + spin_lock(&dcache_inode_lock); de = __d_find_alias(inode, 0); + spin_unlock(&dcache_inode_lock); spin_unlock(&dcache_lock); } return de; @@ -574,18 +592,21 @@ void d_prune_aliases(struct inode *inode) struct dentry *dentry; restart: spin_lock(&dcache_lock); + spin_lock(&dcache_inode_lock); list_for_each_entry(dentry, &inode->i_dentry, d_alias) { spin_lock(&dentry->d_lock); if (!dentry->d_count) { __dget_locked_dlock(dentry); __d_drop(dentry); spin_unlock(&dentry->d_lock); + spin_unlock(&dcache_inode_lock); spin_unlock(&dcache_lock); dput(dentry); goto restart; } spin_unlock(&dentry->d_lock); } + spin_unlock(&dcache_inode_lock); spin_unlock(&dcache_lock); } EXPORT_SYMBOL(d_prune_aliases); @@ -601,6 +622,7 @@ EXPORT_SYMBOL(d_prune_aliases); static void prune_one_dentry(struct dentry *dentry, struct dentry *parent) __releases(dentry->d_lock) __releases(parent->d_lock) + __releases(dcache_inode_lock) __releases(dcache_lock) { __d_drop(dentry); @@ -612,6 +634,7 @@ static void prune_one_dentry(struct dentry *dentry, struct dentry *parent) */ while (dentry) { spin_lock(&dcache_lock); + spin_lock(&dcache_inode_lock); again: spin_lock(&dentry->d_lock); if (IS_ROOT(dentry)) @@ -627,6 +650,7 @@ again: if (parent) spin_unlock(&parent->d_lock); spin_unlock(&dentry->d_lock); + spin_unlock(&dcache_inode_lock); spin_unlock(&dcache_lock); return; } @@ -676,8 +700,9 @@ relock: spin_unlock(&dcache_lru_lock); prune_one_dentry(dentry, parent); - /* dcache_lock and dentry->d_lock dropped */ + /* dcache_lock, dcache_inode_lock and dentry->d_lock dropped */ spin_lock(&dcache_lock); + spin_lock(&dcache_inode_lock); spin_lock(&dcache_lru_lock); } } @@ -699,6 +724,7 @@ static void __shrink_dcache_sb(struct super_block *sb, int *count, int flags) int cnt = *count; spin_lock(&dcache_lock); + spin_lock(&dcache_inode_lock); relock: spin_lock(&dcache_lru_lock); while (!list_empty(&sb->s_dentry_lru)) { @@ -737,8 +763,8 @@ relock: if (!list_empty(&referenced)) list_splice(&referenced, &sb->s_dentry_lru); spin_unlock(&dcache_lru_lock); + spin_unlock(&dcache_inode_lock); spin_unlock(&dcache_lock); - } /** @@ -832,12 +858,14 @@ void shrink_dcache_sb(struct super_block *sb) LIST_HEAD(tmp); spin_lock(&dcache_lock); + spin_lock(&dcache_inode_lock); spin_lock(&dcache_lru_lock); while (!list_empty(&sb->s_dentry_lru)) { list_splice_init(&sb->s_dentry_lru, &tmp); shrink_dentry_list(&tmp); } spin_unlock(&dcache_lru_lock); + spin_unlock(&dcache_inode_lock); spin_unlock(&dcache_lock); } EXPORT_SYMBOL(shrink_dcache_sb); @@ -1255,9 +1283,11 @@ EXPORT_SYMBOL(d_alloc_name); /* the caller must hold dcache_lock */ static void __d_instantiate(struct dentry *dentry, struct inode *inode) { + spin_lock(&dentry->d_lock); if (inode) list_add(&dentry->d_alias, &inode->i_dentry); dentry->d_inode = inode; + spin_unlock(&dentry->d_lock); fsnotify_d_instantiate(dentry, inode); } @@ -1280,7 +1310,9 @@ void d_instantiate(struct dentry *entry, struct inode * inode) { BUG_ON(!list_empty(&entry->d_alias)); spin_lock(&dcache_lock); + spin_lock(&dcache_inode_lock); __d_instantiate(entry, inode); + spin_unlock(&dcache_inode_lock); spin_unlock(&dcache_lock); security_d_instantiate(entry, inode); } @@ -1341,7 +1373,9 @@ struct dentry *d_instantiate_unique(struct dentry *entry, struct inode *inode) BUG_ON(!list_empty(&entry->d_alias)); spin_lock(&dcache_lock); + spin_lock(&dcache_inode_lock); result = __d_instantiate_unique(entry, inode); + spin_unlock(&dcache_inode_lock); spin_unlock(&dcache_lock); if (!result) { @@ -1432,8 +1466,10 @@ struct dentry *d_obtain_alias(struct inode *inode) tmp->d_parent = tmp; /* make sure dput doesn't croak */ spin_lock(&dcache_lock); + spin_lock(&dcache_inode_lock); res = __d_find_alias(inode, 0); if (res) { + spin_unlock(&dcache_inode_lock); spin_unlock(&dcache_lock); dput(tmp); goto out_iput; @@ -1450,6 +1486,7 @@ struct dentry *d_obtain_alias(struct inode *inode) hlist_add_head(&tmp->d_hash, &inode->i_sb->s_anon); spin_unlock(&dcache_hash_lock); spin_unlock(&tmp->d_lock); + spin_unlock(&dcache_inode_lock); spin_unlock(&dcache_lock); return tmp; @@ -1482,9 +1519,11 @@ struct dentry *d_splice_alias(struct inode *inode, struct dentry *dentry) if (inode && S_ISDIR(inode->i_mode)) { spin_lock(&dcache_lock); + spin_lock(&dcache_inode_lock); new = __d_find_alias(inode, 1); if (new) { BUG_ON(!(new->d_flags & DCACHE_DISCONNECTED)); + spin_unlock(&dcache_inode_lock); spin_unlock(&dcache_lock); security_d_instantiate(new, inode); d_move(new, dentry); @@ -1492,6 +1531,7 @@ struct dentry *d_splice_alias(struct inode *inode, struct dentry *dentry) } else { /* already taking dcache_lock, so d_add() by hand */ __d_instantiate(dentry, inode); + spin_unlock(&dcache_inode_lock); spin_unlock(&dcache_lock); security_d_instantiate(dentry, inode); d_rehash(dentry); @@ -1566,8 +1606,10 @@ struct dentry *d_add_ci(struct dentry *dentry, struct inode *inode, * already has a dentry. */ spin_lock(&dcache_lock); + spin_lock(&dcache_inode_lock); if (!S_ISDIR(inode->i_mode) || list_empty(&inode->i_dentry)) { __d_instantiate(found, inode); + spin_unlock(&dcache_inode_lock); spin_unlock(&dcache_lock); security_d_instantiate(found, inode); return found; @@ -1579,6 +1621,7 @@ struct dentry *d_add_ci(struct dentry *dentry, struct inode *inode, */ new = list_entry(inode->i_dentry.next, struct dentry, d_alias); dget_locked(new); + spin_unlock(&dcache_inode_lock); spin_unlock(&dcache_lock); security_d_instantiate(found, inode); d_move(new, found); @@ -1797,6 +1840,7 @@ void d_delete(struct dentry * dentry) * Are we the only user? */ spin_lock(&dcache_lock); + spin_lock(&dcache_inode_lock); spin_lock(&dentry->d_lock); isdir = S_ISDIR(dentry->d_inode->i_mode); if (dentry->d_count == 1) { @@ -1810,6 +1854,7 @@ void d_delete(struct dentry * dentry) __d_drop(dentry); spin_unlock(&dentry->d_lock); + spin_unlock(&dcache_inode_lock); spin_unlock(&dcache_lock); fsnotify_nameremove(dentry, isdir); @@ -2067,6 +2112,7 @@ struct dentry *d_ancestor(struct dentry *p1, struct dentry *p2) */ static struct dentry *__d_unalias(struct dentry *dentry, struct dentry *alias) __releases(dcache_lock) + __releases(dcache_inode_lock) { struct mutex *m1 = NULL, *m2 = NULL; struct dentry *ret; @@ -2092,6 +2138,7 @@ out_unalias: d_move_locked(alias, dentry); ret = alias; out_err: + spin_unlock(&dcache_inode_lock); spin_unlock(&dcache_lock); if (m2) mutex_unlock(m2); @@ -2153,6 +2200,7 @@ struct dentry *d_materialise_unique(struct dentry *dentry, struct inode *inode) BUG_ON(!d_unhashed(dentry)); spin_lock(&dcache_lock); + spin_lock(&dcache_inode_lock); if (!inode) { actual = dentry; @@ -2196,6 +2244,7 @@ found: _d_rehash(actual); spin_unlock(&dcache_hash_lock); spin_unlock(&actual->d_lock); + spin_unlock(&dcache_inode_lock); spin_unlock(&dcache_lock); out_nolock: if (actual == dentry) { @@ -2207,6 +2256,7 @@ out_nolock: return actual; shouldnt_be_hashed: + spin_unlock(&dcache_inode_lock); spin_unlock(&dcache_lock); BUG(); } diff --git a/fs/exportfs/expfs.c b/fs/exportfs/expfs.c index 51b304056f10..84b8c460a781 100644 --- a/fs/exportfs/expfs.c +++ b/fs/exportfs/expfs.c @@ -48,8 +48,10 @@ find_acceptable_alias(struct dentry *result, return result; spin_lock(&dcache_lock); + spin_lock(&dcache_inode_lock); list_for_each_entry(dentry, &result->d_inode->i_dentry, d_alias) { dget_locked(dentry); + spin_unlock(&dcache_inode_lock); spin_unlock(&dcache_lock); if (toput) dput(toput); @@ -58,8 +60,10 @@ find_acceptable_alias(struct dentry *result, return dentry; } spin_lock(&dcache_lock); + spin_lock(&dcache_inode_lock); toput = dentry; } + spin_unlock(&dcache_inode_lock); spin_unlock(&dcache_lock); if (toput) diff --git a/fs/nfs/getroot.c b/fs/nfs/getroot.c index ac7b814ce162..850f67d5f0ac 100644 --- a/fs/nfs/getroot.c +++ b/fs/nfs/getroot.c @@ -64,7 +64,11 @@ static int nfs_superblock_set_dummy_root(struct super_block *sb, struct inode *i * Oops, since the test for IS_ROOT() will fail. */ spin_lock(&dcache_lock); + spin_lock(&dcache_inode_lock); + spin_lock(&sb->s_root->d_lock); list_del_init(&sb->s_root->d_alias); + spin_unlock(&sb->s_root->d_lock); + spin_unlock(&dcache_inode_lock); spin_unlock(&dcache_lock); } return 0; diff --git a/fs/notify/fsnotify.c b/fs/notify/fsnotify.c index aa4f25e803f6..ae769fc9b66c 100644 --- a/fs/notify/fsnotify.c +++ b/fs/notify/fsnotify.c @@ -60,6 +60,7 @@ void __fsnotify_update_child_dentry_flags(struct inode *inode) watched = fsnotify_inode_watches_children(inode); spin_lock(&dcache_lock); + spin_lock(&dcache_inode_lock); /* run all of the dentries associated with this inode. Since this is a * directory, there damn well better only be one item on this list */ list_for_each_entry(alias, &inode->i_dentry, d_alias) { @@ -82,6 +83,7 @@ void __fsnotify_update_child_dentry_flags(struct inode *inode) } spin_unlock(&alias->d_lock); } + spin_unlock(&dcache_inode_lock); spin_unlock(&dcache_lock); } diff --git a/fs/ocfs2/dcache.c b/fs/ocfs2/dcache.c index 35e5f5a9ef59..c31b5c647ac7 100644 --- a/fs/ocfs2/dcache.c +++ b/fs/ocfs2/dcache.c @@ -170,7 +170,7 @@ struct dentry *ocfs2_find_local_alias(struct inode *inode, struct dentry *dentry = NULL; spin_lock(&dcache_lock); - + spin_lock(&dcache_inode_lock); list_for_each(p, &inode->i_dentry) { dentry = list_entry(p, struct dentry, d_alias); @@ -188,6 +188,7 @@ struct dentry *ocfs2_find_local_alias(struct inode *inode, dentry = NULL; } + spin_unlock(&dcache_inode_lock); spin_unlock(&dcache_lock); return dentry; diff --git a/include/linux/dcache.h b/include/linux/dcache.h index ddf4f55624f7..bda5ec0b077d 100644 --- a/include/linux/dcache.h +++ b/include/linux/dcache.h @@ -181,6 +181,7 @@ struct dentry_operations { #define DCACHE_CANT_MOUNT 0x0100 +extern spinlock_t dcache_inode_lock; extern spinlock_t dcache_lock; extern seqlock_t rename_lock; -- cgit v1.2.3 From 9abca36087288fe28de4749c71ca003d4b9e3ed0 Mon Sep 17 00:00:00 2001 From: Nick Piggin Date: Fri, 7 Jan 2011 17:49:36 +1100 Subject: fs: increase d_name lock coverage Cover d_name with d_lock in more cases, where there may be concurrent modification to it. Signed-off-by: Nick Piggin --- fs/dcache.c | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) (limited to 'fs/dcache.c') diff --git a/fs/dcache.c b/fs/dcache.c index de38680ee0ed..a09f0771fd27 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -1350,6 +1350,11 @@ static struct dentry *__d_instantiate_unique(struct dentry *entry, list_for_each_entry(alias, &inode->i_dentry, d_alias) { struct qstr *qstr = &alias->d_name; + /* + * Don't need alias->d_lock here, because aliases with + * d_parent == entry->d_parent are not subject to name or + * parent changes, because the parent inode i_mutex is held. + */ if (qstr->hash != hash) continue; if (alias->d_parent != entry->d_parent) @@ -2313,7 +2318,9 @@ static int prepend_path(const struct path *path, struct path *root, } parent = dentry->d_parent; prefetch(parent); + spin_lock(&dentry->d_lock); error = prepend_name(buffer, buflen, &dentry->d_name); + spin_unlock(&dentry->d_lock); if (!error) error = prepend(buffer, buflen, "/", 1); if (error) @@ -2515,10 +2522,13 @@ static char *__dentry_path(struct dentry *dentry, char *buf, int buflen) while (!IS_ROOT(dentry)) { struct dentry *parent = dentry->d_parent; + int error; prefetch(parent); - if ((prepend_name(&end, &buflen, &dentry->d_name) != 0) || - (prepend(&end, &buflen, "/", 1) != 0)) + spin_lock(&dentry->d_lock); + error = prepend_name(&end, &buflen, &dentry->d_name); + spin_unlock(&dentry->d_lock); + if (error != 0 || prepend(&end, &buflen, "/", 1) != 0) goto Elong; retval = end; -- cgit v1.2.3 From 949854d02455080d20cd3e1db28a3a18daf7599d Mon Sep 17 00:00:00 2001 From: Nick Piggin Date: Fri, 7 Jan 2011 17:49:37 +1100 Subject: fs: Use rename lock and RCU for multi-step operations The remaining usages for dcache_lock is to allow atomic, multi-step read-side operations over the directory tree by excluding modifications to the tree. Also, to walk in the leaf->root direction in the tree where we don't have a natural d_lock ordering. This could be accomplished by taking every d_lock, but this would mean a huge number of locks and actually gets very tricky. Solve this instead by using the rename seqlock for multi-step read-side operations, retry in case of a rename so we don't walk up the wrong parent. Concurrent dentry insertions are not serialised against. Concurrent deletes are tricky when walking up the directory: our parent might have been deleted when dropping locks so also need to check and retry for that. We can also use the rename lock in cases where livelock is a worry (and it is introduced in subsequent patch). Signed-off-by: Nick Piggin --- drivers/staging/pohmelfs/path_entry.c | 15 +++- fs/autofs4/waitq.c | 18 ++++- fs/dcache.c | 134 ++++++++++++++++++++++++++++------ fs/nfs/namespace.c | 14 +++- include/linux/dcache.h | 1 + 5 files changed, 152 insertions(+), 30 deletions(-) (limited to 'fs/dcache.c') diff --git a/drivers/staging/pohmelfs/path_entry.c b/drivers/staging/pohmelfs/path_entry.c index 8ec83d2dffb7..bbe42f42ca8f 100644 --- a/drivers/staging/pohmelfs/path_entry.c +++ b/drivers/staging/pohmelfs/path_entry.c @@ -83,10 +83,11 @@ out: int pohmelfs_path_length(struct pohmelfs_inode *pi) { struct dentry *d, *root, *first; - int len = 1; /* Root slash */ + int len; + unsigned seq; - first = d = d_find_alias(&pi->vfs_inode); - if (!d) { + first = d_find_alias(&pi->vfs_inode); + if (!first) { dprintk("%s: ino: %llu, mode: %o.\n", __func__, pi->ino, pi->vfs_inode.i_mode); return -ENOENT; } @@ -95,6 +96,11 @@ int pohmelfs_path_length(struct pohmelfs_inode *pi) root = dget(current->fs->root.dentry); spin_unlock(¤t->fs->lock); +rename_retry: + len = 1; /* Root slash */ + d = first; + seq = read_seqbegin(&rename_lock); + rcu_read_lock(); spin_lock(&dcache_lock); if (!IS_ROOT(d) && d_unhashed(d)) @@ -105,6 +111,9 @@ int pohmelfs_path_length(struct pohmelfs_inode *pi) d = d->d_parent; } spin_unlock(&dcache_lock); + rcu_read_unlock(); + if (read_seqretry(&rename_lock, seq)) + goto rename_retry; dput(root); dput(first); diff --git a/fs/autofs4/waitq.c b/fs/autofs4/waitq.c index 2341375386f8..4be8f778a418 100644 --- a/fs/autofs4/waitq.c +++ b/fs/autofs4/waitq.c @@ -186,16 +186,25 @@ static int autofs4_getpath(struct autofs_sb_info *sbi, { struct dentry *root = sbi->sb->s_root; struct dentry *tmp; - char *buf = *name; + char *buf; char *p; - int len = 0; - + int len; + unsigned seq; + +rename_retry: + buf = *name; + len = 0; + seq = read_seqbegin(&rename_lock); + rcu_read_lock(); spin_lock(&dcache_lock); for (tmp = dentry ; tmp != root ; tmp = tmp->d_parent) len += tmp->d_name.len + 1; if (!len || --len > NAME_MAX) { spin_unlock(&dcache_lock); + rcu_read_unlock(); + if (read_seqretry(&rename_lock, seq)) + goto rename_retry; return 0; } @@ -209,6 +218,9 @@ static int autofs4_getpath(struct autofs_sb_info *sbi, strncpy(p, tmp->d_name.name, tmp->d_name.len); } spin_unlock(&dcache_lock); + rcu_read_unlock(); + if (read_seqretry(&rename_lock, seq)) + goto rename_retry; return len; } diff --git a/fs/dcache.c b/fs/dcache.c index a09f0771fd27..a9bc4ecc21e1 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -80,6 +80,7 @@ static __cacheline_aligned_in_smp DEFINE_SPINLOCK(dcache_lru_lock); __cacheline_aligned_in_smp DEFINE_SPINLOCK(dcache_lock); __cacheline_aligned_in_smp DEFINE_SEQLOCK(rename_lock); +EXPORT_SYMBOL(rename_lock); EXPORT_SYMBOL(dcache_inode_lock); EXPORT_SYMBOL(dcache_lock); @@ -243,6 +244,7 @@ static struct dentry *d_kill(struct dentry *dentry, struct dentry *parent) __releases(dcache_inode_lock) __releases(dcache_lock) { + dentry->d_parent = NULL; list_del(&dentry->d_u.d_child); if (parent) spin_unlock(&parent->d_lock); @@ -1017,11 +1019,15 @@ void shrink_dcache_for_umount(struct super_block *sb) * Return true if the parent or its subdirectories contain * a mount point */ - int have_submounts(struct dentry *parent) { - struct dentry *this_parent = parent; + struct dentry *this_parent; struct list_head *next; + unsigned seq; + +rename_retry: + this_parent = parent; + seq = read_seqbegin(&rename_lock); spin_lock(&dcache_lock); if (d_mountpoint(parent)) @@ -1055,17 +1061,37 @@ resume: * All done at this level ... ascend and resume the search. */ if (this_parent != parent) { - next = this_parent->d_u.d_child.next; + struct dentry *tmp; + struct dentry *child; + + tmp = this_parent->d_parent; + rcu_read_lock(); spin_unlock(&this_parent->d_lock); - this_parent = this_parent->d_parent; + child = this_parent; + this_parent = tmp; spin_lock(&this_parent->d_lock); + /* might go back up the wrong parent if we have had a rename + * or deletion */ + if (this_parent != child->d_parent || + read_seqretry(&rename_lock, seq)) { + spin_unlock(&this_parent->d_lock); + spin_unlock(&dcache_lock); + rcu_read_unlock(); + goto rename_retry; + } + rcu_read_unlock(); + next = child->d_u.d_child.next; goto resume; } spin_unlock(&this_parent->d_lock); spin_unlock(&dcache_lock); + if (read_seqretry(&rename_lock, seq)) + goto rename_retry; return 0; /* No mount points found in tree */ positive: spin_unlock(&dcache_lock); + if (read_seqretry(&rename_lock, seq)) + goto rename_retry; return 1; } EXPORT_SYMBOL(have_submounts); @@ -1086,10 +1112,15 @@ EXPORT_SYMBOL(have_submounts); */ static int select_parent(struct dentry * parent) { - struct dentry *this_parent = parent; + struct dentry *this_parent; struct list_head *next; + unsigned seq; int found = 0; +rename_retry: + this_parent = parent; + seq = read_seqbegin(&rename_lock); + spin_lock(&dcache_lock); spin_lock(&this_parent->d_lock); repeat: @@ -1099,7 +1130,6 @@ resume: struct list_head *tmp = next; struct dentry *dentry = list_entry(tmp, struct dentry, d_u.d_child); next = tmp->next; - BUG_ON(this_parent == dentry); spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED); @@ -1142,17 +1172,32 @@ resume: */ if (this_parent != parent) { struct dentry *tmp; - next = this_parent->d_u.d_child.next; + struct dentry *child; + tmp = this_parent->d_parent; + rcu_read_lock(); spin_unlock(&this_parent->d_lock); - BUG_ON(tmp == this_parent); + child = this_parent; this_parent = tmp; spin_lock(&this_parent->d_lock); + /* might go back up the wrong parent if we have had a rename + * or deletion */ + if (this_parent != child->d_parent || + read_seqretry(&rename_lock, seq)) { + spin_unlock(&this_parent->d_lock); + spin_unlock(&dcache_lock); + rcu_read_unlock(); + goto rename_retry; + } + rcu_read_unlock(); + next = child->d_u.d_child.next; goto resume; } out: spin_unlock(&this_parent->d_lock); spin_unlock(&dcache_lock); + if (read_seqretry(&rename_lock, seq)) + goto rename_retry; return found; } @@ -1654,7 +1699,7 @@ EXPORT_SYMBOL(d_add_ci); struct dentry * d_lookup(struct dentry * parent, struct qstr * name) { struct dentry * dentry = NULL; - unsigned long seq; + unsigned seq; do { seq = read_seqbegin(&rename_lock); @@ -2290,7 +2335,7 @@ static int prepend_name(char **buffer, int *buflen, struct qstr *name) * @buffer: pointer to the end of the buffer * @buflen: pointer to buffer length * - * Caller holds the dcache_lock. + * Caller holds the rename_lock. * * If path is not reachable from the supplied root, then the value of * root is changed (without modifying refcounts). @@ -2377,7 +2422,9 @@ char *__d_path(const struct path *path, struct path *root, prepend(&res, &buflen, "\0", 1); spin_lock(&dcache_lock); + write_seqlock(&rename_lock); error = prepend_path(path, root, &res, &buflen); + write_sequnlock(&rename_lock); spin_unlock(&dcache_lock); if (error) @@ -2441,10 +2488,12 @@ char *d_path(const struct path *path, char *buf, int buflen) get_fs_root(current->fs, &root); spin_lock(&dcache_lock); + write_seqlock(&rename_lock); tmp = root; error = path_with_deleted(path, &tmp, &res, &buflen); if (error) res = ERR_PTR(error); + write_sequnlock(&rename_lock); spin_unlock(&dcache_lock); path_put(&root); return res; @@ -2472,10 +2521,12 @@ char *d_path_with_unreachable(const struct path *path, char *buf, int buflen) get_fs_root(current->fs, &root); spin_lock(&dcache_lock); + write_seqlock(&rename_lock); tmp = root; error = path_with_deleted(path, &tmp, &res, &buflen); if (!error && !path_equal(&tmp, &root)) error = prepend_unreachable(&res, &buflen); + write_sequnlock(&rename_lock); spin_unlock(&dcache_lock); path_put(&root); if (error) @@ -2544,7 +2595,9 @@ char *dentry_path_raw(struct dentry *dentry, char *buf, int buflen) char *retval; spin_lock(&dcache_lock); + write_seqlock(&rename_lock); retval = __dentry_path(dentry, buf, buflen); + write_sequnlock(&rename_lock); spin_unlock(&dcache_lock); return retval; @@ -2557,6 +2610,7 @@ char *dentry_path(struct dentry *dentry, char *buf, int buflen) char *retval; spin_lock(&dcache_lock); + write_seqlock(&rename_lock); if (d_unlinked(dentry)) { p = buf + buflen; if (prepend(&p, &buflen, "//deleted", 10) != 0) @@ -2564,6 +2618,7 @@ char *dentry_path(struct dentry *dentry, char *buf, int buflen) buflen++; } retval = __dentry_path(dentry, buf, buflen); + write_sequnlock(&rename_lock); spin_unlock(&dcache_lock); if (!IS_ERR(retval) && p) *p = '/'; /* restore '/' overriden with '\0' */ @@ -2604,6 +2659,7 @@ SYSCALL_DEFINE2(getcwd, char __user *, buf, unsigned long, size) error = -ENOENT; spin_lock(&dcache_lock); + write_seqlock(&rename_lock); if (!d_unlinked(pwd.dentry)) { unsigned long len; struct path tmp = root; @@ -2612,6 +2668,7 @@ SYSCALL_DEFINE2(getcwd, char __user *, buf, unsigned long, size) prepend(&cwd, &buflen, "\0", 1); error = prepend_path(&pwd, &tmp, &cwd, &buflen); + write_sequnlock(&rename_lock); spin_unlock(&dcache_lock); if (error) @@ -2631,8 +2688,10 @@ SYSCALL_DEFINE2(getcwd, char __user *, buf, unsigned long, size) if (copy_to_user(buf, cwd, len)) error = -EFAULT; } - } else + } else { + write_sequnlock(&rename_lock); spin_unlock(&dcache_lock); + } out: path_put(&pwd); @@ -2660,25 +2719,25 @@ out: int is_subdir(struct dentry *new_dentry, struct dentry *old_dentry) { int result; - unsigned long seq; + unsigned seq; if (new_dentry == old_dentry) return 1; - /* - * Need rcu_readlock to protect against the d_parent trashing - * due to d_move - */ - rcu_read_lock(); do { /* for restarting inner loop in case of seq retry */ seq = read_seqbegin(&rename_lock); + /* + * Need rcu_readlock to protect against the d_parent trashing + * due to d_move + */ + rcu_read_lock(); if (d_ancestor(old_dentry, new_dentry)) result = 1; else result = 0; + rcu_read_unlock(); } while (read_seqretry(&rename_lock, seq)); - rcu_read_unlock(); return result; } @@ -2710,9 +2769,13 @@ EXPORT_SYMBOL(path_is_under); void d_genocide(struct dentry *root) { - struct dentry *this_parent = root; + struct dentry *this_parent; struct list_head *next; + unsigned seq; +rename_retry: + this_parent = root; + seq = read_seqbegin(&rename_lock); spin_lock(&dcache_lock); spin_lock(&this_parent->d_lock); repeat: @@ -2722,6 +2785,7 @@ resume: struct list_head *tmp = next; struct dentry *dentry = list_entry(tmp, struct dentry, d_u.d_child); next = tmp->next; + spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED); if (d_unhashed(dentry) || !dentry->d_inode) { spin_unlock(&dentry->d_lock); @@ -2734,19 +2798,43 @@ resume: spin_acquire(&this_parent->d_lock.dep_map, 0, 1, _RET_IP_); goto repeat; } - dentry->d_count--; + if (!(dentry->d_flags & DCACHE_GENOCIDE)) { + dentry->d_flags |= DCACHE_GENOCIDE; + dentry->d_count--; + } spin_unlock(&dentry->d_lock); } if (this_parent != root) { - next = this_parent->d_u.d_child.next; - this_parent->d_count--; + struct dentry *tmp; + struct dentry *child; + + tmp = this_parent->d_parent; + if (!(this_parent->d_flags & DCACHE_GENOCIDE)) { + this_parent->d_flags |= DCACHE_GENOCIDE; + this_parent->d_count--; + } + rcu_read_lock(); spin_unlock(&this_parent->d_lock); - this_parent = this_parent->d_parent; + child = this_parent; + this_parent = tmp; spin_lock(&this_parent->d_lock); + /* might go back up the wrong parent if we have had a rename + * or deletion */ + if (this_parent != child->d_parent || + read_seqretry(&rename_lock, seq)) { + spin_unlock(&this_parent->d_lock); + spin_unlock(&dcache_lock); + rcu_read_unlock(); + goto rename_retry; + } + rcu_read_unlock(); + next = child->d_u.d_child.next; goto resume; } spin_unlock(&this_parent->d_lock); spin_unlock(&dcache_lock); + if (read_seqretry(&rename_lock, seq)) + goto rename_retry; } /** diff --git a/fs/nfs/namespace.c b/fs/nfs/namespace.c index db6aa3673cf3..78c0ebb0b07c 100644 --- a/fs/nfs/namespace.c +++ b/fs/nfs/namespace.c @@ -49,11 +49,17 @@ char *nfs_path(const char *base, const struct dentry *dentry, char *buffer, ssize_t buflen) { - char *end = buffer+buflen; + char *end; int namelen; + unsigned seq; +rename_retry: + end = buffer+buflen; *--end = '\0'; buflen--; + + seq = read_seqbegin(&rename_lock); + rcu_read_lock(); spin_lock(&dcache_lock); while (!IS_ROOT(dentry) && dentry != droot) { namelen = dentry->d_name.len; @@ -66,6 +72,9 @@ char *nfs_path(const char *base, dentry = dentry->d_parent; } spin_unlock(&dcache_lock); + rcu_read_unlock(); + if (read_seqretry(&rename_lock, seq)) + goto rename_retry; if (*end != '/') { if (--buflen < 0) goto Elong; @@ -83,6 +92,9 @@ char *nfs_path(const char *base, return end; Elong_unlock: spin_unlock(&dcache_lock); + rcu_read_unlock(); + if (read_seqretry(&rename_lock, seq)) + goto rename_retry; Elong: return ERR_PTR(-ENAMETOOLONG); } diff --git a/include/linux/dcache.h b/include/linux/dcache.h index bda5ec0b077d..c963ebada922 100644 --- a/include/linux/dcache.h +++ b/include/linux/dcache.h @@ -180,6 +180,7 @@ struct dentry_operations { #define DCACHE_FSNOTIFY_PARENT_WATCHED 0x0080 /* Parent inode is watched by some fsnotify listener */ #define DCACHE_CANT_MOUNT 0x0100 +#define DCACHE_GENOCIDE 0x0200 extern spinlock_t dcache_inode_lock; extern spinlock_t dcache_lock; -- cgit v1.2.3 From b5c84bf6f6fa3a7dfdcb556023a62953574b60ee Mon Sep 17 00:00:00 2001 From: Nick Piggin Date: Fri, 7 Jan 2011 17:49:38 +1100 Subject: fs: dcache remove dcache_lock dcache_lock no longer protects anything. remove it. Signed-off-by: Nick Piggin --- Documentation/filesystems/Locking | 16 +-- Documentation/filesystems/dentry-locking.txt | 40 ++++--- Documentation/filesystems/porting | 8 +- arch/powerpc/platforms/cell/spufs/inode.c | 5 +- drivers/infiniband/hw/ipath/ipath_fs.c | 6 +- drivers/infiniband/hw/qib/qib_fs.c | 3 - drivers/staging/pohmelfs/path_entry.c | 2 - drivers/staging/smbfs/cache.c | 4 - drivers/usb/core/inode.c | 3 - fs/9p/vfs_inode.c | 2 - fs/affs/amigaffs.c | 2 - fs/autofs4/autofs_i.h | 3 + fs/autofs4/expire.c | 10 +- fs/autofs4/root.c | 44 ++++---- fs/autofs4/waitq.c | 7 +- fs/ceph/dir.c | 6 +- fs/ceph/inode.c | 4 - fs/cifs/inode.c | 3 - fs/coda/cache.c | 2 - fs/configfs/configfs_internal.h | 2 - fs/configfs/inode.c | 6 +- fs/dcache.c | 160 ++++----------------------- fs/exportfs/expfs.c | 4 - fs/libfs.c | 8 -- fs/namei.c | 9 +- fs/ncpfs/dir.c | 3 - fs/ncpfs/ncplib_kernel.h | 4 - fs/nfs/dir.c | 3 - fs/nfs/getroot.c | 2 - fs/nfs/namespace.c | 3 - fs/notify/fsnotify.c | 2 - fs/ocfs2/dcache.c | 2 - include/linux/dcache.h | 5 +- include/linux/fs.h | 6 +- include/linux/fsnotify.h | 2 - include/linux/fsnotify_backend.h | 11 +- include/linux/namei.h | 1 - kernel/cgroup.c | 6 - mm/filemap.c | 3 - security/selinux/selinuxfs.c | 4 - 40 files changed, 109 insertions(+), 307 deletions(-) (limited to 'fs/dcache.c') diff --git a/Documentation/filesystems/Locking b/Documentation/filesystems/Locking index a15ee207b449..bdad6414dfa0 100644 --- a/Documentation/filesystems/Locking +++ b/Documentation/filesystems/Locking @@ -21,14 +21,14 @@ prototypes: char *(*d_dname)((struct dentry *dentry, char *buffer, int buflen); locking rules: - dcache_lock rename_lock ->d_lock may block -d_revalidate: no no no yes -d_hash no no no no -d_compare: no yes no no -d_delete: yes no yes no -d_release: no no no yes -d_iput: no no no yes -d_dname: no no no no + rename_lock ->d_lock may block +d_revalidate: no no yes +d_hash no no no +d_compare: yes no no +d_delete: no yes no +d_release: no no yes +d_iput: no no yes +d_dname: no no no --------------------------- inode_operations --------------------------- prototypes: diff --git a/Documentation/filesystems/dentry-locking.txt b/Documentation/filesystems/dentry-locking.txt index 79334ed5daa7..30b6a40f5650 100644 --- a/Documentation/filesystems/dentry-locking.txt +++ b/Documentation/filesystems/dentry-locking.txt @@ -31,6 +31,7 @@ significant change is the way d_lookup traverses the hash chain, it doesn't acquire the dcache_lock for this and rely on RCU to ensure that the dentry has not been *freed*. +dcache_lock no longer exists, dentry locking is explained in fs/dcache.c Dcache locking details ====================== @@ -50,14 +51,12 @@ Safe lock-free look-up of dcache hash table Dcache is a complex data structure with the hash table entries also linked together in other lists. In 2.4 kernel, dcache_lock protected -all the lists. We applied RCU only on hash chain walking. The rest of -the lists are still protected by dcache_lock. Some of the important -changes are : +all the lists. RCU dentry hash walking works like this: 1. The deletion from hash chain is done using hlist_del_rcu() macro which doesn't initialize next pointer of the deleted dentry and this allows us to walk safely lock-free while a deletion is - happening. + happening. This is a standard hlist_rcu iteration. 2. Insertion of a dentry into the hash table is done using hlist_add_head_rcu() which take care of ordering the writes - the @@ -66,19 +65,18 @@ changes are : which has since been replaced by hlist_for_each_entry_rcu(), while walking the hash chain. The only requirement is that all initialization to the dentry must be done before - hlist_add_head_rcu() since we don't have dcache_lock protection - while traversing the hash chain. This isn't different from the - existing code. - -3. The dentry looked up without holding dcache_lock by cannot be - returned for walking if it is unhashed. It then may have a NULL - d_inode or other bogosity since RCU doesn't protect the other - fields in the dentry. We therefore use a flag DCACHE_UNHASHED to - indicate unhashed dentries and use this in conjunction with a - per-dentry lock (d_lock). Once looked up without the dcache_lock, - we acquire the per-dentry lock (d_lock) and check if the dentry is - unhashed. If so, the look-up is failed. If not, the reference count - of the dentry is increased and the dentry is returned. + hlist_add_head_rcu() since we don't have lock protection + while traversing the hash chain. + +3. The dentry looked up without holding locks cannot be returned for + walking if it is unhashed. It then may have a NULL d_inode or other + bogosity since RCU doesn't protect the other fields in the dentry. We + therefore use a flag DCACHE_UNHASHED to indicate unhashed dentries + and use this in conjunction with a per-dentry lock (d_lock). Once + looked up without locks, we acquire the per-dentry lock (d_lock) and + check if the dentry is unhashed. If so, the look-up is failed. If not, + the reference count of the dentry is increased and the dentry is + returned. 4. Once a dentry is looked up, it must be ensured during the path walk for that component it doesn't go away. In pre-2.5.10 code, this was @@ -86,10 +84,10 @@ changes are : In some sense, dcache_rcu path walking looks like the pre-2.5.10 version. -5. All dentry hash chain updates must take the dcache_lock as well as - the per-dentry lock in that order. dput() does this to ensure that - a dentry that has just been looked up in another CPU doesn't get - deleted before dget() can be done on it. +5. All dentry hash chain updates must take the per-dentry lock (see + fs/dcache.c). This excludes dput() to ensure that a dentry that has + been looked up concurrently does not get deleted before dget() can + take a ref. 6. There are several ways to do reference counting of RCU protected objects. One such example is in ipv4 route cache where deferred diff --git a/Documentation/filesystems/porting b/Documentation/filesystems/porting index 9fd31940a8ef..1eb76959d096 100644 --- a/Documentation/filesystems/porting +++ b/Documentation/filesystems/porting @@ -216,7 +216,6 @@ had ->revalidate()) add calls in ->follow_link()/->readlink(). ->d_parent changes are not protected by BKL anymore. Read access is safe if at least one of the following is true: * filesystem has no cross-directory rename() - * dcache_lock is held * we know that parent had been locked (e.g. we are looking at ->d_parent of ->lookup() argument). * we are called from ->rename(). @@ -340,3 +339,10 @@ look at examples of other filesystems) for guidance. .d_hash() calling convention and locking rules are significantly changed. Read updated documentation in Documentation/filesystems/vfs.txt (and look at examples of other filesystems) for guidance. + +--- +[mandatory] + dcache_lock is gone, replaced by fine grained locks. See fs/dcache.c +for details of what locks to replace dcache_lock with in order to protect +particular things. Most of the time, a filesystem only needs ->d_lock, which +protects *all* the dcache state of a given dentry. diff --git a/arch/powerpc/platforms/cell/spufs/inode.c b/arch/powerpc/platforms/cell/spufs/inode.c index 5aef1a7f5e4b..2662b50ea8d4 100644 --- a/arch/powerpc/platforms/cell/spufs/inode.c +++ b/arch/powerpc/platforms/cell/spufs/inode.c @@ -159,21 +159,18 @@ static void spufs_prune_dir(struct dentry *dir) mutex_lock(&dir->d_inode->i_mutex); list_for_each_entry_safe(dentry, tmp, &dir->d_subdirs, d_u.d_child) { - spin_lock(&dcache_lock); spin_lock(&dentry->d_lock); if (!(d_unhashed(dentry)) && dentry->d_inode) { dget_locked_dlock(dentry); __d_drop(dentry); spin_unlock(&dentry->d_lock); simple_unlink(dir->d_inode, dentry); - /* XXX: what is dcache_lock protecting here? Other + /* XXX: what was dcache_lock protecting here? Other * filesystems (IB, configfs) release dcache_lock * before unlink */ - spin_unlock(&dcache_lock); dput(dentry); } else { spin_unlock(&dentry->d_lock); - spin_unlock(&dcache_lock); } } shrink_dcache_parent(dir); diff --git a/drivers/infiniband/hw/ipath/ipath_fs.c b/drivers/infiniband/hw/ipath/ipath_fs.c index 18aee04a8411..925e88227ded 100644 --- a/drivers/infiniband/hw/ipath/ipath_fs.c +++ b/drivers/infiniband/hw/ipath/ipath_fs.c @@ -277,18 +277,14 @@ static int remove_file(struct dentry *parent, char *name) goto bail; } - spin_lock(&dcache_lock); spin_lock(&tmp->d_lock); if (!(d_unhashed(tmp) && tmp->d_inode)) { dget_locked_dlock(tmp); __d_drop(tmp); spin_unlock(&tmp->d_lock); - spin_unlock(&dcache_lock); simple_unlink(parent->d_inode, tmp); - } else { + } else spin_unlock(&tmp->d_lock); - spin_unlock(&dcache_lock); - } ret = 0; bail: diff --git a/drivers/infiniband/hw/qib/qib_fs.c b/drivers/infiniband/hw/qib/qib_fs.c index fe4b242f0094..49af4a6538ba 100644 --- a/drivers/infiniband/hw/qib/qib_fs.c +++ b/drivers/infiniband/hw/qib/qib_fs.c @@ -453,17 +453,14 @@ static int remove_file(struct dentry *parent, char *name) goto bail; } - spin_lock(&dcache_lock); spin_lock(&tmp->d_lock); if (!(d_unhashed(tmp) && tmp->d_inode)) { dget_locked_dlock(tmp); __d_drop(tmp); spin_unlock(&tmp->d_lock); - spin_unlock(&dcache_lock); simple_unlink(parent->d_inode, tmp); } else { spin_unlock(&tmp->d_lock); - spin_unlock(&dcache_lock); } ret = 0; diff --git a/drivers/staging/pohmelfs/path_entry.c b/drivers/staging/pohmelfs/path_entry.c index bbe42f42ca8f..400a9fc386ad 100644 --- a/drivers/staging/pohmelfs/path_entry.c +++ b/drivers/staging/pohmelfs/path_entry.c @@ -101,7 +101,6 @@ rename_retry: d = first; seq = read_seqbegin(&rename_lock); rcu_read_lock(); - spin_lock(&dcache_lock); if (!IS_ROOT(d) && d_unhashed(d)) len += UNHASHED_OBSCURE_STRING_SIZE; /* Obscure " (deleted)" string */ @@ -110,7 +109,6 @@ rename_retry: len += d->d_name.len + 1; /* Plus slash */ d = d->d_parent; } - spin_unlock(&dcache_lock); rcu_read_unlock(); if (read_seqretry(&rename_lock, seq)) goto rename_retry; diff --git a/drivers/staging/smbfs/cache.c b/drivers/staging/smbfs/cache.c index 920434b6c071..75dfd403fb90 100644 --- a/drivers/staging/smbfs/cache.c +++ b/drivers/staging/smbfs/cache.c @@ -62,7 +62,6 @@ smb_invalidate_dircache_entries(struct dentry *parent) struct list_head *next; struct dentry *dentry; - spin_lock(&dcache_lock); spin_lock(&parent->d_lock); next = parent->d_subdirs.next; while (next != &parent->d_subdirs) { @@ -72,7 +71,6 @@ smb_invalidate_dircache_entries(struct dentry *parent) next = next->next; } spin_unlock(&parent->d_lock); - spin_unlock(&dcache_lock); } /* @@ -98,7 +96,6 @@ smb_dget_fpos(struct dentry *dentry, struct dentry *parent, unsigned long fpos) } /* If a pointer is invalid, we search the dentry. */ - spin_lock(&dcache_lock); spin_lock(&parent->d_lock); next = parent->d_subdirs.next; while (next != &parent->d_subdirs) { @@ -115,7 +112,6 @@ smb_dget_fpos(struct dentry *dentry, struct dentry *parent, unsigned long fpos) dent = NULL; out_unlock: spin_unlock(&parent->d_lock); - spin_unlock(&dcache_lock); return dent; } diff --git a/drivers/usb/core/inode.c b/drivers/usb/core/inode.c index 89a0e8366585..1b125c224dcf 100644 --- a/drivers/usb/core/inode.c +++ b/drivers/usb/core/inode.c @@ -343,7 +343,6 @@ static int usbfs_empty (struct dentry *dentry) { struct list_head *list; - spin_lock(&dcache_lock); spin_lock(&dentry->d_lock); list_for_each(list, &dentry->d_subdirs) { struct dentry *de = list_entry(list, struct dentry, d_u.d_child); @@ -352,13 +351,11 @@ static int usbfs_empty (struct dentry *dentry) if (usbfs_positive(de)) { spin_unlock(&de->d_lock); spin_unlock(&dentry->d_lock); - spin_unlock(&dcache_lock); return 0; } spin_unlock(&de->d_lock); } spin_unlock(&dentry->d_lock); - spin_unlock(&dcache_lock); return 1; } diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c index 47dfd5d29a6b..1073bca8488c 100644 --- a/fs/9p/vfs_inode.c +++ b/fs/9p/vfs_inode.c @@ -270,13 +270,11 @@ static struct dentry *v9fs_dentry_from_dir_inode(struct inode *inode) { struct dentry *dentry; - spin_lock(&dcache_lock); spin_lock(&dcache_inode_lock); /* Directory should have only one entry. */ BUG_ON(S_ISDIR(inode->i_mode) && !list_is_singular(&inode->i_dentry)); dentry = list_entry(inode->i_dentry.next, struct dentry, d_alias); spin_unlock(&dcache_inode_lock); - spin_unlock(&dcache_lock); return dentry; } diff --git a/fs/affs/amigaffs.c b/fs/affs/amigaffs.c index 2321cc92d44f..600101a21ba2 100644 --- a/fs/affs/amigaffs.c +++ b/fs/affs/amigaffs.c @@ -128,7 +128,6 @@ affs_fix_dcache(struct dentry *dentry, u32 entry_ino) void *data = dentry->d_fsdata; struct list_head *head, *next; - spin_lock(&dcache_lock); spin_lock(&dcache_inode_lock); head = &inode->i_dentry; next = head->next; @@ -141,7 +140,6 @@ affs_fix_dcache(struct dentry *dentry, u32 entry_ino) next = next->next; } spin_unlock(&dcache_inode_lock); - spin_unlock(&dcache_lock); } diff --git a/fs/autofs4/autofs_i.h b/fs/autofs4/autofs_i.h index 9d2ae9b30d9f..0fffe1c24cec 100644 --- a/fs/autofs4/autofs_i.h +++ b/fs/autofs4/autofs_i.h @@ -16,6 +16,7 @@ #include #include #include +#include #include /* This is the range of ioctl() numbers we claim as ours */ @@ -60,6 +61,8 @@ do { \ current->pid, __func__, ##args); \ } while (0) +extern spinlock_t autofs4_lock; + /* Unified info structure. This is pointed to by both the dentry and inode structures. Each file in the filesystem has an instance of this structure. It holds a reference to the dentry, so dentries are never diff --git a/fs/autofs4/expire.c b/fs/autofs4/expire.c index 968c1434af62..2f7951d67d16 100644 --- a/fs/autofs4/expire.c +++ b/fs/autofs4/expire.c @@ -102,7 +102,7 @@ static struct dentry *get_next_positive_dentry(struct dentry *prev, if (prev == NULL) return dget(prev); - spin_lock(&dcache_lock); + spin_lock(&autofs4_lock); relock: p = prev; spin_lock(&p->d_lock); @@ -114,7 +114,7 @@ again: if (p == root) { spin_unlock(&p->d_lock); - spin_unlock(&dcache_lock); + spin_unlock(&autofs4_lock); dput(prev); return NULL; } @@ -144,7 +144,7 @@ again: dget_dlock(ret); spin_unlock(&ret->d_lock); spin_unlock(&p->d_lock); - spin_unlock(&dcache_lock); + spin_unlock(&autofs4_lock); dput(prev); @@ -408,13 +408,13 @@ found: ino->flags |= AUTOFS_INF_EXPIRING; init_completion(&ino->expire_complete); spin_unlock(&sbi->fs_lock); - spin_lock(&dcache_lock); + spin_lock(&autofs4_lock); spin_lock(&expired->d_parent->d_lock); spin_lock_nested(&expired->d_lock, DENTRY_D_LOCK_NESTED); list_move(&expired->d_parent->d_subdirs, &expired->d_u.d_child); spin_unlock(&expired->d_lock); spin_unlock(&expired->d_parent->d_lock); - spin_unlock(&dcache_lock); + spin_unlock(&autofs4_lock); return expired; } diff --git a/fs/autofs4/root.c b/fs/autofs4/root.c index 7a9ed6b88291..10ca68a96dc7 100644 --- a/fs/autofs4/root.c +++ b/fs/autofs4/root.c @@ -23,6 +23,8 @@ #include "autofs_i.h" +DEFINE_SPINLOCK(autofs4_lock); + static int autofs4_dir_symlink(struct inode *,struct dentry *,const char *); static int autofs4_dir_unlink(struct inode *,struct dentry *); static int autofs4_dir_rmdir(struct inode *,struct dentry *); @@ -142,15 +144,15 @@ static int autofs4_dir_open(struct inode *inode, struct file *file) * autofs file system so just let the libfs routines handle * it. */ - spin_lock(&dcache_lock); + spin_lock(&autofs4_lock); spin_lock(&dentry->d_lock); if (!d_mountpoint(dentry) && list_empty(&dentry->d_subdirs)) { spin_unlock(&dentry->d_lock); - spin_unlock(&dcache_lock); + spin_unlock(&autofs4_lock); return -ENOENT; } spin_unlock(&dentry->d_lock); - spin_unlock(&dcache_lock); + spin_unlock(&autofs4_lock); out: return dcache_dir_open(inode, file); @@ -255,11 +257,11 @@ static void *autofs4_follow_link(struct dentry *dentry, struct nameidata *nd) /* We trigger a mount for almost all flags */ lookup_type = autofs4_need_mount(nd->flags); spin_lock(&sbi->fs_lock); - spin_lock(&dcache_lock); + spin_lock(&autofs4_lock); spin_lock(&dentry->d_lock); if (!(lookup_type || ino->flags & AUTOFS_INF_PENDING)) { spin_unlock(&dentry->d_lock); - spin_unlock(&dcache_lock); + spin_unlock(&autofs4_lock); spin_unlock(&sbi->fs_lock); goto follow; } @@ -272,7 +274,7 @@ static void *autofs4_follow_link(struct dentry *dentry, struct nameidata *nd) if (ino->flags & AUTOFS_INF_PENDING || (!d_mountpoint(dentry) && list_empty(&dentry->d_subdirs))) { spin_unlock(&dentry->d_lock); - spin_unlock(&dcache_lock); + spin_unlock(&autofs4_lock); spin_unlock(&sbi->fs_lock); status = try_to_fill_dentry(dentry, nd->flags); @@ -282,7 +284,7 @@ static void *autofs4_follow_link(struct dentry *dentry, struct nameidata *nd) goto follow; } spin_unlock(&dentry->d_lock); - spin_unlock(&dcache_lock); + spin_unlock(&autofs4_lock); spin_unlock(&sbi->fs_lock); follow: /* @@ -353,14 +355,14 @@ static int autofs4_revalidate(struct dentry *dentry, struct nameidata *nd) return 0; /* Check for a non-mountpoint directory with no contents */ - spin_lock(&dcache_lock); + spin_lock(&autofs4_lock); spin_lock(&dentry->d_lock); if (S_ISDIR(dentry->d_inode->i_mode) && !d_mountpoint(dentry) && list_empty(&dentry->d_subdirs)) { DPRINTK("dentry=%p %.*s, emptydir", dentry, dentry->d_name.len, dentry->d_name.name); spin_unlock(&dentry->d_lock); - spin_unlock(&dcache_lock); + spin_unlock(&autofs4_lock); /* The daemon never causes a mount to trigger */ if (oz_mode) @@ -377,7 +379,7 @@ static int autofs4_revalidate(struct dentry *dentry, struct nameidata *nd) return status; } spin_unlock(&dentry->d_lock); - spin_unlock(&dcache_lock); + spin_unlock(&autofs4_lock); return 1; } @@ -432,7 +434,7 @@ static struct dentry *autofs4_lookup_active(struct dentry *dentry) const unsigned char *str = name->name; struct list_head *p, *head; - spin_lock(&dcache_lock); + spin_lock(&autofs4_lock); spin_lock(&sbi->lookup_lock); head = &sbi->active_list; list_for_each(p, head) { @@ -465,14 +467,14 @@ static struct dentry *autofs4_lookup_active(struct dentry *dentry) dget_dlock(active); spin_unlock(&active->d_lock); spin_unlock(&sbi->lookup_lock); - spin_unlock(&dcache_lock); + spin_unlock(&autofs4_lock); return active; } next: spin_unlock(&active->d_lock); } spin_unlock(&sbi->lookup_lock); - spin_unlock(&dcache_lock); + spin_unlock(&autofs4_lock); return NULL; } @@ -487,7 +489,7 @@ static struct dentry *autofs4_lookup_expiring(struct dentry *dentry) const unsigned char *str = name->name; struct list_head *p, *head; - spin_lock(&dcache_lock); + spin_lock(&autofs4_lock); spin_lock(&sbi->lookup_lock); head = &sbi->expiring_list; list_for_each(p, head) { @@ -520,14 +522,14 @@ static struct dentry *autofs4_lookup_expiring(struct dentry *dentry) dget_dlock(expiring); spin_unlock(&expiring->d_lock); spin_unlock(&sbi->lookup_lock); - spin_unlock(&dcache_lock); + spin_unlock(&autofs4_lock); return expiring; } next: spin_unlock(&expiring->d_lock); } spin_unlock(&sbi->lookup_lock); - spin_unlock(&dcache_lock); + spin_unlock(&autofs4_lock); return NULL; } @@ -763,12 +765,12 @@ static int autofs4_dir_unlink(struct inode *dir, struct dentry *dentry) dir->i_mtime = CURRENT_TIME; - spin_lock(&dcache_lock); + spin_lock(&autofs4_lock); autofs4_add_expiring(dentry); spin_lock(&dentry->d_lock); __d_drop(dentry); spin_unlock(&dentry->d_lock); - spin_unlock(&dcache_lock); + spin_unlock(&autofs4_lock); return 0; } @@ -785,20 +787,20 @@ static int autofs4_dir_rmdir(struct inode *dir, struct dentry *dentry) if (!autofs4_oz_mode(sbi)) return -EACCES; - spin_lock(&dcache_lock); + spin_lock(&autofs4_lock); spin_lock(&sbi->lookup_lock); spin_lock(&dentry->d_lock); if (!list_empty(&dentry->d_subdirs)) { spin_unlock(&dentry->d_lock); spin_unlock(&sbi->lookup_lock); - spin_unlock(&dcache_lock); + spin_unlock(&autofs4_lock); return -ENOTEMPTY; } __autofs4_add_expiring(dentry); spin_unlock(&sbi->lookup_lock); __d_drop(dentry); spin_unlock(&dentry->d_lock); - spin_unlock(&dcache_lock); + spin_unlock(&autofs4_lock); if (atomic_dec_and_test(&ino->count)) { p_ino = autofs4_dentry_ino(dentry->d_parent); diff --git a/fs/autofs4/waitq.c b/fs/autofs4/waitq.c index 4be8f778a418..c5f8459c905e 100644 --- a/fs/autofs4/waitq.c +++ b/fs/autofs4/waitq.c @@ -194,14 +194,15 @@ static int autofs4_getpath(struct autofs_sb_info *sbi, rename_retry: buf = *name; len = 0; + seq = read_seqbegin(&rename_lock); rcu_read_lock(); - spin_lock(&dcache_lock); + spin_lock(&autofs4_lock); for (tmp = dentry ; tmp != root ; tmp = tmp->d_parent) len += tmp->d_name.len + 1; if (!len || --len > NAME_MAX) { - spin_unlock(&dcache_lock); + spin_unlock(&autofs4_lock); rcu_read_unlock(); if (read_seqretry(&rename_lock, seq)) goto rename_retry; @@ -217,7 +218,7 @@ rename_retry: p -= tmp->d_name.len; strncpy(p, tmp->d_name.name, tmp->d_name.len); } - spin_unlock(&dcache_lock); + spin_unlock(&autofs4_lock); rcu_read_unlock(); if (read_seqretry(&rename_lock, seq)) goto rename_retry; diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c index 2c924e8d85fe..58abc3da6111 100644 --- a/fs/ceph/dir.c +++ b/fs/ceph/dir.c @@ -112,7 +112,6 @@ static int __dcache_readdir(struct file *filp, dout("__dcache_readdir %p at %llu (last %p)\n", dir, filp->f_pos, last); - spin_lock(&dcache_lock); spin_lock(&parent->d_lock); /* start at beginning? */ @@ -156,7 +155,6 @@ more: dget_dlock(dentry); spin_unlock(&dentry->d_lock); spin_unlock(&parent->d_lock); - spin_unlock(&dcache_lock); dout(" %llu (%llu) dentry %p %.*s %p\n", di->offset, filp->f_pos, dentry, dentry->d_name.len, dentry->d_name.name, dentry->d_inode); @@ -182,21 +180,19 @@ more: filp->f_pos++; - /* make sure a dentry wasn't dropped while we didn't have dcache_lock */ + /* make sure a dentry wasn't dropped while we didn't have parent lock */ if (!ceph_i_test(dir, CEPH_I_COMPLETE)) { dout(" lost I_COMPLETE on %p; falling back to mds\n", dir); err = -EAGAIN; goto out; } - spin_lock(&dcache_lock); spin_lock(&parent->d_lock); p = p->prev; /* advance to next dentry */ goto more; out_unlock: spin_unlock(&parent->d_lock); - spin_unlock(&dcache_lock); out: if (last) dput(last); diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c index 2c6944473366..2a48cafc174b 100644 --- a/fs/ceph/inode.c +++ b/fs/ceph/inode.c @@ -841,7 +841,6 @@ static void ceph_set_dentry_offset(struct dentry *dn) di->offset = ceph_inode(inode)->i_max_offset++; spin_unlock(&inode->i_lock); - spin_lock(&dcache_lock); spin_lock(&dir->d_lock); spin_lock_nested(&dn->d_lock, DENTRY_D_LOCK_NESTED); list_move(&dn->d_u.d_child, &dir->d_subdirs); @@ -849,7 +848,6 @@ static void ceph_set_dentry_offset(struct dentry *dn) dn->d_u.d_child.prev, dn->d_u.d_child.next); spin_unlock(&dn->d_lock); spin_unlock(&dir->d_lock); - spin_unlock(&dcache_lock); } /* @@ -1233,13 +1231,11 @@ retry_lookup: goto retry_lookup; } else { /* reorder parent's d_subdirs */ - spin_lock(&dcache_lock); spin_lock(&parent->d_lock); spin_lock_nested(&dn->d_lock, DENTRY_D_LOCK_NESTED); list_move(&dn->d_u.d_child, &parent->d_subdirs); spin_unlock(&dn->d_lock); spin_unlock(&parent->d_lock); - spin_unlock(&dcache_lock); } di = dn->d_fsdata; diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c index 003698365ece..99b9a2cc14b7 100644 --- a/fs/cifs/inode.c +++ b/fs/cifs/inode.c @@ -809,17 +809,14 @@ inode_has_hashed_dentries(struct inode *inode) { struct dentry *dentry; - spin_lock(&dcache_lock); spin_lock(&dcache_inode_lock); list_for_each_entry(dentry, &inode->i_dentry, d_alias) { if (!d_unhashed(dentry) || IS_ROOT(dentry)) { spin_unlock(&dcache_inode_lock); - spin_unlock(&dcache_lock); return true; } } spin_unlock(&dcache_inode_lock); - spin_unlock(&dcache_lock); return false; } diff --git a/fs/coda/cache.c b/fs/coda/cache.c index 859393fca2b7..5525e1c660fd 100644 --- a/fs/coda/cache.c +++ b/fs/coda/cache.c @@ -93,7 +93,6 @@ static void coda_flag_children(struct dentry *parent, int flag) struct list_head *child; struct dentry *de; - spin_lock(&dcache_lock); spin_lock(&parent->d_lock); list_for_each(child, &parent->d_subdirs) { @@ -104,7 +103,6 @@ static void coda_flag_children(struct dentry *parent, int flag) coda_flag_inode(de->d_inode, flag); } spin_unlock(&parent->d_lock); - spin_unlock(&dcache_lock); return; } diff --git a/fs/configfs/configfs_internal.h b/fs/configfs/configfs_internal.h index e58b4c30e216..026cf68553a4 100644 --- a/fs/configfs/configfs_internal.h +++ b/fs/configfs/configfs_internal.h @@ -120,7 +120,6 @@ static inline struct config_item *configfs_get_config_item(struct dentry *dentry { struct config_item * item = NULL; - spin_lock(&dcache_lock); spin_lock(&dentry->d_lock); if (!d_unhashed(dentry)) { struct configfs_dirent * sd = dentry->d_fsdata; @@ -131,7 +130,6 @@ static inline struct config_item *configfs_get_config_item(struct dentry *dentry item = config_item_get(sd->s_element); } spin_unlock(&dentry->d_lock); - spin_unlock(&dcache_lock); return item; } diff --git a/fs/configfs/inode.c b/fs/configfs/inode.c index 79b37765d8ff..fb3a55fff82a 100644 --- a/fs/configfs/inode.c +++ b/fs/configfs/inode.c @@ -250,18 +250,14 @@ void configfs_drop_dentry(struct configfs_dirent * sd, struct dentry * parent) struct dentry * dentry = sd->s_dentry; if (dentry) { - spin_lock(&dcache_lock); spin_lock(&dentry->d_lock); if (!(d_unhashed(dentry) && dentry->d_inode)) { dget_locked_dlock(dentry); __d_drop(dentry); spin_unlock(&dentry->d_lock); - spin_unlock(&dcache_lock); simple_unlink(parent->d_inode, dentry); - } else { + } else spin_unlock(&dentry->d_lock); - spin_unlock(&dcache_lock); - } } } diff --git a/fs/dcache.c b/fs/dcache.c index a9bc4ecc21e1..0dbae053b664 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -54,11 +54,10 @@ * - d_alias, d_inode * * Ordering: - * dcache_lock - * dcache_inode_lock - * dentry->d_lock - * dcache_lru_lock - * dcache_hash_lock + * dcache_inode_lock + * dentry->d_lock + * dcache_lru_lock + * dcache_hash_lock * * If there is an ancestor relationship: * dentry->d_parent->...->d_parent->d_lock @@ -77,12 +76,10 @@ EXPORT_SYMBOL_GPL(sysctl_vfs_cache_pressure); __cacheline_aligned_in_smp DEFINE_SPINLOCK(dcache_inode_lock); static __cacheline_aligned_in_smp DEFINE_SPINLOCK(dcache_hash_lock); static __cacheline_aligned_in_smp DEFINE_SPINLOCK(dcache_lru_lock); -__cacheline_aligned_in_smp DEFINE_SPINLOCK(dcache_lock); __cacheline_aligned_in_smp DEFINE_SEQLOCK(rename_lock); EXPORT_SYMBOL(rename_lock); EXPORT_SYMBOL(dcache_inode_lock); -EXPORT_SYMBOL(dcache_lock); static struct kmem_cache *dentry_cache __read_mostly; @@ -139,7 +136,7 @@ static void __d_free(struct rcu_head *head) } /* - * no dcache_lock, please. + * no locks, please. */ static void d_free(struct dentry *dentry) { @@ -162,7 +159,6 @@ static void d_free(struct dentry *dentry) static void dentry_iput(struct dentry * dentry) __releases(dentry->d_lock) __releases(dcache_inode_lock) - __releases(dcache_lock) { struct inode *inode = dentry->d_inode; if (inode) { @@ -170,7 +166,6 @@ static void dentry_iput(struct dentry * dentry) list_del_init(&dentry->d_alias); spin_unlock(&dentry->d_lock); spin_unlock(&dcache_inode_lock); - spin_unlock(&dcache_lock); if (!inode->i_nlink) fsnotify_inoderemove(inode); if (dentry->d_op && dentry->d_op->d_iput) @@ -180,7 +175,6 @@ static void dentry_iput(struct dentry * dentry) } else { spin_unlock(&dentry->d_lock); spin_unlock(&dcache_inode_lock); - spin_unlock(&dcache_lock); } } @@ -235,14 +229,13 @@ static void dentry_lru_move_tail(struct dentry *dentry) * * If this is the root of the dentry tree, return NULL. * - * dcache_lock and d_lock and d_parent->d_lock must be held by caller, and - * are dropped by d_kill. + * dentry->d_lock and parent->d_lock must be held by caller, and are dropped by + * d_kill. */ static struct dentry *d_kill(struct dentry *dentry, struct dentry *parent) __releases(dentry->d_lock) __releases(parent->d_lock) __releases(dcache_inode_lock) - __releases(dcache_lock) { dentry->d_parent = NULL; list_del(&dentry->d_u.d_child); @@ -285,11 +278,9 @@ EXPORT_SYMBOL(__d_drop); void d_drop(struct dentry *dentry) { - spin_lock(&dcache_lock); spin_lock(&dentry->d_lock); __d_drop(dentry); spin_unlock(&dentry->d_lock); - spin_unlock(&dcache_lock); } EXPORT_SYMBOL(d_drop); @@ -337,21 +328,10 @@ repeat: else parent = dentry->d_parent; if (dentry->d_count == 1) { - if (!spin_trylock(&dcache_lock)) { - /* - * Something of a livelock possibility we could avoid - * by taking dcache_lock and trying again, but we - * want to reduce dcache_lock anyway so this will - * get improved. - */ -drop1: - spin_unlock(&dentry->d_lock); - goto repeat; - } if (!spin_trylock(&dcache_inode_lock)) { drop2: - spin_unlock(&dcache_lock); - goto drop1; + spin_unlock(&dentry->d_lock); + goto repeat; } if (parent && !spin_trylock(&parent->d_lock)) { spin_unlock(&dcache_inode_lock); @@ -363,7 +343,6 @@ drop2: spin_unlock(&dentry->d_lock); if (parent) spin_unlock(&parent->d_lock); - spin_unlock(&dcache_lock); return; } @@ -387,7 +366,6 @@ drop2: if (parent) spin_unlock(&parent->d_lock); spin_unlock(&dcache_inode_lock); - spin_unlock(&dcache_lock); return; unhash_it: @@ -418,11 +396,9 @@ int d_invalidate(struct dentry * dentry) /* * If it's already been dropped, return OK. */ - spin_lock(&dcache_lock); spin_lock(&dentry->d_lock); if (d_unhashed(dentry)) { spin_unlock(&dentry->d_lock); - spin_unlock(&dcache_lock); return 0; } /* @@ -431,9 +407,7 @@ int d_invalidate(struct dentry * dentry) */ if (!list_empty(&dentry->d_subdirs)) { spin_unlock(&dentry->d_lock); - spin_unlock(&dcache_lock); shrink_dcache_parent(dentry); - spin_lock(&dcache_lock); spin_lock(&dentry->d_lock); } @@ -450,19 +424,17 @@ int d_invalidate(struct dentry * dentry) if (dentry->d_count > 1) { if (dentry->d_inode && S_ISDIR(dentry->d_inode->i_mode)) { spin_unlock(&dentry->d_lock); - spin_unlock(&dcache_lock); return -EBUSY; } } __d_drop(dentry); spin_unlock(&dentry->d_lock); - spin_unlock(&dcache_lock); return 0; } EXPORT_SYMBOL(d_invalidate); -/* This must be called with dcache_lock and d_lock held */ +/* This must be called with d_lock held */ static inline struct dentry * __dget_locked_dlock(struct dentry *dentry) { dentry->d_count++; @@ -470,7 +442,7 @@ static inline struct dentry * __dget_locked_dlock(struct dentry *dentry) return dentry; } -/* This should be called _only_ with dcache_lock held */ +/* This must be called with d_lock held */ static inline struct dentry * __dget_locked(struct dentry *dentry) { spin_lock(&dentry->d_lock); @@ -575,11 +547,9 @@ struct dentry *d_find_alias(struct inode *inode) struct dentry *de = NULL; if (!list_empty(&inode->i_dentry)) { - spin_lock(&dcache_lock); spin_lock(&dcache_inode_lock); de = __d_find_alias(inode, 0); spin_unlock(&dcache_inode_lock); - spin_unlock(&dcache_lock); } return de; } @@ -593,7 +563,6 @@ void d_prune_aliases(struct inode *inode) { struct dentry *dentry; restart: - spin_lock(&dcache_lock); spin_lock(&dcache_inode_lock); list_for_each_entry(dentry, &inode->i_dentry, d_alias) { spin_lock(&dentry->d_lock); @@ -602,14 +571,12 @@ restart: __d_drop(dentry); spin_unlock(&dentry->d_lock); spin_unlock(&dcache_inode_lock); - spin_unlock(&dcache_lock); dput(dentry); goto restart; } spin_unlock(&dentry->d_lock); } spin_unlock(&dcache_inode_lock); - spin_unlock(&dcache_lock); } EXPORT_SYMBOL(d_prune_aliases); @@ -625,17 +592,14 @@ static void prune_one_dentry(struct dentry *dentry, struct dentry *parent) __releases(dentry->d_lock) __releases(parent->d_lock) __releases(dcache_inode_lock) - __releases(dcache_lock) { __d_drop(dentry); dentry = d_kill(dentry, parent); /* - * Prune ancestors. Locking is simpler than in dput(), - * because dcache_lock needs to be taken anyway. + * Prune ancestors. */ while (dentry) { - spin_lock(&dcache_lock); spin_lock(&dcache_inode_lock); again: spin_lock(&dentry->d_lock); @@ -653,7 +617,6 @@ again: spin_unlock(&parent->d_lock); spin_unlock(&dentry->d_lock); spin_unlock(&dcache_inode_lock); - spin_unlock(&dcache_lock); return; } @@ -702,8 +665,7 @@ relock: spin_unlock(&dcache_lru_lock); prune_one_dentry(dentry, parent); - /* dcache_lock, dcache_inode_lock and dentry->d_lock dropped */ - spin_lock(&dcache_lock); + /* dcache_inode_lock and dentry->d_lock dropped */ spin_lock(&dcache_inode_lock); spin_lock(&dcache_lru_lock); } @@ -725,7 +687,6 @@ static void __shrink_dcache_sb(struct super_block *sb, int *count, int flags) LIST_HEAD(tmp); int cnt = *count; - spin_lock(&dcache_lock); spin_lock(&dcache_inode_lock); relock: spin_lock(&dcache_lru_lock); @@ -766,7 +727,6 @@ relock: list_splice(&referenced, &sb->s_dentry_lru); spin_unlock(&dcache_lru_lock); spin_unlock(&dcache_inode_lock); - spin_unlock(&dcache_lock); } /** @@ -788,7 +748,6 @@ static void prune_dcache(int count) if (unused == 0 || count == 0) return; - spin_lock(&dcache_lock); if (count >= unused) prune_ratio = 1; else @@ -825,11 +784,9 @@ static void prune_dcache(int count) if (down_read_trylock(&sb->s_umount)) { if ((sb->s_root != NULL) && (!list_empty(&sb->s_dentry_lru))) { - spin_unlock(&dcache_lock); __shrink_dcache_sb(sb, &w_count, DCACHE_REFERENCED); pruned -= w_count; - spin_lock(&dcache_lock); } up_read(&sb->s_umount); } @@ -845,7 +802,6 @@ static void prune_dcache(int count) if (p) __put_super(p); spin_unlock(&sb_lock); - spin_unlock(&dcache_lock); } /** @@ -859,7 +815,6 @@ void shrink_dcache_sb(struct super_block *sb) { LIST_HEAD(tmp); - spin_lock(&dcache_lock); spin_lock(&dcache_inode_lock); spin_lock(&dcache_lru_lock); while (!list_empty(&sb->s_dentry_lru)) { @@ -868,7 +823,6 @@ void shrink_dcache_sb(struct super_block *sb) } spin_unlock(&dcache_lru_lock); spin_unlock(&dcache_inode_lock); - spin_unlock(&dcache_lock); } EXPORT_SYMBOL(shrink_dcache_sb); @@ -885,12 +839,10 @@ static void shrink_dcache_for_umount_subtree(struct dentry *dentry) BUG_ON(!IS_ROOT(dentry)); /* detach this root from the system */ - spin_lock(&dcache_lock); spin_lock(&dentry->d_lock); dentry_lru_del(dentry); __d_drop(dentry); spin_unlock(&dentry->d_lock); - spin_unlock(&dcache_lock); for (;;) { /* descend to the first leaf in the current subtree */ @@ -899,7 +851,6 @@ static void shrink_dcache_for_umount_subtree(struct dentry *dentry) /* this is a branch with children - detach all of them * from the system in one go */ - spin_lock(&dcache_lock); spin_lock(&dentry->d_lock); list_for_each_entry(loop, &dentry->d_subdirs, d_u.d_child) { @@ -910,7 +861,6 @@ static void shrink_dcache_for_umount_subtree(struct dentry *dentry) spin_unlock(&loop->d_lock); } spin_unlock(&dentry->d_lock); - spin_unlock(&dcache_lock); /* move to the first child */ dentry = list_entry(dentry->d_subdirs.next, @@ -977,8 +927,7 @@ static void shrink_dcache_for_umount_subtree(struct dentry *dentry) /* * destroy the dentries attached to a superblock on unmounting - * - we don't need to use dentry->d_lock, and only need dcache_lock when - * removing the dentry from the system lists and hashes because: + * - we don't need to use dentry->d_lock because: * - the superblock is detached from all mountings and open files, so the * dentry trees will not be rearranged by the VFS * - s_umount is write-locked, so the memory pressure shrinker will ignore @@ -1029,7 +978,6 @@ rename_retry: this_parent = parent; seq = read_seqbegin(&rename_lock); - spin_lock(&dcache_lock); if (d_mountpoint(parent)) goto positive; spin_lock(&this_parent->d_lock); @@ -1075,7 +1023,6 @@ resume: if (this_parent != child->d_parent || read_seqretry(&rename_lock, seq)) { spin_unlock(&this_parent->d_lock); - spin_unlock(&dcache_lock); rcu_read_unlock(); goto rename_retry; } @@ -1084,12 +1031,10 @@ resume: goto resume; } spin_unlock(&this_parent->d_lock); - spin_unlock(&dcache_lock); if (read_seqretry(&rename_lock, seq)) goto rename_retry; return 0; /* No mount points found in tree */ positive: - spin_unlock(&dcache_lock); if (read_seqretry(&rename_lock, seq)) goto rename_retry; return 1; @@ -1121,7 +1066,6 @@ rename_retry: this_parent = parent; seq = read_seqbegin(&rename_lock); - spin_lock(&dcache_lock); spin_lock(&this_parent->d_lock); repeat: next = this_parent->d_subdirs.next; @@ -1185,7 +1129,6 @@ resume: if (this_parent != child->d_parent || read_seqretry(&rename_lock, seq)) { spin_unlock(&this_parent->d_lock); - spin_unlock(&dcache_lock); rcu_read_unlock(); goto rename_retry; } @@ -1195,7 +1138,6 @@ resume: } out: spin_unlock(&this_parent->d_lock); - spin_unlock(&dcache_lock); if (read_seqretry(&rename_lock, seq)) goto rename_retry; return found; @@ -1297,7 +1239,6 @@ struct dentry *d_alloc(struct dentry * parent, const struct qstr *name) INIT_LIST_HEAD(&dentry->d_u.d_child); if (parent) { - spin_lock(&dcache_lock); spin_lock(&parent->d_lock); spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED); dentry->d_parent = dget_dlock(parent); @@ -1305,7 +1246,6 @@ struct dentry *d_alloc(struct dentry * parent, const struct qstr *name) list_add(&dentry->d_u.d_child, &parent->d_subdirs); spin_unlock(&dentry->d_lock); spin_unlock(&parent->d_lock); - spin_unlock(&dcache_lock); } this_cpu_inc(nr_dentry); @@ -1325,7 +1265,6 @@ struct dentry *d_alloc_name(struct dentry *parent, const char *name) } EXPORT_SYMBOL(d_alloc_name); -/* the caller must hold dcache_lock */ static void __d_instantiate(struct dentry *dentry, struct inode *inode) { spin_lock(&dentry->d_lock); @@ -1354,11 +1293,9 @@ static void __d_instantiate(struct dentry *dentry, struct inode *inode) void d_instantiate(struct dentry *entry, struct inode * inode) { BUG_ON(!list_empty(&entry->d_alias)); - spin_lock(&dcache_lock); spin_lock(&dcache_inode_lock); __d_instantiate(entry, inode); spin_unlock(&dcache_inode_lock); - spin_unlock(&dcache_lock); security_d_instantiate(entry, inode); } EXPORT_SYMBOL(d_instantiate); @@ -1422,11 +1359,9 @@ struct dentry *d_instantiate_unique(struct dentry *entry, struct inode *inode) BUG_ON(!list_empty(&entry->d_alias)); - spin_lock(&dcache_lock); spin_lock(&dcache_inode_lock); result = __d_instantiate_unique(entry, inode); spin_unlock(&dcache_inode_lock); - spin_unlock(&dcache_lock); if (!result) { security_d_instantiate(entry, inode); @@ -1515,12 +1450,11 @@ struct dentry *d_obtain_alias(struct inode *inode) } tmp->d_parent = tmp; /* make sure dput doesn't croak */ - spin_lock(&dcache_lock); + spin_lock(&dcache_inode_lock); res = __d_find_alias(inode, 0); if (res) { spin_unlock(&dcache_inode_lock); - spin_unlock(&dcache_lock); dput(tmp); goto out_iput; } @@ -1538,7 +1472,6 @@ struct dentry *d_obtain_alias(struct inode *inode) spin_unlock(&tmp->d_lock); spin_unlock(&dcache_inode_lock); - spin_unlock(&dcache_lock); return tmp; out_iput: @@ -1568,21 +1501,18 @@ struct dentry *d_splice_alias(struct inode *inode, struct dentry *dentry) struct dentry *new = NULL; if (inode && S_ISDIR(inode->i_mode)) { - spin_lock(&dcache_lock); spin_lock(&dcache_inode_lock); new = __d_find_alias(inode, 1); if (new) { BUG_ON(!(new->d_flags & DCACHE_DISCONNECTED)); spin_unlock(&dcache_inode_lock); - spin_unlock(&dcache_lock); security_d_instantiate(new, inode); d_move(new, dentry); iput(inode); } else { - /* already taking dcache_lock, so d_add() by hand */ + /* already taking dcache_inode_lock, so d_add() by hand */ __d_instantiate(dentry, inode); spin_unlock(&dcache_inode_lock); - spin_unlock(&dcache_lock); security_d_instantiate(dentry, inode); d_rehash(dentry); } @@ -1655,12 +1585,10 @@ struct dentry *d_add_ci(struct dentry *dentry, struct inode *inode, * Negative dentry: instantiate it unless the inode is a directory and * already has a dentry. */ - spin_lock(&dcache_lock); spin_lock(&dcache_inode_lock); if (!S_ISDIR(inode->i_mode) || list_empty(&inode->i_dentry)) { __d_instantiate(found, inode); spin_unlock(&dcache_inode_lock); - spin_unlock(&dcache_lock); security_d_instantiate(found, inode); return found; } @@ -1672,7 +1600,6 @@ struct dentry *d_add_ci(struct dentry *dentry, struct inode *inode, new = list_entry(inode->i_dentry.next, struct dentry, d_alias); dget_locked(new); spin_unlock(&dcache_inode_lock); - spin_unlock(&dcache_lock); security_d_instantiate(found, inode); d_move(new, found); iput(inode); @@ -1843,7 +1770,6 @@ int d_validate(struct dentry *dentry, struct dentry *dparent) { struct dentry *child; - spin_lock(&dcache_lock); spin_lock(&dparent->d_lock); list_for_each_entry(child, &dparent->d_subdirs, d_u.d_child) { if (dentry == child) { @@ -1851,12 +1777,10 @@ int d_validate(struct dentry *dentry, struct dentry *dparent) __dget_locked_dlock(dentry); spin_unlock(&dentry->d_lock); spin_unlock(&dparent->d_lock); - spin_unlock(&dcache_lock); return 1; } } spin_unlock(&dparent->d_lock); - spin_unlock(&dcache_lock); return 0; } @@ -1889,7 +1813,6 @@ void d_delete(struct dentry * dentry) /* * Are we the only user? */ - spin_lock(&dcache_lock); spin_lock(&dcache_inode_lock); spin_lock(&dentry->d_lock); isdir = S_ISDIR(dentry->d_inode->i_mode); @@ -1905,7 +1828,6 @@ void d_delete(struct dentry * dentry) spin_unlock(&dentry->d_lock); spin_unlock(&dcache_inode_lock); - spin_unlock(&dcache_lock); fsnotify_nameremove(dentry, isdir); } @@ -1932,13 +1854,11 @@ static void _d_rehash(struct dentry * entry) void d_rehash(struct dentry * entry) { - spin_lock(&dcache_lock); spin_lock(&entry->d_lock); spin_lock(&dcache_hash_lock); _d_rehash(entry); spin_unlock(&dcache_hash_lock); spin_unlock(&entry->d_lock); - spin_unlock(&dcache_lock); } EXPORT_SYMBOL(d_rehash); @@ -1961,11 +1881,9 @@ void dentry_update_name_case(struct dentry *dentry, struct qstr *name) BUG_ON(!mutex_is_locked(&dentry->d_inode->i_mutex)); BUG_ON(dentry->d_name.len != name->len); /* d_lookup gives this */ - spin_lock(&dcache_lock); spin_lock(&dentry->d_lock); memcpy((unsigned char *)dentry->d_name.name, name->name, name->len); spin_unlock(&dentry->d_lock); - spin_unlock(&dcache_lock); } EXPORT_SYMBOL(dentry_update_name_case); @@ -2058,14 +1976,14 @@ static void dentry_unlock_parents_for_move(struct dentry *dentry, * The hash value has to match the hash queue that the dentry is on.. */ /* - * d_move_locked - move a dentry + * d_move - move a dentry * @dentry: entry to move * @target: new dentry * * Update the dcache to reflect the move of a file name. Negative * dcache entries should not be moved in this way. */ -static void d_move_locked(struct dentry * dentry, struct dentry * target) +void d_move(struct dentry * dentry, struct dentry * target) { if (!dentry->d_inode) printk(KERN_WARNING "VFS: moving negative dcache entry\n"); @@ -2114,22 +2032,6 @@ static void d_move_locked(struct dentry * dentry, struct dentry * target) spin_unlock(&dentry->d_lock); write_sequnlock(&rename_lock); } - -/** - * d_move - move a dentry - * @dentry: entry to move - * @target: new dentry - * - * Update the dcache to reflect the move of a file name. Negative - * dcache entries should not be moved in this way. - */ - -void d_move(struct dentry * dentry, struct dentry * target) -{ - spin_lock(&dcache_lock); - d_move_locked(dentry, target); - spin_unlock(&dcache_lock); -} EXPORT_SYMBOL(d_move); /** @@ -2155,13 +2057,12 @@ struct dentry *d_ancestor(struct dentry *p1, struct dentry *p2) * This helper attempts to cope with remotely renamed directories * * It assumes that the caller is already holding - * dentry->d_parent->d_inode->i_mutex and the dcache_lock + * dentry->d_parent->d_inode->i_mutex and the dcache_inode_lock * * Note: If ever the locking in lock_rename() changes, then please * remember to update this too... */ static struct dentry *__d_unalias(struct dentry *dentry, struct dentry *alias) - __releases(dcache_lock) __releases(dcache_inode_lock) { struct mutex *m1 = NULL, *m2 = NULL; @@ -2185,11 +2086,10 @@ static struct dentry *__d_unalias(struct dentry *dentry, struct dentry *alias) goto out_err; m2 = &alias->d_parent->d_inode->i_mutex; out_unalias: - d_move_locked(alias, dentry); + d_move(alias, dentry); ret = alias; out_err: spin_unlock(&dcache_inode_lock); - spin_unlock(&dcache_lock); if (m2) mutex_unlock(m2); if (m1) @@ -2249,7 +2149,6 @@ struct dentry *d_materialise_unique(struct dentry *dentry, struct inode *inode) BUG_ON(!d_unhashed(dentry)); - spin_lock(&dcache_lock); spin_lock(&dcache_inode_lock); if (!inode) { @@ -2295,7 +2194,6 @@ found: spin_unlock(&dcache_hash_lock); spin_unlock(&actual->d_lock); spin_unlock(&dcache_inode_lock); - spin_unlock(&dcache_lock); out_nolock: if (actual == dentry) { security_d_instantiate(dentry, inode); @@ -2307,7 +2205,6 @@ out_nolock: shouldnt_be_hashed: spin_unlock(&dcache_inode_lock); - spin_unlock(&dcache_lock); BUG(); } EXPORT_SYMBOL_GPL(d_materialise_unique); @@ -2421,11 +2318,9 @@ char *__d_path(const struct path *path, struct path *root, int error; prepend(&res, &buflen, "\0", 1); - spin_lock(&dcache_lock); write_seqlock(&rename_lock); error = prepend_path(path, root, &res, &buflen); write_sequnlock(&rename_lock); - spin_unlock(&dcache_lock); if (error) return ERR_PTR(error); @@ -2487,14 +2382,12 @@ char *d_path(const struct path *path, char *buf, int buflen) return path->dentry->d_op->d_dname(path->dentry, buf, buflen); get_fs_root(current->fs, &root); - spin_lock(&dcache_lock); write_seqlock(&rename_lock); tmp = root; error = path_with_deleted(path, &tmp, &res, &buflen); if (error) res = ERR_PTR(error); write_sequnlock(&rename_lock); - spin_unlock(&dcache_lock); path_put(&root); return res; } @@ -2520,14 +2413,12 @@ char *d_path_with_unreachable(const struct path *path, char *buf, int buflen) return path->dentry->d_op->d_dname(path->dentry, buf, buflen); get_fs_root(current->fs, &root); - spin_lock(&dcache_lock); write_seqlock(&rename_lock); tmp = root; error = path_with_deleted(path, &tmp, &res, &buflen); if (!error && !path_equal(&tmp, &root)) error = prepend_unreachable(&res, &buflen); write_sequnlock(&rename_lock); - spin_unlock(&dcache_lock); path_put(&root); if (error) res = ERR_PTR(error); @@ -2594,11 +2485,9 @@ char *dentry_path_raw(struct dentry *dentry, char *buf, int buflen) { char *retval; - spin_lock(&dcache_lock); write_seqlock(&rename_lock); retval = __dentry_path(dentry, buf, buflen); write_sequnlock(&rename_lock); - spin_unlock(&dcache_lock); return retval; } @@ -2609,7 +2498,6 @@ char *dentry_path(struct dentry *dentry, char *buf, int buflen) char *p = NULL; char *retval; - spin_lock(&dcache_lock); write_seqlock(&rename_lock); if (d_unlinked(dentry)) { p = buf + buflen; @@ -2619,12 +2507,10 @@ char *dentry_path(struct dentry *dentry, char *buf, int buflen) } retval = __dentry_path(dentry, buf, buflen); write_sequnlock(&rename_lock); - spin_unlock(&dcache_lock); if (!IS_ERR(retval) && p) *p = '/'; /* restore '/' overriden with '\0' */ return retval; Elong: - spin_unlock(&dcache_lock); return ERR_PTR(-ENAMETOOLONG); } @@ -2658,7 +2544,6 @@ SYSCALL_DEFINE2(getcwd, char __user *, buf, unsigned long, size) get_fs_root_and_pwd(current->fs, &root, &pwd); error = -ENOENT; - spin_lock(&dcache_lock); write_seqlock(&rename_lock); if (!d_unlinked(pwd.dentry)) { unsigned long len; @@ -2669,7 +2554,6 @@ SYSCALL_DEFINE2(getcwd, char __user *, buf, unsigned long, size) prepend(&cwd, &buflen, "\0", 1); error = prepend_path(&pwd, &tmp, &cwd, &buflen); write_sequnlock(&rename_lock); - spin_unlock(&dcache_lock); if (error) goto out; @@ -2690,7 +2574,6 @@ SYSCALL_DEFINE2(getcwd, char __user *, buf, unsigned long, size) } } else { write_sequnlock(&rename_lock); - spin_unlock(&dcache_lock); } out: @@ -2776,7 +2659,6 @@ void d_genocide(struct dentry *root) rename_retry: this_parent = root; seq = read_seqbegin(&rename_lock); - spin_lock(&dcache_lock); spin_lock(&this_parent->d_lock); repeat: next = this_parent->d_subdirs.next; @@ -2823,7 +2705,6 @@ resume: if (this_parent != child->d_parent || read_seqretry(&rename_lock, seq)) { spin_unlock(&this_parent->d_lock); - spin_unlock(&dcache_lock); rcu_read_unlock(); goto rename_retry; } @@ -2832,7 +2713,6 @@ resume: goto resume; } spin_unlock(&this_parent->d_lock); - spin_unlock(&dcache_lock); if (read_seqretry(&rename_lock, seq)) goto rename_retry; } diff --git a/fs/exportfs/expfs.c b/fs/exportfs/expfs.c index 84b8c460a781..53a5c08fb63c 100644 --- a/fs/exportfs/expfs.c +++ b/fs/exportfs/expfs.c @@ -47,24 +47,20 @@ find_acceptable_alias(struct dentry *result, if (acceptable(context, result)) return result; - spin_lock(&dcache_lock); spin_lock(&dcache_inode_lock); list_for_each_entry(dentry, &result->d_inode->i_dentry, d_alias) { dget_locked(dentry); spin_unlock(&dcache_inode_lock); - spin_unlock(&dcache_lock); if (toput) dput(toput); if (dentry != result && acceptable(context, dentry)) { dput(result); return dentry; } - spin_lock(&dcache_lock); spin_lock(&dcache_inode_lock); toput = dentry; } spin_unlock(&dcache_inode_lock); - spin_unlock(&dcache_lock); if (toput) dput(toput); diff --git a/fs/libfs.c b/fs/libfs.c index cc4794914b52..28b36663c44e 100644 --- a/fs/libfs.c +++ b/fs/libfs.c @@ -100,7 +100,6 @@ loff_t dcache_dir_lseek(struct file *file, loff_t offset, int origin) struct dentry *cursor = file->private_data; loff_t n = file->f_pos - 2; - spin_lock(&dcache_lock); spin_lock(&dentry->d_lock); /* d_lock not required for cursor */ list_del(&cursor->d_u.d_child); @@ -116,7 +115,6 @@ loff_t dcache_dir_lseek(struct file *file, loff_t offset, int origin) } list_add_tail(&cursor->d_u.d_child, p); spin_unlock(&dentry->d_lock); - spin_unlock(&dcache_lock); } } mutex_unlock(&dentry->d_inode->i_mutex); @@ -159,7 +157,6 @@ int dcache_readdir(struct file * filp, void * dirent, filldir_t filldir) i++; /* fallthrough */ default: - spin_lock(&dcache_lock); spin_lock(&dentry->d_lock); if (filp->f_pos == 2) list_move(q, &dentry->d_subdirs); @@ -175,13 +172,11 @@ int dcache_readdir(struct file * filp, void * dirent, filldir_t filldir) spin_unlock(&next->d_lock); spin_unlock(&dentry->d_lock); - spin_unlock(&dcache_lock); if (filldir(dirent, next->d_name.name, next->d_name.len, filp->f_pos, next->d_inode->i_ino, dt_type(next->d_inode)) < 0) return 0; - spin_lock(&dcache_lock); spin_lock(&dentry->d_lock); spin_lock_nested(&next->d_lock, DENTRY_D_LOCK_NESTED); /* next is still alive */ @@ -191,7 +186,6 @@ int dcache_readdir(struct file * filp, void * dirent, filldir_t filldir) filp->f_pos++; } spin_unlock(&dentry->d_lock); - spin_unlock(&dcache_lock); } return 0; } @@ -285,7 +279,6 @@ int simple_empty(struct dentry *dentry) struct dentry *child; int ret = 0; - spin_lock(&dcache_lock); spin_lock(&dentry->d_lock); list_for_each_entry(child, &dentry->d_subdirs, d_u.d_child) { spin_lock_nested(&child->d_lock, DENTRY_D_LOCK_NESTED); @@ -298,7 +291,6 @@ int simple_empty(struct dentry *dentry) ret = 1; out: spin_unlock(&dentry->d_lock); - spin_unlock(&dcache_lock); return ret; } diff --git a/fs/namei.c b/fs/namei.c index cbfa5fb31072..5642bc2be418 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -612,8 +612,8 @@ int follow_up(struct path *path) return 1; } -/* no need for dcache_lock, as serialization is taken care in - * namespace.c +/* + * serialization is taken care of in namespace.c */ static int __follow_mount(struct path *path) { @@ -645,9 +645,6 @@ static void follow_mount(struct path *path) } } -/* no need for dcache_lock, as serialization is taken care in - * namespace.c - */ int follow_down(struct path *path) { struct vfsmount *mounted; @@ -2131,12 +2128,10 @@ void dentry_unhash(struct dentry *dentry) { dget(dentry); shrink_dcache_parent(dentry); - spin_lock(&dcache_lock); spin_lock(&dentry->d_lock); if (dentry->d_count == 2) __d_drop(dentry); spin_unlock(&dentry->d_lock); - spin_unlock(&dcache_lock); } int vfs_rmdir(struct inode *dir, struct dentry *dentry) diff --git a/fs/ncpfs/dir.c b/fs/ncpfs/dir.c index 102278ed38bd..de15c533311c 100644 --- a/fs/ncpfs/dir.c +++ b/fs/ncpfs/dir.c @@ -391,7 +391,6 @@ ncp_dget_fpos(struct dentry *dentry, struct dentry *parent, unsigned long fpos) } /* If a pointer is invalid, we search the dentry. */ - spin_lock(&dcache_lock); spin_lock(&parent->d_lock); next = parent->d_subdirs.next; while (next != &parent->d_subdirs) { @@ -402,13 +401,11 @@ ncp_dget_fpos(struct dentry *dentry, struct dentry *parent, unsigned long fpos) else dent = NULL; spin_unlock(&parent->d_lock); - spin_unlock(&dcache_lock); goto out; } next = next->next; } spin_unlock(&parent->d_lock); - spin_unlock(&dcache_lock); return NULL; out: diff --git a/fs/ncpfs/ncplib_kernel.h b/fs/ncpfs/ncplib_kernel.h index c4b718ff9a6b..1220df75ff22 100644 --- a/fs/ncpfs/ncplib_kernel.h +++ b/fs/ncpfs/ncplib_kernel.h @@ -193,7 +193,6 @@ ncp_renew_dentries(struct dentry *parent) struct list_head *next; struct dentry *dentry; - spin_lock(&dcache_lock); spin_lock(&parent->d_lock); next = parent->d_subdirs.next; while (next != &parent->d_subdirs) { @@ -207,7 +206,6 @@ ncp_renew_dentries(struct dentry *parent) next = next->next; } spin_unlock(&parent->d_lock); - spin_unlock(&dcache_lock); } static inline void @@ -217,7 +215,6 @@ ncp_invalidate_dircache_entries(struct dentry *parent) struct list_head *next; struct dentry *dentry; - spin_lock(&dcache_lock); spin_lock(&parent->d_lock); next = parent->d_subdirs.next; while (next != &parent->d_subdirs) { @@ -227,7 +224,6 @@ ncp_invalidate_dircache_entries(struct dentry *parent) next = next->next; } spin_unlock(&parent->d_lock); - spin_unlock(&dcache_lock); } struct ncp_cache_head { diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 12de824edb5c..eb77471b8823 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -1718,11 +1718,9 @@ static int nfs_unlink(struct inode *dir, struct dentry *dentry) dfprintk(VFS, "NFS: unlink(%s/%ld, %s)\n", dir->i_sb->s_id, dir->i_ino, dentry->d_name.name); - spin_lock(&dcache_lock); spin_lock(&dentry->d_lock); if (dentry->d_count > 1) { spin_unlock(&dentry->d_lock); - spin_unlock(&dcache_lock); /* Start asynchronous writeout of the inode */ write_inode_now(dentry->d_inode, 0); error = nfs_sillyrename(dir, dentry); @@ -1733,7 +1731,6 @@ static int nfs_unlink(struct inode *dir, struct dentry *dentry) need_rehash = 1; } spin_unlock(&dentry->d_lock); - spin_unlock(&dcache_lock); error = nfs_safe_remove(dentry); if (!error || error == -ENOENT) { nfs_set_verifier(dentry, nfs_save_change_attribute(dir)); diff --git a/fs/nfs/getroot.c b/fs/nfs/getroot.c index 850f67d5f0ac..b3e36c3430de 100644 --- a/fs/nfs/getroot.c +++ b/fs/nfs/getroot.c @@ -63,13 +63,11 @@ static int nfs_superblock_set_dummy_root(struct super_block *sb, struct inode *i * This again causes shrink_dcache_for_umount_subtree() to * Oops, since the test for IS_ROOT() will fail. */ - spin_lock(&dcache_lock); spin_lock(&dcache_inode_lock); spin_lock(&sb->s_root->d_lock); list_del_init(&sb->s_root->d_alias); spin_unlock(&sb->s_root->d_lock); spin_unlock(&dcache_inode_lock); - spin_unlock(&dcache_lock); } return 0; } diff --git a/fs/nfs/namespace.c b/fs/nfs/namespace.c index 78c0ebb0b07c..74aaf3963c10 100644 --- a/fs/nfs/namespace.c +++ b/fs/nfs/namespace.c @@ -60,7 +60,6 @@ rename_retry: seq = read_seqbegin(&rename_lock); rcu_read_lock(); - spin_lock(&dcache_lock); while (!IS_ROOT(dentry) && dentry != droot) { namelen = dentry->d_name.len; buflen -= namelen + 1; @@ -71,7 +70,6 @@ rename_retry: *--end = '/'; dentry = dentry->d_parent; } - spin_unlock(&dcache_lock); rcu_read_unlock(); if (read_seqretry(&rename_lock, seq)) goto rename_retry; @@ -91,7 +89,6 @@ rename_retry: memcpy(end, base, namelen); return end; Elong_unlock: - spin_unlock(&dcache_lock); rcu_read_unlock(); if (read_seqretry(&rename_lock, seq)) goto rename_retry; diff --git a/fs/notify/fsnotify.c b/fs/notify/fsnotify.c index ae769fc9b66c..9be6ec1f36d8 100644 --- a/fs/notify/fsnotify.c +++ b/fs/notify/fsnotify.c @@ -59,7 +59,6 @@ void __fsnotify_update_child_dentry_flags(struct inode *inode) /* determine if the children should tell inode about their events */ watched = fsnotify_inode_watches_children(inode); - spin_lock(&dcache_lock); spin_lock(&dcache_inode_lock); /* run all of the dentries associated with this inode. Since this is a * directory, there damn well better only be one item on this list */ @@ -84,7 +83,6 @@ void __fsnotify_update_child_dentry_flags(struct inode *inode) spin_unlock(&alias->d_lock); } spin_unlock(&dcache_inode_lock); - spin_unlock(&dcache_lock); } /* Notify this dentry's parent about a child's events. */ diff --git a/fs/ocfs2/dcache.c b/fs/ocfs2/dcache.c index c31b5c647ac7..b7de749bdd12 100644 --- a/fs/ocfs2/dcache.c +++ b/fs/ocfs2/dcache.c @@ -169,7 +169,6 @@ struct dentry *ocfs2_find_local_alias(struct inode *inode, struct list_head *p; struct dentry *dentry = NULL; - spin_lock(&dcache_lock); spin_lock(&dcache_inode_lock); list_for_each(p, &inode->i_dentry) { dentry = list_entry(p, struct dentry, d_alias); @@ -189,7 +188,6 @@ struct dentry *ocfs2_find_local_alias(struct inode *inode, } spin_unlock(&dcache_inode_lock); - spin_unlock(&dcache_lock); return dentry; } diff --git a/include/linux/dcache.h b/include/linux/dcache.h index c963ebada922..a2ceb94b0e38 100644 --- a/include/linux/dcache.h +++ b/include/linux/dcache.h @@ -183,7 +183,6 @@ struct dentry_operations { #define DCACHE_GENOCIDE 0x0200 extern spinlock_t dcache_inode_lock; -extern spinlock_t dcache_lock; extern seqlock_t rename_lock; static inline int dname_external(struct dentry *dentry) @@ -296,8 +295,8 @@ extern char *dentry_path(struct dentry *, char *, int); * destroyed when it has references. dget() should never be * called for dentries with zero reference counter. For these cases * (preferably none, functions in dcache.c are sufficient for normal - * needs and they take necessary precautions) you should hold dcache_lock - * and call dget_locked() instead of dget(). + * needs and they take necessary precautions) you should hold d_lock + * and call dget_dlock() instead of dget(). */ static inline struct dentry *dget_dlock(struct dentry *dentry) { diff --git a/include/linux/fs.h b/include/linux/fs.h index 090f0eacde29..296cf2fde945 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1378,7 +1378,7 @@ struct super_block { #else struct list_head s_files; #endif - /* s_dentry_lru and s_nr_dentry_unused are protected by dcache_lock */ + /* s_dentry_lru, s_nr_dentry_unused protected by dcache.c lru locks */ struct list_head s_dentry_lru; /* unused dentry lru */ int s_nr_dentry_unused; /* # of dentry on lru */ @@ -2446,6 +2446,10 @@ static inline ino_t parent_ino(struct dentry *dentry) { ino_t res; + /* + * Don't strictly need d_lock here? If the parent ino could change + * then surely we'd have a deeper race in the caller? + */ spin_lock(&dentry->d_lock); res = dentry->d_parent->d_inode->i_ino; spin_unlock(&dentry->d_lock); diff --git a/include/linux/fsnotify.h b/include/linux/fsnotify.h index b10bcdeaef76..2a53f10712b3 100644 --- a/include/linux/fsnotify.h +++ b/include/linux/fsnotify.h @@ -17,7 +17,6 @@ /* * fsnotify_d_instantiate - instantiate a dentry for inode - * Called with dcache_lock held. */ static inline void fsnotify_d_instantiate(struct dentry *dentry, struct inode *inode) @@ -62,7 +61,6 @@ static inline int fsnotify_perm(struct file *file, int mask) /* * fsnotify_d_move - dentry has been moved - * Called with dcache_lock and dentry->d_lock held. */ static inline void fsnotify_d_move(struct dentry *dentry) { diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h index 7380763595d3..69ad89b50489 100644 --- a/include/linux/fsnotify_backend.h +++ b/include/linux/fsnotify_backend.h @@ -329,9 +329,15 @@ static inline void __fsnotify_update_dcache_flags(struct dentry *dentry) { struct dentry *parent; - assert_spin_locked(&dcache_lock); assert_spin_locked(&dentry->d_lock); + /* + * Serialisation of setting PARENT_WATCHED on the dentries is provided + * by d_lock. If inotify_inode_watched changes after we have taken + * d_lock, the following __fsnotify_update_child_dentry_flags call will + * find our entry, so it will spin until we complete here, and update + * us with the new state. + */ parent = dentry->d_parent; if (parent->d_inode && fsnotify_inode_watches_children(parent->d_inode)) dentry->d_flags |= DCACHE_FSNOTIFY_PARENT_WATCHED; @@ -341,15 +347,12 @@ static inline void __fsnotify_update_dcache_flags(struct dentry *dentry) /* * fsnotify_d_instantiate - instantiate a dentry for inode - * Called with dcache_lock held. */ static inline void __fsnotify_d_instantiate(struct dentry *dentry, struct inode *inode) { if (!inode) return; - assert_spin_locked(&dcache_lock); - spin_lock(&dentry->d_lock); __fsnotify_update_dcache_flags(dentry); spin_unlock(&dentry->d_lock); diff --git a/include/linux/namei.h b/include/linux/namei.h index 05b441d93642..aec730b53935 100644 --- a/include/linux/namei.h +++ b/include/linux/namei.h @@ -41,7 +41,6 @@ enum {LAST_NORM, LAST_ROOT, LAST_DOT, LAST_DOTDOT, LAST_BIND}; * - require a directory * - ending slashes ok even for nonexistent files * - internal "there are more path components" flag - * - locked when lookup done with dcache_lock held * - dentry cache is untrusted; force a real lookup */ #define LOOKUP_FOLLOW 1 diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 7b4705b51d4a..1864cb6a6a59 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -876,7 +876,6 @@ static void cgroup_clear_directory(struct dentry *dentry) struct list_head *node; BUG_ON(!mutex_is_locked(&dentry->d_inode->i_mutex)); - spin_lock(&dcache_lock); spin_lock(&dentry->d_lock); node = dentry->d_subdirs.next; while (node != &dentry->d_subdirs) { @@ -891,18 +890,15 @@ static void cgroup_clear_directory(struct dentry *dentry) dget_locked_dlock(d); spin_unlock(&d->d_lock); spin_unlock(&dentry->d_lock); - spin_unlock(&dcache_lock); d_delete(d); simple_unlink(dentry->d_inode, d); dput(d); - spin_lock(&dcache_lock); spin_lock(&dentry->d_lock); } else spin_unlock(&d->d_lock); node = dentry->d_subdirs.next; } spin_unlock(&dentry->d_lock); - spin_unlock(&dcache_lock); } /* @@ -914,14 +910,12 @@ static void cgroup_d_remove_dir(struct dentry *dentry) cgroup_clear_directory(dentry); - spin_lock(&dcache_lock); parent = dentry->d_parent; spin_lock(&parent->d_lock); spin_lock(&dentry->d_lock); list_del_init(&dentry->d_u.d_child); spin_unlock(&dentry->d_lock); spin_unlock(&parent->d_lock); - spin_unlock(&dcache_lock); remove_dir(dentry); } diff --git a/mm/filemap.c b/mm/filemap.c index 6b9aee20f242..ca389394fa2a 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -102,9 +102,6 @@ * ->inode_lock (zap_pte_range->set_page_dirty) * ->private_lock (zap_pte_range->__set_page_dirty_buffers) * - * ->task->proc_lock - * ->dcache_lock (proc_pid_lookup) - * * (code doesn't rely on that order, so you could switch it around) * ->tasklist_lock (memory_failure, collect_procs_ao) * ->i_mmap_lock diff --git a/security/selinux/selinuxfs.c b/security/selinux/selinuxfs.c index 017ec096446e..2285d693f296 100644 --- a/security/selinux/selinuxfs.c +++ b/security/selinux/selinuxfs.c @@ -1145,7 +1145,6 @@ static void sel_remove_entries(struct dentry *de) { struct list_head *node; - spin_lock(&dcache_lock); spin_lock(&de->d_lock); node = de->d_subdirs.next; while (node != &de->d_subdirs) { @@ -1158,11 +1157,9 @@ static void sel_remove_entries(struct dentry *de) dget_locked_dlock(d); spin_unlock(&de->d_lock); spin_unlock(&d->d_lock); - spin_unlock(&dcache_lock); d_delete(d); simple_unlink(de->d_inode, d); dput(d); - spin_lock(&dcache_lock); spin_lock(&de->d_lock); } else spin_unlock(&d->d_lock); @@ -1170,7 +1167,6 @@ static void sel_remove_entries(struct dentry *de) } spin_unlock(&de->d_lock); - spin_unlock(&dcache_lock); } #define BOOL_DIR_NAME "booleans" -- cgit v1.2.3 From 58db63d086790eec2ed433f9d8c4962239809cf8 Mon Sep 17 00:00:00 2001 From: Nick Piggin Date: Fri, 7 Jan 2011 17:49:39 +1100 Subject: fs: dcache avoid starvation in dcache multi-step operations Long lived dcache "multi-step" operations which retry on rename seq can be starved with a lot of rename activity. If they fail after the 1st pass, take the rename_lock for writing to avoid further starvation. Signed-off-by: Nick Piggin --- fs/dcache.c | 56 ++++++++++++++++++++++++++++++++++++++++++-------------- 1 file changed, 42 insertions(+), 14 deletions(-) (limited to 'fs/dcache.c') diff --git a/fs/dcache.c b/fs/dcache.c index 0dbae053b664..bf6294a20f0e 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -973,10 +973,11 @@ int have_submounts(struct dentry *parent) struct dentry *this_parent; struct list_head *next; unsigned seq; + int locked = 0; -rename_retry: - this_parent = parent; seq = read_seqbegin(&rename_lock); +again: + this_parent = parent; if (d_mountpoint(parent)) goto positive; @@ -1021,7 +1022,7 @@ resume: /* might go back up the wrong parent if we have had a rename * or deletion */ if (this_parent != child->d_parent || - read_seqretry(&rename_lock, seq)) { + (!locked && read_seqretry(&rename_lock, seq))) { spin_unlock(&this_parent->d_lock); rcu_read_unlock(); goto rename_retry; @@ -1031,13 +1032,22 @@ resume: goto resume; } spin_unlock(&this_parent->d_lock); - if (read_seqretry(&rename_lock, seq)) + if (!locked && read_seqretry(&rename_lock, seq)) goto rename_retry; + if (locked) + write_sequnlock(&rename_lock); return 0; /* No mount points found in tree */ positive: - if (read_seqretry(&rename_lock, seq)) + if (!locked && read_seqretry(&rename_lock, seq)) goto rename_retry; + if (locked) + write_sequnlock(&rename_lock); return 1; + +rename_retry: + locked = 1; + write_seqlock(&rename_lock); + goto again; } EXPORT_SYMBOL(have_submounts); @@ -1061,11 +1071,11 @@ static int select_parent(struct dentry * parent) struct list_head *next; unsigned seq; int found = 0; + int locked = 0; -rename_retry: - this_parent = parent; seq = read_seqbegin(&rename_lock); - +again: + this_parent = parent; spin_lock(&this_parent->d_lock); repeat: next = this_parent->d_subdirs.next; @@ -1127,7 +1137,7 @@ resume: /* might go back up the wrong parent if we have had a rename * or deletion */ if (this_parent != child->d_parent || - read_seqretry(&rename_lock, seq)) { + (!locked && read_seqretry(&rename_lock, seq))) { spin_unlock(&this_parent->d_lock); rcu_read_unlock(); goto rename_retry; @@ -1138,9 +1148,18 @@ resume: } out: spin_unlock(&this_parent->d_lock); - if (read_seqretry(&rename_lock, seq)) + if (!locked && read_seqretry(&rename_lock, seq)) goto rename_retry; + if (locked) + write_sequnlock(&rename_lock); return found; + +rename_retry: + if (found) + return found; + locked = 1; + write_seqlock(&rename_lock); + goto again; } /** @@ -2655,10 +2674,11 @@ void d_genocide(struct dentry *root) struct dentry *this_parent; struct list_head *next; unsigned seq; + int locked = 0; -rename_retry: - this_parent = root; seq = read_seqbegin(&rename_lock); +again: + this_parent = root; spin_lock(&this_parent->d_lock); repeat: next = this_parent->d_subdirs.next; @@ -2703,7 +2723,7 @@ resume: /* might go back up the wrong parent if we have had a rename * or deletion */ if (this_parent != child->d_parent || - read_seqretry(&rename_lock, seq)) { + (!locked && read_seqretry(&rename_lock, seq))) { spin_unlock(&this_parent->d_lock); rcu_read_unlock(); goto rename_retry; @@ -2713,8 +2733,16 @@ resume: goto resume; } spin_unlock(&this_parent->d_lock); - if (read_seqretry(&rename_lock, seq)) + if (!locked && read_seqretry(&rename_lock, seq)) goto rename_retry; + if (locked) + write_sequnlock(&rename_lock); + return; + +rename_retry: + locked = 1; + write_seqlock(&rename_lock); + goto again; } /** -- cgit v1.2.3 From 61f3dee4af09528997a970280da240577bf60721 Mon Sep 17 00:00:00 2001 From: Nick Piggin Date: Fri, 7 Jan 2011 17:49:40 +1100 Subject: fs: dcache reduce dput locking It is possible to run dput without taking data structure locks up-front. In many cases where we don't kill the dentry anyway, these locks are not required. Signed-off-by: Nick Piggin --- fs/dcache.c | 52 +++++++++++++++++++++++----------------------------- 1 file changed, 23 insertions(+), 29 deletions(-) (limited to 'fs/dcache.c') diff --git a/fs/dcache.c b/fs/dcache.c index bf6294a20f0e..98a05696593e 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -323,35 +323,16 @@ repeat: if (dentry->d_count == 1) might_sleep(); spin_lock(&dentry->d_lock); - if (IS_ROOT(dentry)) - parent = NULL; - else - parent = dentry->d_parent; - if (dentry->d_count == 1) { - if (!spin_trylock(&dcache_inode_lock)) { -drop2: - spin_unlock(&dentry->d_lock); - goto repeat; - } - if (parent && !spin_trylock(&parent->d_lock)) { - spin_unlock(&dcache_inode_lock); - goto drop2; - } - } - dentry->d_count--; - if (dentry->d_count) { + BUG_ON(!dentry->d_count); + if (dentry->d_count > 1) { + dentry->d_count--; spin_unlock(&dentry->d_lock); - if (parent) - spin_unlock(&parent->d_lock); return; } - /* - * AV: ->d_delete() is _NOT_ allowed to block now. - */ if (dentry->d_op && dentry->d_op->d_delete) { if (dentry->d_op->d_delete(dentry)) - goto unhash_it; + goto kill_it; } /* Unreachable? Get rid of it */ @@ -362,17 +343,30 @@ drop2: dentry->d_flags |= DCACHE_REFERENCED; dentry_lru_add(dentry); - spin_unlock(&dentry->d_lock); - if (parent) - spin_unlock(&parent->d_lock); - spin_unlock(&dcache_inode_lock); + dentry->d_count--; + spin_unlock(&dentry->d_lock); return; -unhash_it: - __d_drop(dentry); kill_it: + if (!spin_trylock(&dcache_inode_lock)) { +relock: + spin_unlock(&dentry->d_lock); + cpu_relax(); + goto repeat; + } + if (IS_ROOT(dentry)) + parent = NULL; + else + parent = dentry->d_parent; + if (parent && !spin_trylock(&parent->d_lock)) { + spin_unlock(&dcache_inode_lock); + goto relock; + } + dentry->d_count--; /* if dentry was on the d_lru list delete it from there */ dentry_lru_del(dentry); + /* if it was on the hash (d_delete case), then remove it */ + __d_drop(dentry); dentry = d_kill(dentry, parent); if (dentry) goto repeat; -- cgit v1.2.3 From 89ad485f01fd83c47f17a128db3bd7b89c0f244f Mon Sep 17 00:00:00 2001 From: Nick Piggin Date: Fri, 7 Jan 2011 17:49:41 +1100 Subject: fs: dcache reduce locking in d_alloc Signed-off-by: Nick Piggin --- fs/dcache.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'fs/dcache.c') diff --git a/fs/dcache.c b/fs/dcache.c index 98a05696593e..ccdc5c2512df 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -1253,11 +1253,13 @@ struct dentry *d_alloc(struct dentry * parent, const struct qstr *name) if (parent) { spin_lock(&parent->d_lock); - spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED); + /* + * don't need child lock because it is not subject + * to concurrency here + */ dentry->d_parent = dget_dlock(parent); dentry->d_sb = parent->d_sb; list_add(&dentry->d_u.d_child, &parent->d_subdirs); - spin_unlock(&dentry->d_lock); spin_unlock(&parent->d_lock); } -- cgit v1.2.3 From 357f8e658bba8a085c4a5d4331e30894be8096b8 Mon Sep 17 00:00:00 2001 From: Nick Piggin Date: Fri, 7 Jan 2011 17:49:42 +1100 Subject: fs: dcache reduce dcache_inode_lock dcache_inode_lock can be avoided in d_delete() and d_materialise_unique() in cases where it is not required. Signed-off-by: Nick Piggin --- fs/dcache.c | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) (limited to 'fs/dcache.c') diff --git a/fs/dcache.c b/fs/dcache.c index ccdc5c2512df..01f016799fd4 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -1828,10 +1828,15 @@ void d_delete(struct dentry * dentry) /* * Are we the only user? */ - spin_lock(&dcache_inode_lock); +again: spin_lock(&dentry->d_lock); isdir = S_ISDIR(dentry->d_inode->i_mode); if (dentry->d_count == 1) { + if (!spin_trylock(&dcache_inode_lock)) { + spin_unlock(&dentry->d_lock); + cpu_relax(); + goto again; + } dentry->d_flags &= ~DCACHE_CANT_MOUNT; dentry_iput(dentry); fsnotify_nameremove(dentry, isdir); @@ -1842,7 +1847,6 @@ void d_delete(struct dentry * dentry) __d_drop(dentry); spin_unlock(&dentry->d_lock); - spin_unlock(&dcache_inode_lock); fsnotify_nameremove(dentry, isdir); } @@ -2164,14 +2168,15 @@ struct dentry *d_materialise_unique(struct dentry *dentry, struct inode *inode) BUG_ON(!d_unhashed(dentry)); - spin_lock(&dcache_inode_lock); - if (!inode) { actual = dentry; __d_instantiate(dentry, NULL); - goto found_lock; + d_rehash(actual); + goto out_nolock; } + spin_lock(&dcache_inode_lock); + if (S_ISDIR(inode->i_mode)) { struct dentry *alias; @@ -2198,10 +2203,9 @@ struct dentry *d_materialise_unique(struct dentry *dentry, struct inode *inode) actual = __d_instantiate_unique(dentry, inode); if (!actual) actual = dentry; - else if (unlikely(!d_unhashed(actual))) - goto shouldnt_be_hashed; + else + BUG_ON(!d_unhashed(actual)); -found_lock: spin_lock(&actual->d_lock); found: spin_lock(&dcache_hash_lock); @@ -2217,10 +2221,6 @@ out_nolock: iput(inode); return actual; - -shouldnt_be_hashed: - spin_unlock(&dcache_inode_lock); - BUG(); } EXPORT_SYMBOL_GPL(d_materialise_unique); -- cgit v1.2.3 From dc0474be3e27463d4d4a2793f82366eed906f223 Mon Sep 17 00:00:00 2001 From: Nick Piggin Date: Fri, 7 Jan 2011 17:49:43 +1100 Subject: fs: dcache rationalise dget variants dget_locked was a shortcut to avoid the lazy lru manipulation when we already held dcache_lock (lru manipulation was relatively cheap at that point). However, how that the lru lock is an innermost one, we never hold it at any caller, so the lock cost can now be avoided. We already have well working lazy dcache LRU, so it should be fine to defer LRU manipulations to scan time. Signed-off-by: Nick Piggin --- arch/powerpc/platforms/cell/spufs/inode.c | 2 +- drivers/infiniband/hw/ipath/ipath_fs.c | 2 +- drivers/infiniband/hw/qib/qib_fs.c | 2 +- drivers/staging/smbfs/cache.c | 2 +- fs/configfs/inode.c | 2 +- fs/dcache.c | 36 ++++++++++--------------------- fs/exportfs/expfs.c | 2 +- fs/ncpfs/dir.c | 2 +- fs/ocfs2/dcache.c | 2 +- include/linux/dcache.h | 15 +++---------- kernel/cgroup.c | 2 +- security/selinux/selinuxfs.c | 2 +- 12 files changed, 24 insertions(+), 47 deletions(-) (limited to 'fs/dcache.c') diff --git a/arch/powerpc/platforms/cell/spufs/inode.c b/arch/powerpc/platforms/cell/spufs/inode.c index 2662b50ea8d4..03185de7cd4a 100644 --- a/arch/powerpc/platforms/cell/spufs/inode.c +++ b/arch/powerpc/platforms/cell/spufs/inode.c @@ -161,7 +161,7 @@ static void spufs_prune_dir(struct dentry *dir) list_for_each_entry_safe(dentry, tmp, &dir->d_subdirs, d_u.d_child) { spin_lock(&dentry->d_lock); if (!(d_unhashed(dentry)) && dentry->d_inode) { - dget_locked_dlock(dentry); + dget_dlock(dentry); __d_drop(dentry); spin_unlock(&dentry->d_lock); simple_unlink(dir->d_inode, dentry); diff --git a/drivers/infiniband/hw/ipath/ipath_fs.c b/drivers/infiniband/hw/ipath/ipath_fs.c index 925e88227ded..31ae1b108aea 100644 --- a/drivers/infiniband/hw/ipath/ipath_fs.c +++ b/drivers/infiniband/hw/ipath/ipath_fs.c @@ -279,7 +279,7 @@ static int remove_file(struct dentry *parent, char *name) spin_lock(&tmp->d_lock); if (!(d_unhashed(tmp) && tmp->d_inode)) { - dget_locked_dlock(tmp); + dget_dlock(tmp); __d_drop(tmp); spin_unlock(&tmp->d_lock); simple_unlink(parent->d_inode, tmp); diff --git a/drivers/infiniband/hw/qib/qib_fs.c b/drivers/infiniband/hw/qib/qib_fs.c index 49af4a6538ba..df7fa251dcdc 100644 --- a/drivers/infiniband/hw/qib/qib_fs.c +++ b/drivers/infiniband/hw/qib/qib_fs.c @@ -455,7 +455,7 @@ static int remove_file(struct dentry *parent, char *name) spin_lock(&tmp->d_lock); if (!(d_unhashed(tmp) && tmp->d_inode)) { - dget_locked_dlock(tmp); + dget_dlock(tmp); __d_drop(tmp); spin_unlock(&tmp->d_lock); simple_unlink(parent->d_inode, tmp); diff --git a/drivers/staging/smbfs/cache.c b/drivers/staging/smbfs/cache.c index 75dfd403fb90..f2a1323ca827 100644 --- a/drivers/staging/smbfs/cache.c +++ b/drivers/staging/smbfs/cache.c @@ -102,7 +102,7 @@ smb_dget_fpos(struct dentry *dentry, struct dentry *parent, unsigned long fpos) dent = list_entry(next, struct dentry, d_u.d_child); if ((unsigned long)dent->d_fsdata == fpos) { if (dent->d_inode) - dget_locked(dent); + dget(dent); else dent = NULL; goto out_unlock; diff --git a/fs/configfs/inode.c b/fs/configfs/inode.c index fb3a55fff82a..c83f4768eeaa 100644 --- a/fs/configfs/inode.c +++ b/fs/configfs/inode.c @@ -252,7 +252,7 @@ void configfs_drop_dentry(struct configfs_dirent * sd, struct dentry * parent) if (dentry) { spin_lock(&dentry->d_lock); if (!(d_unhashed(dentry) && dentry->d_inode)) { - dget_locked_dlock(dentry); + dget_dlock(dentry); __d_drop(dentry); spin_unlock(&dentry->d_lock); simple_unlink(parent->d_inode, dentry); diff --git a/fs/dcache.c b/fs/dcache.c index 01f016799fd4..b4d2e28eef5b 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -429,32 +429,17 @@ int d_invalidate(struct dentry * dentry) EXPORT_SYMBOL(d_invalidate); /* This must be called with d_lock held */ -static inline struct dentry * __dget_locked_dlock(struct dentry *dentry) +static inline void __dget_dlock(struct dentry *dentry) { dentry->d_count++; - dentry_lru_del(dentry); - return dentry; } -/* This must be called with d_lock held */ -static inline struct dentry * __dget_locked(struct dentry *dentry) +static inline void __dget(struct dentry *dentry) { spin_lock(&dentry->d_lock); - __dget_locked_dlock(dentry); + __dget_dlock(dentry); spin_unlock(&dentry->d_lock); - return dentry; -} - -struct dentry * dget_locked_dlock(struct dentry *dentry) -{ - return __dget_locked_dlock(dentry); -} - -struct dentry * dget_locked(struct dentry *dentry) -{ - return __dget_locked(dentry); } -EXPORT_SYMBOL(dget_locked); struct dentry *dget_parent(struct dentry *dentry) { @@ -512,7 +497,7 @@ again: (alias->d_flags & DCACHE_DISCONNECTED)) { discon_alias = alias; } else if (!want_discon) { - __dget_locked_dlock(alias); + __dget_dlock(alias); spin_unlock(&alias->d_lock); return alias; } @@ -525,7 +510,7 @@ again: if (S_ISDIR(inode->i_mode) || !d_unhashed(alias)) { if (IS_ROOT(alias) && (alias->d_flags & DCACHE_DISCONNECTED)) { - __dget_locked_dlock(alias); + __dget_dlock(alias); spin_unlock(&alias->d_lock); return alias; } @@ -561,7 +546,7 @@ restart: list_for_each_entry(dentry, &inode->i_dentry, d_alias) { spin_lock(&dentry->d_lock); if (!dentry->d_count) { - __dget_locked_dlock(dentry); + __dget_dlock(dentry); __d_drop(dentry); spin_unlock(&dentry->d_lock); spin_unlock(&dcache_inode_lock); @@ -1257,7 +1242,8 @@ struct dentry *d_alloc(struct dentry * parent, const struct qstr *name) * don't need child lock because it is not subject * to concurrency here */ - dentry->d_parent = dget_dlock(parent); + __dget_dlock(parent); + dentry->d_parent = parent; dentry->d_sb = parent->d_sb; list_add(&dentry->d_u.d_child, &parent->d_subdirs); spin_unlock(&parent->d_lock); @@ -1360,7 +1346,7 @@ static struct dentry *__d_instantiate_unique(struct dentry *entry, continue; if (memcmp(qstr->name, name, len)) continue; - dget_locked(alias); + __dget(alias); return alias; } @@ -1613,7 +1599,7 @@ struct dentry *d_add_ci(struct dentry *dentry, struct inode *inode, * reference to it, move it in place and use it. */ new = list_entry(inode->i_dentry.next, struct dentry, d_alias); - dget_locked(new); + __dget(new); spin_unlock(&dcache_inode_lock); security_d_instantiate(found, inode); d_move(new, found); @@ -1789,7 +1775,7 @@ int d_validate(struct dentry *dentry, struct dentry *dparent) list_for_each_entry(child, &dparent->d_subdirs, d_u.d_child) { if (dentry == child) { spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED); - __dget_locked_dlock(dentry); + __dget_dlock(dentry); spin_unlock(&dentry->d_lock); spin_unlock(&dparent->d_lock); return 1; diff --git a/fs/exportfs/expfs.c b/fs/exportfs/expfs.c index 53a5c08fb63c..f06a940065f6 100644 --- a/fs/exportfs/expfs.c +++ b/fs/exportfs/expfs.c @@ -49,7 +49,7 @@ find_acceptable_alias(struct dentry *result, spin_lock(&dcache_inode_lock); list_for_each_entry(dentry, &result->d_inode->i_dentry, d_alias) { - dget_locked(dentry); + dget(dentry); spin_unlock(&dcache_inode_lock); if (toput) dput(toput); diff --git a/fs/ncpfs/dir.c b/fs/ncpfs/dir.c index de15c533311c..0ba3cdc95a44 100644 --- a/fs/ncpfs/dir.c +++ b/fs/ncpfs/dir.c @@ -397,7 +397,7 @@ ncp_dget_fpos(struct dentry *dentry, struct dentry *parent, unsigned long fpos) dent = list_entry(next, struct dentry, d_u.d_child); if ((unsigned long)dent->d_fsdata == fpos) { if (dent->d_inode) - dget_locked(dent); + dget(dent); else dent = NULL; spin_unlock(&parent->d_lock); diff --git a/fs/ocfs2/dcache.c b/fs/ocfs2/dcache.c index b7de749bdd12..4d54c60ceee4 100644 --- a/fs/ocfs2/dcache.c +++ b/fs/ocfs2/dcache.c @@ -178,7 +178,7 @@ struct dentry *ocfs2_find_local_alias(struct inode *inode, mlog(0, "dentry found: %.*s\n", dentry->d_name.len, dentry->d_name.name); - dget_locked_dlock(dentry); + dget_dlock(dentry); spin_unlock(&dentry->d_lock); break; } diff --git a/include/linux/dcache.h b/include/linux/dcache.h index a2ceb94b0e38..ca648685f0cc 100644 --- a/include/linux/dcache.h +++ b/include/linux/dcache.h @@ -287,23 +287,17 @@ extern char *dentry_path(struct dentry *, char *, int); /* Allocation counts.. */ /** - * dget, dget_locked - get a reference to a dentry + * dget, dget_dlock - get a reference to a dentry * @dentry: dentry to get a reference to * * Given a dentry or %NULL pointer increment the reference count * if appropriate and return the dentry. A dentry will not be - * destroyed when it has references. dget() should never be - * called for dentries with zero reference counter. For these cases - * (preferably none, functions in dcache.c are sufficient for normal - * needs and they take necessary precautions) you should hold d_lock - * and call dget_dlock() instead of dget(). + * destroyed when it has references. */ static inline struct dentry *dget_dlock(struct dentry *dentry) { - if (dentry) { - BUG_ON(!dentry->d_count); + if (dentry) dentry->d_count++; - } return dentry; } @@ -317,9 +311,6 @@ static inline struct dentry *dget(struct dentry *dentry) return dentry; } -extern struct dentry * dget_locked(struct dentry *); -extern struct dentry * dget_locked_dlock(struct dentry *); - extern struct dentry *dget_parent(struct dentry *dentry); /** diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 1864cb6a6a59..9f41470c3949 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -887,7 +887,7 @@ static void cgroup_clear_directory(struct dentry *dentry) /* This should never be called on a cgroup * directory with child cgroups */ BUG_ON(d->d_inode->i_mode & S_IFDIR); - dget_locked_dlock(d); + dget_dlock(d); spin_unlock(&d->d_lock); spin_unlock(&dentry->d_lock); d_delete(d); diff --git a/security/selinux/selinuxfs.c b/security/selinux/selinuxfs.c index 2285d693f296..43deac219491 100644 --- a/security/selinux/selinuxfs.c +++ b/security/selinux/selinuxfs.c @@ -1154,7 +1154,7 @@ static void sel_remove_entries(struct dentry *de) list_del_init(node); if (d->d_inode) { - dget_locked_dlock(d); + dget_dlock(d); spin_unlock(&de->d_lock); spin_unlock(&d->d_lock); d_delete(d); -- cgit v1.2.3 From a734eb458ab2bd11479a27dd54f48e1b26a55845 Mon Sep 17 00:00:00 2001 From: Nick Piggin Date: Fri, 7 Jan 2011 17:49:44 +1100 Subject: fs: dcache reduce d_parent locking Use RCU to simplify locking in dget_parent. Signed-off-by: Nick Piggin --- fs/dcache.c | 21 ++++++++++++--------- 1 file changed, 12 insertions(+), 9 deletions(-) (limited to 'fs/dcache.c') diff --git a/fs/dcache.c b/fs/dcache.c index b4d2e28eef5b..a8f89765d602 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -446,24 +446,27 @@ struct dentry *dget_parent(struct dentry *dentry) struct dentry *ret; repeat: - spin_lock(&dentry->d_lock); + /* + * Don't need rcu_dereference because we re-check it was correct under + * the lock. + */ + rcu_read_lock(); ret = dentry->d_parent; - if (!ret) - goto out; - if (dentry == ret) { - ret->d_count++; + if (!ret) { + rcu_read_unlock(); goto out; } - if (!spin_trylock(&ret->d_lock)) { - spin_unlock(&dentry->d_lock); - cpu_relax(); + spin_lock(&ret->d_lock); + if (unlikely(ret != dentry->d_parent)) { + spin_unlock(&ret->d_lock); + rcu_read_unlock(); goto repeat; } + rcu_read_unlock(); BUG_ON(!ret->d_count); ret->d_count++; spin_unlock(&ret->d_lock); out: - spin_unlock(&dentry->d_lock); return ret; } EXPORT_SYMBOL(dget_parent); -- cgit v1.2.3 From 89e6054836a7b1e7500cd70a14b5579e752c9250 Mon Sep 17 00:00:00 2001 From: Nick Piggin Date: Fri, 7 Jan 2011 17:49:45 +1100 Subject: fs: dcache reduce prune_one_dentry locking prune_one_dentry can avoid quite a bit of locking in the common case where ancestors have an elevated refcount. Alternatively, we could have gone the other way and made fewer trylocks in the case where d_count goes to zero, but is probably less common. Signed-off-by: Nick Piggin --- fs/dcache.c | 27 +++++++++++++++------------ 1 file changed, 15 insertions(+), 12 deletions(-) (limited to 'fs/dcache.c') diff --git a/fs/dcache.c b/fs/dcache.c index a8f89765d602..195706374697 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -582,26 +582,29 @@ static void prune_one_dentry(struct dentry *dentry, struct dentry *parent) * Prune ancestors. */ while (dentry) { - spin_lock(&dcache_inode_lock); -again: +relock: spin_lock(&dentry->d_lock); + if (dentry->d_count > 1) { + dentry->d_count--; + spin_unlock(&dentry->d_lock); + return; + } + if (!spin_trylock(&dcache_inode_lock)) { +relock2: + spin_unlock(&dentry->d_lock); + cpu_relax(); + goto relock; + } + if (IS_ROOT(dentry)) parent = NULL; else parent = dentry->d_parent; if (parent && !spin_trylock(&parent->d_lock)) { - spin_unlock(&dentry->d_lock); - goto again; - } - dentry->d_count--; - if (dentry->d_count) { - if (parent) - spin_unlock(&parent->d_lock); - spin_unlock(&dentry->d_lock); spin_unlock(&dcache_inode_lock); - return; + goto relock2; } - + dentry->d_count--; dentry_lru_del(dentry); __d_drop(dentry); dentry = d_kill(dentry, parent); -- cgit v1.2.3 From be182bff72fae6a3eb25624b39170c40b72f0909 Mon Sep 17 00:00:00 2001 From: Nick Piggin Date: Fri, 7 Jan 2011 17:49:46 +1100 Subject: fs: reduce dcache_inode_lock width in lru scanning Signed-off-by: Nick Piggin --- fs/dcache.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) (limited to 'fs/dcache.c') diff --git a/fs/dcache.c b/fs/dcache.c index 195706374697..fe8f28a2878e 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -621,7 +621,7 @@ static void shrink_dentry_list(struct list_head *list) dentry = list_entry(list->prev, struct dentry, d_lru); if (!spin_trylock(&dentry->d_lock)) { -relock: +relock1: spin_unlock(&dcache_lru_lock); cpu_relax(); spin_lock(&dcache_lru_lock); @@ -638,20 +638,24 @@ relock: spin_unlock(&dentry->d_lock); continue; } + if (!spin_trylock(&dcache_inode_lock)) { +relock2: + spin_unlock(&dentry->d_lock); + goto relock1; + } if (IS_ROOT(dentry)) parent = NULL; else parent = dentry->d_parent; if (parent && !spin_trylock(&parent->d_lock)) { - spin_unlock(&dentry->d_lock); - goto relock; + spin_unlock(&dcache_inode_lock); + goto relock2; } __dentry_lru_del(dentry); spin_unlock(&dcache_lru_lock); prune_one_dentry(dentry, parent); /* dcache_inode_lock and dentry->d_lock dropped */ - spin_lock(&dcache_inode_lock); spin_lock(&dcache_lru_lock); } } @@ -672,7 +676,6 @@ static void __shrink_dcache_sb(struct super_block *sb, int *count, int flags) LIST_HEAD(tmp); int cnt = *count; - spin_lock(&dcache_inode_lock); relock: spin_lock(&dcache_lru_lock); while (!list_empty(&sb->s_dentry_lru)) { @@ -711,7 +714,6 @@ relock: if (!list_empty(&referenced)) list_splice(&referenced, &sb->s_dentry_lru); spin_unlock(&dcache_lru_lock); - spin_unlock(&dcache_inode_lock); } /** @@ -800,14 +802,12 @@ void shrink_dcache_sb(struct super_block *sb) { LIST_HEAD(tmp); - spin_lock(&dcache_inode_lock); spin_lock(&dcache_lru_lock); while (!list_empty(&sb->s_dentry_lru)) { list_splice_init(&sb->s_dentry_lru, &tmp); shrink_dentry_list(&tmp); } spin_unlock(&dcache_lru_lock); - spin_unlock(&dcache_inode_lock); } EXPORT_SYMBOL(shrink_dcache_sb); -- cgit v1.2.3 From ec33679d78f9d653a44ddba10b5fb824c06330a1 Mon Sep 17 00:00:00 2001 From: Nick Piggin Date: Fri, 7 Jan 2011 17:49:47 +1100 Subject: fs: use RCU in shrink_dentry_list to reduce lock nesting Signed-off-by: Nick Piggin --- fs/dcache.c | 46 +++++++++++++++++++++++++--------------------- 1 file changed, 25 insertions(+), 21 deletions(-) (limited to 'fs/dcache.c') diff --git a/fs/dcache.c b/fs/dcache.c index fe8f28a2878e..d1840b30c673 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -615,16 +615,16 @@ static void shrink_dentry_list(struct list_head *list) { struct dentry *dentry; - while (!list_empty(list)) { + rcu_read_lock(); + for (;;) { struct dentry *parent; - dentry = list_entry(list->prev, struct dentry, d_lru); - - if (!spin_trylock(&dentry->d_lock)) { -relock1: - spin_unlock(&dcache_lru_lock); - cpu_relax(); - spin_lock(&dcache_lru_lock); + dentry = list_entry_rcu(list->prev, struct dentry, d_lru); + if (&dentry->d_lru == list) + break; /* empty */ + spin_lock(&dentry->d_lock); + if (dentry != list_entry(list->prev, struct dentry, d_lru)) { + spin_unlock(&dentry->d_lock); continue; } @@ -634,14 +634,16 @@ relock1: * it - just keep it off the LRU list. */ if (dentry->d_count) { - __dentry_lru_del(dentry); + dentry_lru_del(dentry); spin_unlock(&dentry->d_lock); continue; } + if (!spin_trylock(&dcache_inode_lock)) { -relock2: +relock: spin_unlock(&dentry->d_lock); - goto relock1; + cpu_relax(); + continue; } if (IS_ROOT(dentry)) parent = NULL; @@ -649,15 +651,15 @@ relock2: parent = dentry->d_parent; if (parent && !spin_trylock(&parent->d_lock)) { spin_unlock(&dcache_inode_lock); - goto relock2; + goto relock; } - __dentry_lru_del(dentry); - spin_unlock(&dcache_lru_lock); + dentry_lru_del(dentry); + rcu_read_unlock(); prune_one_dentry(dentry, parent); - /* dcache_inode_lock and dentry->d_lock dropped */ - spin_lock(&dcache_lru_lock); + rcu_read_lock(); } + rcu_read_unlock(); } /** @@ -705,15 +707,15 @@ relock: if (!--cnt) break; } - /* XXX: re-add cond_resched_lock when dcache_lock goes away */ + cond_resched_lock(&dcache_lru_lock); } - - *count = cnt; - shrink_dentry_list(&tmp); - if (!list_empty(&referenced)) list_splice(&referenced, &sb->s_dentry_lru); spin_unlock(&dcache_lru_lock); + + shrink_dentry_list(&tmp); + + *count = cnt; } /** @@ -805,7 +807,9 @@ void shrink_dcache_sb(struct super_block *sb) spin_lock(&dcache_lru_lock); while (!list_empty(&sb->s_dentry_lru)) { list_splice_init(&sb->s_dentry_lru, &tmp); + spin_unlock(&dcache_lru_lock); shrink_dentry_list(&tmp); + spin_lock(&dcache_lru_lock); } spin_unlock(&dcache_lru_lock); } -- cgit v1.2.3 From 77812a1ef139d84270d27faacc0630c887411013 Mon Sep 17 00:00:00 2001 From: Nick Piggin Date: Fri, 7 Jan 2011 17:49:48 +1100 Subject: fs: consolidate dentry kill sequence The tricky locking for disposing of a dentry is duplicated 3 times in the dcache (dput, pruning a dentry from the LRU, and pruning its ancestors). Consolidate them all into a single function dentry_kill. Signed-off-by: Nick Piggin --- fs/dcache.c | 135 +++++++++++++++++++++++++++--------------------------------- 1 file changed, 61 insertions(+), 74 deletions(-) (limited to 'fs/dcache.c') diff --git a/fs/dcache.c b/fs/dcache.c index d1840b30c673..dc0551c9755d 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -284,6 +284,40 @@ void d_drop(struct dentry *dentry) } EXPORT_SYMBOL(d_drop); +/* + * Finish off a dentry we've decided to kill. + * dentry->d_lock must be held, returns with it unlocked. + * If ref is non-zero, then decrement the refcount too. + * Returns dentry requiring refcount drop, or NULL if we're done. + */ +static inline struct dentry *dentry_kill(struct dentry *dentry, int ref) + __releases(dentry->d_lock) +{ + struct dentry *parent; + + if (!spin_trylock(&dcache_inode_lock)) { +relock: + spin_unlock(&dentry->d_lock); + cpu_relax(); + return dentry; /* try again with same dentry */ + } + if (IS_ROOT(dentry)) + parent = NULL; + else + parent = dentry->d_parent; + if (parent && !spin_trylock(&parent->d_lock)) { + spin_unlock(&dcache_inode_lock); + goto relock; + } + if (ref) + dentry->d_count--; + /* if dentry was on the d_lru list delete it from there */ + dentry_lru_del(dentry); + /* if it was on the hash then remove it */ + __d_drop(dentry); + return d_kill(dentry, parent); +} + /* * This is dput * @@ -309,13 +343,9 @@ EXPORT_SYMBOL(d_drop); * call the dentry unlink method as well as removing it from the queues and * releasing its resources. If the parent dentries were scheduled for release * they too may now get deleted. - * - * no dcache lock, please. */ - void dput(struct dentry *dentry) { - struct dentry *parent; if (!dentry) return; @@ -348,26 +378,7 @@ repeat: return; kill_it: - if (!spin_trylock(&dcache_inode_lock)) { -relock: - spin_unlock(&dentry->d_lock); - cpu_relax(); - goto repeat; - } - if (IS_ROOT(dentry)) - parent = NULL; - else - parent = dentry->d_parent; - if (parent && !spin_trylock(&parent->d_lock)) { - spin_unlock(&dcache_inode_lock); - goto relock; - } - dentry->d_count--; - /* if dentry was on the d_lru list delete it from there */ - dentry_lru_del(dentry); - /* if it was on the hash (d_delete case), then remove it */ - __d_drop(dentry); - dentry = d_kill(dentry, parent); + dentry = dentry_kill(dentry, 1); if (dentry) goto repeat; } @@ -563,51 +574,43 @@ restart: EXPORT_SYMBOL(d_prune_aliases); /* - * Throw away a dentry - free the inode, dput the parent. This requires that - * the LRU list has already been removed. + * Try to throw away a dentry - free the inode, dput the parent. + * Requires dentry->d_lock is held, and dentry->d_count == 0. + * Releases dentry->d_lock. * - * Try to prune ancestors as well. This is necessary to prevent - * quadratic behavior of shrink_dcache_parent(), but is also expected - * to be beneficial in reducing dentry cache fragmentation. + * This may fail if locks cannot be acquired no problem, just try again. */ -static void prune_one_dentry(struct dentry *dentry, struct dentry *parent) +static void try_prune_one_dentry(struct dentry *dentry) __releases(dentry->d_lock) - __releases(parent->d_lock) - __releases(dcache_inode_lock) { - __d_drop(dentry); - dentry = d_kill(dentry, parent); + struct dentry *parent; + parent = dentry_kill(dentry, 0); /* - * Prune ancestors. + * If dentry_kill returns NULL, we have nothing more to do. + * if it returns the same dentry, trylocks failed. In either + * case, just loop again. + * + * Otherwise, we need to prune ancestors too. This is necessary + * to prevent quadratic behavior of shrink_dcache_parent(), but + * is also expected to be beneficial in reducing dentry cache + * fragmentation. */ + if (!parent) + return; + if (parent == dentry) + return; + + /* Prune ancestors. */ + dentry = parent; while (dentry) { -relock: spin_lock(&dentry->d_lock); if (dentry->d_count > 1) { dentry->d_count--; spin_unlock(&dentry->d_lock); return; } - if (!spin_trylock(&dcache_inode_lock)) { -relock2: - spin_unlock(&dentry->d_lock); - cpu_relax(); - goto relock; - } - - if (IS_ROOT(dentry)) - parent = NULL; - else - parent = dentry->d_parent; - if (parent && !spin_trylock(&parent->d_lock)) { - spin_unlock(&dcache_inode_lock); - goto relock2; - } - dentry->d_count--; - dentry_lru_del(dentry); - __d_drop(dentry); - dentry = d_kill(dentry, parent); + dentry = dentry_kill(dentry, 1); } } @@ -617,8 +620,6 @@ static void shrink_dentry_list(struct list_head *list) rcu_read_lock(); for (;;) { - struct dentry *parent; - dentry = list_entry_rcu(list->prev, struct dentry, d_lru); if (&dentry->d_lru == list) break; /* empty */ @@ -639,24 +640,10 @@ static void shrink_dentry_list(struct list_head *list) continue; } - if (!spin_trylock(&dcache_inode_lock)) { -relock: - spin_unlock(&dentry->d_lock); - cpu_relax(); - continue; - } - if (IS_ROOT(dentry)) - parent = NULL; - else - parent = dentry->d_parent; - if (parent && !spin_trylock(&parent->d_lock)) { - spin_unlock(&dcache_inode_lock); - goto relock; - } - dentry_lru_del(dentry); - rcu_read_unlock(); - prune_one_dentry(dentry, parent); + + try_prune_one_dentry(dentry); + rcu_read_lock(); } rcu_read_unlock(); -- cgit v1.2.3 From 31e6b01f4183ff419a6d1f86177cbf4662347cec Mon Sep 17 00:00:00 2001 From: Nick Piggin Date: Fri, 7 Jan 2011 17:49:52 +1100 Subject: fs: rcu-walk for path lookup Perform common cases of path lookups without any stores or locking in the ancestor dentry elements. This is called rcu-walk, as opposed to the current algorithm which is a refcount based walk, or ref-walk. This results in far fewer atomic operations on every path element, significantly improving path lookup performance. It also avoids cacheline bouncing on common dentries, significantly improving scalability. The overall design is like this: * LOOKUP_RCU is set in nd->flags, which distinguishes rcu-walk from ref-walk. * Take the RCU lock for the entire path walk, starting with the acquiring of the starting path (eg. root/cwd/fd-path). So now dentry refcounts are not required for dentry persistence. * synchronize_rcu is called when unregistering a filesystem, so we can access d_ops and i_ops during rcu-walk. * Similarly take the vfsmount lock for the entire path walk. So now mnt refcounts are not required for persistence. Also we are free to perform mount lookups, and to assume dentry mount points and mount roots are stable up and down the path. * Have a per-dentry seqlock to protect the dentry name, parent, and inode, so we can load this tuple atomically, and also check whether any of its members have changed. * Dentry lookups (based on parent, candidate string tuple) recheck the parent sequence after the child is found in case anything changed in the parent during the path walk. * inode is also RCU protected so we can load d_inode and use the inode for limited things. * i_mode, i_uid, i_gid can be tested for exec permissions during path walk. * i_op can be loaded. When we reach the destination dentry, we lock it, recheck lookup sequence, and increment its refcount and mountpoint refcount. RCU and vfsmount locks are dropped. This is termed "dropping rcu-walk". If the dentry refcount does not match, we can not drop rcu-walk gracefully at the current point in the lokup, so instead return -ECHILD (for want of a better errno). This signals the path walking code to re-do the entire lookup with a ref-walk. Aside from the final dentry, there are other situations that may be encounted where we cannot continue rcu-walk. In that case, we drop rcu-walk (ie. take a reference on the last good dentry) and continue with a ref-walk. Again, if we can drop rcu-walk gracefully, we return -ECHILD and do the whole lookup using ref-walk. But it is very important that we can continue with ref-walk for most cases, particularly to avoid the overhead of double lookups, and to gain the scalability advantages on common path elements (like cwd and root). The cases where rcu-walk cannot continue are: * NULL dentry (ie. any uncached path element) * parent with d_inode->i_op->permission or ACLs * dentries with d_revalidate * Following links In future patches, permission checks and d_revalidate become rcu-walk aware. It may be possible eventually to make following links rcu-walk aware. Uncached path elements will always require dropping to ref-walk mode, at the very least because i_mutex needs to be grabbed, and objects allocated. Signed-off-by: Nick Piggin --- Documentation/filesystems/dentry-locking.txt | 172 ------- Documentation/filesystems/path-lookup.txt | 345 +++++++++++++ fs/dcache.c | 203 +++++++- fs/filesystems.c | 3 + fs/namei.c | 743 ++++++++++++++++++++++----- fs/proc/proc_sysctl.c | 4 + include/linux/dcache.h | 32 +- include/linux/namei.h | 15 +- include/linux/security.h | 8 +- security/security.c | 9 + 10 files changed, 1194 insertions(+), 340 deletions(-) delete mode 100644 Documentation/filesystems/dentry-locking.txt create mode 100644 Documentation/filesystems/path-lookup.txt (limited to 'fs/dcache.c') diff --git a/Documentation/filesystems/dentry-locking.txt b/Documentation/filesystems/dentry-locking.txt deleted file mode 100644 index 30b6a40f5650..000000000000 --- a/Documentation/filesystems/dentry-locking.txt +++ /dev/null @@ -1,172 +0,0 @@ -RCU-based dcache locking model -============================== - -On many workloads, the most common operation on dcache is to look up a -dentry, given a parent dentry and the name of the child. Typically, -for every open(), stat() etc., the dentry corresponding to the -pathname will be looked up by walking the tree starting with the first -component of the pathname and using that dentry along with the next -component to look up the next level and so on. Since it is a frequent -operation for workloads like multiuser environments and web servers, -it is important to optimize this path. - -Prior to 2.5.10, dcache_lock was acquired in d_lookup and thus in -every component during path look-up. Since 2.5.10 onwards, fast-walk -algorithm changed this by holding the dcache_lock at the beginning and -walking as many cached path component dentries as possible. This -significantly decreases the number of acquisition of -dcache_lock. However it also increases the lock hold time -significantly and affects performance in large SMP machines. Since -2.5.62 kernel, dcache has been using a new locking model that uses RCU -to make dcache look-up lock-free. - -The current dcache locking model is not very different from the -existing dcache locking model. Prior to 2.5.62 kernel, dcache_lock -protected the hash chain, d_child, d_alias, d_lru lists as well as -d_inode and several other things like mount look-up. RCU-based changes -affect only the way the hash chain is protected. For everything else -the dcache_lock must be taken for both traversing as well as -updating. The hash chain updates too take the dcache_lock. The -significant change is the way d_lookup traverses the hash chain, it -doesn't acquire the dcache_lock for this and rely on RCU to ensure -that the dentry has not been *freed*. - -dcache_lock no longer exists, dentry locking is explained in fs/dcache.c - -Dcache locking details -====================== - -For many multi-user workloads, open() and stat() on files are very -frequently occurring operations. Both involve walking of path names to -find the dentry corresponding to the concerned file. In 2.4 kernel, -dcache_lock was held during look-up of each path component. Contention -and cache-line bouncing of this global lock caused significant -scalability problems. With the introduction of RCU in Linux kernel, -this was worked around by making the look-up of path components during -path walking lock-free. - - -Safe lock-free look-up of dcache hash table -=========================================== - -Dcache is a complex data structure with the hash table entries also -linked together in other lists. In 2.4 kernel, dcache_lock protected -all the lists. RCU dentry hash walking works like this: - -1. The deletion from hash chain is done using hlist_del_rcu() macro - which doesn't initialize next pointer of the deleted dentry and - this allows us to walk safely lock-free while a deletion is - happening. This is a standard hlist_rcu iteration. - -2. Insertion of a dentry into the hash table is done using - hlist_add_head_rcu() which take care of ordering the writes - the - writes to the dentry must be visible before the dentry is - inserted. This works in conjunction with hlist_for_each_rcu(), - which has since been replaced by hlist_for_each_entry_rcu(), while - walking the hash chain. The only requirement is that all - initialization to the dentry must be done before - hlist_add_head_rcu() since we don't have lock protection - while traversing the hash chain. - -3. The dentry looked up without holding locks cannot be returned for - walking if it is unhashed. It then may have a NULL d_inode or other - bogosity since RCU doesn't protect the other fields in the dentry. We - therefore use a flag DCACHE_UNHASHED to indicate unhashed dentries - and use this in conjunction with a per-dentry lock (d_lock). Once - looked up without locks, we acquire the per-dentry lock (d_lock) and - check if the dentry is unhashed. If so, the look-up is failed. If not, - the reference count of the dentry is increased and the dentry is - returned. - -4. Once a dentry is looked up, it must be ensured during the path walk - for that component it doesn't go away. In pre-2.5.10 code, this was - done holding a reference to the dentry. dcache_rcu does the same. - In some sense, dcache_rcu path walking looks like the pre-2.5.10 - version. - -5. All dentry hash chain updates must take the per-dentry lock (see - fs/dcache.c). This excludes dput() to ensure that a dentry that has - been looked up concurrently does not get deleted before dget() can - take a ref. - -6. There are several ways to do reference counting of RCU protected - objects. One such example is in ipv4 route cache where deferred - freeing (using call_rcu()) is done as soon as the reference count - goes to zero. This cannot be done in the case of dentries because - tearing down of dentries require blocking (dentry_iput()) which - isn't supported from RCU callbacks. Instead, tearing down of - dentries happen synchronously in dput(), but actual freeing happens - later when RCU grace period is over. This allows safe lock-free - walking of the hash chains, but a matched dentry may have been - partially torn down. The checking of DCACHE_UNHASHED flag with - d_lock held detects such dentries and prevents them from being - returned from look-up. - - -Maintaining POSIX rename semantics -================================== - -Since look-up of dentries is lock-free, it can race against a -concurrent rename operation. For example, during rename of file A to -B, look-up of either A or B must succeed. So, if look-up of B happens -after A has been removed from the hash chain but not added to the new -hash chain, it may fail. Also, a comparison while the name is being -written concurrently by a rename may result in false positive matches -violating rename semantics. Issues related to race with rename are -handled as described below : - -1. Look-up can be done in two ways - d_lookup() which is safe from - simultaneous renames and __d_lookup() which is not. If - __d_lookup() fails, it must be followed up by a d_lookup() to - correctly determine whether a dentry is in the hash table or - not. d_lookup() protects look-ups using a sequence lock - (rename_lock). - -2. The name associated with a dentry (d_name) may be changed if a - rename is allowed to happen simultaneously. To avoid memcmp() in - __d_lookup() go out of bounds due to a rename and false positive - comparison, the name comparison is done while holding the - per-dentry lock. This prevents concurrent renames during this - operation. - -3. Hash table walking during look-up may move to a different bucket as - the current dentry is moved to a different bucket due to rename. - But we use hlists in dcache hash table and they are - null-terminated. So, even if a dentry moves to a different bucket, - hash chain walk will terminate. [with a list_head list, it may not - since termination is when the list_head in the original bucket is - reached]. Since we redo the d_parent check and compare name while - holding d_lock, lock-free look-up will not race against d_move(). - -4. There can be a theoretical race when a dentry keeps coming back to - original bucket due to double moves. Due to this look-up may - consider that it has never moved and can end up in a infinite loop. - But this is not any worse that theoretical livelocks we already - have in the kernel. - - -Important guidelines for filesystem developers related to dcache_rcu -==================================================================== - -1. Existing dcache interfaces (pre-2.5.62) exported to filesystem - don't change. Only dcache internal implementation changes. However - filesystems *must not* delete from the dentry hash chains directly - using the list macros like allowed earlier. They must use dcache - APIs like d_drop() or __d_drop() depending on the situation. - -2. d_flags is now protected by a per-dentry lock (d_lock). All access - to d_flags must be protected by it. - -3. For a hashed dentry, checking of d_count needs to be protected by - d_lock. - - -Papers and other documentation on dcache locking -================================================ - -1. Scaling dcache with RCU (http://linuxjournal.com/article.php?sid=7124). - -2. http://lse.sourceforge.net/locking/dcache/dcache.html - - - diff --git a/Documentation/filesystems/path-lookup.txt b/Documentation/filesystems/path-lookup.txt new file mode 100644 index 000000000000..09b2878724a1 --- /dev/null +++ b/Documentation/filesystems/path-lookup.txt @@ -0,0 +1,345 @@ +Path walking and name lookup locking +==================================== + +Path resolution is the finding a dentry corresponding to a path name string, by +performing a path walk. Typically, for every open(), stat() etc., the path name +will be resolved. Paths are resolved by walking the namespace tree, starting +with the first component of the pathname (eg. root or cwd) with a known dentry, +then finding the child of that dentry, which is named the next component in the +path string. Then repeating the lookup from the child dentry and finding its +child with the next element, and so on. + +Since it is a frequent operation for workloads like multiuser environments and +web servers, it is important to optimize this code. + +Path walking synchronisation history: +Prior to 2.5.10, dcache_lock was acquired in d_lookup (dcache hash lookup) and +thus in every component during path look-up. Since 2.5.10 onwards, fast-walk +algorithm changed this by holding the dcache_lock at the beginning and walking +as many cached path component dentries as possible. This significantly +decreases the number of acquisition of dcache_lock. However it also increases +the lock hold time significantly and affects performance in large SMP machines. +Since 2.5.62 kernel, dcache has been using a new locking model that uses RCU to +make dcache look-up lock-free. + +All the above algorithms required taking a lock and reference count on the +dentry that was looked up, so that may be used as the basis for walking the +next path element. This is inefficient and unscalable. It is inefficient +because of the locks and atomic operations required for every dentry element +slows things down. It is not scalable because many parallel applications that +are path-walk intensive tend to do path lookups starting from a common dentry +(usually, the root "/" or current working directory). So contention on these +common path elements causes lock and cacheline queueing. + +Since 2.6.38, RCU is used to make a significant part of the entire path walk +(including dcache look-up) completely "store-free" (so, no locks, atomics, or +even stores into cachelines of common dentries). This is known as "rcu-walk" +path walking. + +Path walking overview +===================== + +A name string specifies a start (root directory, cwd, fd-relative) and a +sequence of elements (directory entry names), which together refer to a path in +the namespace. A path is represented as a (dentry, vfsmount) tuple. The name +elements are sub-strings, seperated by '/'. + +Name lookups will want to find a particular path that a name string refers to +(usually the final element, or parent of final element). This is done by taking +the path given by the name's starting point (which we know in advance -- eg. +current->fs->cwd or current->fs->root) as the first parent of the lookup. Then +iteratively for each subsequent name element, look up the child of the current +parent with the given name and if it is not the desired entry, make it the +parent for the next lookup. + +A parent, of course, must be a directory, and we must have appropriate +permissions on the parent inode to be able to walk into it. + +Turning the child into a parent for the next lookup requires more checks and +procedures. Symlinks essentially substitute the symlink name for the target +name in the name string, and require some recursive path walking. Mount points +must be followed into (thus changing the vfsmount that subsequent path elements +refer to), switching from the mount point path to the root of the particular +mounted vfsmount. These behaviours are variously modified depending on the +exact path walking flags. + +Path walking then must, broadly, do several particular things: +- find the start point of the walk; +- perform permissions and validity checks on inodes; +- perform dcache hash name lookups on (parent, name element) tuples; +- traverse mount points; +- traverse symlinks; +- lookup and create missing parts of the path on demand. + +Safe store-free look-up of dcache hash table +============================================ + +Dcache name lookup +------------------ +In order to lookup a dcache (parent, name) tuple, we take a hash on the tuple +and use that to select a bucket in the dcache-hash table. The list of entries +in that bucket is then walked, and we do a full comparison of each entry +against our (parent, name) tuple. + +The hash lists are RCU protected, so list walking is not serialised with +concurrent updates (insertion, deletion from the hash). This is a standard RCU +list application with the exception of renames, which will be covered below. + +Parent and name members of a dentry, as well as its membership in the dcache +hash, and its inode are protected by the per-dentry d_lock spinlock. A +reference is taken on the dentry (while the fields are verified under d_lock), +and this stabilises its d_inode pointer and actual inode. This gives a stable +point to perform the next step of our path walk against. + +These members are also protected by d_seq seqlock, although this offers +read-only protection and no durability of results, so care must be taken when +using d_seq for synchronisation (see seqcount based lookups, below). + +Renames +------- +Back to the rename case. In usual RCU protected lists, the only operations that +will happen to an object is insertion, and then eventually removal from the +list. The object will not be reused until an RCU grace period is complete. +This ensures the RCU list traversal primitives can run over the object without +problems (see RCU documentation for how this works). + +However when a dentry is renamed, its hash value can change, requiring it to be +moved to a new hash list. Allocating and inserting a new alias would be +expensive and also problematic for directory dentries. Latency would be far to +high to wait for a grace period after removing the dentry and before inserting +it in the new hash bucket. So what is done is to insert the dentry into the +new list immediately. + +However, when the dentry's list pointers are updated to point to objects in the +new list before waiting for a grace period, this can result in a concurrent RCU +lookup of the old list veering off into the new (incorrect) list and missing +the remaining dentries on the list. + +There is no fundamental problem with walking down the wrong list, because the +dentry comparisons will never match. However it is fatal to miss a matching +dentry. So a seqlock is used to detect when a rename has occurred, and so the +lookup can be retried. + + 1 2 3 + +---+ +---+ +---+ +hlist-->| N-+->| N-+->| N-+-> +head <--+-P |<-+-P |<-+-P | + +---+ +---+ +---+ + +Rename of dentry 2 may require it deleted from the above list, and inserted +into a new list. Deleting 2 gives the following list. + + 1 3 + +---+ +---+ (don't worry, the longer pointers do not +hlist-->| N-+-------->| N-+-> impose a measurable performance overhead +head <--+-P |<--------+-P | on modern CPUs) + +---+ +---+ + ^ 2 ^ + | +---+ | + | | N-+----+ + +----+-P | + +---+ + +This is a standard RCU-list deletion, which leaves the deleted object's +pointers intact, so a concurrent list walker that is currently looking at +object 2 will correctly continue to object 3 when it is time to traverse the +next object. + +However, when inserting object 2 onto a new list, we end up with this: + + 1 3 + +---+ +---+ +hlist-->| N-+-------->| N-+-> +head <--+-P |<--------+-P | + +---+ +---+ + 2 + +---+ + | N-+----> + <----+-P | + +---+ + +Because we didn't wait for a grace period, there may be a concurrent lookup +still at 2. Now when it follows 2's 'next' pointer, it will walk off into +another list without ever having checked object 3. + +A related, but distinctly different, issue is that of rename atomicity versus +lookup operations. If a file is renamed from 'A' to 'B', a lookup must only +find either 'A' or 'B'. So if a lookup of 'A' returns NULL, a subsequent lookup +of 'B' must succeed (note the reverse is not true). + +Between deleting the dentry from the old hash list, and inserting it on the new +hash list, a lookup may find neither 'A' nor 'B' matching the dentry. The same +rename seqlock is also used to cover this race in much the same way, by +retrying a negative lookup result if a rename was in progress. + +Seqcount based lookups +---------------------- +In refcount based dcache lookups, d_lock is used to serialise access to +the dentry, stabilising it while comparing its name and parent and then +taking a reference count (the reference count then gives a stable place to +start the next part of the path walk from). + +As explained above, we would like to do path walking without taking locks or +reference counts on intermediate dentries along the path. To do this, a per +dentry seqlock (d_seq) is used to take a "coherent snapshot" of what the dentry +looks like (its name, parent, and inode). That snapshot is then used to start +the next part of the path walk. When loading the coherent snapshot under d_seq, +care must be taken to load the members up-front, and use those pointers rather +than reloading from the dentry later on (otherwise we'd have interesting things +like d_inode going NULL underneath us, if the name was unlinked). + +Also important is to avoid performing any destructive operations (pretty much: +no non-atomic stores to shared data), and to recheck the seqcount when we are +"done" with the operation. Retry or abort if the seqcount does not match. +Avoiding destructive or changing operations means we can easily unwind from +failure. + +What this means is that a caller, provided they are holding RCU lock to +protect the dentry object from disappearing, can perform a seqcount based +lookup which does not increment the refcount on the dentry or write to +it in any way. This returned dentry can be used for subsequent operations, +provided that d_seq is rechecked after that operation is complete. + +Inodes are also rcu freed, so the seqcount lookup dentry's inode may also be +queried for permissions. + +With this two parts of the puzzle, we can do path lookups without taking +locks or refcounts on dentry elements. + +RCU-walk path walking design +============================ + +Path walking code now has two distinct modes, ref-walk and rcu-walk. ref-walk +is the traditional[*] way of performing dcache lookups using d_lock to +serialise concurrent modifications to the dentry and take a reference count on +it. ref-walk is simple and obvious, and may sleep, take locks, etc while path +walking is operating on each dentry. rcu-walk uses seqcount based dentry +lookups, and can perform lookup of intermediate elements without any stores to +shared data in the dentry or inode. rcu-walk can not be applied to all cases, +eg. if the filesystem must sleep or perform non trivial operations, rcu-walk +must be switched to ref-walk mode. + +[*] RCU is still used for the dentry hash lookup in ref-walk, but not the full + path walk. + +Where ref-walk uses a stable, refcounted ``parent'' to walk the remaining +path string, rcu-walk uses a d_seq protected snapshot. When looking up a +child of this parent snapshot, we open d_seq critical section on the child +before closing d_seq critical section on the parent. This gives an interlocking +ladder of snapshots to walk down. + + + proc 101 + /----------------\ + / comm: "vi" \ + / fs.root: dentry0 \ + \ fs.cwd: dentry2 / + \ / + \----------------/ + +So when vi wants to open("/home/npiggin/test.c", O_RDWR), then it will +start from current->fs->root, which is a pinned dentry. Alternatively, +"./test.c" would start from cwd; both names refer to the same path in +the context of proc101. + + dentry 0 + +---------------------+ rcu-walk begins here, we note d_seq, check the + | name: "/" | inode's permission, and then look up the next + | inode: 10 | path element which is "home"... + | children:"home", ...| + +---------------------+ + | + dentry 1 V + +---------------------+ ... which brings us here. We find dentry1 via + | name: "home" | hash lookup, then note d_seq and compare name + | inode: 678 | string and parent pointer. When we have a match, + | children:"npiggin" | we now recheck the d_seq of dentry0. Then we + +---------------------+ check inode and look up the next element. + | + dentry2 V + +---------------------+ Note: if dentry0 is now modified, lookup is + | name: "npiggin" | not necessarily invalid, so we need only keep a + | inode: 543 | parent for d_seq verification, and grandparents + | children:"a.c", ... | can be forgotten. + +---------------------+ + | + dentry3 V + +---------------------+ At this point we have our destination dentry. + | name: "a.c" | We now take its d_lock, verify d_seq of this + | inode: 14221 | dentry. If that checks out, we can increment + | children:NULL | its refcount because we're holding d_lock. + +---------------------+ + +Taking a refcount on a dentry from rcu-walk mode, by taking its d_lock, +re-checking its d_seq, and then incrementing its refcount is called +"dropping rcu" or dropping from rcu-walk into ref-walk mode. + +It is, in some sense, a bit of a house of cards. If the seqcount check of the +parent snapshot fails, the house comes down, because we had closed the d_seq +section on the grandparent, so we have nothing left to stand on. In that case, +the path walk must be fully restarted (which we do in ref-walk mode, to avoid +live locks). It is costly to have a full restart, but fortunately they are +quite rare. + +When we reach a point where sleeping is required, or a filesystem callout +requires ref-walk, then instead of restarting the walk, we attempt to drop rcu +at the last known good dentry we have. Avoiding a full restart in ref-walk in +these cases is fundamental for performance and scalability because blocking +operations such as creates and unlinks are not uncommon. + +The detailed design for rcu-walk is like this: +* LOOKUP_RCU is set in nd->flags, which distinguishes rcu-walk from ref-walk. +* Take the RCU lock for the entire path walk, starting with the acquiring + of the starting path (eg. root/cwd/fd-path). So now dentry refcounts are + not required for dentry persistence. +* synchronize_rcu is called when unregistering a filesystem, so we can + access d_ops and i_ops during rcu-walk. +* Similarly take the vfsmount lock for the entire path walk. So now mnt + refcounts are not required for persistence. Also we are free to perform mount + lookups, and to assume dentry mount points and mount roots are stable up and + down the path. +* Have a per-dentry seqlock to protect the dentry name, parent, and inode, + so we can load this tuple atomically, and also check whether any of its + members have changed. +* Dentry lookups (based on parent, candidate string tuple) recheck the parent + sequence after the child is found in case anything changed in the parent + during the path walk. +* inode is also RCU protected so we can load d_inode and use the inode for + limited things. +* i_mode, i_uid, i_gid can be tested for exec permissions during path walk. +* i_op can be loaded. +* When the destination dentry is reached, drop rcu there (ie. take d_lock, + verify d_seq, increment refcount). +* If seqlock verification fails anywhere along the path, do a full restart + of the path lookup in ref-walk mode. -ECHILD tends to be used (for want of + a better errno) to signal an rcu-walk failure. + +The cases where rcu-walk cannot continue are: +* NULL dentry (ie. any uncached path element) +* parent with d_inode->i_op->permission or ACLs +* dentries with d_revalidate +* Following links + +In future patches, permission checks and d_revalidate become rcu-walk aware. It +may be possible eventually to make following links rcu-walk aware. + +Uncached path elements will always require dropping to ref-walk mode, at the +very least because i_mutex needs to be grabbed, and objects allocated. + +Final note: +"store-free" path walking is not strictly store free. We take vfsmount lock +and refcounts (both of which can be made per-cpu), and we also store to the +stack (which is essentially CPU-local), and we also have to take locks and +refcount on final dentry. + +The point is that shared data, where practically possible, is not locked +or stored into. The result is massive improvements in performance and +scalability of path resolution. + + +Papers and other documentation on dcache locking +================================================ + +1. Scaling dcache with RCU (http://linuxjournal.com/article.php?sid=7124). + +2. http://lse.sourceforge.net/locking/dcache/dcache.html diff --git a/fs/dcache.c b/fs/dcache.c index dc0551c9755d..187fea040108 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -152,9 +152,23 @@ static void d_free(struct dentry *dentry) call_rcu(&dentry->d_u.d_rcu, __d_free); } +/** + * dentry_rcuwalk_barrier - invalidate in-progress rcu-walk lookups + * After this call, in-progress rcu-walk path lookup will fail. This + * should be called after unhashing, and after changing d_inode (if + * the dentry has not already been unhashed). + */ +static inline void dentry_rcuwalk_barrier(struct dentry *dentry) +{ + assert_spin_locked(&dentry->d_lock); + /* Go through a barrier */ + write_seqcount_barrier(&dentry->d_seq); +} + /* * Release the dentry's inode, using the filesystem - * d_iput() operation if defined. + * d_iput() operation if defined. Dentry has no refcount + * and is unhashed. */ static void dentry_iput(struct dentry * dentry) __releases(dentry->d_lock) @@ -178,6 +192,28 @@ static void dentry_iput(struct dentry * dentry) } } +/* + * Release the dentry's inode, using the filesystem + * d_iput() operation if defined. dentry remains in-use. + */ +static void dentry_unlink_inode(struct dentry * dentry) + __releases(dentry->d_lock) + __releases(dcache_inode_lock) +{ + struct inode *inode = dentry->d_inode; + dentry->d_inode = NULL; + list_del_init(&dentry->d_alias); + dentry_rcuwalk_barrier(dentry); + spin_unlock(&dentry->d_lock); + spin_unlock(&dcache_inode_lock); + if (!inode->i_nlink) + fsnotify_inoderemove(inode); + if (dentry->d_op && dentry->d_op->d_iput) + dentry->d_op->d_iput(dentry, inode); + else + iput(inode); +} + /* * dentry_lru_(add|del|move_tail) must be called with d_lock held. */ @@ -272,6 +308,7 @@ void __d_drop(struct dentry *dentry) spin_lock(&dcache_hash_lock); hlist_del_rcu(&dentry->d_hash); spin_unlock(&dcache_hash_lock); + dentry_rcuwalk_barrier(dentry); } } EXPORT_SYMBOL(__d_drop); @@ -309,6 +346,7 @@ relock: spin_unlock(&dcache_inode_lock); goto relock; } + if (ref) dentry->d_count--; /* if dentry was on the d_lru list delete it from there */ @@ -1221,6 +1259,7 @@ struct dentry *d_alloc(struct dentry * parent, const struct qstr *name) dentry->d_count = 1; dentry->d_flags = DCACHE_UNHASHED; spin_lock_init(&dentry->d_lock); + seqcount_init(&dentry->d_seq); dentry->d_inode = NULL; dentry->d_parent = NULL; dentry->d_sb = NULL; @@ -1269,6 +1308,7 @@ static void __d_instantiate(struct dentry *dentry, struct inode *inode) if (inode) list_add(&dentry->d_alias, &inode->i_dentry); dentry->d_inode = inode; + dentry_rcuwalk_barrier(dentry); spin_unlock(&dentry->d_lock); fsnotify_d_instantiate(dentry, inode); } @@ -1610,6 +1650,111 @@ err_out: } EXPORT_SYMBOL(d_add_ci); +/** + * __d_lookup_rcu - search for a dentry (racy, store-free) + * @parent: parent dentry + * @name: qstr of name we wish to find + * @seq: returns d_seq value at the point where the dentry was found + * @inode: returns dentry->d_inode when the inode was found valid. + * Returns: dentry, or NULL + * + * __d_lookup_rcu is the dcache lookup function for rcu-walk name + * resolution (store-free path walking) design described in + * Documentation/filesystems/path-lookup.txt. + * + * This is not to be used outside core vfs. + * + * __d_lookup_rcu must only be used in rcu-walk mode, ie. with vfsmount lock + * held, and rcu_read_lock held. The returned dentry must not be stored into + * without taking d_lock and checking d_seq sequence count against @seq + * returned here. + * + * A refcount may be taken on the found dentry with the __d_rcu_to_refcount + * function. + * + * Alternatively, __d_lookup_rcu may be called again to look up the child of + * the returned dentry, so long as its parent's seqlock is checked after the + * child is looked up. Thus, an interlocking stepping of sequence lock checks + * is formed, giving integrity down the path walk. + */ +struct dentry *__d_lookup_rcu(struct dentry *parent, struct qstr *name, + unsigned *seq, struct inode **inode) +{ + unsigned int len = name->len; + unsigned int hash = name->hash; + const unsigned char *str = name->name; + struct hlist_head *head = d_hash(parent, hash); + struct hlist_node *node; + struct dentry *dentry; + + /* + * Note: There is significant duplication with __d_lookup_rcu which is + * required to prevent single threaded performance regressions + * especially on architectures where smp_rmb (in seqcounts) are costly. + * Keep the two functions in sync. + */ + + /* + * The hash list is protected using RCU. + * + * Carefully use d_seq when comparing a candidate dentry, to avoid + * races with d_move(). + * + * It is possible that concurrent renames can mess up our list + * walk here and result in missing our dentry, resulting in the + * false-negative result. d_lookup() protects against concurrent + * renames using rename_lock seqlock. + * + * See Documentation/vfs/dcache-locking.txt for more details. + */ + hlist_for_each_entry_rcu(dentry, node, head, d_hash) { + struct inode *i; + const char *tname; + int tlen; + + if (dentry->d_name.hash != hash) + continue; + +seqretry: + *seq = read_seqcount_begin(&dentry->d_seq); + if (dentry->d_parent != parent) + continue; + if (d_unhashed(dentry)) + continue; + tlen = dentry->d_name.len; + tname = dentry->d_name.name; + i = dentry->d_inode; + /* + * This seqcount check is required to ensure name and + * len are loaded atomically, so as not to walk off the + * edge of memory when walking. If we could load this + * atomically some other way, we could drop this check. + */ + if (read_seqcount_retry(&dentry->d_seq, *seq)) + goto seqretry; + if (parent->d_op && parent->d_op->d_compare) { + if (parent->d_op->d_compare(parent, *inode, + dentry, i, + tlen, tname, name)) + continue; + } else { + if (tlen != len) + continue; + if (memcmp(tname, str, tlen)) + continue; + } + /* + * No extra seqcount check is required after the name + * compare. The caller must perform a seqcount check in + * order to do anything useful with the returned dentry + * anyway. + */ + *inode = i; + return dentry; + } + return NULL; +} + /** * d_lookup - search for a dentry * @parent: parent dentry @@ -1621,9 +1766,9 @@ EXPORT_SYMBOL(d_add_ci); * dentry is returned. The caller must use dput to free the entry when it has * finished using it. %NULL is returned if the dentry does not exist. */ -struct dentry * d_lookup(struct dentry * parent, struct qstr * name) +struct dentry *d_lookup(struct dentry *parent, struct qstr *name) { - struct dentry * dentry = NULL; + struct dentry *dentry; unsigned seq; do { @@ -1636,7 +1781,7 @@ struct dentry * d_lookup(struct dentry * parent, struct qstr * name) } EXPORT_SYMBOL(d_lookup); -/* +/** * __d_lookup - search for a dentry (racy) * @parent: parent dentry * @name: qstr of name we wish to find @@ -1651,16 +1796,23 @@ EXPORT_SYMBOL(d_lookup); * * __d_lookup callers must be commented. */ -struct dentry * __d_lookup(struct dentry * parent, struct qstr * name) +struct dentry *__d_lookup(struct dentry *parent, struct qstr *name) { unsigned int len = name->len; unsigned int hash = name->hash; const unsigned char *str = name->name; struct hlist_head *head = d_hash(parent,hash); - struct dentry *found = NULL; struct hlist_node *node; + struct dentry *found = NULL; struct dentry *dentry; + /* + * Note: There is significant duplication with __d_lookup_rcu which is + * required to prevent single threaded performance regressions + * especially on architectures where smp_rmb (in seqcounts) are costly. + * Keep the two functions in sync. + */ + /* * The hash list is protected using RCU. * @@ -1677,24 +1829,15 @@ struct dentry * __d_lookup(struct dentry * parent, struct qstr * name) rcu_read_lock(); hlist_for_each_entry_rcu(dentry, node, head, d_hash) { - struct qstr *qstr; + const char *tname; + int tlen; if (dentry->d_name.hash != hash) continue; - if (dentry->d_parent != parent) - continue; spin_lock(&dentry->d_lock); - - /* - * Recheck the dentry after taking the lock - d_move may have - * changed things. Don't bother checking the hash because - * we're about to compare the whole name anyway. - */ if (dentry->d_parent != parent) goto next; - - /* non-existing due to RCU? */ if (d_unhashed(dentry)) goto next; @@ -1702,16 +1845,17 @@ struct dentry * __d_lookup(struct dentry * parent, struct qstr * name) * It is safe to compare names since d_move() cannot * change the qstr (protected by d_lock). */ - qstr = &dentry->d_name; + tlen = dentry->d_name.len; + tname = dentry->d_name.name; if (parent->d_op && parent->d_op->d_compare) { if (parent->d_op->d_compare(parent, parent->d_inode, dentry, dentry->d_inode, - qstr->len, qstr->name, name)) + tlen, tname, name)) goto next; } else { - if (qstr->len != len) + if (tlen != len) goto next; - if (memcmp(qstr->name, str, len)) + if (memcmp(tname, str, tlen)) goto next; } @@ -1821,7 +1965,7 @@ again: goto again; } dentry->d_flags &= ~DCACHE_CANT_MOUNT; - dentry_iput(dentry); + dentry_unlink_inode(dentry); fsnotify_nameremove(dentry, isdir); return; } @@ -1884,7 +2028,9 @@ void dentry_update_name_case(struct dentry *dentry, struct qstr *name) BUG_ON(dentry->d_name.len != name->len); /* d_lookup gives this */ spin_lock(&dentry->d_lock); + write_seqcount_begin(&dentry->d_seq); memcpy((unsigned char *)dentry->d_name.name, name->name, name->len); + write_seqcount_end(&dentry->d_seq); spin_unlock(&dentry->d_lock); } EXPORT_SYMBOL(dentry_update_name_case); @@ -1997,6 +2143,9 @@ void d_move(struct dentry * dentry, struct dentry * target) dentry_lock_for_move(dentry, target); + write_seqcount_begin(&dentry->d_seq); + write_seqcount_begin(&target->d_seq); + /* Move the dentry to the target hash queue, if on different bucket */ spin_lock(&dcache_hash_lock); if (!d_unhashed(dentry)) @@ -2005,6 +2154,7 @@ void d_move(struct dentry * dentry, struct dentry * target) spin_unlock(&dcache_hash_lock); /* Unhash the target: dput() will then get rid of it */ + /* __d_drop does write_seqcount_barrier, but they're OK to nest. */ __d_drop(target); list_del(&dentry->d_u.d_child); @@ -2028,6 +2178,9 @@ void d_move(struct dentry * dentry, struct dentry * target) list_add(&dentry->d_u.d_child, &dentry->d_parent->d_subdirs); + write_seqcount_end(&target->d_seq); + write_seqcount_end(&dentry->d_seq); + dentry_unlock_parents_for_move(dentry, target); spin_unlock(&target->d_lock); fsnotify_d_move(dentry); @@ -2110,6 +2263,9 @@ static void __d_materialise_dentry(struct dentry *dentry, struct dentry *anon) dentry_lock_for_move(anon, dentry); + write_seqcount_begin(&dentry->d_seq); + write_seqcount_begin(&anon->d_seq); + dparent = dentry->d_parent; aparent = anon->d_parent; @@ -2130,6 +2286,9 @@ static void __d_materialise_dentry(struct dentry *dentry, struct dentry *anon) else INIT_LIST_HEAD(&anon->d_u.d_child); + write_seqcount_end(&dentry->d_seq); + write_seqcount_end(&anon->d_seq); + dentry_unlock_parents_for_move(anon, dentry); spin_unlock(&dentry->d_lock); diff --git a/fs/filesystems.c b/fs/filesystems.c index 68ba492d8eef..751d6b255a12 100644 --- a/fs/filesystems.c +++ b/fs/filesystems.c @@ -115,6 +115,9 @@ int unregister_filesystem(struct file_system_type * fs) tmp = &(*tmp)->next; } write_unlock(&file_systems_lock); + + synchronize_rcu(); + return -EINVAL; } diff --git a/fs/namei.c b/fs/namei.c index 5642bc2be418..8d3f15b3a541 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -169,8 +169,8 @@ EXPORT_SYMBOL(putname); /* * This does basic POSIX ACL permission checking */ -static int acl_permission_check(struct inode *inode, int mask, - int (*check_acl)(struct inode *inode, int mask)) +static inline int __acl_permission_check(struct inode *inode, int mask, + int (*check_acl)(struct inode *inode, int mask), int rcu) { umode_t mode = inode->i_mode; @@ -180,9 +180,13 @@ static int acl_permission_check(struct inode *inode, int mask, mode >>= 6; else { if (IS_POSIXACL(inode) && (mode & S_IRWXG) && check_acl) { - int error = check_acl(inode, mask); - if (error != -EAGAIN) - return error; + if (rcu) { + return -ECHILD; + } else { + int error = check_acl(inode, mask); + if (error != -EAGAIN) + return error; + } } if (in_group_p(inode->i_gid)) @@ -197,6 +201,12 @@ static int acl_permission_check(struct inode *inode, int mask, return -EACCES; } +static inline int acl_permission_check(struct inode *inode, int mask, + int (*check_acl)(struct inode *inode, int mask)) +{ + return __acl_permission_check(inode, mask, check_acl, 0); +} + /** * generic_permission - check for access rights on a Posix-like filesystem * @inode: inode to check access rights for @@ -374,6 +384,173 @@ void path_put(struct path *path) } EXPORT_SYMBOL(path_put); +/** + * nameidata_drop_rcu - drop this nameidata out of rcu-walk + * @nd: nameidata pathwalk data to drop + * @Returns: 0 on success, -ECHLID on failure + * + * Path walking has 2 modes, rcu-walk and ref-walk (see + * Documentation/filesystems/path-lookup.txt). __drop_rcu* functions attempt + * to drop out of rcu-walk mode and take normal reference counts on dentries + * and vfsmounts to transition to rcu-walk mode. __drop_rcu* functions take + * refcounts at the last known good point before rcu-walk got stuck, so + * ref-walk may continue from there. If this is not successful (eg. a seqcount + * has changed), then failure is returned and path walk restarts from the + * beginning in ref-walk mode. + * + * nameidata_drop_rcu attempts to drop the current nd->path and nd->root into + * ref-walk. Must be called from rcu-walk context. + */ +static int nameidata_drop_rcu(struct nameidata *nd) +{ + struct fs_struct *fs = current->fs; + struct dentry *dentry = nd->path.dentry; + + BUG_ON(!(nd->flags & LOOKUP_RCU)); + if (nd->root.mnt) { + spin_lock(&fs->lock); + if (nd->root.mnt != fs->root.mnt || + nd->root.dentry != fs->root.dentry) + goto err_root; + } + spin_lock(&dentry->d_lock); + if (!__d_rcu_to_refcount(dentry, nd->seq)) + goto err; + BUG_ON(nd->inode != dentry->d_inode); + spin_unlock(&dentry->d_lock); + if (nd->root.mnt) { + path_get(&nd->root); + spin_unlock(&fs->lock); + } + mntget(nd->path.mnt); + + rcu_read_unlock(); + br_read_unlock(vfsmount_lock); + nd->flags &= ~LOOKUP_RCU; + return 0; +err: + spin_unlock(&dentry->d_lock); +err_root: + if (nd->root.mnt) + spin_unlock(&fs->lock); + return -ECHILD; +} + +/* Try to drop out of rcu-walk mode if we were in it, otherwise do nothing. */ +static inline int nameidata_drop_rcu_maybe(struct nameidata *nd) +{ + if (nd->flags & LOOKUP_RCU) + return nameidata_drop_rcu(nd); + return 0; +} + +/** + * nameidata_dentry_drop_rcu - drop nameidata and dentry out of rcu-walk + * @nd: nameidata pathwalk data to drop + * @dentry: dentry to drop + * @Returns: 0 on success, -ECHLID on failure + * + * nameidata_dentry_drop_rcu attempts to drop the current nd->path and nd->root, + * and dentry into ref-walk. @dentry must be a path found by a do_lookup call on + * @nd. Must be called from rcu-walk context. + */ +static int nameidata_dentry_drop_rcu(struct nameidata *nd, struct dentry *dentry) +{ + struct fs_struct *fs = current->fs; + struct dentry *parent = nd->path.dentry; + + BUG_ON(!(nd->flags & LOOKUP_RCU)); + if (nd->root.mnt) { + spin_lock(&fs->lock); + if (nd->root.mnt != fs->root.mnt || + nd->root.dentry != fs->root.dentry) + goto err_root; + } + spin_lock(&parent->d_lock); + spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED); + if (!__d_rcu_to_refcount(dentry, nd->seq)) + goto err; + /* + * If the sequence check on the child dentry passed, then the child has + * not been removed from its parent. This means the parent dentry must + * be valid and able to take a reference at this point. + */ + BUG_ON(!IS_ROOT(dentry) && dentry->d_parent != parent); + BUG_ON(!parent->d_count); + parent->d_count++; + spin_unlock(&dentry->d_lock); + spin_unlock(&parent->d_lock); + if (nd->root.mnt) { + path_get(&nd->root); + spin_unlock(&fs->lock); + } + mntget(nd->path.mnt); + + rcu_read_unlock(); + br_read_unlock(vfsmount_lock); + nd->flags &= ~LOOKUP_RCU; + return 0; +err: + spin_unlock(&dentry->d_lock); + spin_unlock(&parent->d_lock); +err_root: + if (nd->root.mnt) + spin_unlock(&fs->lock); + return -ECHILD; +} + +/* Try to drop out of rcu-walk mode if we were in it, otherwise do nothing. */ +static inline int nameidata_dentry_drop_rcu_maybe(struct nameidata *nd, struct dentry *dentry) +{ + if (nd->flags & LOOKUP_RCU) + return nameidata_dentry_drop_rcu(nd, dentry); + return 0; +} + +/** + * nameidata_drop_rcu_last - drop nameidata ending path walk out of rcu-walk + * @nd: nameidata pathwalk data to drop + * @Returns: 0 on success, -ECHLID on failure + * + * nameidata_drop_rcu_last attempts to drop the current nd->path into ref-walk. + * nd->path should be the final element of the lookup, so nd->root is discarded. + * Must be called from rcu-walk context. + */ +static int nameidata_drop_rcu_last(struct nameidata *nd) +{ + struct dentry *dentry = nd->path.dentry; + + BUG_ON(!(nd->flags & LOOKUP_RCU)); + nd->flags &= ~LOOKUP_RCU; + nd->root.mnt = NULL; + spin_lock(&dentry->d_lock); + if (!__d_rcu_to_refcount(dentry, nd->seq)) + goto err_unlock; + BUG_ON(nd->inode != dentry->d_inode); + spin_unlock(&dentry->d_lock); + + mntget(nd->path.mnt); + + rcu_read_unlock(); + br_read_unlock(vfsmount_lock); + + return 0; + +err_unlock: + spin_unlock(&dentry->d_lock); + rcu_read_unlock(); + br_read_unlock(vfsmount_lock); + return -ECHILD; +} + +/* Try to drop out of rcu-walk mode if we were in it, otherwise do nothing. */ +static inline int nameidata_drop_rcu_last_maybe(struct nameidata *nd) +{ + if (likely(nd->flags & LOOKUP_RCU)) + return nameidata_drop_rcu_last(nd); + return 0; +} + /** * release_open_intent - free up open intent resources * @nd: pointer to nameidata @@ -459,26 +636,40 @@ force_reval_path(struct path *path, struct nameidata *nd) * short-cut DAC fails, then call ->permission() to do more * complete permission check. */ -static int exec_permission(struct inode *inode) +static inline int __exec_permission(struct inode *inode, int rcu) { int ret; if (inode->i_op->permission) { + if (rcu) + return -ECHILD; ret = inode->i_op->permission(inode, MAY_EXEC); if (!ret) goto ok; return ret; } - ret = acl_permission_check(inode, MAY_EXEC, inode->i_op->check_acl); + ret = __acl_permission_check(inode, MAY_EXEC, inode->i_op->check_acl, rcu); if (!ret) goto ok; + if (rcu && ret == -ECHILD) + return ret; if (capable(CAP_DAC_OVERRIDE) || capable(CAP_DAC_READ_SEARCH)) goto ok; return ret; ok: - return security_inode_permission(inode, MAY_EXEC); + return security_inode_exec_permission(inode, rcu); +} + +static int exec_permission(struct inode *inode) +{ + return __exec_permission(inode, 0); +} + +static int exec_permission_rcu(struct inode *inode) +{ + return __exec_permission(inode, 1); } static __always_inline void set_root(struct nameidata *nd) @@ -489,8 +680,20 @@ static __always_inline void set_root(struct nameidata *nd) static int link_path_walk(const char *, struct nameidata *); +static __always_inline void set_root_rcu(struct nameidata *nd) +{ + if (!nd->root.mnt) { + struct fs_struct *fs = current->fs; + spin_lock(&fs->lock); + nd->root = fs->root; + spin_unlock(&fs->lock); + } +} + static __always_inline int __vfs_follow_link(struct nameidata *nd, const char *link) { + int ret; + if (IS_ERR(link)) goto fail; @@ -500,8 +703,10 @@ static __always_inline int __vfs_follow_link(struct nameidata *nd, const char *l nd->path = nd->root; path_get(&nd->root); } + nd->inode = nd->path.dentry->d_inode; - return link_path_walk(link, nd); + ret = link_path_walk(link, nd); + return ret; fail: path_put(&nd->path); return PTR_ERR(link); @@ -516,11 +721,12 @@ static void path_put_conditional(struct path *path, struct nameidata *nd) static inline void path_to_nameidata(struct path *path, struct nameidata *nd) { - dput(nd->path.dentry); - if (nd->path.mnt != path->mnt) { - mntput(nd->path.mnt); - nd->path.mnt = path->mnt; + if (!(nd->flags & LOOKUP_RCU)) { + dput(nd->path.dentry); + if (nd->path.mnt != path->mnt) + mntput(nd->path.mnt); } + nd->path.mnt = path->mnt; nd->path.dentry = path->dentry; } @@ -535,9 +741,11 @@ __do_follow_link(struct path *path, struct nameidata *nd, void **p) if (path->mnt != nd->path.mnt) { path_to_nameidata(path, nd); + nd->inode = nd->path.dentry->d_inode; dget(dentry); } mntget(path->mnt); + nd->last_type = LAST_BIND; *p = dentry->d_inode->i_op->follow_link(dentry, nd); error = PTR_ERR(*p); @@ -591,6 +799,20 @@ loop: return err; } +static int follow_up_rcu(struct path *path) +{ + struct vfsmount *parent; + struct dentry *mountpoint; + + parent = path->mnt->mnt_parent; + if (parent == path->mnt) + return 0; + mountpoint = path->mnt->mnt_mountpoint; + path->dentry = mountpoint; + path->mnt = parent; + return 1; +} + int follow_up(struct path *path) { struct vfsmount *parent; @@ -615,6 +837,21 @@ int follow_up(struct path *path) /* * serialization is taken care of in namespace.c */ +static void __follow_mount_rcu(struct nameidata *nd, struct path *path, + struct inode **inode) +{ + while (d_mountpoint(path->dentry)) { + struct vfsmount *mounted; + mounted = __lookup_mnt(path->mnt, path->dentry, 1); + if (!mounted) + return; + path->mnt = mounted; + path->dentry = mounted->mnt_root; + nd->seq = read_seqcount_begin(&path->dentry->d_seq); + *inode = path->dentry->d_inode; + } +} + static int __follow_mount(struct path *path) { int res = 0; @@ -660,7 +897,42 @@ int follow_down(struct path *path) return 0; } -static __always_inline void follow_dotdot(struct nameidata *nd) +static int follow_dotdot_rcu(struct nameidata *nd) +{ + struct inode *inode = nd->inode; + + set_root_rcu(nd); + + while(1) { + if (nd->path.dentry == nd->root.dentry && + nd->path.mnt == nd->root.mnt) { + break; + } + if (nd->path.dentry != nd->path.mnt->mnt_root) { + struct dentry *old = nd->path.dentry; + struct dentry *parent = old->d_parent; + unsigned seq; + + seq = read_seqcount_begin(&parent->d_seq); + if (read_seqcount_retry(&old->d_seq, nd->seq)) + return -ECHILD; + inode = parent->d_inode; + nd->path.dentry = parent; + nd->seq = seq; + break; + } + if (!follow_up_rcu(&nd->path)) + break; + nd->seq = read_seqcount_begin(&nd->path.dentry->d_seq); + inode = nd->path.dentry->d_inode; + } + __follow_mount_rcu(nd, &nd->path, &inode); + nd->inode = inode; + + return 0; +} + +static void follow_dotdot(struct nameidata *nd) { set_root(nd); @@ -681,6 +953,7 @@ static __always_inline void follow_dotdot(struct nameidata *nd) break; } follow_mount(&nd->path); + nd->inode = nd->path.dentry->d_inode; } /* @@ -718,18 +991,17 @@ static struct dentry *d_alloc_and_lookup(struct dentry *parent, * It _is_ time-critical. */ static int do_lookup(struct nameidata *nd, struct qstr *name, - struct path *path) + struct path *path, struct inode **inode) { struct vfsmount *mnt = nd->path.mnt; - struct dentry *dentry, *parent; + struct dentry *dentry, *parent = nd->path.dentry; struct inode *dir; /* * See if the low-level filesystem might want * to use its own hash.. */ - if (nd->path.dentry->d_op && nd->path.dentry->d_op->d_hash) { - int err = nd->path.dentry->d_op->d_hash(nd->path.dentry, - nd->path.dentry->d_inode, name); + if (parent->d_op && parent->d_op->d_hash) { + int err = parent->d_op->d_hash(parent, nd->inode, name); if (err < 0) return err; } @@ -739,21 +1011,48 @@ static int do_lookup(struct nameidata *nd, struct qstr *name, * of a false negative due to a concurrent rename, we're going to * do the non-racy lookup, below. */ - dentry = __d_lookup(nd->path.dentry, name); - if (!dentry) - goto need_lookup; + if (nd->flags & LOOKUP_RCU) { + unsigned seq; + + *inode = nd->inode; + dentry = __d_lookup_rcu(parent, name, &seq, inode); + if (!dentry) { + if (nameidata_drop_rcu(nd)) + return -ECHILD; + goto need_lookup; + } + /* Memory barrier in read_seqcount_begin of child is enough */ + if (__read_seqcount_retry(&parent->d_seq, nd->seq)) + return -ECHILD; + + nd->seq = seq; + if (dentry->d_op && dentry->d_op->d_revalidate) { + /* We commonly drop rcu-walk here */ + if (nameidata_dentry_drop_rcu(nd, dentry)) + return -ECHILD; + goto need_revalidate; + } + path->mnt = mnt; + path->dentry = dentry; + __follow_mount_rcu(nd, path, inode); + } else { + dentry = __d_lookup(parent, name); + if (!dentry) + goto need_lookup; found: - if (dentry->d_op && dentry->d_op->d_revalidate) - goto need_revalidate; + if (dentry->d_op && dentry->d_op->d_revalidate) + goto need_revalidate; done: - path->mnt = mnt; - path->dentry = dentry; - __follow_mount(path); + path->mnt = mnt; + path->dentry = dentry; + __follow_mount(path); + *inode = path->dentry->d_inode; + } return 0; need_lookup: - parent = nd->path.dentry; dir = parent->d_inode; + BUG_ON(nd->inode != dir); mutex_lock(&dir->i_mutex); /* @@ -815,7 +1114,6 @@ static inline int follow_on_final(struct inode *inode, unsigned lookup_flags) static int link_path_walk(const char *name, struct nameidata *nd) { struct path next; - struct inode *inode; int err; unsigned int lookup_flags = nd->flags; @@ -824,18 +1122,28 @@ static int link_path_walk(const char *name, struct nameidata *nd) if (!*name) goto return_reval; - inode = nd->path.dentry->d_inode; if (nd->depth) lookup_flags = LOOKUP_FOLLOW | (nd->flags & LOOKUP_CONTINUE); /* At this point we know we have a real path component. */ for(;;) { + struct inode *inode; unsigned long hash; struct qstr this; unsigned int c; nd->flags |= LOOKUP_CONTINUE; - err = exec_permission(inode); + if (nd->flags & LOOKUP_RCU) { + err = exec_permission_rcu(nd->inode); + if (err == -ECHILD) { + if (nameidata_drop_rcu(nd)) + return -ECHILD; + goto exec_again; + } + } else { +exec_again: + err = exec_permission(nd->inode); + } if (err) break; @@ -866,37 +1174,44 @@ static int link_path_walk(const char *name, struct nameidata *nd) if (this.name[0] == '.') switch (this.len) { default: break; - case 2: + case 2: if (this.name[1] != '.') break; - follow_dotdot(nd); - inode = nd->path.dentry->d_inode; + if (nd->flags & LOOKUP_RCU) { + if (follow_dotdot_rcu(nd)) + return -ECHILD; + } else + follow_dotdot(nd); /* fallthrough */ case 1: continue; } /* This does the actual lookups.. */ - err = do_lookup(nd, &this, &next); + err = do_lookup(nd, &this, &next, &inode); if (err) break; - err = -ENOENT; - inode = next.dentry->d_inode; if (!inode) goto out_dput; if (inode->i_op->follow_link) { + /* We commonly drop rcu-walk here */ + if (nameidata_dentry_drop_rcu_maybe(nd, next.dentry)) + return -ECHILD; + BUG_ON(inode != next.dentry->d_inode); err = do_follow_link(&next, nd); if (err) goto return_err; + nd->inode = nd->path.dentry->d_inode; err = -ENOENT; - inode = nd->path.dentry->d_inode; - if (!inode) + if (!nd->inode) break; - } else + } else { path_to_nameidata(&next, nd); + nd->inode = inode; + } err = -ENOTDIR; - if (!inode->i_op->lookup) + if (!nd->inode->i_op->lookup) break; continue; /* here ends the main loop */ @@ -911,32 +1226,39 @@ last_component: if (this.name[0] == '.') switch (this.len) { default: break; - case 2: + case 2: if (this.name[1] != '.') break; - follow_dotdot(nd); - inode = nd->path.dentry->d_inode; + if (nd->flags & LOOKUP_RCU) { + if (follow_dotdot_rcu(nd)) + return -ECHILD; + } else + follow_dotdot(nd); /* fallthrough */ case 1: goto return_reval; } - err = do_lookup(nd, &this, &next); + err = do_lookup(nd, &this, &next, &inode); if (err) break; - inode = next.dentry->d_inode; if (follow_on_final(inode, lookup_flags)) { + if (nameidata_dentry_drop_rcu_maybe(nd, next.dentry)) + return -ECHILD; + BUG_ON(inode != next.dentry->d_inode); err = do_follow_link(&next, nd); if (err) goto return_err; - inode = nd->path.dentry->d_inode; - } else + nd->inode = nd->path.dentry->d_inode; + } else { path_to_nameidata(&next, nd); + nd->inode = inode; + } err = -ENOENT; - if (!inode) + if (!nd->inode) break; if (lookup_flags & LOOKUP_DIRECTORY) { err = -ENOTDIR; - if (!inode->i_op->lookup) + if (!nd->inode->i_op->lookup) break; } goto return_base; @@ -958,6 +1280,8 @@ return_reval: */ if (nd->path.dentry && nd->path.dentry->d_sb && (nd->path.dentry->d_sb->s_type->fs_flags & FS_REVAL_DOT)) { + if (nameidata_drop_rcu_maybe(nd)) + return -ECHILD; err = -ESTALE; /* Note: we do not d_invalidate() */ if (!nd->path.dentry->d_op->d_revalidate( @@ -965,16 +1289,34 @@ return_reval: break; } return_base: + if (nameidata_drop_rcu_last_maybe(nd)) + return -ECHILD; return 0; out_dput: - path_put_conditional(&next, nd); + if (!(nd->flags & LOOKUP_RCU)) + path_put_conditional(&next, nd); break; } - path_put(&nd->path); + if (!(nd->flags & LOOKUP_RCU)) + path_put(&nd->path); return_err: return err; } +static inline int path_walk_rcu(const char *name, struct nameidata *nd) +{ + current->total_link_count = 0; + + return link_path_walk(name, nd); +} + +static inline int path_walk_simple(const char *name, struct nameidata *nd) +{ + current->total_link_count = 0; + + return link_path_walk(name, nd); +} + static int path_walk(const char *name, struct nameidata *nd) { struct path save = nd->path; @@ -1000,6 +1342,88 @@ static int path_walk(const char *name, struct nameidata *nd) return result; } +static void path_finish_rcu(struct nameidata *nd) +{ + if (nd->flags & LOOKUP_RCU) { + /* RCU dangling. Cancel it. */ + nd->flags &= ~LOOKUP_RCU; + nd->root.mnt = NULL; + rcu_read_unlock(); + br_read_unlock(vfsmount_lock); + } + if (nd->file) + fput(nd->file); +} + +static int path_init_rcu(int dfd, const char *name, unsigned int flags, struct nameidata *nd) +{ + int retval = 0; + int fput_needed; + struct file *file; + + nd->last_type = LAST_ROOT; /* if there are only slashes... */ + nd->flags = flags | LOOKUP_RCU; + nd->depth = 0; + nd->root.mnt = NULL; + nd->file = NULL; + + if (*name=='/') { + struct fs_struct *fs = current->fs; + + br_read_lock(vfsmount_lock); + rcu_read_lock(); + + spin_lock(&fs->lock); + nd->root = fs->root; + nd->path = nd->root; + nd->seq = read_seqcount_begin(&nd->path.dentry->d_seq); + spin_unlock(&fs->lock); + + } else if (dfd == AT_FDCWD) { + struct fs_struct *fs = current->fs; + + br_read_lock(vfsmount_lock); + rcu_read_lock(); + + spin_lock(&fs->lock); + nd->path = fs->pwd; + nd->seq = read_seqcount_begin(&nd->path.dentry->d_seq); + spin_unlock(&fs->lock); + } else { + struct dentry *dentry; + + file = fget_light(dfd, &fput_needed); + retval = -EBADF; + if (!file) + goto out_fail; + + dentry = file->f_path.dentry; + + retval = -ENOTDIR; + if (!S_ISDIR(dentry->d_inode->i_mode)) + goto fput_fail; + + retval = file_permission(file, MAY_EXEC); + if (retval) + goto fput_fail; + + nd->path = file->f_path; + if (fput_needed) + nd->file = file; + + nd->seq = read_seqcount_begin(&nd->path.dentry->d_seq); + br_read_lock(vfsmount_lock); + rcu_read_lock(); + } + nd->inode = nd->path.dentry->d_inode; + return 0; + +fput_fail: + fput_light(file, fput_needed); +out_fail: + return retval; +} + static int path_init(int dfd, const char *name, unsigned int flags, struct nameidata *nd) { int retval = 0; @@ -1040,6 +1464,7 @@ static int path_init(int dfd, const char *name, unsigned int flags, struct namei fput_light(file, fput_needed); } + nd->inode = nd->path.dentry->d_inode; return 0; fput_fail: @@ -1052,16 +1477,53 @@ out_fail: static int do_path_lookup(int dfd, const char *name, unsigned int flags, struct nameidata *nd) { - int retval = path_init(dfd, name, flags, nd); - if (!retval) - retval = path_walk(name, nd); - if (unlikely(!retval && !audit_dummy_context() && nd->path.dentry && - nd->path.dentry->d_inode)) - audit_inode(name, nd->path.dentry); + int retval; + + /* + * Path walking is largely split up into 2 different synchronisation + * schemes, rcu-walk and ref-walk (explained in + * Documentation/filesystems/path-lookup.txt). These share much of the + * path walk code, but some things particularly setup, cleanup, and + * following mounts are sufficiently divergent that functions are + * duplicated. Typically there is a function foo(), and its RCU + * analogue, foo_rcu(). + * + * -ECHILD is the error number of choice (just to avoid clashes) that + * is returned if some aspect of an rcu-walk fails. Such an error must + * be handled by restarting a traditional ref-walk (which will always + * be able to complete). + */ + retval = path_init_rcu(dfd, name, flags, nd); + if (unlikely(retval)) + return retval; + retval = path_walk_rcu(name, nd); + path_finish_rcu(nd); if (nd->root.mnt) { path_put(&nd->root); nd->root.mnt = NULL; } + + if (unlikely(retval == -ECHILD || retval == -ESTALE)) { + /* slower, locked walk */ + if (retval == -ESTALE) + flags |= LOOKUP_REVAL; + retval = path_init(dfd, name, flags, nd); + if (unlikely(retval)) + return retval; + retval = path_walk(name, nd); + if (nd->root.mnt) { + path_put(&nd->root); + nd->root.mnt = NULL; + } + } + + if (likely(!retval)) { + if (unlikely(!audit_dummy_context())) { + if (nd->path.dentry && nd->inode) + audit_inode(name, nd->path.dentry); + } + } + return retval; } @@ -1104,10 +1566,11 @@ int vfs_path_lookup(struct dentry *dentry, struct vfsmount *mnt, path_get(&nd->path); nd->root = nd->path; path_get(&nd->root); + nd->inode = nd->path.dentry->d_inode; retval = path_walk(name, nd); if (unlikely(!retval && !audit_dummy_context() && nd->path.dentry && - nd->path.dentry->d_inode)) + nd->inode)) audit_inode(name, nd->path.dentry); path_put(&nd->root); @@ -1488,6 +1951,7 @@ out_unlock: mutex_unlock(&dir->d_inode->i_mutex); dput(nd->path.dentry); nd->path.dentry = path->dentry; + if (error) return error; /* Don't check for write permission, don't truncate */ @@ -1582,6 +2046,9 @@ exit: return ERR_PTR(error); } +/* + * Handle O_CREAT case for do_filp_open + */ static struct file *do_last(struct nameidata *nd, struct path *path, int open_flag, int acc_mode, int mode, const char *pathname) @@ -1603,42 +2070,16 @@ static struct file *do_last(struct nameidata *nd, struct path *path, } /* fallthrough */ case LAST_ROOT: - if (open_flag & O_CREAT) - goto exit; - /* fallthrough */ + goto exit; case LAST_BIND: audit_inode(pathname, dir); goto ok; } /* trailing slashes? */ - if (nd->last.name[nd->last.len]) { - if (open_flag & O_CREAT) - goto exit; - nd->flags |= LOOKUP_DIRECTORY | LOOKUP_FOLLOW; - } - - /* just plain open? */ - if (!(open_flag & O_CREAT)) { - error = do_lookup(nd, &nd->last, path); - if (error) - goto exit; - error = -ENOENT; - if (!path->dentry->d_inode) - goto exit_dput; - if (path->dentry->d_inode->i_op->follow_link) - return NULL; - error = -ENOTDIR; - if (nd->flags & LOOKUP_DIRECTORY) { - if (!path->dentry->d_inode->i_op->lookup) - goto exit_dput; - } - path_to_nameidata(path, nd); - audit_inode(pathname, nd->path.dentry); - goto ok; - } + if (nd->last.name[nd->last.len]) + goto exit; - /* OK, it's O_CREAT */ mutex_lock(&dir->d_inode->i_mutex); path->dentry = lookup_hash(nd); @@ -1709,8 +2150,9 @@ static struct file *do_last(struct nameidata *nd, struct path *path, return NULL; path_to_nameidata(path, nd); + nd->inode = path->dentry->d_inode; error = -EISDIR; - if (S_ISDIR(path->dentry->d_inode->i_mode)) + if (S_ISDIR(nd->inode->i_mode)) goto exit; ok: filp = finish_open(nd, open_flag, acc_mode); @@ -1741,7 +2183,7 @@ struct file *do_filp_open(int dfd, const char *pathname, struct path path; int count = 0; int flag = open_to_namei_flags(open_flag); - int force_reval = 0; + int flags; if (!(open_flag & O_CREAT)) mode = 0; @@ -1770,54 +2212,84 @@ struct file *do_filp_open(int dfd, const char *pathname, if (open_flag & O_APPEND) acc_mode |= MAY_APPEND; - /* find the parent */ -reval: - error = path_init(dfd, pathname, LOOKUP_PARENT, &nd); + flags = LOOKUP_OPEN; + if (open_flag & O_CREAT) { + flags |= LOOKUP_CREATE; + if (open_flag & O_EXCL) + flags |= LOOKUP_EXCL; + } + if (open_flag & O_DIRECTORY) + flags |= LOOKUP_DIRECTORY; + if (!(open_flag & O_NOFOLLOW)) + flags |= LOOKUP_FOLLOW; + + filp = get_empty_filp(); + if (!filp) + return ERR_PTR(-ENFILE); + + filp->f_flags = open_flag; + nd.intent.open.file = filp; + nd.intent.open.flags = flag; + nd.intent.open.create_mode = mode; + + if (open_flag & O_CREAT) + goto creat; + + /* !O_CREAT, simple open */ + error = do_path_lookup(dfd, pathname, flags, &nd); + if (unlikely(error)) + goto out_filp; + error = -ELOOP; + if (!(nd.flags & LOOKUP_FOLLOW)) { + if (nd.inode->i_op->follow_link) + goto out_path; + } + error = -ENOTDIR; + if (nd.flags & LOOKUP_DIRECTORY) { + if (!nd.inode->i_op->lookup) + goto out_path; + } + audit_inode(pathname, nd.path.dentry); + filp = finish_open(&nd, open_flag, acc_mode); + return filp; + +creat: + /* OK, have to create the file. Find the parent. */ + error = path_init_rcu(dfd, pathname, + LOOKUP_PARENT | (flags & LOOKUP_REVAL), &nd); if (error) - return ERR_PTR(error); - if (force_reval) - nd.flags |= LOOKUP_REVAL; + goto out_filp; + error = path_walk_rcu(pathname, &nd); + path_finish_rcu(&nd); + if (unlikely(error == -ECHILD || error == -ESTALE)) { + /* slower, locked walk */ + if (error == -ESTALE) { +reval: + flags |= LOOKUP_REVAL; + } + error = path_init(dfd, pathname, + LOOKUP_PARENT | (flags & LOOKUP_REVAL), &nd); + if (error) + goto out_filp; - current->total_link_count = 0; - error = link_path_walk(pathname, &nd); - if (error) { - filp = ERR_PTR(error); - goto out; + error = path_walk_simple(pathname, &nd); } - if (unlikely(!audit_dummy_context()) && (open_flag & O_CREAT)) + if (unlikely(error)) + goto out_filp; + if (unlikely(!audit_dummy_context())) audit_inode(pathname, nd.path.dentry); /* * We have the parent and last component. */ - - error = -ENFILE; - filp = get_empty_filp(); - if (filp == NULL) - goto exit_parent; - nd.intent.open.file = filp; - filp->f_flags = open_flag; - nd.intent.open.flags = flag; - nd.intent.open.create_mode = mode; - nd.flags &= ~LOOKUP_PARENT; - nd.flags |= LOOKUP_OPEN; - if (open_flag & O_CREAT) { - nd.flags |= LOOKUP_CREATE; - if (open_flag & O_EXCL) - nd.flags |= LOOKUP_EXCL; - } - if (open_flag & O_DIRECTORY) - nd.flags |= LOOKUP_DIRECTORY; - if (!(open_flag & O_NOFOLLOW)) - nd.flags |= LOOKUP_FOLLOW; + nd.flags = flags; filp = do_last(&nd, &path, open_flag, acc_mode, mode, pathname); while (unlikely(!filp)) { /* trailing symlink */ struct path holder; - struct inode *inode = path.dentry->d_inode; void *cookie; error = -ELOOP; /* S_ISDIR part is a temporary automount kludge */ - if (!(nd.flags & LOOKUP_FOLLOW) && !S_ISDIR(inode->i_mode)) + if (!(nd.flags & LOOKUP_FOLLOW) && !S_ISDIR(nd.inode->i_mode)) goto exit_dput; if (count++ == 32) goto exit_dput; @@ -1838,36 +2310,33 @@ reval: goto exit_dput; error = __do_follow_link(&path, &nd, &cookie); if (unlikely(error)) { + if (!IS_ERR(cookie) && nd.inode->i_op->put_link) + nd.inode->i_op->put_link(path.dentry, &nd, cookie); /* nd.path had been dropped */ - if (!IS_ERR(cookie) && inode->i_op->put_link) - inode->i_op->put_link(path.dentry, &nd, cookie); - path_put(&path); - release_open_intent(&nd); - filp = ERR_PTR(error); - goto out; + nd.path = path; + goto out_path; } holder = path; nd.flags &= ~LOOKUP_PARENT; filp = do_last(&nd, &path, open_flag, acc_mode, mode, pathname); - if (inode->i_op->put_link) - inode->i_op->put_link(holder.dentry, &nd, cookie); + if (nd.inode->i_op->put_link) + nd.inode->i_op->put_link(holder.dentry, &nd, cookie); path_put(&holder); } out: if (nd.root.mnt) path_put(&nd.root); - if (filp == ERR_PTR(-ESTALE) && !force_reval) { - force_reval = 1; + if (filp == ERR_PTR(-ESTALE) && !(flags & LOOKUP_REVAL)) goto reval; - } return filp; exit_dput: path_put_conditional(&path, &nd); +out_path: + path_put(&nd.path); +out_filp: if (!IS_ERR(nd.intent.open.file)) release_open_intent(&nd); -exit_parent: - path_put(&nd.path); filp = ERR_PTR(error); goto out; } diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c index ae4b0fd9033f..998e3a715bcc 100644 --- a/fs/proc/proc_sysctl.c +++ b/fs/proc/proc_sysctl.c @@ -402,6 +402,10 @@ static int proc_sys_compare(const struct dentry *parent, const struct dentry *dentry, const struct inode *inode, unsigned int len, const char *str, const struct qstr *name) { + /* Although proc doesn't have negative dentries, rcu-walk means + * that inode here can be NULL */ + if (!inode) + return 0; if (name->len != len) return 1; if (memcmp(name->name, str, len)) diff --git a/include/linux/dcache.h b/include/linux/dcache.h index ca648685f0cc..c2e7390289cc 100644 --- a/include/linux/dcache.h +++ b/include/linux/dcache.h @@ -5,6 +5,7 @@ #include #include #include +#include #include #include @@ -90,6 +91,7 @@ struct dentry { unsigned int d_count; /* protected by d_lock */ unsigned int d_flags; /* protected by d_lock */ spinlock_t d_lock; /* per dentry lock */ + seqcount_t d_seq; /* per dentry seqlock */ int d_mounted; struct inode *d_inode; /* Where the name belongs to - NULL is * negative */ @@ -266,9 +268,33 @@ extern void d_move(struct dentry *, struct dentry *); extern struct dentry *d_ancestor(struct dentry *, struct dentry *); /* appendix may either be NULL or be used for transname suffixes */ -extern struct dentry * d_lookup(struct dentry *, struct qstr *); -extern struct dentry * __d_lookup(struct dentry *, struct qstr *); -extern struct dentry * d_hash_and_lookup(struct dentry *, struct qstr *); +extern struct dentry *d_lookup(struct dentry *, struct qstr *); +extern struct dentry *d_hash_and_lookup(struct dentry *, struct qstr *); +extern struct dentry *__d_lookup(struct dentry *, struct qstr *); +extern struct dentry *__d_lookup_rcu(struct dentry *parent, struct qstr *name, + unsigned *seq, struct inode **inode); + +/** + * __d_rcu_to_refcount - take a refcount on dentry if sequence check is ok + * @dentry: dentry to take a ref on + * @seq: seqcount to verify against + * @Returns: 0 on failure, else 1. + * + * __d_rcu_to_refcount operates on a dentry,seq pair that was returned + * by __d_lookup_rcu, to get a reference on an rcu-walk dentry. + */ +static inline int __d_rcu_to_refcount(struct dentry *dentry, unsigned seq) +{ + int ret = 0; + + assert_spin_locked(&dentry->d_lock); + if (!read_seqcount_retry(&dentry->d_seq, seq)) { + ret = 1; + dentry->d_count++; + } + + return ret; +} /* validate "insecure" dentry pointer */ extern int d_validate(struct dentry *, struct dentry *); diff --git a/include/linux/namei.h b/include/linux/namei.h index aec730b53935..18d06add0a40 100644 --- a/include/linux/namei.h +++ b/include/linux/namei.h @@ -19,7 +19,10 @@ struct nameidata { struct path path; struct qstr last; struct path root; + struct file *file; + struct inode *inode; /* path.dentry.d_inode */ unsigned int flags; + unsigned seq; int last_type; unsigned depth; char *saved_names[MAX_NESTED_LINKS + 1]; @@ -43,11 +46,13 @@ enum {LAST_NORM, LAST_ROOT, LAST_DOT, LAST_DOTDOT, LAST_BIND}; * - internal "there are more path components" flag * - dentry cache is untrusted; force a real lookup */ -#define LOOKUP_FOLLOW 1 -#define LOOKUP_DIRECTORY 2 -#define LOOKUP_CONTINUE 4 -#define LOOKUP_PARENT 16 -#define LOOKUP_REVAL 64 +#define LOOKUP_FOLLOW 0x0001 +#define LOOKUP_DIRECTORY 0x0002 +#define LOOKUP_CONTINUE 0x0004 + +#define LOOKUP_PARENT 0x0010 +#define LOOKUP_REVAL 0x0020 +#define LOOKUP_RCU 0x0040 /* * Intent data */ diff --git a/include/linux/security.h b/include/linux/security.h index fd4d55fb8845..ed95401970c7 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -457,7 +457,6 @@ static inline void security_free_mnt_opts(struct security_mnt_opts *opts) * called when the actual read/write operations are performed. * @inode contains the inode structure to check. * @mask contains the permission mask. - * @nd contains the nameidata (may be NULL). * Return 0 if permission is granted. * @inode_setattr: * Check permission before setting file attributes. Note that the kernel @@ -1713,6 +1712,7 @@ int security_inode_rename(struct inode *old_dir, struct dentry *old_dentry, int security_inode_readlink(struct dentry *dentry); int security_inode_follow_link(struct dentry *dentry, struct nameidata *nd); int security_inode_permission(struct inode *inode, int mask); +int security_inode_exec_permission(struct inode *inode, unsigned int flags); int security_inode_setattr(struct dentry *dentry, struct iattr *attr); int security_inode_getattr(struct vfsmount *mnt, struct dentry *dentry); int security_inode_setxattr(struct dentry *dentry, const char *name, @@ -2102,6 +2102,12 @@ static inline int security_inode_permission(struct inode *inode, int mask) return 0; } +static inline int security_inode_exec_permission(struct inode *inode, + unsigned int flags) +{ + return 0; +} + static inline int security_inode_setattr(struct dentry *dentry, struct iattr *attr) { diff --git a/security/security.c b/security/security.c index 1b798d3df710..c645e263ca8d 100644 --- a/security/security.c +++ b/security/security.c @@ -513,6 +513,15 @@ int security_inode_permission(struct inode *inode, int mask) return security_ops->inode_permission(inode, mask); } +int security_inode_exec_permission(struct inode *inode, unsigned int flags) +{ + if (unlikely(IS_PRIVATE(inode))) + return 0; + if (flags) + return -ECHILD; + return security_ops->inode_permission(inode, MAY_EXEC); +} + int security_inode_setattr(struct dentry *dentry, struct iattr *attr) { if (unlikely(IS_PRIVATE(dentry->d_inode))) -- cgit v1.2.3 From 5f57cbcc02cf18f6b22ef4066bb10afeb8f930ff Mon Sep 17 00:00:00 2001 From: Nick Piggin Date: Fri, 7 Jan 2011 17:49:54 +1100 Subject: fs: dcache remove d_mounted Rather than keep a d_mounted count in the dentry, set a dentry flag instead. The flag can be cleared by checking the hash table to see if there are any mounts left, which is not time critical because it is performed at detach time. The mounted state of a dentry is only used to speculatively take a look in the mount hash table if it is set -- before following the mount, vfsmount lock is taken and mount re-checked without races. This saves 4 bytes on 32-bit, nothing on 64-bit but it does provide a hole I might use later (and some configs have larger than 32-bit spinlocks which might make use of the hole). Autofs4 conversion and changelog by Ian Kent : In autofs4, when expring direct (or offset) mounts we need to ensure that we block user path walks into the autofs mount, which is covered by another mount. To do this we clear the mounted status so that follows stop before walking into the mount and are essentially blocked until the expire is completed. The automount daemon still finds the correct dentry for the umount due to the follow mount logic in fs/autofs4/root.c:autofs4_follow_link(), which is set as an inode operation for direct and offset mounts only and is called following the lookup that stopped at the covered mount. At the end of the expire the covering mount probably has gone away so the mounted status need not be restored. But we need to check this and only restore the mounted status if the expire failed. XXX: autofs may not work right if we have other mounts go over the top of it? Signed-off-by: Nick Piggin --- fs/autofs4/expire.c | 13 +++++++++++-- fs/dcache.c | 1 - fs/namespace.c | 29 ++++++++++++++++++++++++++--- include/linux/dcache.h | 42 +++++++++++++++++++++--------------------- 4 files changed, 58 insertions(+), 27 deletions(-) (limited to 'fs/dcache.c') diff --git a/fs/autofs4/expire.c b/fs/autofs4/expire.c index 2f7951d67d16..cc1d01365905 100644 --- a/fs/autofs4/expire.c +++ b/fs/autofs4/expire.c @@ -295,7 +295,9 @@ struct dentry *autofs4_expire_direct(struct super_block *sb, struct autofs_info *ino = autofs4_dentry_ino(root); if (d_mountpoint(root)) { ino->flags |= AUTOFS_INF_MOUNTPOINT; - root->d_mounted--; + spin_lock(&root->d_lock); + root->d_flags &= ~DCACHE_MOUNTED; + spin_unlock(&root->d_lock); } ino->flags |= AUTOFS_INF_EXPIRING; init_completion(&ino->expire_complete); @@ -503,7 +505,14 @@ int autofs4_do_expire_multi(struct super_block *sb, struct vfsmount *mnt, spin_lock(&sbi->fs_lock); if (ino->flags & AUTOFS_INF_MOUNTPOINT) { - sb->s_root->d_mounted++; + spin_lock(&sb->s_root->d_lock); + /* + * If we haven't been expired away, then reset + * mounted status. + */ + if (mnt->mnt_parent != mnt) + sb->s_root->d_flags |= DCACHE_MOUNTED; + spin_unlock(&sb->s_root->d_lock); ino->flags &= ~AUTOFS_INF_MOUNTPOINT; } ino->flags &= ~AUTOFS_INF_EXPIRING; diff --git a/fs/dcache.c b/fs/dcache.c index 187fea040108..1d5cf511e1c7 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -1265,7 +1265,6 @@ struct dentry *d_alloc(struct dentry * parent, const struct qstr *name) dentry->d_sb = NULL; dentry->d_op = NULL; dentry->d_fsdata = NULL; - dentry->d_mounted = 0; INIT_HLIST_NODE(&dentry->d_hash); INIT_LIST_HEAD(&dentry->d_lru); INIT_LIST_HEAD(&dentry->d_subdirs); diff --git a/fs/namespace.c b/fs/namespace.c index 3dbfc072ec70..39a7d507fba0 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -491,6 +491,27 @@ static void __touch_mnt_namespace(struct mnt_namespace *ns) } } +/* + * Clear dentry's mounted state if it has no remaining mounts. + * vfsmount_lock must be held for write. + */ +static void dentry_reset_mounted(struct vfsmount *mnt, struct dentry *dentry) +{ + unsigned u; + + for (u = 0; u < HASH_SIZE; u++) { + struct vfsmount *p; + + list_for_each_entry(p, &mount_hashtable[u], mnt_hash) { + if (p->mnt_mountpoint == dentry) + return; + } + } + spin_lock(&dentry->d_lock); + dentry->d_flags &= ~DCACHE_MOUNTED; + spin_unlock(&dentry->d_lock); +} + /* * vfsmount lock must be held for write */ @@ -502,7 +523,7 @@ static void detach_mnt(struct vfsmount *mnt, struct path *old_path) mnt->mnt_mountpoint = mnt->mnt_root; list_del_init(&mnt->mnt_child); list_del_init(&mnt->mnt_hash); - old_path->dentry->d_mounted--; + dentry_reset_mounted(old_path->mnt, old_path->dentry); } /* @@ -513,7 +534,9 @@ void mnt_set_mountpoint(struct vfsmount *mnt, struct dentry *dentry, { child_mnt->mnt_parent = mntget(mnt); child_mnt->mnt_mountpoint = dget(dentry); - dentry->d_mounted++; + spin_lock(&dentry->d_lock); + dentry->d_flags |= DCACHE_MOUNTED; + spin_unlock(&dentry->d_lock); } /* @@ -1073,7 +1096,7 @@ void umount_tree(struct vfsmount *mnt, int propagate, struct list_head *kill) list_del_init(&p->mnt_child); if (p->mnt_parent != p) { p->mnt_parent->mnt_ghosts++; - p->mnt_mountpoint->d_mounted--; + dentry_reset_mounted(p->mnt_parent, p->mnt_mountpoint); } change_mnt_propagation(p, MS_PRIVATE); } diff --git a/include/linux/dcache.h b/include/linux/dcache.h index c2e7390289cc..e4414693065e 100644 --- a/include/linux/dcache.h +++ b/include/linux/dcache.h @@ -92,7 +92,6 @@ struct dentry { unsigned int d_flags; /* protected by d_lock */ spinlock_t d_lock; /* per dentry lock */ seqcount_t d_seq; /* per dentry seqlock */ - int d_mounted; struct inode *d_inode; /* Where the name belongs to - NULL is * negative */ /* @@ -156,33 +155,34 @@ struct dentry_operations { /* d_flags entries */ #define DCACHE_AUTOFS_PENDING 0x0001 /* autofs: "under construction" */ -#define DCACHE_NFSFS_RENAMED 0x0002 /* this dentry has been "silly - * renamed" and has to be - * deleted on the last dput() - */ -#define DCACHE_DISCONNECTED 0x0004 - /* This dentry is possibly not currently connected to the dcache tree, - * in which case its parent will either be itself, or will have this - * flag as well. nfsd will not use a dentry with this bit set, but will - * first endeavour to clear the bit either by discovering that it is - * connected, or by performing lookup operations. Any filesystem which - * supports nfsd_operations MUST have a lookup function which, if it finds - * a directory inode with a DCACHE_DISCONNECTED dentry, will d_move - * that dentry into place and return that dentry rather than the passed one, - * typically using d_splice_alias. - */ +#define DCACHE_NFSFS_RENAMED 0x0002 + /* this dentry has been "silly renamed" and has to be deleted on the last + * dput() */ + +#define DCACHE_DISCONNECTED 0x0004 + /* This dentry is possibly not currently connected to the dcache tree, in + * which case its parent will either be itself, or will have this flag as + * well. nfsd will not use a dentry with this bit set, but will first + * endeavour to clear the bit either by discovering that it is connected, + * or by performing lookup operations. Any filesystem which supports + * nfsd_operations MUST have a lookup function which, if it finds a + * directory inode with a DCACHE_DISCONNECTED dentry, will d_move that + * dentry into place and return that dentry rather than the passed one, + * typically using d_splice_alias. */ #define DCACHE_REFERENCED 0x0008 /* Recently used, don't discard. */ #define DCACHE_UNHASHED 0x0010 - -#define DCACHE_INOTIFY_PARENT_WATCHED 0x0020 /* Parent inode is watched by inotify */ +#define DCACHE_INOTIFY_PARENT_WATCHED 0x0020 + /* Parent inode is watched by inotify */ #define DCACHE_COOKIE 0x0040 /* For use by dcookie subsystem */ - -#define DCACHE_FSNOTIFY_PARENT_WATCHED 0x0080 /* Parent inode is watched by some fsnotify listener */ +#define DCACHE_FSNOTIFY_PARENT_WATCHED 0x0080 + /* Parent inode is watched by some fsnotify listener */ #define DCACHE_CANT_MOUNT 0x0100 #define DCACHE_GENOCIDE 0x0200 +#define DCACHE_MOUNTED 0x0400 /* is a mountpoint */ + extern spinlock_t dcache_inode_lock; extern seqlock_t rename_lock; @@ -372,7 +372,7 @@ extern void dput(struct dentry *); static inline int d_mountpoint(struct dentry *dentry) { - return dentry->d_mounted; + return dentry->d_flags & DCACHE_MOUNTED; } extern struct vfsmount *lookup_mnt(struct path *); -- cgit v1.2.3 From fb045adb99d9b7c562dc7fef834857f78249daa1 Mon Sep 17 00:00:00 2001 From: Nick Piggin Date: Fri, 7 Jan 2011 17:49:55 +1100 Subject: fs: dcache reduce branches in lookup path Reduce some branches and memory accesses in dcache lookup by adding dentry flags to indicate common d_ops are set, rather than having to check them. This saves a pointer memory access (dentry->d_op) in common path lookup situations, and saves another pointer load and branch in cases where we have d_op but not the particular operation. Patched with: git grep -E '[.>]([[:space:]])*d_op([[:space:]])*=' | xargs sed -e 's/\([^\t ]*\)->d_op = \(.*\);/d_set_d_op(\1, \2);/' -e 's/\([^\t ]*\)\.d_op = \(.*\);/d_set_d_op(\&\1, \2);/' -i Signed-off-by: Nick Piggin --- arch/ia64/kernel/perfmon.c | 2 +- drivers/staging/autofs/root.c | 2 +- drivers/staging/smbfs/dir.c | 8 ++++---- fs/9p/vfs_inode.c | 26 +++++++++++++------------- fs/adfs/dir.c | 2 +- fs/adfs/super.c | 2 +- fs/affs/namei.c | 2 +- fs/affs/super.c | 2 +- fs/afs/dir.c | 2 +- fs/anon_inodes.c | 2 +- fs/autofs4/inode.c | 2 +- fs/autofs4/root.c | 10 +++++----- fs/btrfs/export.c | 4 ++-- fs/btrfs/inode.c | 2 +- fs/ceph/dir.c | 6 +++--- fs/cifs/dir.c | 16 ++++++++-------- fs/cifs/inode.c | 8 ++++---- fs/cifs/link.c | 4 ++-- fs/cifs/readdir.c | 4 ++-- fs/coda/dir.c | 2 +- fs/configfs/dir.c | 8 ++++---- fs/dcache.c | 30 ++++++++++++++++++++++++++---- fs/ecryptfs/inode.c | 2 +- fs/ecryptfs/main.c | 4 ++-- fs/fat/inode.c | 4 ++-- fs/fat/namei_msdos.c | 6 +++--- fs/fat/namei_vfat.c | 8 ++++---- fs/fuse/dir.c | 2 +- fs/fuse/inode.c | 4 ++-- fs/gfs2/export.c | 4 ++-- fs/gfs2/ops_fstype.c | 2 +- fs/gfs2/ops_inode.c | 2 +- fs/hfs/dir.c | 2 +- fs/hfs/super.c | 2 +- fs/hfsplus/dir.c | 2 +- fs/hfsplus/super.c | 2 +- fs/hostfs/hostfs_kern.c | 2 +- fs/hpfs/dentry.c | 2 +- fs/isofs/inode.c | 2 +- fs/isofs/namei.c | 2 +- fs/jfs/namei.c | 4 ++-- fs/jfs/super.c | 2 +- fs/libfs.c | 2 +- fs/minix/namei.c | 2 +- fs/namei.c | 31 ++++++++++++++++++++----------- fs/ncpfs/dir.c | 4 ++-- fs/ncpfs/inode.c | 2 +- fs/nfs/dir.c | 6 +++--- fs/nfs/getroot.c | 4 ++-- fs/ocfs2/export.c | 4 ++-- fs/ocfs2/namei.c | 10 +++++----- fs/pipe.c | 2 +- fs/proc/base.c | 12 ++++++------ fs/proc/generic.c | 2 +- fs/proc/proc_sysctl.c | 4 ++-- fs/reiserfs/xattr.c | 2 +- fs/sysfs/dir.c | 2 +- fs/sysv/namei.c | 2 +- fs/sysv/super.c | 2 +- include/linux/dcache.h | 6 ++++++ kernel/cgroup.c | 2 +- net/socket.c | 2 +- net/sunrpc/rpc_pipe.c | 2 +- 63 files changed, 174 insertions(+), 137 deletions(-) (limited to 'fs/dcache.c') diff --git a/arch/ia64/kernel/perfmon.c b/arch/ia64/kernel/perfmon.c index d39d8a53b579..5a24f40bb48e 100644 --- a/arch/ia64/kernel/perfmon.c +++ b/arch/ia64/kernel/perfmon.c @@ -2233,7 +2233,7 @@ pfm_alloc_file(pfm_context_t *ctx) } path.mnt = mntget(pfmfs_mnt); - path.dentry->d_op = &pfmfs_dentry_operations; + d_set_d_op(path.dentry, &pfmfs_dentry_operations); d_add(path.dentry, inode); file = alloc_file(&path, FMODE_READ, &pfm_file_ops); diff --git a/drivers/staging/autofs/root.c b/drivers/staging/autofs/root.c index 0fdec4befd84..b09adb57971f 100644 --- a/drivers/staging/autofs/root.c +++ b/drivers/staging/autofs/root.c @@ -237,7 +237,7 @@ static struct dentry *autofs_root_lookup(struct inode *dir, struct dentry *dentr * * We need to do this before we release the directory semaphore. */ - dentry->d_op = &autofs_dentry_operations; + d_set_d_op(dentry, &autofs_dentry_operations); dentry->d_flags |= DCACHE_AUTOFS_PENDING; d_add(dentry, NULL); diff --git a/drivers/staging/smbfs/dir.c b/drivers/staging/smbfs/dir.c index 5f79799d5d4a..78f09412740c 100644 --- a/drivers/staging/smbfs/dir.c +++ b/drivers/staging/smbfs/dir.c @@ -398,9 +398,9 @@ smb_new_dentry(struct dentry *dentry) struct smb_sb_info *server = server_from_dentry(dentry); if (server->mnt->flags & SMB_MOUNT_CASE) - dentry->d_op = &smbfs_dentry_operations_case; + d_set_d_op(dentry, &smbfs_dentry_operations_case); else - dentry->d_op = &smbfs_dentry_operations; + d_set_d_op(dentry, &smbfs_dentry_operations); dentry->d_time = jiffies; } @@ -462,9 +462,9 @@ smb_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd) add_entry: server = server_from_dentry(dentry); if (server->mnt->flags & SMB_MOUNT_CASE) - dentry->d_op = &smbfs_dentry_operations_case; + d_set_d_op(dentry, &smbfs_dentry_operations_case); else - dentry->d_op = &smbfs_dentry_operations; + d_set_d_op(dentry, &smbfs_dentry_operations); d_add(dentry, inode); smb_renew_times(dentry); diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c index f6f9081e6d2c..df8bbb358d54 100644 --- a/fs/9p/vfs_inode.c +++ b/fs/9p/vfs_inode.c @@ -635,9 +635,9 @@ v9fs_create(struct v9fs_session_info *v9ses, struct inode *dir, } if (v9ses->cache) - dentry->d_op = &v9fs_cached_dentry_operations; + d_set_d_op(dentry, &v9fs_cached_dentry_operations); else - dentry->d_op = &v9fs_dentry_operations; + d_set_d_op(dentry, &v9fs_dentry_operations); d_instantiate(dentry, inode); err = v9fs_fid_add(dentry, fid); @@ -749,7 +749,7 @@ v9fs_vfs_create_dotl(struct inode *dir, struct dentry *dentry, int omode, err); goto error; } - dentry->d_op = &v9fs_cached_dentry_operations; + d_set_d_op(dentry, &v9fs_cached_dentry_operations); d_instantiate(dentry, inode); err = v9fs_fid_add(dentry, fid); if (err < 0) @@ -767,7 +767,7 @@ v9fs_vfs_create_dotl(struct inode *dir, struct dentry *dentry, int omode, err = PTR_ERR(inode); goto error; } - dentry->d_op = &v9fs_dentry_operations; + d_set_d_op(dentry, &v9fs_dentry_operations); d_instantiate(dentry, inode); } /* Now set the ACL based on the default value */ @@ -956,7 +956,7 @@ static int v9fs_vfs_mkdir_dotl(struct inode *dir, err); goto error; } - dentry->d_op = &v9fs_cached_dentry_operations; + d_set_d_op(dentry, &v9fs_cached_dentry_operations); d_instantiate(dentry, inode); err = v9fs_fid_add(dentry, fid); if (err < 0) @@ -973,7 +973,7 @@ static int v9fs_vfs_mkdir_dotl(struct inode *dir, err = PTR_ERR(inode); goto error; } - dentry->d_op = &v9fs_dentry_operations; + d_set_d_op(dentry, &v9fs_dentry_operations); d_instantiate(dentry, inode); } /* Now set the ACL based on the default value */ @@ -1041,9 +1041,9 @@ static struct dentry *v9fs_vfs_lookup(struct inode *dir, struct dentry *dentry, inst_out: if (v9ses->cache) - dentry->d_op = &v9fs_cached_dentry_operations; + d_set_d_op(dentry, &v9fs_cached_dentry_operations); else - dentry->d_op = &v9fs_dentry_operations; + d_set_d_op(dentry, &v9fs_dentry_operations); d_add(dentry, inode); return NULL; @@ -1709,7 +1709,7 @@ v9fs_vfs_symlink_dotl(struct inode *dir, struct dentry *dentry, err); goto error; } - dentry->d_op = &v9fs_cached_dentry_operations; + d_set_d_op(dentry, &v9fs_cached_dentry_operations); d_instantiate(dentry, inode); err = v9fs_fid_add(dentry, fid); if (err < 0) @@ -1722,7 +1722,7 @@ v9fs_vfs_symlink_dotl(struct inode *dir, struct dentry *dentry, err = PTR_ERR(inode); goto error; } - dentry->d_op = &v9fs_dentry_operations; + d_set_d_op(dentry, &v9fs_dentry_operations); d_instantiate(dentry, inode); } @@ -1856,7 +1856,7 @@ v9fs_vfs_link_dotl(struct dentry *old_dentry, struct inode *dir, ihold(old_dentry->d_inode); } - dentry->d_op = old_dentry->d_op; + d_set_d_op(dentry, old_dentry->d_op); d_instantiate(dentry, old_dentry->d_inode); return err; @@ -1980,7 +1980,7 @@ v9fs_vfs_mknod_dotl(struct inode *dir, struct dentry *dentry, int omode, err); goto error; } - dentry->d_op = &v9fs_cached_dentry_operations; + d_set_d_op(dentry, &v9fs_cached_dentry_operations); d_instantiate(dentry, inode); err = v9fs_fid_add(dentry, fid); if (err < 0) @@ -1996,7 +1996,7 @@ v9fs_vfs_mknod_dotl(struct inode *dir, struct dentry *dentry, int omode, err = PTR_ERR(inode); goto error; } - dentry->d_op = &v9fs_dentry_operations; + d_set_d_op(dentry, &v9fs_dentry_operations); d_instantiate(dentry, inode); } /* Now set the ACL based on the default value */ diff --git a/fs/adfs/dir.c b/fs/adfs/dir.c index a11e5e102716..bf7693c384f9 100644 --- a/fs/adfs/dir.c +++ b/fs/adfs/dir.c @@ -276,7 +276,7 @@ adfs_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd) struct object_info obj; int error; - dentry->d_op = &adfs_dentry_operations; + d_set_d_op(dentry, &adfs_dentry_operations); lock_kernel(); error = adfs_dir_lookup_byname(dir, &dentry->d_name, &obj); if (error == 0) { diff --git a/fs/adfs/super.c b/fs/adfs/super.c index 47dffc513a26..a4041b52fbca 100644 --- a/fs/adfs/super.c +++ b/fs/adfs/super.c @@ -484,7 +484,7 @@ static int adfs_fill_super(struct super_block *sb, void *data, int silent) adfs_error(sb, "get root inode failed\n"); goto error; } else - sb->s_root->d_op = &adfs_dentry_operations; + d_set_d_op(sb->s_root, &adfs_dentry_operations); unlock_kernel(); return 0; diff --git a/fs/affs/namei.c b/fs/affs/namei.c index 5aca08c21100..944a4042fb65 100644 --- a/fs/affs/namei.c +++ b/fs/affs/namei.c @@ -240,7 +240,7 @@ affs_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd) if (IS_ERR(inode)) return ERR_CAST(inode); } - dentry->d_op = AFFS_SB(sb)->s_flags & SF_INTL ? &affs_intl_dentry_operations : &affs_dentry_operations; + d_set_d_op(dentry, AFFS_SB(sb)->s_flags & SF_INTL ? &affs_intl_dentry_operations : &affs_dentry_operations); d_add(dentry, inode); return NULL; } diff --git a/fs/affs/super.c b/fs/affs/super.c index 4c18fcfb0a19..d39081bbe7ce 100644 --- a/fs/affs/super.c +++ b/fs/affs/super.c @@ -482,7 +482,7 @@ got_root: printk(KERN_ERR "AFFS: Get root inode failed\n"); goto out_error; } - sb->s_root->d_op = &affs_dentry_operations; + d_set_d_op(sb->s_root, &affs_dentry_operations); pr_debug("AFFS: s_flags=%lX\n",sb->s_flags); return 0; diff --git a/fs/afs/dir.c b/fs/afs/dir.c index 2c18cde27000..b8bb7e7148d0 100644 --- a/fs/afs/dir.c +++ b/fs/afs/dir.c @@ -581,7 +581,7 @@ static struct dentry *afs_lookup(struct inode *dir, struct dentry *dentry, } success: - dentry->d_op = &afs_fs_dentry_operations; + d_set_d_op(dentry, &afs_fs_dentry_operations); d_add(dentry, inode); _leave(" = 0 { vn=%u u=%u } -> { ino=%lu v=%llu }", diff --git a/fs/anon_inodes.c b/fs/anon_inodes.c index 57ce55b2564c..aca8806fa206 100644 --- a/fs/anon_inodes.c +++ b/fs/anon_inodes.c @@ -113,7 +113,7 @@ struct file *anon_inode_getfile(const char *name, */ ihold(anon_inode_inode); - path.dentry->d_op = &anon_inodefs_dentry_operations; + d_set_d_op(path.dentry, &anon_inodefs_dentry_operations); d_instantiate(path.dentry, anon_inode_inode); error = -ENFILE; diff --git a/fs/autofs4/inode.c b/fs/autofs4/inode.c index ac87e49fa706..a7bdb9dcac84 100644 --- a/fs/autofs4/inode.c +++ b/fs/autofs4/inode.c @@ -309,7 +309,7 @@ int autofs4_fill_super(struct super_block *s, void *data, int silent) goto fail_iput; pipe = NULL; - root->d_op = &autofs4_sb_dentry_operations; + d_set_d_op(root, &autofs4_sb_dentry_operations); root->d_fsdata = ino; /* Can this call block? */ diff --git a/fs/autofs4/root.c b/fs/autofs4/root.c index 10ca68a96dc7..bfe3f2eb684d 100644 --- a/fs/autofs4/root.c +++ b/fs/autofs4/root.c @@ -571,7 +571,7 @@ static struct dentry *autofs4_lookup(struct inode *dir, struct dentry *dentry, s * we check for the hashed dentry and return the newly * hashed dentry. */ - dentry->d_op = &autofs4_root_dentry_operations; + d_set_d_op(dentry, &autofs4_root_dentry_operations); /* * And we need to ensure that the same dentry is used for @@ -710,9 +710,9 @@ static int autofs4_dir_symlink(struct inode *dir, d_add(dentry, inode); if (dir == dir->i_sb->s_root->d_inode) - dentry->d_op = &autofs4_root_dentry_operations; + d_set_d_op(dentry, &autofs4_root_dentry_operations); else - dentry->d_op = &autofs4_dentry_operations; + d_set_d_op(dentry, &autofs4_dentry_operations); dentry->d_fsdata = ino; ino->dentry = dget(dentry); @@ -845,9 +845,9 @@ static int autofs4_dir_mkdir(struct inode *dir, struct dentry *dentry, int mode) d_add(dentry, inode); if (dir == dir->i_sb->s_root->d_inode) - dentry->d_op = &autofs4_root_dentry_operations; + d_set_d_op(dentry, &autofs4_root_dentry_operations); else - dentry->d_op = &autofs4_dentry_operations; + d_set_d_op(dentry, &autofs4_dentry_operations); dentry->d_fsdata = ino; ino->dentry = dget(dentry); diff --git a/fs/btrfs/export.c b/fs/btrfs/export.c index 659f532d26a0..0ccf9a8afcdf 100644 --- a/fs/btrfs/export.c +++ b/fs/btrfs/export.c @@ -110,7 +110,7 @@ static struct dentry *btrfs_get_dentry(struct super_block *sb, u64 objectid, dentry = d_obtain_alias(inode); if (!IS_ERR(dentry)) - dentry->d_op = &btrfs_dentry_operations; + d_set_d_op(dentry, &btrfs_dentry_operations); return dentry; fail: srcu_read_unlock(&fs_info->subvol_srcu, index); @@ -225,7 +225,7 @@ static struct dentry *btrfs_get_parent(struct dentry *child) key.offset = 0; dentry = d_obtain_alias(btrfs_iget(root->fs_info->sb, &key, root, NULL)); if (!IS_ERR(dentry)) - dentry->d_op = &btrfs_dentry_operations; + d_set_d_op(dentry, &btrfs_dentry_operations); return dentry; fail: btrfs_free_path(path); diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index f9d2994a42a2..63e4546b478a 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -4084,7 +4084,7 @@ struct inode *btrfs_lookup_dentry(struct inode *dir, struct dentry *dentry) int index; int ret; - dentry->d_op = &btrfs_dentry_operations; + d_set_d_op(dentry, &btrfs_dentry_operations); if (dentry->d_name.len > BTRFS_NAME_LEN) return ERR_PTR(-ENAMETOOLONG); diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c index 58abc3da6111..cc01cf826769 100644 --- a/fs/ceph/dir.c +++ b/fs/ceph/dir.c @@ -42,11 +42,11 @@ int ceph_init_dentry(struct dentry *dentry) if (dentry->d_parent == NULL || /* nfs fh_to_dentry */ ceph_snap(dentry->d_parent->d_inode) == CEPH_NOSNAP) - dentry->d_op = &ceph_dentry_ops; + d_set_d_op(dentry, &ceph_dentry_ops); else if (ceph_snap(dentry->d_parent->d_inode) == CEPH_SNAPDIR) - dentry->d_op = &ceph_snapdir_dentry_ops; + d_set_d_op(dentry, &ceph_snapdir_dentry_ops); else - dentry->d_op = &ceph_snap_dentry_ops; + d_set_d_op(dentry, &ceph_snap_dentry_ops); di = kmem_cache_alloc(ceph_dentry_cachep, GFP_NOFS | __GFP_ZERO); if (!di) diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c index 88bfe686ac00..e3b10ca6d453 100644 --- a/fs/cifs/dir.c +++ b/fs/cifs/dir.c @@ -135,9 +135,9 @@ static void setup_cifs_dentry(struct cifsTconInfo *tcon, struct inode *newinode) { if (tcon->nocase) - direntry->d_op = &cifs_ci_dentry_ops; + d_set_d_op(direntry, &cifs_ci_dentry_ops); else - direntry->d_op = &cifs_dentry_ops; + d_set_d_op(direntry, &cifs_dentry_ops); d_instantiate(direntry, newinode); } @@ -421,9 +421,9 @@ int cifs_mknod(struct inode *inode, struct dentry *direntry, int mode, rc = cifs_get_inode_info_unix(&newinode, full_path, inode->i_sb, xid); if (pTcon->nocase) - direntry->d_op = &cifs_ci_dentry_ops; + d_set_d_op(direntry, &cifs_ci_dentry_ops); else - direntry->d_op = &cifs_dentry_ops; + d_set_d_op(direntry, &cifs_dentry_ops); if (rc == 0) d_instantiate(direntry, newinode); @@ -604,9 +604,9 @@ cifs_lookup(struct inode *parent_dir_inode, struct dentry *direntry, if ((rc == 0) && (newInode != NULL)) { if (pTcon->nocase) - direntry->d_op = &cifs_ci_dentry_ops; + d_set_d_op(direntry, &cifs_ci_dentry_ops); else - direntry->d_op = &cifs_dentry_ops; + d_set_d_op(direntry, &cifs_dentry_ops); d_add(direntry, newInode); if (posix_open) { filp = lookup_instantiate_filp(nd, direntry, @@ -634,9 +634,9 @@ cifs_lookup(struct inode *parent_dir_inode, struct dentry *direntry, rc = 0; direntry->d_time = jiffies; if (pTcon->nocase) - direntry->d_op = &cifs_ci_dentry_ops; + d_set_d_op(direntry, &cifs_ci_dentry_ops); else - direntry->d_op = &cifs_dentry_ops; + d_set_d_op(direntry, &cifs_dentry_ops); d_add(direntry, NULL); /* if it was once a directory (but how can we tell?) we could do shrink_dcache_parent(direntry); */ diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c index 99b9a2cc14b7..2a239d878e85 100644 --- a/fs/cifs/inode.c +++ b/fs/cifs/inode.c @@ -1319,9 +1319,9 @@ int cifs_mkdir(struct inode *inode, struct dentry *direntry, int mode) to set uid/gid */ inc_nlink(inode); if (pTcon->nocase) - direntry->d_op = &cifs_ci_dentry_ops; + d_set_d_op(direntry, &cifs_ci_dentry_ops); else - direntry->d_op = &cifs_dentry_ops; + d_set_d_op(direntry, &cifs_dentry_ops); cifs_unix_basic_to_fattr(&fattr, pInfo, cifs_sb); cifs_fill_uniqueid(inode->i_sb, &fattr); @@ -1363,9 +1363,9 @@ mkdir_get_info: inode->i_sb, xid, NULL); if (pTcon->nocase) - direntry->d_op = &cifs_ci_dentry_ops; + d_set_d_op(direntry, &cifs_ci_dentry_ops); else - direntry->d_op = &cifs_dentry_ops; + d_set_d_op(direntry, &cifs_dentry_ops); d_instantiate(direntry, newinode); /* setting nlink not necessary except in cases where we * failed to get it from the server or was set bogus */ diff --git a/fs/cifs/link.c b/fs/cifs/link.c index 85cdbf831e7b..fe2f6a93c49e 100644 --- a/fs/cifs/link.c +++ b/fs/cifs/link.c @@ -525,9 +525,9 @@ cifs_symlink(struct inode *inode, struct dentry *direntry, const char *symname) rc); } else { if (pTcon->nocase) - direntry->d_op = &cifs_ci_dentry_ops; + d_set_d_op(direntry, &cifs_ci_dentry_ops); else - direntry->d_op = &cifs_dentry_ops; + d_set_d_op(direntry, &cifs_dentry_ops); d_instantiate(direntry, newinode); } } diff --git a/fs/cifs/readdir.c b/fs/cifs/readdir.c index ee463aeca0b0..ec5b68e3b928 100644 --- a/fs/cifs/readdir.c +++ b/fs/cifs/readdir.c @@ -103,9 +103,9 @@ cifs_readdir_lookup(struct dentry *parent, struct qstr *name, } if (cifs_sb_master_tcon(CIFS_SB(sb))->nocase) - dentry->d_op = &cifs_ci_dentry_ops; + d_set_d_op(dentry, &cifs_ci_dentry_ops); else - dentry->d_op = &cifs_dentry_ops; + d_set_d_op(dentry, &cifs_dentry_ops); alias = d_materialise_unique(dentry, inode); if (alias != NULL) { diff --git a/fs/coda/dir.c b/fs/coda/dir.c index 9e37e8bc9b89..aa40c811f8d2 100644 --- a/fs/coda/dir.c +++ b/fs/coda/dir.c @@ -125,7 +125,7 @@ static struct dentry *coda_lookup(struct inode *dir, struct dentry *entry, struc return ERR_PTR(error); exit: - entry->d_op = &coda_dentry_operations; + d_set_d_op(entry, &coda_dentry_operations); if (inode && (type & CODA_NOCACHE)) coda_flag_inode(inode, C_VATTR | C_PURGE); diff --git a/fs/configfs/dir.c b/fs/configfs/dir.c index e9acea440ffc..36637a8c1ed3 100644 --- a/fs/configfs/dir.c +++ b/fs/configfs/dir.c @@ -442,7 +442,7 @@ static int configfs_attach_attr(struct configfs_dirent * sd, struct dentry * den return error; } - dentry->d_op = &configfs_dentry_ops; + d_set_d_op(dentry, &configfs_dentry_ops); d_rehash(dentry); return 0; @@ -489,7 +489,7 @@ static struct dentry * configfs_lookup(struct inode *dir, */ if (dentry->d_name.len > NAME_MAX) return ERR_PTR(-ENAMETOOLONG); - dentry->d_op = &configfs_dentry_ops; + d_set_d_op(dentry, &configfs_dentry_ops); d_add(dentry, NULL); return NULL; } @@ -683,7 +683,7 @@ static int create_default_group(struct config_group *parent_group, ret = -ENOMEM; child = d_alloc(parent, &name); if (child) { - child->d_op = &configfs_dentry_ops; + d_set_d_op(child, &configfs_dentry_ops); d_add(child, NULL); ret = configfs_attach_group(&parent_group->cg_item, @@ -1681,7 +1681,7 @@ int configfs_register_subsystem(struct configfs_subsystem *subsys) err = -ENOMEM; dentry = d_alloc(configfs_sb->s_root, &name); if (dentry) { - dentry->d_op = &configfs_dentry_ops; + d_set_d_op(dentry, &configfs_dentry_ops); d_add(dentry, NULL); err = configfs_attach_group(sd->s_element, &group->cg_item, diff --git a/fs/dcache.c b/fs/dcache.c index 1d5cf511e1c7..f9693da3efbd 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -398,7 +398,7 @@ repeat: return; } - if (dentry->d_op && dentry->d_op->d_delete) { + if (dentry->d_flags & DCACHE_OP_DELETE) { if (dentry->d_op->d_delete(dentry)) goto kill_it; } @@ -1301,6 +1301,28 @@ struct dentry *d_alloc_name(struct dentry *parent, const char *name) } EXPORT_SYMBOL(d_alloc_name); +void d_set_d_op(struct dentry *dentry, const struct dentry_operations *op) +{ + BUG_ON(dentry->d_op); + BUG_ON(dentry->d_flags & (DCACHE_OP_HASH | + DCACHE_OP_COMPARE | + DCACHE_OP_REVALIDATE | + DCACHE_OP_DELETE )); + dentry->d_op = op; + if (!op) + return; + if (op->d_hash) + dentry->d_flags |= DCACHE_OP_HASH; + if (op->d_compare) + dentry->d_flags |= DCACHE_OP_COMPARE; + if (op->d_revalidate) + dentry->d_flags |= DCACHE_OP_REVALIDATE; + if (op->d_delete) + dentry->d_flags |= DCACHE_OP_DELETE; + +} +EXPORT_SYMBOL(d_set_d_op); + static void __d_instantiate(struct dentry *dentry, struct inode *inode) { spin_lock(&dentry->d_lock); @@ -1731,7 +1753,7 @@ seqretry: */ if (read_seqcount_retry(&dentry->d_seq, *seq)) goto seqretry; - if (parent->d_op && parent->d_op->d_compare) { + if (parent->d_flags & DCACHE_OP_COMPARE) { if (parent->d_op->d_compare(parent, *inode, dentry, i, tlen, tname, name)) @@ -1846,7 +1868,7 @@ struct dentry *__d_lookup(struct dentry *parent, struct qstr *name) */ tlen = dentry->d_name.len; tname = dentry->d_name.name; - if (parent->d_op && parent->d_op->d_compare) { + if (parent->d_flags & DCACHE_OP_COMPARE) { if (parent->d_op->d_compare(parent, parent->d_inode, dentry, dentry->d_inode, tlen, tname, name)) @@ -1887,7 +1909,7 @@ struct dentry *d_hash_and_lookup(struct dentry *dir, struct qstr *name) * routine may choose to leave the hash value unchanged. */ name->hash = full_name_hash(name->name, name->len); - if (dir->d_op && dir->d_op->d_hash) { + if (dir->d_flags & DCACHE_OP_HASH) { if (dir->d_op->d_hash(dir, dir->d_inode, name) < 0) goto out; } diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c index 5e5c7ec1fc98..f91b35db4c6e 100644 --- a/fs/ecryptfs/inode.c +++ b/fs/ecryptfs/inode.c @@ -441,7 +441,7 @@ static struct dentry *ecryptfs_lookup(struct inode *ecryptfs_dir_inode, struct qstr lower_name; int rc = 0; - ecryptfs_dentry->d_op = &ecryptfs_dops; + d_set_d_op(ecryptfs_dentry, &ecryptfs_dops); if ((ecryptfs_dentry->d_name.len == 1 && !strcmp(ecryptfs_dentry->d_name.name, ".")) || (ecryptfs_dentry->d_name.len == 2 diff --git a/fs/ecryptfs/main.c b/fs/ecryptfs/main.c index a9dbd62518e6..351038675376 100644 --- a/fs/ecryptfs/main.c +++ b/fs/ecryptfs/main.c @@ -189,7 +189,7 @@ int ecryptfs_interpose(struct dentry *lower_dentry, struct dentry *dentry, if (special_file(lower_inode->i_mode)) init_special_inode(inode, lower_inode->i_mode, lower_inode->i_rdev); - dentry->d_op = &ecryptfs_dops; + d_set_d_op(dentry, &ecryptfs_dops); fsstack_copy_attr_all(inode, lower_inode); /* This size will be overwritten for real files w/ headers and * other metadata */ @@ -594,7 +594,7 @@ static struct dentry *ecryptfs_mount(struct file_system_type *fs_type, int flags deactivate_locked_super(s); goto out; } - s->s_root->d_op = &ecryptfs_dops; + d_set_d_op(s->s_root, &ecryptfs_dops); s->s_root->d_sb = s; s->s_root->d_parent = s->s_root; diff --git a/fs/fat/inode.c b/fs/fat/inode.c index 8cccfebee180..206351af7c58 100644 --- a/fs/fat/inode.c +++ b/fs/fat/inode.c @@ -750,7 +750,7 @@ static struct dentry *fat_fh_to_dentry(struct super_block *sb, */ result = d_obtain_alias(inode); if (!IS_ERR(result)) - result->d_op = sb->s_root->d_op; + d_set_d_op(result, sb->s_root->d_op); return result; } @@ -800,7 +800,7 @@ static struct dentry *fat_get_parent(struct dentry *child) parent = d_obtain_alias(inode); if (!IS_ERR(parent)) - parent->d_op = sb->s_root->d_op; + d_set_d_op(parent, sb->s_root->d_op); out: unlock_super(sb); diff --git a/fs/fat/namei_msdos.c b/fs/fat/namei_msdos.c index 3b3e072d8982..35ffe43afa4b 100644 --- a/fs/fat/namei_msdos.c +++ b/fs/fat/namei_msdos.c @@ -227,10 +227,10 @@ static struct dentry *msdos_lookup(struct inode *dir, struct dentry *dentry, } out: unlock_super(sb); - dentry->d_op = &msdos_dentry_operations; + d_set_d_op(dentry, &msdos_dentry_operations); dentry = d_splice_alias(inode, dentry); if (dentry) - dentry->d_op = &msdos_dentry_operations; + d_set_d_op(dentry, &msdos_dentry_operations); return dentry; error: @@ -673,7 +673,7 @@ static int msdos_fill_super(struct super_block *sb, void *data, int silent) } sb->s_flags |= MS_NOATIME; - sb->s_root->d_op = &msdos_dentry_operations; + d_set_d_op(sb->s_root, &msdos_dentry_operations); unlock_super(sb); return 0; } diff --git a/fs/fat/namei_vfat.c b/fs/fat/namei_vfat.c index 4fc06278db48..3be5ed7d859f 100644 --- a/fs/fat/namei_vfat.c +++ b/fs/fat/namei_vfat.c @@ -766,11 +766,11 @@ static struct dentry *vfat_lookup(struct inode *dir, struct dentry *dentry, out: unlock_super(sb); - dentry->d_op = sb->s_root->d_op; + d_set_d_op(dentry, sb->s_root->d_op); dentry->d_time = dentry->d_parent->d_inode->i_version; dentry = d_splice_alias(inode, dentry); if (dentry) { - dentry->d_op = sb->s_root->d_op; + d_set_d_op(dentry, sb->s_root->d_op); dentry->d_time = dentry->d_parent->d_inode->i_version; } return dentry; @@ -1072,9 +1072,9 @@ static int vfat_fill_super(struct super_block *sb, void *data, int silent) } if (MSDOS_SB(sb)->options.name_check != 's') - sb->s_root->d_op = &vfat_ci_dentry_ops; + d_set_d_op(sb->s_root, &vfat_ci_dentry_ops); else - sb->s_root->d_op = &vfat_dentry_ops; + d_set_d_op(sb->s_root, &vfat_dentry_ops); unlock_super(sb); return 0; diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c index c9627c95482d..c9a8a426a395 100644 --- a/fs/fuse/dir.c +++ b/fs/fuse/dir.c @@ -347,7 +347,7 @@ static struct dentry *fuse_lookup(struct inode *dir, struct dentry *entry, } entry = newent ? newent : entry; - entry->d_op = &fuse_dentry_operations; + d_set_d_op(entry, &fuse_dentry_operations); if (outarg_valid) fuse_change_entry_timeout(entry, &outarg); else diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index 44e0a6c57e87..a8b31da19b93 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c @@ -626,7 +626,7 @@ static struct dentry *fuse_get_dentry(struct super_block *sb, entry = d_obtain_alias(inode); if (!IS_ERR(entry) && get_node_id(inode) != FUSE_ROOT_ID) { - entry->d_op = &fuse_dentry_operations; + d_set_d_op(entry, &fuse_dentry_operations); fuse_invalidate_entry_cache(entry); } @@ -728,7 +728,7 @@ static struct dentry *fuse_get_parent(struct dentry *child) parent = d_obtain_alias(inode); if (!IS_ERR(parent) && get_node_id(inode) != FUSE_ROOT_ID) { - parent->d_op = &fuse_dentry_operations; + d_set_d_op(parent, &fuse_dentry_operations); fuse_invalidate_entry_cache(parent); } diff --git a/fs/gfs2/export.c b/fs/gfs2/export.c index 5ab3839dfcb9..97012ecff560 100644 --- a/fs/gfs2/export.c +++ b/fs/gfs2/export.c @@ -130,7 +130,7 @@ static struct dentry *gfs2_get_parent(struct dentry *child) dentry = d_obtain_alias(gfs2_lookupi(child->d_inode, &gfs2_qdotdot, 1)); if (!IS_ERR(dentry)) - dentry->d_op = &gfs2_dops; + d_set_d_op(dentry, &gfs2_dops); return dentry; } @@ -158,7 +158,7 @@ static struct dentry *gfs2_get_dentry(struct super_block *sb, out_inode: dentry = d_obtain_alias(inode); if (!IS_ERR(dentry)) - dentry->d_op = &gfs2_dops; + d_set_d_op(dentry, &gfs2_dops); return dentry; } diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c index 3eb1393f7b81..2aeabd4218cc 100644 --- a/fs/gfs2/ops_fstype.c +++ b/fs/gfs2/ops_fstype.c @@ -440,7 +440,7 @@ static int gfs2_lookup_root(struct super_block *sb, struct dentry **dptr, iput(inode); return -ENOMEM; } - dentry->d_op = &gfs2_dops; + d_set_d_op(dentry, &gfs2_dops); *dptr = dentry; return 0; } diff --git a/fs/gfs2/ops_inode.c b/fs/gfs2/ops_inode.c index 12cbea7502c2..f28f89796f4d 100644 --- a/fs/gfs2/ops_inode.c +++ b/fs/gfs2/ops_inode.c @@ -106,7 +106,7 @@ static struct dentry *gfs2_lookup(struct inode *dir, struct dentry *dentry, { struct inode *inode = NULL; - dentry->d_op = &gfs2_dops; + d_set_d_op(dentry, &gfs2_dops); inode = gfs2_lookupi(dir, &dentry->d_name, 0); if (inode && IS_ERR(inode)) diff --git a/fs/hfs/dir.c b/fs/hfs/dir.c index 2b3b8611b41b..ea4aefe7c652 100644 --- a/fs/hfs/dir.c +++ b/fs/hfs/dir.c @@ -25,7 +25,7 @@ static struct dentry *hfs_lookup(struct inode *dir, struct dentry *dentry, struct inode *inode = NULL; int res; - dentry->d_op = &hfs_dentry_operations; + d_set_d_op(dentry, &hfs_dentry_operations); hfs_find_init(HFS_SB(dir->i_sb)->cat_tree, &fd); hfs_cat_build_key(dir->i_sb, fd.search_key, dir->i_ino, &dentry->d_name); diff --git a/fs/hfs/super.c b/fs/hfs/super.c index ef4ee5716abe..0bef62aa4f42 100644 --- a/fs/hfs/super.c +++ b/fs/hfs/super.c @@ -434,7 +434,7 @@ static int hfs_fill_super(struct super_block *sb, void *data, int silent) if (!sb->s_root) goto bail_iput; - sb->s_root->d_op = &hfs_dentry_operations; + d_set_d_op(sb->s_root, &hfs_dentry_operations); /* everything's okay */ return 0; diff --git a/fs/hfsplus/dir.c b/fs/hfsplus/dir.c index 9d59c0571f59..ccab87145f7a 100644 --- a/fs/hfsplus/dir.c +++ b/fs/hfsplus/dir.c @@ -37,7 +37,7 @@ static struct dentry *hfsplus_lookup(struct inode *dir, struct dentry *dentry, sb = dir->i_sb; - dentry->d_op = &hfsplus_dentry_operations; + d_set_d_op(dentry, &hfsplus_dentry_operations); dentry->d_fsdata = NULL; hfs_find_init(HFSPLUS_SB(sb)->cat_tree, &fd); hfsplus_cat_build_key(sb, fd.search_key, dir->i_ino, &dentry->d_name); diff --git a/fs/hfsplus/super.c b/fs/hfsplus/super.c index 182e83a9079e..ddf712e4700e 100644 --- a/fs/hfsplus/super.c +++ b/fs/hfsplus/super.c @@ -419,7 +419,7 @@ static int hfsplus_fill_super(struct super_block *sb, void *data, int silent) err = -ENOMEM; goto cleanup; } - sb->s_root->d_op = &hfsplus_dentry_operations; + d_set_d_op(sb->s_root, &hfsplus_dentry_operations); str.len = sizeof(HFSP_HIDDENDIR_NAME) - 1; str.name = HFSP_HIDDENDIR_NAME; diff --git a/fs/hostfs/hostfs_kern.c b/fs/hostfs/hostfs_kern.c index 861113fcfc88..0bc81cf256b8 100644 --- a/fs/hostfs/hostfs_kern.c +++ b/fs/hostfs/hostfs_kern.c @@ -612,7 +612,7 @@ struct dentry *hostfs_lookup(struct inode *ino, struct dentry *dentry, goto out_put; d_add(dentry, inode); - dentry->d_op = &hostfs_dentry_ops; + d_set_d_op(dentry, &hostfs_dentry_ops); return NULL; out_put: diff --git a/fs/hpfs/dentry.c b/fs/hpfs/dentry.c index 35526df1fd32..32c13a94e1e9 100644 --- a/fs/hpfs/dentry.c +++ b/fs/hpfs/dentry.c @@ -65,5 +65,5 @@ static const struct dentry_operations hpfs_dentry_operations = { void hpfs_set_dentry_operations(struct dentry *dentry) { - dentry->d_op = &hpfs_dentry_operations; + d_set_d_op(dentry, &hpfs_dentry_operations); } diff --git a/fs/isofs/inode.c b/fs/isofs/inode.c index d8f3a652243d..844a7903c72f 100644 --- a/fs/isofs/inode.c +++ b/fs/isofs/inode.c @@ -949,7 +949,7 @@ root_found: table += 2; if (opt.check == 'r') table++; - s->s_root->d_op = &isofs_dentry_ops[table]; + d_set_d_op(s->s_root, &isofs_dentry_ops[table]); kfree(opt.iocharset); diff --git a/fs/isofs/namei.c b/fs/isofs/namei.c index 715f7d318046..679a849c3b27 100644 --- a/fs/isofs/namei.c +++ b/fs/isofs/namei.c @@ -172,7 +172,7 @@ struct dentry *isofs_lookup(struct inode *dir, struct dentry *dentry, struct nam struct inode *inode; struct page *page; - dentry->d_op = dir->i_sb->s_root->d_op; + d_set_d_op(dentry, dir->i_sb->s_root->d_op); page = alloc_page(GFP_USER); if (!page) diff --git a/fs/jfs/namei.c b/fs/jfs/namei.c index 57f90dad8919..a151cbdec626 100644 --- a/fs/jfs/namei.c +++ b/fs/jfs/namei.c @@ -1466,7 +1466,7 @@ static struct dentry *jfs_lookup(struct inode *dip, struct dentry *dentry, struc jfs_info("jfs_lookup: name = %s", name); if (JFS_SBI(dip->i_sb)->mntflag & JFS_OS2) - dentry->d_op = &jfs_ci_dentry_operations; + d_set_d_op(dentry, &jfs_ci_dentry_operations); if ((name[0] == '.') && (len == 1)) inum = dip->i_ino; @@ -1495,7 +1495,7 @@ static struct dentry *jfs_lookup(struct inode *dip, struct dentry *dentry, struc dentry = d_splice_alias(ip, dentry); if (dentry && (JFS_SBI(dip->i_sb)->mntflag & JFS_OS2)) - dentry->d_op = &jfs_ci_dentry_operations; + d_set_d_op(dentry, &jfs_ci_dentry_operations); return dentry; } diff --git a/fs/jfs/super.c b/fs/jfs/super.c index b715b0f7bdfd..3150d766e0d4 100644 --- a/fs/jfs/super.c +++ b/fs/jfs/super.c @@ -525,7 +525,7 @@ static int jfs_fill_super(struct super_block *sb, void *data, int silent) goto out_no_root; if (sbi->mntflag & JFS_OS2) - sb->s_root->d_op = &jfs_ci_dentry_operations; + d_set_d_op(sb->s_root, &jfs_ci_dentry_operations); /* logical blocks are represented by 40 bits in pxd_t, etc. */ sb->s_maxbytes = ((u64) sb->s_blocksize) << 40; diff --git a/fs/libfs.c b/fs/libfs.c index 28b36663c44e..889311e3d06b 100644 --- a/fs/libfs.c +++ b/fs/libfs.c @@ -59,7 +59,7 @@ struct dentry *simple_lookup(struct inode *dir, struct dentry *dentry, struct na if (dentry->d_name.len > NAME_MAX) return ERR_PTR(-ENAMETOOLONG); - dentry->d_op = &simple_dentry_operations; + d_set_d_op(dentry, &simple_dentry_operations); d_add(dentry, NULL); return NULL; } diff --git a/fs/minix/namei.c b/fs/minix/namei.c index c0d35a3accef..1b9e07728a9f 100644 --- a/fs/minix/namei.c +++ b/fs/minix/namei.c @@ -23,7 +23,7 @@ static struct dentry *minix_lookup(struct inode * dir, struct dentry *dentry, st struct inode * inode = NULL; ino_t ino; - dentry->d_op = dir->i_sb->s_root->d_op; + d_set_d_op(dentry, dir->i_sb->s_root->d_op); if (dentry->d_name.len > minix_sb(dir->i_sb)->s_namelen) return ERR_PTR(-ENAMETOOLONG); diff --git a/fs/namei.c b/fs/namei.c index c731b50a6184..90bd2873e117 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -587,6 +587,17 @@ do_revalidate(struct dentry *dentry, struct nameidata *nd) return dentry; } +static inline int need_reval_dot(struct dentry *dentry) +{ + if (likely(!(dentry->d_flags & DCACHE_OP_REVALIDATE))) + return 0; + + if (likely(!(dentry->d_sb->s_type->fs_flags & FS_REVAL_DOT))) + return 0; + + return 1; +} + /* * force_reval_path - force revalidation of a dentry * @@ -610,10 +621,9 @@ force_reval_path(struct path *path, struct nameidata *nd) /* * only check on filesystems where it's possible for the dentry to - * become stale. It's assumed that if this flag is set then the - * d_revalidate op will also be defined. + * become stale. */ - if (!(dentry->d_sb->s_type->fs_flags & FS_REVAL_DOT)) + if (!need_reval_dot(dentry)) return 0; status = dentry->d_op->d_revalidate(dentry, nd); @@ -1003,7 +1013,7 @@ static int do_lookup(struct nameidata *nd, struct qstr *name, * See if the low-level filesystem might want * to use its own hash.. */ - if (parent->d_op && parent->d_op->d_hash) { + if (unlikely(parent->d_flags & DCACHE_OP_HASH)) { int err = parent->d_op->d_hash(parent, nd->inode, name); if (err < 0) return err; @@ -1029,7 +1039,7 @@ static int do_lookup(struct nameidata *nd, struct qstr *name, return -ECHILD; nd->seq = seq; - if (dentry->d_op && dentry->d_op->d_revalidate) { + if (dentry->d_flags & DCACHE_OP_REVALIDATE) { /* We commonly drop rcu-walk here */ if (nameidata_dentry_drop_rcu(nd, dentry)) return -ECHILD; @@ -1043,7 +1053,7 @@ static int do_lookup(struct nameidata *nd, struct qstr *name, if (!dentry) goto need_lookup; found: - if (dentry->d_op && dentry->d_op->d_revalidate) + if (dentry->d_flags & DCACHE_OP_REVALIDATE) goto need_revalidate; done: path->mnt = mnt; @@ -1281,8 +1291,7 @@ return_reval: * We bypassed the ordinary revalidation routines. * We may need to check the cached dentry for staleness. */ - if (nd->path.dentry && nd->path.dentry->d_sb && - (nd->path.dentry->d_sb->s_type->fs_flags & FS_REVAL_DOT)) { + if (need_reval_dot(nd->path.dentry)) { if (nameidata_drop_rcu_maybe(nd)) return -ECHILD; err = -ESTALE; @@ -1602,7 +1611,7 @@ static struct dentry *__lookup_hash(struct qstr *name, * See if the low-level filesystem might want * to use its own hash.. */ - if (base->d_op && base->d_op->d_hash) { + if (base->d_flags & DCACHE_OP_HASH) { err = base->d_op->d_hash(base, inode, name); dentry = ERR_PTR(err); if (err < 0) @@ -1616,7 +1625,7 @@ static struct dentry *__lookup_hash(struct qstr *name, */ dentry = d_lookup(base, name); - if (dentry && dentry->d_op && dentry->d_op->d_revalidate) + if (dentry && (dentry->d_flags & DCACHE_OP_REVALIDATE)) dentry = do_revalidate(dentry, nd); if (!dentry) @@ -2070,7 +2079,7 @@ static struct file *do_last(struct nameidata *nd, struct path *path, follow_dotdot(nd); dir = nd->path.dentry; case LAST_DOT: - if (nd->path.mnt->mnt_sb->s_type->fs_flags & FS_REVAL_DOT) { + if (need_reval_dot(dir)) { if (!dir->d_op->d_revalidate(dir, nd)) { error = -ESTALE; goto exit; diff --git a/fs/ncpfs/dir.c b/fs/ncpfs/dir.c index 0ba3cdc95a44..4b9cbb28d7fa 100644 --- a/fs/ncpfs/dir.c +++ b/fs/ncpfs/dir.c @@ -633,7 +633,7 @@ ncp_fill_cache(struct file *filp, void *dirent, filldir_t filldir, entry->ino = iunique(dir->i_sb, 2); inode = ncp_iget(dir->i_sb, entry); if (inode) { - newdent->d_op = &ncp_dentry_operations; + d_set_d_op(newdent, &ncp_dentry_operations); d_instantiate(newdent, inode); if (!hashed) d_rehash(newdent); @@ -889,7 +889,7 @@ static struct dentry *ncp_lookup(struct inode *dir, struct dentry *dentry, struc if (inode) { ncp_new_dentry(dentry); add_entry: - dentry->d_op = &ncp_dentry_operations; + d_set_d_op(dentry, &ncp_dentry_operations); d_add(dentry, inode); error = 0; } diff --git a/fs/ncpfs/inode.c b/fs/ncpfs/inode.c index 60047dbeb38d..0c75a5f3cafd 100644 --- a/fs/ncpfs/inode.c +++ b/fs/ncpfs/inode.c @@ -717,7 +717,7 @@ static int ncp_fill_super(struct super_block *sb, void *raw_data, int silent) sb->s_root = d_alloc_root(root_inode); if (!sb->s_root) goto out_no_root; - sb->s_root->d_op = &ncp_root_dentry_operations; + d_set_d_op(sb->s_root, &ncp_root_dentry_operations); return 0; out_no_root: diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index eb77471b8823..37e0a8bb077e 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -438,7 +438,7 @@ void nfs_prime_dcache(struct dentry *parent, struct nfs_entry *entry) if (dentry == NULL) return; - dentry->d_op = NFS_PROTO(dir)->dentry_ops; + d_set_d_op(dentry, NFS_PROTO(dir)->dentry_ops); inode = nfs_fhget(dentry->d_sb, entry->fh, entry->fattr); if (IS_ERR(inode)) goto out; @@ -1188,7 +1188,7 @@ static struct dentry *nfs_lookup(struct inode *dir, struct dentry * dentry, stru if (dentry->d_name.len > NFS_SERVER(dir)->namelen) goto out; - dentry->d_op = NFS_PROTO(dir)->dentry_ops; + d_set_d_op(dentry, NFS_PROTO(dir)->dentry_ops); /* * If we're doing an exclusive create, optimize away the lookup @@ -1333,7 +1333,7 @@ static struct dentry *nfs_atomic_lookup(struct inode *dir, struct dentry *dentry res = ERR_PTR(-ENAMETOOLONG); goto out; } - dentry->d_op = NFS_PROTO(dir)->dentry_ops; + d_set_d_op(dentry, NFS_PROTO(dir)->dentry_ops); /* Let vfs_create() deal with O_EXCL. Instantiate, but don't hash * the dentry. */ diff --git a/fs/nfs/getroot.c b/fs/nfs/getroot.c index b3e36c3430de..c3a5a1126833 100644 --- a/fs/nfs/getroot.c +++ b/fs/nfs/getroot.c @@ -121,7 +121,7 @@ struct dentry *nfs_get_root(struct super_block *sb, struct nfs_fh *mntfh) security_d_instantiate(ret, inode); if (ret->d_op == NULL) - ret->d_op = server->nfs_client->rpc_ops->dentry_ops; + d_set_d_op(ret, server->nfs_client->rpc_ops->dentry_ops); out: nfs_free_fattr(fsinfo.fattr); return ret; @@ -228,7 +228,7 @@ struct dentry *nfs4_get_root(struct super_block *sb, struct nfs_fh *mntfh) security_d_instantiate(ret, inode); if (ret->d_op == NULL) - ret->d_op = server->nfs_client->rpc_ops->dentry_ops; + d_set_d_op(ret, server->nfs_client->rpc_ops->dentry_ops); out: nfs_free_fattr(fattr); diff --git a/fs/ocfs2/export.c b/fs/ocfs2/export.c index 19ad145d2af3..6adafa576065 100644 --- a/fs/ocfs2/export.c +++ b/fs/ocfs2/export.c @@ -138,7 +138,7 @@ check_gen: result = d_obtain_alias(inode); if (!IS_ERR(result)) - result->d_op = &ocfs2_dentry_ops; + d_set_d_op(result, &ocfs2_dentry_ops); else mlog_errno(PTR_ERR(result)); @@ -176,7 +176,7 @@ static struct dentry *ocfs2_get_parent(struct dentry *child) parent = d_obtain_alias(ocfs2_iget(OCFS2_SB(dir->i_sb), blkno, 0, 0)); if (!IS_ERR(parent)) - parent->d_op = &ocfs2_dentry_ops; + d_set_d_op(parent, &ocfs2_dentry_ops); bail_unlock: ocfs2_inode_unlock(dir, 0); diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c index ff5744e1e36f..d14cad6e2e41 100644 --- a/fs/ocfs2/namei.c +++ b/fs/ocfs2/namei.c @@ -147,7 +147,7 @@ static struct dentry *ocfs2_lookup(struct inode *dir, struct dentry *dentry, spin_unlock(&oi->ip_lock); bail_add: - dentry->d_op = &ocfs2_dentry_ops; + d_set_d_op(dentry, &ocfs2_dentry_ops); ret = d_splice_alias(inode, dentry); if (inode) { @@ -415,7 +415,7 @@ static int ocfs2_mknod(struct inode *dir, mlog_errno(status); goto leave; } - dentry->d_op = &ocfs2_dentry_ops; + d_set_d_op(dentry, &ocfs2_dentry_ops); status = ocfs2_add_entry(handle, dentry, inode, OCFS2_I(inode)->ip_blkno, parent_fe_bh, @@ -743,7 +743,7 @@ static int ocfs2_link(struct dentry *old_dentry, } ihold(inode); - dentry->d_op = &ocfs2_dentry_ops; + d_set_d_op(dentry, &ocfs2_dentry_ops); d_instantiate(dentry, inode); out_commit: @@ -1794,7 +1794,7 @@ static int ocfs2_symlink(struct inode *dir, mlog_errno(status); goto bail; } - dentry->d_op = &ocfs2_dentry_ops; + d_set_d_op(dentry, &ocfs2_dentry_ops); status = ocfs2_add_entry(handle, dentry, inode, le64_to_cpu(fe->i_blkno), parent_fe_bh, @@ -2459,7 +2459,7 @@ int ocfs2_mv_orphaned_inode_to_new(struct inode *dir, goto out_commit; } - dentry->d_op = &ocfs2_dentry_ops; + d_set_d_op(dentry, &ocfs2_dentry_ops); d_instantiate(dentry, inode); status = 0; out_commit: diff --git a/fs/pipe.c b/fs/pipe.c index ae3592dcc88c..01a786567810 100644 --- a/fs/pipe.c +++ b/fs/pipe.c @@ -1004,7 +1004,7 @@ struct file *create_write_pipe(int flags) goto err_inode; path.mnt = mntget(pipe_mnt); - path.dentry->d_op = &pipefs_dentry_operations; + d_set_d_op(path.dentry, &pipefs_dentry_operations); d_instantiate(path.dentry, inode); err = -ENFILE; diff --git a/fs/proc/base.c b/fs/proc/base.c index d932fdb6a245..85f0a80912aa 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -1969,7 +1969,7 @@ static struct dentry *proc_fd_instantiate(struct inode *dir, inode->i_op = &proc_pid_link_inode_operations; inode->i_size = 64; ei->op.proc_get_link = proc_fd_link; - dentry->d_op = &tid_fd_dentry_operations; + d_set_d_op(dentry, &tid_fd_dentry_operations); d_add(dentry, inode); /* Close the race of the process dying before we return the dentry */ if (tid_fd_revalidate(dentry, NULL)) @@ -2137,7 +2137,7 @@ static struct dentry *proc_fdinfo_instantiate(struct inode *dir, ei->fd = fd; inode->i_mode = S_IFREG | S_IRUSR; inode->i_fop = &proc_fdinfo_file_operations; - dentry->d_op = &tid_fd_dentry_operations; + d_set_d_op(dentry, &tid_fd_dentry_operations); d_add(dentry, inode); /* Close the race of the process dying before we return the dentry */ if (tid_fd_revalidate(dentry, NULL)) @@ -2196,7 +2196,7 @@ static struct dentry *proc_pident_instantiate(struct inode *dir, if (p->fop) inode->i_fop = p->fop; ei->op = p->op; - dentry->d_op = &pid_dentry_operations; + d_set_d_op(dentry, &pid_dentry_operations); d_add(dentry, inode); /* Close the race of the process dying before we return the dentry */ if (pid_revalidate(dentry, NULL)) @@ -2615,7 +2615,7 @@ static struct dentry *proc_base_instantiate(struct inode *dir, if (p->fop) inode->i_fop = p->fop; ei->op = p->op; - dentry->d_op = &proc_base_dentry_operations; + d_set_d_op(dentry, &proc_base_dentry_operations); d_add(dentry, inode); error = NULL; out: @@ -2926,7 +2926,7 @@ static struct dentry *proc_pid_instantiate(struct inode *dir, inode->i_nlink = 2 + pid_entry_count_dirs(tgid_base_stuff, ARRAY_SIZE(tgid_base_stuff)); - dentry->d_op = &pid_dentry_operations; + d_set_d_op(dentry, &pid_dentry_operations); d_add(dentry, inode); /* Close the race of the process dying before we return the dentry */ @@ -3169,7 +3169,7 @@ static struct dentry *proc_task_instantiate(struct inode *dir, inode->i_nlink = 2 + pid_entry_count_dirs(tid_base_stuff, ARRAY_SIZE(tid_base_stuff)); - dentry->d_op = &pid_dentry_operations; + d_set_d_op(dentry, &pid_dentry_operations); d_add(dentry, inode); /* Close the race of the process dying before we return the dentry */ diff --git a/fs/proc/generic.c b/fs/proc/generic.c index 1d607be36d95..f766be29d2c7 100644 --- a/fs/proc/generic.c +++ b/fs/proc/generic.c @@ -439,7 +439,7 @@ struct dentry *proc_lookup_de(struct proc_dir_entry *de, struct inode *dir, out_unlock: if (inode) { - dentry->d_op = &proc_dentry_operations; + d_set_d_op(dentry, &proc_dentry_operations); d_add(dentry, inode); return NULL; } diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c index 998e3a715bcc..35efd85a4d32 100644 --- a/fs/proc/proc_sysctl.c +++ b/fs/proc/proc_sysctl.c @@ -120,7 +120,7 @@ static struct dentry *proc_sys_lookup(struct inode *dir, struct dentry *dentry, goto out; err = NULL; - dentry->d_op = &proc_sys_dentry_operations; + d_set_d_op(dentry, &proc_sys_dentry_operations); d_add(dentry, inode); out: @@ -201,7 +201,7 @@ static int proc_sys_fill_cache(struct file *filp, void *dirent, dput(child); return -ENOMEM; } else { - child->d_op = &proc_sys_dentry_operations; + d_set_d_op(child, &proc_sys_dentry_operations); d_add(child, inode); } } else { diff --git a/fs/reiserfs/xattr.c b/fs/reiserfs/xattr.c index 5d04a7828e7a..e0f0d7ea10a1 100644 --- a/fs/reiserfs/xattr.c +++ b/fs/reiserfs/xattr.c @@ -990,7 +990,7 @@ int reiserfs_lookup_privroot(struct super_block *s) strlen(PRIVROOT_NAME)); if (!IS_ERR(dentry)) { REISERFS_SB(s)->priv_root = dentry; - dentry->d_op = &xattr_lookup_poison_ops; + d_set_d_op(dentry, &xattr_lookup_poison_ops); if (dentry->d_inode) dentry->d_inode->i_flags |= S_PRIVATE; } else diff --git a/fs/sysfs/dir.c b/fs/sysfs/dir.c index 27e1102e303e..3e076caa8daf 100644 --- a/fs/sysfs/dir.c +++ b/fs/sysfs/dir.c @@ -701,7 +701,7 @@ static struct dentry * sysfs_lookup(struct inode *dir, struct dentry *dentry, /* instantiate and hash dentry */ ret = d_find_alias(inode); if (!ret) { - dentry->d_op = &sysfs_dentry_ops; + d_set_d_op(dentry, &sysfs_dentry_ops); dentry->d_fsdata = sysfs_get(sd); d_add(dentry, inode); } else { diff --git a/fs/sysv/namei.c b/fs/sysv/namei.c index 7507aeb4c90e..b5e68da2db32 100644 --- a/fs/sysv/namei.c +++ b/fs/sysv/namei.c @@ -48,7 +48,7 @@ static struct dentry *sysv_lookup(struct inode * dir, struct dentry * dentry, st struct inode * inode = NULL; ino_t ino; - dentry->d_op = dir->i_sb->s_root->d_op; + d_set_d_op(dentry, dir->i_sb->s_root->d_op); if (dentry->d_name.len > SYSV_NAMELEN) return ERR_PTR(-ENAMETOOLONG); ino = sysv_inode_by_name(dentry); diff --git a/fs/sysv/super.c b/fs/sysv/super.c index 3d9c62be0c10..76712aefc4ab 100644 --- a/fs/sysv/super.c +++ b/fs/sysv/super.c @@ -346,7 +346,7 @@ static int complete_read_super(struct super_block *sb, int silent, int size) if (sbi->s_forced_ro) sb->s_flags |= MS_RDONLY; if (sbi->s_truncate) - sb->s_root->d_op = &sysv_dentry_operations; + d_set_d_op(sb->s_root, &sysv_dentry_operations); return 1; } diff --git a/include/linux/dcache.h b/include/linux/dcache.h index e4414693065e..f4b40a751f09 100644 --- a/include/linux/dcache.h +++ b/include/linux/dcache.h @@ -183,6 +183,11 @@ struct dentry_operations { #define DCACHE_GENOCIDE 0x0200 #define DCACHE_MOUNTED 0x0400 /* is a mountpoint */ +#define DCACHE_OP_HASH 0x1000 +#define DCACHE_OP_COMPARE 0x2000 +#define DCACHE_OP_REVALIDATE 0x4000 +#define DCACHE_OP_DELETE 0x8000 + extern spinlock_t dcache_inode_lock; extern seqlock_t rename_lock; @@ -201,6 +206,7 @@ extern struct dentry * d_materialise_unique(struct dentry *, struct inode *); extern void __d_drop(struct dentry *dentry); extern void d_drop(struct dentry *dentry); extern void d_delete(struct dentry *); +extern void d_set_d_op(struct dentry *dentry, const struct dentry_operations *op); /* allocate/de-allocate */ extern struct dentry * d_alloc(struct dentry *, const struct qstr *); diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 9f41470c3949..51cddc11cd85 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -2222,7 +2222,7 @@ static struct dentry *cgroup_lookup(struct inode *dir, if (dentry->d_name.len > NAME_MAX) return ERR_PTR(-ENAMETOOLONG); - dentry->d_op = &cgroup_dentry_operations; + d_set_d_op(dentry, &cgroup_dentry_operations); d_add(dentry, NULL); return NULL; } diff --git a/net/socket.c b/net/socket.c index 817dc92e9ef8..991e266bc7ae 100644 --- a/net/socket.c +++ b/net/socket.c @@ -368,7 +368,7 @@ static int sock_alloc_file(struct socket *sock, struct file **f, int flags) } path.mnt = mntget(sock_mnt); - path.dentry->d_op = &sockfs_dentry_operations; + d_set_d_op(path.dentry, &sockfs_dentry_operations); d_instantiate(path.dentry, SOCK_INODE(sock)); SOCK_INODE(sock)->i_fop = &socket_file_ops; diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c index 2899fe27f880..09f01f41e55a 100644 --- a/net/sunrpc/rpc_pipe.c +++ b/net/sunrpc/rpc_pipe.c @@ -591,7 +591,7 @@ static struct dentry *__rpc_lookup_create(struct dentry *parent, } } if (!dentry->d_inode) - dentry->d_op = &rpc_dentry_operations; + d_set_d_op(dentry, &rpc_dentry_operations); out_err: return dentry; } -- cgit v1.2.3 From 44a7d7a878c9cbb74f236ea755b25b6b2e26a9a9 Mon Sep 17 00:00:00 2001 From: Nick Piggin Date: Fri, 7 Jan 2011 17:49:56 +1100 Subject: fs: cache optimise dentry and inode for rcu-walk Put dentry and inode fields into top of data structure. This allows RCU path traversal to perform an RCU dentry lookup in a path walk by touching only the first 56 bytes of the dentry. We also fit in 8 bytes of inline name in the first 64 bytes, so for short names, only 64 bytes needs to be touched to perform the lookup. We should get rid of the hash->prev pointer from the first 64 bytes, and fit 16 bytes of name in there, which will take care of 81% rather than 32% of the kernel tree. inode is also rearranged so that RCU lookup will only touch a single cacheline in the inode, plus one in the i_ops structure. This is important for directory component lookups in RCU path walking. In the kernel source, directory names average is around 6 chars, so this works. When we reach the last element of the lookup, we need to lock it and take its refcount which requires another cacheline access. Align dentry and inode operations structs, so members will be at predictable offsets and we can group common operations into head of structure. Signed-off-by: Nick Piggin --- fs/dcache.c | 2 -- include/linux/dcache.h | 36 +++++++++++++++++++----------------- include/linux/fs.h | 42 +++++++++++++++++++++++------------------- 3 files changed, 42 insertions(+), 38 deletions(-) (limited to 'fs/dcache.c') diff --git a/fs/dcache.c b/fs/dcache.c index f9693da3efbd..07d1f6862dc7 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -83,8 +83,6 @@ EXPORT_SYMBOL(dcache_inode_lock); static struct kmem_cache *dentry_cache __read_mostly; -#define DNAME_INLINE_LEN (sizeof(struct dentry)-offsetof(struct dentry,d_iname)) - /* * This is the single most critical data structure when it comes * to the dcache: the hashtable for lookups. Somebody should try diff --git a/include/linux/dcache.h b/include/linux/dcache.h index f4b40a751f09..b1aeda077258 100644 --- a/include/linux/dcache.h +++ b/include/linux/dcache.h @@ -82,25 +82,33 @@ full_name_hash(const unsigned char *name, unsigned int len) * large memory footprint increase). */ #ifdef CONFIG_64BIT -#define DNAME_INLINE_LEN_MIN 32 /* 192 bytes */ +# define DNAME_INLINE_LEN 32 /* 192 bytes */ #else -#define DNAME_INLINE_LEN_MIN 40 /* 128 bytes */ +# ifdef CONFIG_SMP +# define DNAME_INLINE_LEN 36 /* 128 bytes */ +# else +# define DNAME_INLINE_LEN 40 /* 128 bytes */ +# endif #endif struct dentry { - unsigned int d_count; /* protected by d_lock */ + /* RCU lookup touched fields */ unsigned int d_flags; /* protected by d_lock */ - spinlock_t d_lock; /* per dentry lock */ seqcount_t d_seq; /* per dentry seqlock */ - struct inode *d_inode; /* Where the name belongs to - NULL is - * negative */ - /* - * The next three fields are touched by __d_lookup. Place them here - * so they all fit in a cache line. - */ struct hlist_node d_hash; /* lookup hash list */ struct dentry *d_parent; /* parent directory */ struct qstr d_name; + struct inode *d_inode; /* Where the name belongs to - NULL is + * negative */ + unsigned char d_iname[DNAME_INLINE_LEN]; /* small names */ + + /* Ref lookup also touches following */ + unsigned int d_count; /* protected by d_lock */ + spinlock_t d_lock; /* per dentry lock */ + const struct dentry_operations *d_op; + struct super_block *d_sb; /* The root of the dentry tree */ + unsigned long d_time; /* used by d_revalidate */ + void *d_fsdata; /* fs-specific data */ struct list_head d_lru; /* LRU list */ /* @@ -112,12 +120,6 @@ struct dentry { } d_u; struct list_head d_subdirs; /* our children */ struct list_head d_alias; /* inode alias list */ - unsigned long d_time; /* used by d_revalidate */ - const struct dentry_operations *d_op; - struct super_block *d_sb; /* The root of the dentry tree */ - void *d_fsdata; /* fs-specific data */ - - unsigned char d_iname[DNAME_INLINE_LEN_MIN]; /* small names */ }; /* @@ -143,7 +145,7 @@ struct dentry_operations { void (*d_release)(struct dentry *); void (*d_iput)(struct dentry *, struct inode *); char *(*d_dname)(struct dentry *, char *, int); -}; +} ____cacheline_aligned; /* * Locking rules for dentry_operations callbacks are to be found in diff --git a/include/linux/fs.h b/include/linux/fs.h index ea202fff44f8..a04aa96acb71 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -733,6 +733,20 @@ struct posix_acl; #define ACL_NOT_CACHED ((void *)(-1)) struct inode { + /* RCU path lookup touches following: */ + umode_t i_mode; + uid_t i_uid; + gid_t i_gid; + const struct inode_operations *i_op; + struct super_block *i_sb; + + spinlock_t i_lock; /* i_blocks, i_bytes, maybe i_size */ + unsigned int i_flags; + struct mutex i_mutex; + + unsigned long i_state; + unsigned long dirtied_when; /* jiffies of first dirtying */ + struct hlist_node i_hash; struct list_head i_wb_list; /* backing dev IO list */ struct list_head i_lru; /* inode LRU list */ @@ -744,8 +758,6 @@ struct inode { unsigned long i_ino; atomic_t i_count; unsigned int i_nlink; - uid_t i_uid; - gid_t i_gid; dev_t i_rdev; unsigned int i_blkbits; u64 i_version; @@ -758,13 +770,8 @@ struct inode { struct timespec i_ctime; blkcnt_t i_blocks; unsigned short i_bytes; - umode_t i_mode; - spinlock_t i_lock; /* i_blocks, i_bytes, maybe i_size */ - struct mutex i_mutex; struct rw_semaphore i_alloc_sem; - const struct inode_operations *i_op; const struct file_operations *i_fop; /* former ->i_op->default_file_ops */ - struct super_block *i_sb; struct file_lock *i_flock; struct address_space *i_mapping; struct address_space i_data; @@ -785,11 +792,6 @@ struct inode { struct hlist_head i_fsnotify_marks; #endif - unsigned long i_state; - unsigned long dirtied_when; /* jiffies of first dirtying */ - - unsigned int i_flags; - #ifdef CONFIG_IMA /* protected by i_lock */ unsigned int i_readcount; /* struct files open RO */ @@ -1549,8 +1551,15 @@ struct file_operations { }; struct inode_operations { - int (*create) (struct inode *,struct dentry *,int, struct nameidata *); struct dentry * (*lookup) (struct inode *,struct dentry *, struct nameidata *); + void * (*follow_link) (struct dentry *, struct nameidata *); + int (*permission) (struct inode *, int); + int (*check_acl)(struct inode *, int); + + int (*readlink) (struct dentry *, char __user *,int); + void (*put_link) (struct dentry *, struct nameidata *, void *); + + int (*create) (struct inode *,struct dentry *,int, struct nameidata *); int (*link) (struct dentry *,struct inode *,struct dentry *); int (*unlink) (struct inode *,struct dentry *); int (*symlink) (struct inode *,struct dentry *,const char *); @@ -1559,12 +1568,7 @@ struct inode_operations { int (*mknod) (struct inode *,struct dentry *,int,dev_t); int (*rename) (struct inode *, struct dentry *, struct inode *, struct dentry *); - int (*readlink) (struct dentry *, char __user *,int); - void * (*follow_link) (struct dentry *, struct nameidata *); - void (*put_link) (struct dentry *, struct nameidata *, void *); void (*truncate) (struct inode *); - int (*permission) (struct inode *, int); - int (*check_acl)(struct inode *, int); int (*setattr) (struct dentry *, struct iattr *); int (*getattr) (struct vfsmount *mnt, struct dentry *, struct kstat *); int (*setxattr) (struct dentry *, const char *,const void *,size_t,int); @@ -1576,7 +1580,7 @@ struct inode_operations { loff_t len); int (*fiemap)(struct inode *, struct fiemap_extent_info *, u64 start, u64 len); -}; +} ____cacheline_aligned; struct seq_file; -- cgit v1.2.3 From ceb5bdc2d246f6d81cf61ed70f325308a11821d2 Mon Sep 17 00:00:00 2001 From: Nick Piggin Date: Fri, 7 Jan 2011 17:50:05 +1100 Subject: fs: dcache per-bucket dcache hash locking We can turn the dcache hash locking from a global dcache_hash_lock into per-bucket locking. Signed-off-by: Nick Piggin --- fs/dcache.c | 133 ++++++++++++++++++++++++++++++------------------- fs/super.c | 3 +- include/linux/dcache.h | 3 +- include/linux/fs.h | 3 +- 4 files changed, 89 insertions(+), 53 deletions(-) (limited to 'fs/dcache.c') diff --git a/fs/dcache.c b/fs/dcache.c index 07d1f6862dc7..9f04e1ba75b7 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -33,14 +33,18 @@ #include #include #include +#include +#include #include "internal.h" /* * Usage: * dcache_inode_lock protects: * - i_dentry, d_alias, d_inode - * dcache_hash_lock protects: - * - the dcache hash table, s_anon lists + * dcache_hash_bucket lock protects: + * - the dcache hash table + * s_anon bl list spinlock protects: + * - the s_anon list (see __d_drop) * dcache_lru_lock protects: * - the dcache lru lists and counters * d_lock protects: @@ -57,7 +61,8 @@ * dcache_inode_lock * dentry->d_lock * dcache_lru_lock - * dcache_hash_lock + * dcache_hash_bucket lock + * s_anon lock * * If there is an ancestor relationship: * dentry->d_parent->...->d_parent->d_lock @@ -74,7 +79,6 @@ int sysctl_vfs_cache_pressure __read_mostly = 100; EXPORT_SYMBOL_GPL(sysctl_vfs_cache_pressure); __cacheline_aligned_in_smp DEFINE_SPINLOCK(dcache_inode_lock); -static __cacheline_aligned_in_smp DEFINE_SPINLOCK(dcache_hash_lock); static __cacheline_aligned_in_smp DEFINE_SPINLOCK(dcache_lru_lock); __cacheline_aligned_in_smp DEFINE_SEQLOCK(rename_lock); @@ -96,7 +100,29 @@ static struct kmem_cache *dentry_cache __read_mostly; static unsigned int d_hash_mask __read_mostly; static unsigned int d_hash_shift __read_mostly; -static struct hlist_head *dentry_hashtable __read_mostly; + +struct dcache_hash_bucket { + struct hlist_bl_head head; +}; +static struct dcache_hash_bucket *dentry_hashtable __read_mostly; + +static inline struct dcache_hash_bucket *d_hash(struct dentry *parent, + unsigned long hash) +{ + hash += ((unsigned long) parent ^ GOLDEN_RATIO_PRIME) / L1_CACHE_BYTES; + hash = hash ^ ((hash ^ GOLDEN_RATIO_PRIME) >> D_HASHBITS); + return dentry_hashtable + (hash & D_HASHMASK); +} + +static inline void spin_lock_bucket(struct dcache_hash_bucket *b) +{ + bit_spin_lock(0, (unsigned long *)&b->head.first); +} + +static inline void spin_unlock_bucket(struct dcache_hash_bucket *b) +{ + __bit_spin_unlock(0, (unsigned long *)&b->head.first); +} /* Statistics gathering. */ struct dentry_stat_t dentry_stat = { @@ -144,7 +170,7 @@ static void d_free(struct dentry *dentry) dentry->d_op->d_release(dentry); /* if dentry was never inserted into hash, immediate free is OK */ - if (hlist_unhashed(&dentry->d_hash)) + if (hlist_bl_unhashed(&dentry->d_hash)) __d_free(&dentry->d_u.d_rcu); else call_rcu(&dentry->d_u.d_rcu, __d_free); @@ -302,11 +328,27 @@ static struct dentry *d_kill(struct dentry *dentry, struct dentry *parent) void __d_drop(struct dentry *dentry) { if (!(dentry->d_flags & DCACHE_UNHASHED)) { - dentry->d_flags |= DCACHE_UNHASHED; - spin_lock(&dcache_hash_lock); - hlist_del_rcu(&dentry->d_hash); - spin_unlock(&dcache_hash_lock); - dentry_rcuwalk_barrier(dentry); + if (unlikely(dentry->d_flags & DCACHE_DISCONNECTED)) { + bit_spin_lock(0, + (unsigned long *)&dentry->d_sb->s_anon.first); + dentry->d_flags |= DCACHE_UNHASHED; + hlist_bl_del_init(&dentry->d_hash); + __bit_spin_unlock(0, + (unsigned long *)&dentry->d_sb->s_anon.first); + } else { + struct dcache_hash_bucket *b; + b = d_hash(dentry->d_parent, dentry->d_name.hash); + spin_lock_bucket(b); + /* + * We may not actually need to put DCACHE_UNHASHED + * manipulations under the hash lock, but follow + * the principle of least surprise. + */ + dentry->d_flags |= DCACHE_UNHASHED; + hlist_bl_del_rcu(&dentry->d_hash); + spin_unlock_bucket(b); + dentry_rcuwalk_barrier(dentry); + } } } EXPORT_SYMBOL(__d_drop); @@ -961,8 +1003,8 @@ void shrink_dcache_for_umount(struct super_block *sb) spin_unlock(&dentry->d_lock); shrink_dcache_for_umount_subtree(dentry); - while (!hlist_empty(&sb->s_anon)) { - dentry = hlist_entry(sb->s_anon.first, struct dentry, d_hash); + while (!hlist_bl_empty(&sb->s_anon)) { + dentry = hlist_bl_entry(hlist_bl_first(&sb->s_anon), struct dentry, d_hash); shrink_dcache_for_umount_subtree(dentry); } } @@ -1263,7 +1305,7 @@ struct dentry *d_alloc(struct dentry * parent, const struct qstr *name) dentry->d_sb = NULL; dentry->d_op = NULL; dentry->d_fsdata = NULL; - INIT_HLIST_NODE(&dentry->d_hash); + INIT_HLIST_BL_NODE(&dentry->d_hash); INIT_LIST_HEAD(&dentry->d_lru); INIT_LIST_HEAD(&dentry->d_subdirs); INIT_LIST_HEAD(&dentry->d_alias); @@ -1459,14 +1501,6 @@ struct dentry * d_alloc_root(struct inode * root_inode) } EXPORT_SYMBOL(d_alloc_root); -static inline struct hlist_head *d_hash(struct dentry *parent, - unsigned long hash) -{ - hash += ((unsigned long) parent ^ GOLDEN_RATIO_PRIME) / L1_CACHE_BYTES; - hash = hash ^ ((hash ^ GOLDEN_RATIO_PRIME) >> D_HASHBITS); - return dentry_hashtable + (hash & D_HASHMASK); -} - /** * d_obtain_alias - find or allocate a dentry for a given inode * @inode: inode to allocate the dentry for @@ -1521,11 +1555,11 @@ struct dentry *d_obtain_alias(struct inode *inode) tmp->d_sb = inode->i_sb; tmp->d_inode = inode; tmp->d_flags |= DCACHE_DISCONNECTED; - tmp->d_flags &= ~DCACHE_UNHASHED; list_add(&tmp->d_alias, &inode->i_dentry); - spin_lock(&dcache_hash_lock); - hlist_add_head(&tmp->d_hash, &inode->i_sb->s_anon); - spin_unlock(&dcache_hash_lock); + bit_spin_lock(0, (unsigned long *)&tmp->d_sb->s_anon.first); + tmp->d_flags &= ~DCACHE_UNHASHED; + hlist_bl_add_head(&tmp->d_hash, &tmp->d_sb->s_anon); + __bit_spin_unlock(0, (unsigned long *)&tmp->d_sb->s_anon.first); spin_unlock(&tmp->d_lock); spin_unlock(&dcache_inode_lock); @@ -1567,7 +1601,7 @@ struct dentry *d_splice_alias(struct inode *inode, struct dentry *dentry) d_move(new, dentry); iput(inode); } else { - /* already taking dcache_inode_lock, so d_add() by hand */ + /* already got dcache_inode_lock, so d_add() by hand */ __d_instantiate(dentry, inode); spin_unlock(&dcache_inode_lock); security_d_instantiate(dentry, inode); @@ -1702,8 +1736,8 @@ struct dentry *__d_lookup_rcu(struct dentry *parent, struct qstr *name, unsigned int len = name->len; unsigned int hash = name->hash; const unsigned char *str = name->name; - struct hlist_head *head = d_hash(parent, hash); - struct hlist_node *node; + struct dcache_hash_bucket *b = d_hash(parent, hash); + struct hlist_bl_node *node; struct dentry *dentry; /* @@ -1726,7 +1760,7 @@ struct dentry *__d_lookup_rcu(struct dentry *parent, struct qstr *name, * * See Documentation/vfs/dcache-locking.txt for more details. */ - hlist_for_each_entry_rcu(dentry, node, head, d_hash) { + hlist_bl_for_each_entry_rcu(dentry, node, &b->head, d_hash) { struct inode *i; const char *tname; int tlen; @@ -1820,8 +1854,8 @@ struct dentry *__d_lookup(struct dentry *parent, struct qstr *name) unsigned int len = name->len; unsigned int hash = name->hash; const unsigned char *str = name->name; - struct hlist_head *head = d_hash(parent,hash); - struct hlist_node *node; + struct dcache_hash_bucket *b = d_hash(parent, hash); + struct hlist_bl_node *node; struct dentry *found = NULL; struct dentry *dentry; @@ -1847,7 +1881,7 @@ struct dentry *__d_lookup(struct dentry *parent, struct qstr *name) */ rcu_read_lock(); - hlist_for_each_entry_rcu(dentry, node, head, d_hash) { + hlist_bl_for_each_entry_rcu(dentry, node, &b->head, d_hash) { const char *tname; int tlen; @@ -1998,11 +2032,13 @@ again: } EXPORT_SYMBOL(d_delete); -static void __d_rehash(struct dentry * entry, struct hlist_head *list) +static void __d_rehash(struct dentry * entry, struct dcache_hash_bucket *b) { - + BUG_ON(!d_unhashed(entry)); + spin_lock_bucket(b); entry->d_flags &= ~DCACHE_UNHASHED; - hlist_add_head_rcu(&entry->d_hash, list); + hlist_bl_add_head_rcu(&entry->d_hash, &b->head); + spin_unlock_bucket(b); } static void _d_rehash(struct dentry * entry) @@ -2020,9 +2056,7 @@ static void _d_rehash(struct dentry * entry) void d_rehash(struct dentry * entry) { spin_lock(&entry->d_lock); - spin_lock(&dcache_hash_lock); _d_rehash(entry); - spin_unlock(&dcache_hash_lock); spin_unlock(&entry->d_lock); } EXPORT_SYMBOL(d_rehash); @@ -2165,15 +2199,16 @@ void d_move(struct dentry * dentry, struct dentry * target) write_seqcount_begin(&dentry->d_seq); write_seqcount_begin(&target->d_seq); - /* Move the dentry to the target hash queue, if on different bucket */ - spin_lock(&dcache_hash_lock); - if (!d_unhashed(dentry)) - hlist_del_rcu(&dentry->d_hash); + /* __d_drop does write_seqcount_barrier, but they're OK to nest. */ + + /* + * Move the dentry to the target hash queue. Don't bother checking + * for the same hash queue because of how unlikely it is. + */ + __d_drop(dentry); __d_rehash(dentry, d_hash(target->d_parent, target->d_name.hash)); - spin_unlock(&dcache_hash_lock); /* Unhash the target: dput() will then get rid of it */ - /* __d_drop does write_seqcount_barrier, but they're OK to nest. */ __d_drop(target); list_del(&dentry->d_u.d_child); @@ -2369,9 +2404,7 @@ struct dentry *d_materialise_unique(struct dentry *dentry, struct inode *inode) spin_lock(&actual->d_lock); found: - spin_lock(&dcache_hash_lock); _d_rehash(actual); - spin_unlock(&dcache_hash_lock); spin_unlock(&actual->d_lock); spin_unlock(&dcache_inode_lock); out_nolock: @@ -2953,7 +2986,7 @@ static void __init dcache_init_early(void) dentry_hashtable = alloc_large_system_hash("Dentry cache", - sizeof(struct hlist_head), + sizeof(struct dcache_hash_bucket), dhash_entries, 13, HASH_EARLY, @@ -2962,7 +2995,7 @@ static void __init dcache_init_early(void) 0); for (loop = 0; loop < (1 << d_hash_shift); loop++) - INIT_HLIST_HEAD(&dentry_hashtable[loop]); + INIT_HLIST_BL_HEAD(&dentry_hashtable[loop].head); } static void __init dcache_init(void) @@ -2985,7 +3018,7 @@ static void __init dcache_init(void) dentry_hashtable = alloc_large_system_hash("Dentry cache", - sizeof(struct hlist_head), + sizeof(struct dcache_hash_bucket), dhash_entries, 13, 0, @@ -2994,7 +3027,7 @@ static void __init dcache_init(void) 0); for (loop = 0; loop < (1 << d_hash_shift); loop++) - INIT_HLIST_HEAD(&dentry_hashtable[loop]); + INIT_HLIST_BL_HEAD(&dentry_hashtable[loop].head); } /* SLAB cache for __getname() consumers */ diff --git a/fs/super.c b/fs/super.c index ca696155cd9a..968ba013011a 100644 --- a/fs/super.c +++ b/fs/super.c @@ -30,6 +30,7 @@ #include #include #include +#include #include "internal.h" @@ -71,7 +72,7 @@ static struct super_block *alloc_super(struct file_system_type *type) INIT_LIST_HEAD(&s->s_files); #endif INIT_LIST_HEAD(&s->s_instances); - INIT_HLIST_HEAD(&s->s_anon); + INIT_HLIST_BL_HEAD(&s->s_anon); INIT_LIST_HEAD(&s->s_inodes); INIT_LIST_HEAD(&s->s_dentry_lru); init_rwsem(&s->s_umount); diff --git a/include/linux/dcache.h b/include/linux/dcache.h index 8b2064d02928..5f0392ef759b 100644 --- a/include/linux/dcache.h +++ b/include/linux/dcache.h @@ -4,6 +4,7 @@ #include #include #include +#include #include #include #include @@ -95,7 +96,7 @@ struct dentry { /* RCU lookup touched fields */ unsigned int d_flags; /* protected by d_lock */ seqcount_t d_seq; /* per dentry seqlock */ - struct hlist_node d_hash; /* lookup hash list */ + struct hlist_bl_node d_hash; /* lookup hash list */ struct dentry *d_parent; /* parent directory */ struct qstr d_name; struct inode *d_inode; /* Where the name belongs to - NULL is diff --git a/include/linux/fs.h b/include/linux/fs.h index d5a4d42f655b..baf3e556ff0e 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -392,6 +392,7 @@ struct inodes_stat_t { #include #include #include +#include #include #include @@ -1377,7 +1378,7 @@ struct super_block { const struct xattr_handler **s_xattr; struct list_head s_inodes; /* all inodes */ - struct hlist_head s_anon; /* anonymous dentries for (nfs) exporting */ + struct hlist_bl_head s_anon; /* anonymous dentries for (nfs) exporting */ #ifdef CONFIG_SMP struct list_head __percpu *s_files; #else -- cgit v1.2.3 From 873feea09ebc980cbd3631b767356ce1eee65ec1 Mon Sep 17 00:00:00 2001 From: Nick Piggin Date: Fri, 7 Jan 2011 17:50:06 +1100 Subject: fs: dcache per-inode inode alias locking dcache_inode_lock can be replaced with per-inode locking. Use existing inode->i_lock for this. This is slightly non-trivial because we sometimes need to find the inode from the dentry, which requires d_inode to be stabilised (either with refcount or d_lock). Signed-off-by: Nick Piggin --- fs/9p/vfs_inode.c | 4 +-- fs/affs/amigaffs.c | 4 +-- fs/cifs/inode.c | 6 ++-- fs/dcache.c | 88 +++++++++++++++++++++++++++----------------------- fs/exportfs/expfs.c | 12 ++++--- fs/nfs/getroot.c | 4 +-- fs/notify/fsnotify.c | 4 +-- fs/ocfs2/dcache.c | 4 +-- include/linux/dcache.h | 1 - 9 files changed, 67 insertions(+), 60 deletions(-) (limited to 'fs/dcache.c') diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c index df8bbb358d54..59782981b225 100644 --- a/fs/9p/vfs_inode.c +++ b/fs/9p/vfs_inode.c @@ -277,11 +277,11 @@ static struct dentry *v9fs_dentry_from_dir_inode(struct inode *inode) { struct dentry *dentry; - spin_lock(&dcache_inode_lock); + spin_lock(&inode->i_lock); /* Directory should have only one entry. */ BUG_ON(S_ISDIR(inode->i_mode) && !list_is_singular(&inode->i_dentry)); dentry = list_entry(inode->i_dentry.next, struct dentry, d_alias); - spin_unlock(&dcache_inode_lock); + spin_unlock(&inode->i_lock); return dentry; } diff --git a/fs/affs/amigaffs.c b/fs/affs/amigaffs.c index 600101a21ba2..3a4557e8325c 100644 --- a/fs/affs/amigaffs.c +++ b/fs/affs/amigaffs.c @@ -128,7 +128,7 @@ affs_fix_dcache(struct dentry *dentry, u32 entry_ino) void *data = dentry->d_fsdata; struct list_head *head, *next; - spin_lock(&dcache_inode_lock); + spin_lock(&inode->i_lock); head = &inode->i_dentry; next = head->next; while (next != head) { @@ -139,7 +139,7 @@ affs_fix_dcache(struct dentry *dentry, u32 entry_ino) } next = next->next; } - spin_unlock(&dcache_inode_lock); + spin_unlock(&inode->i_lock); } diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c index 2a239d878e85..a853a89857a5 100644 --- a/fs/cifs/inode.c +++ b/fs/cifs/inode.c @@ -809,14 +809,14 @@ inode_has_hashed_dentries(struct inode *inode) { struct dentry *dentry; - spin_lock(&dcache_inode_lock); + spin_lock(&inode->i_lock); list_for_each_entry(dentry, &inode->i_dentry, d_alias) { if (!d_unhashed(dentry) || IS_ROOT(dentry)) { - spin_unlock(&dcache_inode_lock); + spin_unlock(&inode->i_lock); return true; } } - spin_unlock(&dcache_inode_lock); + spin_unlock(&inode->i_lock); return false; } diff --git a/fs/dcache.c b/fs/dcache.c index 9f04e1ba75b7..09ec945f3c98 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -39,8 +39,8 @@ /* * Usage: - * dcache_inode_lock protects: - * - i_dentry, d_alias, d_inode + * dcache->d_inode->i_lock protects: + * - i_dentry, d_alias, d_inode of aliases * dcache_hash_bucket lock protects: * - the dcache hash table * s_anon bl list spinlock protects: @@ -58,7 +58,7 @@ * - d_alias, d_inode * * Ordering: - * dcache_inode_lock + * dentry->d_inode->i_lock * dentry->d_lock * dcache_lru_lock * dcache_hash_bucket lock @@ -78,12 +78,10 @@ int sysctl_vfs_cache_pressure __read_mostly = 100; EXPORT_SYMBOL_GPL(sysctl_vfs_cache_pressure); -__cacheline_aligned_in_smp DEFINE_SPINLOCK(dcache_inode_lock); static __cacheline_aligned_in_smp DEFINE_SPINLOCK(dcache_lru_lock); __cacheline_aligned_in_smp DEFINE_SEQLOCK(rename_lock); EXPORT_SYMBOL(rename_lock); -EXPORT_SYMBOL(dcache_inode_lock); static struct kmem_cache *dentry_cache __read_mostly; @@ -196,14 +194,14 @@ static inline void dentry_rcuwalk_barrier(struct dentry *dentry) */ static void dentry_iput(struct dentry * dentry) __releases(dentry->d_lock) - __releases(dcache_inode_lock) + __releases(dentry->d_inode->i_lock) { struct inode *inode = dentry->d_inode; if (inode) { dentry->d_inode = NULL; list_del_init(&dentry->d_alias); spin_unlock(&dentry->d_lock); - spin_unlock(&dcache_inode_lock); + spin_unlock(&inode->i_lock); if (!inode->i_nlink) fsnotify_inoderemove(inode); if (dentry->d_op && dentry->d_op->d_iput) @@ -212,7 +210,6 @@ static void dentry_iput(struct dentry * dentry) iput(inode); } else { spin_unlock(&dentry->d_lock); - spin_unlock(&dcache_inode_lock); } } @@ -222,14 +219,14 @@ static void dentry_iput(struct dentry * dentry) */ static void dentry_unlink_inode(struct dentry * dentry) __releases(dentry->d_lock) - __releases(dcache_inode_lock) + __releases(dentry->d_inode->i_lock) { struct inode *inode = dentry->d_inode; dentry->d_inode = NULL; list_del_init(&dentry->d_alias); dentry_rcuwalk_barrier(dentry); spin_unlock(&dentry->d_lock); - spin_unlock(&dcache_inode_lock); + spin_unlock(&inode->i_lock); if (!inode->i_nlink) fsnotify_inoderemove(inode); if (dentry->d_op && dentry->d_op->d_iput) @@ -295,7 +292,7 @@ static void dentry_lru_move_tail(struct dentry *dentry) static struct dentry *d_kill(struct dentry *dentry, struct dentry *parent) __releases(dentry->d_lock) __releases(parent->d_lock) - __releases(dcache_inode_lock) + __releases(dentry->d_inode->i_lock) { dentry->d_parent = NULL; list_del(&dentry->d_u.d_child); @@ -370,9 +367,11 @@ EXPORT_SYMBOL(d_drop); static inline struct dentry *dentry_kill(struct dentry *dentry, int ref) __releases(dentry->d_lock) { + struct inode *inode; struct dentry *parent; - if (!spin_trylock(&dcache_inode_lock)) { + inode = dentry->d_inode; + if (inode && !spin_trylock(&inode->i_lock)) { relock: spin_unlock(&dentry->d_lock); cpu_relax(); @@ -383,7 +382,8 @@ relock: else parent = dentry->d_parent; if (parent && !spin_trylock(&parent->d_lock)) { - spin_unlock(&dcache_inode_lock); + if (inode) + spin_unlock(&inode->i_lock); goto relock; } @@ -618,9 +618,9 @@ struct dentry *d_find_alias(struct inode *inode) struct dentry *de = NULL; if (!list_empty(&inode->i_dentry)) { - spin_lock(&dcache_inode_lock); + spin_lock(&inode->i_lock); de = __d_find_alias(inode, 0); - spin_unlock(&dcache_inode_lock); + spin_unlock(&inode->i_lock); } return de; } @@ -634,20 +634,20 @@ void d_prune_aliases(struct inode *inode) { struct dentry *dentry; restart: - spin_lock(&dcache_inode_lock); + spin_lock(&inode->i_lock); list_for_each_entry(dentry, &inode->i_dentry, d_alias) { spin_lock(&dentry->d_lock); if (!dentry->d_count) { __dget_dlock(dentry); __d_drop(dentry); spin_unlock(&dentry->d_lock); - spin_unlock(&dcache_inode_lock); + spin_unlock(&inode->i_lock); dput(dentry); goto restart; } spin_unlock(&dentry->d_lock); } - spin_unlock(&dcache_inode_lock); + spin_unlock(&inode->i_lock); } EXPORT_SYMBOL(d_prune_aliases); @@ -1392,9 +1392,11 @@ static void __d_instantiate(struct dentry *dentry, struct inode *inode) void d_instantiate(struct dentry *entry, struct inode * inode) { BUG_ON(!list_empty(&entry->d_alias)); - spin_lock(&dcache_inode_lock); + if (inode) + spin_lock(&inode->i_lock); __d_instantiate(entry, inode); - spin_unlock(&dcache_inode_lock); + if (inode) + spin_unlock(&inode->i_lock); security_d_instantiate(entry, inode); } EXPORT_SYMBOL(d_instantiate); @@ -1458,9 +1460,11 @@ struct dentry *d_instantiate_unique(struct dentry *entry, struct inode *inode) BUG_ON(!list_empty(&entry->d_alias)); - spin_lock(&dcache_inode_lock); + if (inode) + spin_lock(&inode->i_lock); result = __d_instantiate_unique(entry, inode); - spin_unlock(&dcache_inode_lock); + if (inode) + spin_unlock(&inode->i_lock); if (!result) { security_d_instantiate(entry, inode); @@ -1542,10 +1546,10 @@ struct dentry *d_obtain_alias(struct inode *inode) tmp->d_parent = tmp; /* make sure dput doesn't croak */ - spin_lock(&dcache_inode_lock); + spin_lock(&inode->i_lock); res = __d_find_alias(inode, 0); if (res) { - spin_unlock(&dcache_inode_lock); + spin_unlock(&inode->i_lock); dput(tmp); goto out_iput; } @@ -1561,7 +1565,7 @@ struct dentry *d_obtain_alias(struct inode *inode) hlist_bl_add_head(&tmp->d_hash, &tmp->d_sb->s_anon); __bit_spin_unlock(0, (unsigned long *)&tmp->d_sb->s_anon.first); spin_unlock(&tmp->d_lock); - spin_unlock(&dcache_inode_lock); + spin_unlock(&inode->i_lock); return tmp; @@ -1592,18 +1596,18 @@ struct dentry *d_splice_alias(struct inode *inode, struct dentry *dentry) struct dentry *new = NULL; if (inode && S_ISDIR(inode->i_mode)) { - spin_lock(&dcache_inode_lock); + spin_lock(&inode->i_lock); new = __d_find_alias(inode, 1); if (new) { BUG_ON(!(new->d_flags & DCACHE_DISCONNECTED)); - spin_unlock(&dcache_inode_lock); + spin_unlock(&inode->i_lock); security_d_instantiate(new, inode); d_move(new, dentry); iput(inode); } else { - /* already got dcache_inode_lock, so d_add() by hand */ + /* already taking inode->i_lock, so d_add() by hand */ __d_instantiate(dentry, inode); - spin_unlock(&dcache_inode_lock); + spin_unlock(&inode->i_lock); security_d_instantiate(dentry, inode); d_rehash(dentry); } @@ -1676,10 +1680,10 @@ struct dentry *d_add_ci(struct dentry *dentry, struct inode *inode, * Negative dentry: instantiate it unless the inode is a directory and * already has a dentry. */ - spin_lock(&dcache_inode_lock); + spin_lock(&inode->i_lock); if (!S_ISDIR(inode->i_mode) || list_empty(&inode->i_dentry)) { __d_instantiate(found, inode); - spin_unlock(&dcache_inode_lock); + spin_unlock(&inode->i_lock); security_d_instantiate(found, inode); return found; } @@ -1690,7 +1694,7 @@ struct dentry *d_add_ci(struct dentry *dentry, struct inode *inode, */ new = list_entry(inode->i_dentry.next, struct dentry, d_alias); __dget(new); - spin_unlock(&dcache_inode_lock); + spin_unlock(&inode->i_lock); security_d_instantiate(found, inode); d_move(new, found); iput(inode); @@ -2004,15 +2008,17 @@ EXPORT_SYMBOL(d_validate); void d_delete(struct dentry * dentry) { + struct inode *inode; int isdir = 0; /* * Are we the only user? */ again: spin_lock(&dentry->d_lock); - isdir = S_ISDIR(dentry->d_inode->i_mode); + inode = dentry->d_inode; + isdir = S_ISDIR(inode->i_mode); if (dentry->d_count == 1) { - if (!spin_trylock(&dcache_inode_lock)) { + if (inode && !spin_trylock(&inode->i_lock)) { spin_unlock(&dentry->d_lock); cpu_relax(); goto again; @@ -2266,13 +2272,13 @@ struct dentry *d_ancestor(struct dentry *p1, struct dentry *p2) * This helper attempts to cope with remotely renamed directories * * It assumes that the caller is already holding - * dentry->d_parent->d_inode->i_mutex and the dcache_inode_lock + * dentry->d_parent->d_inode->i_mutex and the inode->i_lock * * Note: If ever the locking in lock_rename() changes, then please * remember to update this too... */ -static struct dentry *__d_unalias(struct dentry *dentry, struct dentry *alias) - __releases(dcache_inode_lock) +static struct dentry *__d_unalias(struct inode *inode, + struct dentry *dentry, struct dentry *alias) { struct mutex *m1 = NULL, *m2 = NULL; struct dentry *ret; @@ -2298,7 +2304,7 @@ out_unalias: d_move(alias, dentry); ret = alias; out_err: - spin_unlock(&dcache_inode_lock); + spin_unlock(&inode->i_lock); if (m2) mutex_unlock(m2); if (m1) @@ -2371,7 +2377,7 @@ struct dentry *d_materialise_unique(struct dentry *dentry, struct inode *inode) goto out_nolock; } - spin_lock(&dcache_inode_lock); + spin_lock(&inode->i_lock); if (S_ISDIR(inode->i_mode)) { struct dentry *alias; @@ -2388,7 +2394,7 @@ struct dentry *d_materialise_unique(struct dentry *dentry, struct inode *inode) goto found; } /* Nope, but we must(!) avoid directory aliasing */ - actual = __d_unalias(dentry, alias); + actual = __d_unalias(inode, dentry, alias); if (IS_ERR(actual)) dput(alias); goto out_nolock; @@ -2406,7 +2412,7 @@ struct dentry *d_materialise_unique(struct dentry *dentry, struct inode *inode) found: _d_rehash(actual); spin_unlock(&actual->d_lock); - spin_unlock(&dcache_inode_lock); + spin_unlock(&inode->i_lock); out_nolock: if (actual == dentry) { security_d_instantiate(dentry, inode); diff --git a/fs/exportfs/expfs.c b/fs/exportfs/expfs.c index f06a940065f6..4b6825740dd5 100644 --- a/fs/exportfs/expfs.c +++ b/fs/exportfs/expfs.c @@ -43,24 +43,26 @@ find_acceptable_alias(struct dentry *result, void *context) { struct dentry *dentry, *toput = NULL; + struct inode *inode; if (acceptable(context, result)) return result; - spin_lock(&dcache_inode_lock); - list_for_each_entry(dentry, &result->d_inode->i_dentry, d_alias) { + inode = result->d_inode; + spin_lock(&inode->i_lock); + list_for_each_entry(dentry, &inode->i_dentry, d_alias) { dget(dentry); - spin_unlock(&dcache_inode_lock); + spin_unlock(&inode->i_lock); if (toput) dput(toput); if (dentry != result && acceptable(context, dentry)) { dput(result); return dentry; } - spin_lock(&dcache_inode_lock); + spin_lock(&inode->i_lock); toput = dentry; } - spin_unlock(&dcache_inode_lock); + spin_unlock(&inode->i_lock); if (toput) dput(toput); diff --git a/fs/nfs/getroot.c b/fs/nfs/getroot.c index c3a5a1126833..5596c6a2881e 100644 --- a/fs/nfs/getroot.c +++ b/fs/nfs/getroot.c @@ -63,11 +63,11 @@ static int nfs_superblock_set_dummy_root(struct super_block *sb, struct inode *i * This again causes shrink_dcache_for_umount_subtree() to * Oops, since the test for IS_ROOT() will fail. */ - spin_lock(&dcache_inode_lock); + spin_lock(&sb->s_root->d_inode->i_lock); spin_lock(&sb->s_root->d_lock); list_del_init(&sb->s_root->d_alias); spin_unlock(&sb->s_root->d_lock); - spin_unlock(&dcache_inode_lock); + spin_unlock(&sb->s_root->d_inode->i_lock); } return 0; } diff --git a/fs/notify/fsnotify.c b/fs/notify/fsnotify.c index 9be6ec1f36d8..79b47cbb5cd8 100644 --- a/fs/notify/fsnotify.c +++ b/fs/notify/fsnotify.c @@ -59,7 +59,7 @@ void __fsnotify_update_child_dentry_flags(struct inode *inode) /* determine if the children should tell inode about their events */ watched = fsnotify_inode_watches_children(inode); - spin_lock(&dcache_inode_lock); + spin_lock(&inode->i_lock); /* run all of the dentries associated with this inode. Since this is a * directory, there damn well better only be one item on this list */ list_for_each_entry(alias, &inode->i_dentry, d_alias) { @@ -82,7 +82,7 @@ void __fsnotify_update_child_dentry_flags(struct inode *inode) } spin_unlock(&alias->d_lock); } - spin_unlock(&dcache_inode_lock); + spin_unlock(&inode->i_lock); } /* Notify this dentry's parent about a child's events. */ diff --git a/fs/ocfs2/dcache.c b/fs/ocfs2/dcache.c index 0310b16a7238..6d80ecc7834f 100644 --- a/fs/ocfs2/dcache.c +++ b/fs/ocfs2/dcache.c @@ -175,7 +175,7 @@ struct dentry *ocfs2_find_local_alias(struct inode *inode, struct list_head *p; struct dentry *dentry = NULL; - spin_lock(&dcache_inode_lock); + spin_lock(&inode->i_lock); list_for_each(p, &inode->i_dentry) { dentry = list_entry(p, struct dentry, d_alias); @@ -193,7 +193,7 @@ struct dentry *ocfs2_find_local_alias(struct inode *inode, dentry = NULL; } - spin_unlock(&dcache_inode_lock); + spin_unlock(&inode->i_lock); return dentry; } diff --git a/include/linux/dcache.h b/include/linux/dcache.h index 5f0392ef759b..d719e4de8046 100644 --- a/include/linux/dcache.h +++ b/include/linux/dcache.h @@ -191,7 +191,6 @@ struct dentry_operations { #define DCACHE_OP_REVALIDATE 0x4000 #define DCACHE_OP_DELETE 0x8000 -extern spinlock_t dcache_inode_lock; extern seqlock_t rename_lock; static inline int dname_external(struct dentry *dentry) -- cgit v1.2.3 From 4b936885ab04dc6e0bb0ef35e0e23c1a7364d9e5 Mon Sep 17 00:00:00 2001 From: Nick Piggin Date: Fri, 7 Jan 2011 17:50:07 +1100 Subject: fs: improve scalability of pseudo filesystems Regardless of how much we possibly try to scale dcache, there is likely always going to be some fundamental contention when adding or removing children under the same parent. Pseudo filesystems do not seem need to have connected dentries because by definition they are disconnected. Signed-off-by: Nick Piggin --- fs/anon_inodes.c | 2 +- fs/dcache.c | 12 ++++++++++++ fs/pipe.c | 2 +- include/linux/dcache.h | 1 + net/socket.c | 2 +- 5 files changed, 16 insertions(+), 3 deletions(-) (limited to 'fs/dcache.c') diff --git a/fs/anon_inodes.c b/fs/anon_inodes.c index aca8806fa206..9d92b33da8a0 100644 --- a/fs/anon_inodes.c +++ b/fs/anon_inodes.c @@ -102,7 +102,7 @@ struct file *anon_inode_getfile(const char *name, this.name = name; this.len = strlen(name); this.hash = 0; - path.dentry = d_alloc(anon_inode_mnt->mnt_sb->s_root, &this); + path.dentry = d_alloc_pseudo(anon_inode_mnt->mnt_sb, &this); if (!path.dentry) goto err_module; diff --git a/fs/dcache.c b/fs/dcache.c index 09ec945f3c98..9e6e6db76869 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -1330,6 +1330,18 @@ struct dentry *d_alloc(struct dentry * parent, const struct qstr *name) } EXPORT_SYMBOL(d_alloc); +struct dentry *d_alloc_pseudo(struct super_block *sb, const struct qstr *name) +{ + struct dentry *dentry = d_alloc(NULL, name); + if (dentry) { + dentry->d_sb = sb; + dentry->d_parent = dentry; + dentry->d_flags |= DCACHE_DISCONNECTED; + } + return dentry; +} +EXPORT_SYMBOL(d_alloc_pseudo); + struct dentry *d_alloc_name(struct dentry *parent, const char *name) { struct qstr q; diff --git a/fs/pipe.c b/fs/pipe.c index 01a786567810..cfe3a7f2ee21 100644 --- a/fs/pipe.c +++ b/fs/pipe.c @@ -999,7 +999,7 @@ struct file *create_write_pipe(int flags) goto err; err = -ENOMEM; - path.dentry = d_alloc(pipe_mnt->mnt_sb->s_root, &name); + path.dentry = d_alloc_pseudo(pipe_mnt->mnt_sb, &name); if (!path.dentry) goto err_inode; path.mnt = mntget(pipe_mnt); diff --git a/include/linux/dcache.h b/include/linux/dcache.h index d719e4de8046..c0a2ca97c72f 100644 --- a/include/linux/dcache.h +++ b/include/linux/dcache.h @@ -211,6 +211,7 @@ extern void d_set_d_op(struct dentry *dentry, const struct dentry_operations *op /* allocate/de-allocate */ extern struct dentry * d_alloc(struct dentry *, const struct qstr *); +extern struct dentry * d_alloc_pseudo(struct super_block *, const struct qstr *); extern struct dentry * d_splice_alias(struct inode *, struct dentry *); extern struct dentry * d_add_ci(struct dentry *, struct inode *, struct qstr *); extern struct dentry * d_obtain_alias(struct inode *); diff --git a/net/socket.c b/net/socket.c index 991e266bc7ae..0ee74c325320 100644 --- a/net/socket.c +++ b/net/socket.c @@ -361,7 +361,7 @@ static int sock_alloc_file(struct socket *sock, struct file **f, int flags) if (unlikely(fd < 0)) return fd; - path.dentry = d_alloc(sock_mnt->mnt_sb->s_root, &name); + path.dentry = d_alloc_pseudo(sock_mnt->mnt_sb, &name); if (unlikely(!path.dentry)) { put_unused_fd(fd); return -ENOMEM; -- cgit v1.2.3 From e1bb57826381199cc79fbf44e9dfeee58fc7b339 Mon Sep 17 00:00:00 2001 From: Nick Piggin Date: Fri, 7 Jan 2011 17:50:08 +1100 Subject: fs: prefetch inode data in dcache lookup This makes single threaded git diff -1.25% +/- 0.05% elapsed time on my 2s12c24t Westmere system, and -0.86% +/- 0.05% on my 2s8c Barcelona, by prefetching the important first cacheline of the inode in while we do the actual name compare and other operations on the dentry. There was no measurable slowdown in the single file stat case, or the creat case (where negative dentries would be common). Signed-off-by: Nick Piggin --- fs/dcache.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'fs/dcache.c') diff --git a/fs/dcache.c b/fs/dcache.c index 9e6e6db76869..2a4ce7dc2306 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -1793,6 +1793,9 @@ seqretry: tlen = dentry->d_name.len; tname = dentry->d_name.name; i = dentry->d_inode; + prefetch(tname); + if (i) + prefetch(i); /* * This seqcount check is required to ensure name and * len are loaded atomically, so as not to walk off the -- cgit v1.2.3 From 9d55c369bb5e695e629bc35cba2ef607755b3bee Mon Sep 17 00:00:00 2001 From: Nick Piggin Date: Fri, 7 Jan 2011 17:50:09 +1100 Subject: fs: implement faster dentry memcmp The standard memcmp function on a Westmere system shows up hot in profiles in the `git diff` workload (both parallel and single threaded), and it is likely due to the costs associated with trapping into microcode, and little opportunity to improve memory access (dentry name is not likely to take up more than a cacheline). So replace it with an open-coded byte comparison. This increases code size by 8 bytes in the critical __d_lookup_rcu function, but the speedup is huge, averaging 10 runs of each: git diff st user sys elapsed CPU before 1.15 2.57 3.82 97.1 after 1.14 2.35 3.61 96.8 git diff mt user sys elapsed CPU before 1.27 3.85 1.46 349 after 1.26 3.54 1.43 333 Elapsed time for single threaded git diff at 95.0% confidence: -0.21 +/- 0.01 -5.45% +/- 0.24% It's -0.66% +/- 0.06% elapsed time on my Opteron, so rep cmp costs on the fam10h seem to be relatively smaller, but there is still a win. Signed-off-by: Nick Piggin --- fs/dcache.c | 12 +++--------- include/linux/dcache.h | 21 +++++++++++++++++++++ 2 files changed, 24 insertions(+), 9 deletions(-) (limited to 'fs/dcache.c') diff --git a/fs/dcache.c b/fs/dcache.c index 2a4ce7dc2306..5699d4c027cb 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -1454,9 +1454,7 @@ static struct dentry *__d_instantiate_unique(struct dentry *entry, continue; if (alias->d_parent != entry->d_parent) continue; - if (qstr->len != len) - continue; - if (memcmp(qstr->name, name, len)) + if (dentry_cmp(qstr->name, qstr->len, name, len)) continue; __dget(alias); return alias; @@ -1810,9 +1808,7 @@ seqretry: tlen, tname, name)) continue; } else { - if (tlen != len) - continue; - if (memcmp(tname, str, tlen)) + if (dentry_cmp(tname, tlen, str, len)) continue; } /* @@ -1925,9 +1921,7 @@ struct dentry *__d_lookup(struct dentry *parent, struct qstr *name) tlen, tname, name)) goto next; } else { - if (tlen != len) - goto next; - if (memcmp(tname, str, tlen)) + if (dentry_cmp(tname, tlen, str, len)) goto next; } diff --git a/include/linux/dcache.h b/include/linux/dcache.h index c0a2ca97c72f..bd07758943e0 100644 --- a/include/linux/dcache.h +++ b/include/linux/dcache.h @@ -47,6 +47,27 @@ struct dentry_stat_t { }; extern struct dentry_stat_t dentry_stat; +/* + * Compare 2 name strings, return 0 if they match, otherwise non-zero. + * The strings are both count bytes long, and count is non-zero. + */ +static inline int dentry_cmp(const unsigned char *cs, size_t scount, + const unsigned char *ct, size_t tcount) +{ + int ret; + if (scount != tcount) + return 1; + do { + ret = (*cs != *ct); + if (ret) + break; + cs++; + ct++; + tcount--; + } while (tcount); + return ret; +} + /* Name hashing routines. Initial hash value */ /* Hash courtesy of the R5 hash in reiserfs modulo sign bits */ #define init_name_hash() 0 -- cgit v1.2.3