From dcfadfa4ec5a12404a99ad6426871a6b03a62b37 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 12 Aug 2012 17:27:30 -0400 Subject: new helper: __alloc_fd() Essentially, alloc_fd() in a files_struct we own a reference to. Most of the time wanting to use it is a sign of lousy API design (such as android/binder). It's *not* a general-purpose interface; better that than open-coding its guts, but again, playing with other process' descriptor table is a sign of bad design. Signed-off-by: Al Viro --- include/linux/fdtable.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux/fdtable.h') diff --git a/include/linux/fdtable.h b/include/linux/fdtable.h index 158a41eed314..b84ca064f727 100644 --- a/include/linux/fdtable.h +++ b/include/linux/fdtable.h @@ -125,6 +125,9 @@ void reset_files_struct(struct files_struct *); int unshare_files(struct files_struct **); struct files_struct *dup_fd(struct files_struct *, int *); +extern int __alloc_fd(struct files_struct *files, + unsigned start, unsigned end, unsigned flags); + extern struct kmem_cache *files_cachep; #endif /* __LINUX_FDTABLE_H */ -- cgit v1.2.3 From 7cf4dc3c8dbfdfde163d4636f621cf99a1f63bfb Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 15 Aug 2012 19:56:12 -0400 Subject: move files_struct-related bits from kernel/exit.c to fs/file.c Signed-off-by: Al Viro --- fs/file.c | 100 +++++++++++++++++++++++++++++++++++++++++++++++- include/linux/fdtable.h | 6 --- kernel/exit.c | 93 -------------------------------------------- 3 files changed, 99 insertions(+), 100 deletions(-) (limited to 'include/linux/fdtable.h') diff --git a/fs/file.c b/fs/file.c index a3a0705f8f51..0be423cadb26 100644 --- a/fs/file.c +++ b/fs/file.c @@ -84,7 +84,7 @@ static void free_fdtable_work(struct work_struct *work) } } -void free_fdtable_rcu(struct rcu_head *rcu) +static void free_fdtable_rcu(struct rcu_head *rcu) { struct fdtable *fdt = container_of(rcu, struct fdtable, rcu); struct fdtable_defer *fddef; @@ -116,6 +116,11 @@ void free_fdtable_rcu(struct rcu_head *rcu) } } +static inline void free_fdtable(struct fdtable *fdt) +{ + call_rcu(&fdt->rcu, free_fdtable_rcu); +} + /* * Expand the fdset in the files_struct. Called with the files spinlock * held for write. @@ -388,6 +393,99 @@ out: return NULL; } +static void close_files(struct files_struct * files) +{ + int i, j; + struct fdtable *fdt; + + j = 0; + + /* + * It is safe to dereference the fd table without RCU or + * ->file_lock because this is the last reference to the + * files structure. But use RCU to shut RCU-lockdep up. + */ + rcu_read_lock(); + fdt = files_fdtable(files); + rcu_read_unlock(); + for (;;) { + unsigned long set; + i = j * BITS_PER_LONG; + if (i >= fdt->max_fds) + break; + set = fdt->open_fds[j++]; + while (set) { + if (set & 1) { + struct file * file = xchg(&fdt->fd[i], NULL); + if (file) { + filp_close(file, files); + cond_resched(); + } + } + i++; + set >>= 1; + } + } +} + +struct files_struct *get_files_struct(struct task_struct *task) +{ + struct files_struct *files; + + task_lock(task); + files = task->files; + if (files) + atomic_inc(&files->count); + task_unlock(task); + + return files; +} + +void put_files_struct(struct files_struct *files) +{ + struct fdtable *fdt; + + if (atomic_dec_and_test(&files->count)) { + close_files(files); + /* + * Free the fd and fdset arrays if we expanded them. + * If the fdtable was embedded, pass files for freeing + * at the end of the RCU grace period. Otherwise, + * you can free files immediately. + */ + rcu_read_lock(); + fdt = files_fdtable(files); + if (fdt != &files->fdtab) + kmem_cache_free(files_cachep, files); + free_fdtable(fdt); + rcu_read_unlock(); + } +} + +void reset_files_struct(struct files_struct *files) +{ + struct task_struct *tsk = current; + struct files_struct *old; + + old = tsk->files; + task_lock(tsk); + tsk->files = files; + task_unlock(tsk); + put_files_struct(old); +} + +void exit_files(struct task_struct *tsk) +{ + struct files_struct * files = tsk->files; + + if (files) { + task_lock(tsk); + tsk->files = NULL; + task_unlock(tsk); + put_files_struct(files); + } +} + static void __devinit fdtable_defer_list_init(int cpu) { struct fdtable_defer *fddef = &per_cpu(fdtable_defer_list, cpu); diff --git a/include/linux/fdtable.h b/include/linux/fdtable.h index b84ca064f727..3855f4febe70 100644 --- a/include/linux/fdtable.h +++ b/include/linux/fdtable.h @@ -94,14 +94,8 @@ struct vfsmount; struct dentry; extern int expand_files(struct files_struct *, int nr); -extern void free_fdtable_rcu(struct rcu_head *rcu); extern void __init files_defer_init(void); -static inline void free_fdtable(struct fdtable *fdt) -{ - call_rcu(&fdt->rcu, free_fdtable_rcu); -} - static inline struct file * fcheck_files(struct files_struct *files, unsigned int fd) { struct file * file = NULL; diff --git a/kernel/exit.c b/kernel/exit.c index f65345f9e5bb..20dfc7617c2e 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -466,99 +466,6 @@ void daemonize(const char *name, ...) EXPORT_SYMBOL(daemonize); -static void close_files(struct files_struct * files) -{ - int i, j; - struct fdtable *fdt; - - j = 0; - - /* - * It is safe to dereference the fd table without RCU or - * ->file_lock because this is the last reference to the - * files structure. But use RCU to shut RCU-lockdep up. - */ - rcu_read_lock(); - fdt = files_fdtable(files); - rcu_read_unlock(); - for (;;) { - unsigned long set; - i = j * BITS_PER_LONG; - if (i >= fdt->max_fds) - break; - set = fdt->open_fds[j++]; - while (set) { - if (set & 1) { - struct file * file = xchg(&fdt->fd[i], NULL); - if (file) { - filp_close(file, files); - cond_resched(); - } - } - i++; - set >>= 1; - } - } -} - -struct files_struct *get_files_struct(struct task_struct *task) -{ - struct files_struct *files; - - task_lock(task); - files = task->files; - if (files) - atomic_inc(&files->count); - task_unlock(task); - - return files; -} - -void put_files_struct(struct files_struct *files) -{ - struct fdtable *fdt; - - if (atomic_dec_and_test(&files->count)) { - close_files(files); - /* - * Free the fd and fdset arrays if we expanded them. - * If the fdtable was embedded, pass files for freeing - * at the end of the RCU grace period. Otherwise, - * you can free files immediately. - */ - rcu_read_lock(); - fdt = files_fdtable(files); - if (fdt != &files->fdtab) - kmem_cache_free(files_cachep, files); - free_fdtable(fdt); - rcu_read_unlock(); - } -} - -void reset_files_struct(struct files_struct *files) -{ - struct task_struct *tsk = current; - struct files_struct *old; - - old = tsk->files; - task_lock(tsk); - tsk->files = files; - task_unlock(tsk); - put_files_struct(old); -} - -void exit_files(struct task_struct *tsk) -{ - struct files_struct * files = tsk->files; - - if (files) { - task_lock(tsk); - tsk->files = NULL; - task_unlock(tsk); - put_files_struct(files); - } -} - #ifdef CONFIG_MM_OWNER /* * A task is exiting. If it owned this mm, find a new owner for the mm. -- cgit v1.2.3 From f869e8a7f753e3fd43d6483e796774776f645edb Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 15 Aug 2012 21:06:33 -0400 Subject: expose a low-level variant of fd_install() for binder Similar situation to that of __alloc_fd(); do not use unless you really have to. You should not touch any descriptor table other than your own; it's a sure sign of a really bad API design. As with __alloc_fd(), you *must* use a first-class reference to struct files_struct; something obtained by get_files_struct(some task) (let alone direct task->files) will not do. It must be either current->files, or obtained by get_files_struct(current) by the owner of that sucker and given to you. Signed-off-by: Al Viro --- drivers/staging/android/binder.c | 13 ++----------- fs/file.c | 16 ++++++++++++++-- include/linux/fdtable.h | 2 ++ 3 files changed, 18 insertions(+), 13 deletions(-) (limited to 'include/linux/fdtable.h') diff --git a/drivers/staging/android/binder.c b/drivers/staging/android/binder.c index 4946d282a35c..9e1a98a360d4 100644 --- a/drivers/staging/android/binder.c +++ b/drivers/staging/android/binder.c @@ -386,17 +386,8 @@ int task_get_unused_fd_flags(struct binder_proc *proc, int flags) static void task_fd_install( struct binder_proc *proc, unsigned int fd, struct file *file) { - struct files_struct *files = proc->files; - struct fdtable *fdt; - - if (files == NULL) - return; - - spin_lock(&files->file_lock); - fdt = files_fdtable(files); - BUG_ON(fdt->fd[fd] != NULL); - rcu_assign_pointer(fdt->fd[fd], file); - spin_unlock(&files->file_lock); + if (proc->files) + __fd_install(proc->files, fd, file); } /* diff --git a/fs/file.c b/fs/file.c index 78cf88f2a0e8..0d1bf0515111 100644 --- a/fs/file.c +++ b/fs/file.c @@ -599,11 +599,18 @@ EXPORT_SYMBOL(put_unused_fd); * * It should never happen - if we allow dup2() do it, _really_ bad things * will follow. + * + * NOTE: __fd_install() variant is really, really low-level; don't + * use it unless you are forced to by truly lousy API shoved down + * your throat. 'files' *MUST* be either current->files or obtained + * by get_files_struct(current) done by whoever had given it to you, + * or really bad things will happen. Normally you want to use + * fd_install() instead. */ -void fd_install(unsigned int fd, struct file *file) +void __fd_install(struct files_struct *files, unsigned int fd, + struct file *file) { - struct files_struct *files = current->files; struct fdtable *fdt; spin_lock(&files->file_lock); fdt = files_fdtable(files); @@ -612,4 +619,9 @@ void fd_install(unsigned int fd, struct file *file) spin_unlock(&files->file_lock); } +void fd_install(unsigned int fd, struct file *file) +{ + __fd_install(current->files, fd, file); +} + EXPORT_SYMBOL(fd_install); diff --git a/include/linux/fdtable.h b/include/linux/fdtable.h index 3855f4febe70..59d4fc7f10c8 100644 --- a/include/linux/fdtable.h +++ b/include/linux/fdtable.h @@ -121,6 +121,8 @@ struct files_struct *dup_fd(struct files_struct *, int *); extern int __alloc_fd(struct files_struct *files, unsigned start, unsigned end, unsigned flags); +extern void __fd_install(struct files_struct *files, + unsigned int fd, struct file *file); extern struct kmem_cache *files_cachep; -- cgit v1.2.3 From 483ce1d4b8c3b82bc9c9a1dd9dbc44f50b3aaf5a Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 19 Aug 2012 12:04:24 -0400 Subject: take descriptor-related part of close() to file.c Signed-off-by: Al Viro --- drivers/staging/android/binder.c | 34 ++-------------------------------- fs/file.c | 26 ++++++++++++++++++++++++++ fs/open.c | 22 +--------------------- include/linux/fdtable.h | 2 ++ 4 files changed, 31 insertions(+), 53 deletions(-) (limited to 'include/linux/fdtable.h') diff --git a/drivers/staging/android/binder.c b/drivers/staging/android/binder.c index 9e1a98a360d4..f71d624995ea 100644 --- a/drivers/staging/android/binder.c +++ b/drivers/staging/android/binder.c @@ -390,43 +390,17 @@ static void task_fd_install( __fd_install(proc->files, fd, file); } -/* - * copied from __put_unused_fd in open.c - */ -static void __put_unused_fd(struct files_struct *files, unsigned int fd) -{ - struct fdtable *fdt = files_fdtable(files); - __clear_open_fd(fd, fdt); - if (fd < files->next_fd) - files->next_fd = fd; -} - /* * copied from sys_close */ static long task_close_fd(struct binder_proc *proc, unsigned int fd) { - struct file *filp; - struct files_struct *files = proc->files; - struct fdtable *fdt; int retval; - if (files == NULL) + if (proc->files == NULL) return -ESRCH; - spin_lock(&files->file_lock); - fdt = files_fdtable(files); - if (fd >= fdt->max_fds) - goto out_unlock; - filp = fdt->fd[fd]; - if (!filp) - goto out_unlock; - rcu_assign_pointer(fdt->fd[fd], NULL); - __clear_close_on_exec(fd, fdt); - __put_unused_fd(files, fd); - spin_unlock(&files->file_lock); - retval = filp_close(filp, files); - + retval = __close_fd(proc->files, fd); /* can't restart close syscall because file table entry was cleared */ if (unlikely(retval == -ERESTARTSYS || retval == -ERESTARTNOINTR || @@ -435,10 +409,6 @@ static long task_close_fd(struct binder_proc *proc, unsigned int fd) retval = -EINTR; return retval; - -out_unlock: - spin_unlock(&files->file_lock); - return -EBADF; } static void binder_set_nice(long nice) diff --git a/fs/file.c b/fs/file.c index 6eef55ce30c9..fd4694e688ab 100644 --- a/fs/file.c +++ b/fs/file.c @@ -626,6 +626,32 @@ void fd_install(unsigned int fd, struct file *file) EXPORT_SYMBOL(fd_install); +/* + * The same warnings as for __alloc_fd()/__fd_install() apply here... + */ +int __close_fd(struct files_struct *files, unsigned fd) +{ + struct file *file; + struct fdtable *fdt; + + spin_lock(&files->file_lock); + fdt = files_fdtable(files); + if (fd >= fdt->max_fds) + goto out_unlock; + file = fdt->fd[fd]; + if (!file) + goto out_unlock; + rcu_assign_pointer(fdt->fd[fd], NULL); + __clear_close_on_exec(fd, fdt); + __put_unused_fd(files, fd); + spin_unlock(&files->file_lock); + return filp_close(file, files); + +out_unlock: + spin_unlock(&files->file_lock); + return -EBADF; +} + struct file *fget(unsigned int fd) { struct file *file; diff --git a/fs/open.c b/fs/open.c index c525bd0e65b6..30760017deed 100644 --- a/fs/open.c +++ b/fs/open.c @@ -994,23 +994,7 @@ EXPORT_SYMBOL(filp_close); */ SYSCALL_DEFINE1(close, unsigned int, fd) { - struct file * filp; - struct files_struct *files = current->files; - struct fdtable *fdt; - int retval; - - spin_lock(&files->file_lock); - fdt = files_fdtable(files); - if (fd >= fdt->max_fds) - goto out_unlock; - filp = fdt->fd[fd]; - if (!filp) - goto out_unlock; - rcu_assign_pointer(fdt->fd[fd], NULL); - __clear_close_on_exec(fd, fdt); - __put_unused_fd(files, fd); - spin_unlock(&files->file_lock); - retval = filp_close(filp, files); + int retval = __close_fd(current->files, fd); /* can't restart close syscall because file table entry was cleared */ if (unlikely(retval == -ERESTARTSYS || @@ -1020,10 +1004,6 @@ SYSCALL_DEFINE1(close, unsigned int, fd) retval = -EINTR; return retval; - -out_unlock: - spin_unlock(&files->file_lock); - return -EBADF; } EXPORT_SYMBOL(sys_close); diff --git a/include/linux/fdtable.h b/include/linux/fdtable.h index 59d4fc7f10c8..59488f2392bc 100644 --- a/include/linux/fdtable.h +++ b/include/linux/fdtable.h @@ -123,6 +123,8 @@ extern int __alloc_fd(struct files_struct *files, unsigned start, unsigned end, unsigned flags); extern void __fd_install(struct files_struct *files, unsigned int fd, struct file *file); +extern int __close_fd(struct files_struct *files, + unsigned int fd); extern struct kmem_cache *files_cachep; -- cgit v1.2.3 From 6a6d27de340c89c5323565b49f7851362619925d Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 21 Aug 2012 09:56:33 -0400 Subject: take close-on-exec logics to fs/file.c, clean it up a bit ... and add cond_resched() there, while we are at it. We can get large latencies as is... Signed-off-by: Al Viro --- fs/exec.c | 41 ++++++----------------------------------- fs/file.c | 37 +++++++++++++++++++++++++++++++++++++ include/linux/fdtable.h | 1 + 3 files changed, 44 insertions(+), 35 deletions(-) (limited to 'include/linux/fdtable.h') diff --git a/fs/exec.c b/fs/exec.c index 574cf4de4ec3..f2b6af585d4a 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -1006,40 +1006,6 @@ no_thread_group: return 0; } -/* - * These functions flushes out all traces of the currently running executable - * so that a new one can be started - */ -static void flush_old_files(struct files_struct * files) -{ - long j = -1; - struct fdtable *fdt; - - spin_lock(&files->file_lock); - for (;;) { - unsigned long set, i; - - j++; - i = j * BITS_PER_LONG; - fdt = files_fdtable(files); - if (i >= fdt->max_fds) - break; - set = fdt->close_on_exec[j]; - if (!set) - continue; - fdt->close_on_exec[j] = 0; - spin_unlock(&files->file_lock); - for ( ; set ; i++,set >>= 1) { - if (set & 1) { - sys_close(i); - } - } - spin_lock(&files->file_lock); - - } - spin_unlock(&files->file_lock); -} - char *get_task_comm(char *buf, struct task_struct *tsk) { /* buf must be at least sizeof(tsk->comm) in size */ @@ -1050,6 +1016,11 @@ char *get_task_comm(char *buf, struct task_struct *tsk) } EXPORT_SYMBOL_GPL(get_task_comm); +/* + * These functions flushes out all traces of the currently running executable + * so that a new one can be started + */ + void set_task_comm(struct task_struct *tsk, char *buf) { task_lock(tsk); @@ -1171,7 +1142,7 @@ void setup_new_exec(struct linux_binprm * bprm) current->self_exec_id++; flush_signal_handlers(current, 0); - flush_old_files(current->files); + do_close_on_exec(current->files); } EXPORT_SYMBOL(setup_new_exec); diff --git a/fs/file.c b/fs/file.c index fd4694e688ab..92197dd9fdc8 100644 --- a/fs/file.c +++ b/fs/file.c @@ -652,6 +652,43 @@ out_unlock: return -EBADF; } +void do_close_on_exec(struct files_struct *files) +{ + unsigned i; + struct fdtable *fdt; + + /* exec unshares first */ + BUG_ON(atomic_read(&files->count) != 1); + spin_lock(&files->file_lock); + for (i = 0; ; i++) { + unsigned long set; + unsigned fd = i * BITS_PER_LONG; + fdt = files_fdtable(files); + if (fd >= fdt->max_fds) + break; + set = fdt->close_on_exec[i]; + if (!set) + continue; + fdt->close_on_exec[i] = 0; + for ( ; set ; fd++, set >>= 1) { + struct file *file; + if (!(set & 1)) + continue; + file = fdt->fd[fd]; + if (!file) + continue; + rcu_assign_pointer(fdt->fd[fd], NULL); + __put_unused_fd(files, fd); + spin_unlock(&files->file_lock); + filp_close(file, files); + cond_resched(); + spin_lock(&files->file_lock); + } + + } + spin_unlock(&files->file_lock); +} + struct file *fget(unsigned int fd) { struct file *file; diff --git a/include/linux/fdtable.h b/include/linux/fdtable.h index 59488f2392bc..ef4b2137e6bc 100644 --- a/include/linux/fdtable.h +++ b/include/linux/fdtable.h @@ -118,6 +118,7 @@ void put_files_struct(struct files_struct *fs); void reset_files_struct(struct files_struct *); int unshare_files(struct files_struct **); struct files_struct *dup_fd(struct files_struct *, int *); +void do_close_on_exec(struct files_struct *); extern int __alloc_fd(struct files_struct *files, unsigned start, unsigned end, unsigned flags); -- cgit v1.2.3 From b8318b01a8f7f760ae3ecae052ccc7fc123d9508 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 21 Aug 2012 20:09:42 -0400 Subject: take __{set,clear}_{open_fd,close_on_exec}() into fs/file.c nobody uses those outside anymore. Signed-off-by: Al Viro --- fs/file.c | 20 ++++++++++++++++++++ include/linux/fdtable.h | 20 -------------------- 2 files changed, 20 insertions(+), 20 deletions(-) (limited to 'include/linux/fdtable.h') diff --git a/fs/file.c b/fs/file.c index a7bbe0324478..40ddef9fe041 100644 --- a/fs/file.c +++ b/fs/file.c @@ -256,6 +256,26 @@ int expand_files(struct files_struct *files, int nr) return expand_fdtable(files, nr); } +static inline void __set_close_on_exec(int fd, struct fdtable *fdt) +{ + __set_bit(fd, fdt->close_on_exec); +} + +static inline void __clear_close_on_exec(int fd, struct fdtable *fdt) +{ + __clear_bit(fd, fdt->close_on_exec); +} + +static inline void __set_open_fd(int fd, struct fdtable *fdt) +{ + __set_bit(fd, fdt->open_fds); +} + +static inline void __clear_open_fd(int fd, struct fdtable *fdt) +{ + __clear_bit(fd, fdt->open_fds); +} + static int count_open_files(struct fdtable *fdt) { int size = fdt->max_fds; diff --git a/include/linux/fdtable.h b/include/linux/fdtable.h index ef4b2137e6bc..9ff26319d44f 100644 --- a/include/linux/fdtable.h +++ b/include/linux/fdtable.h @@ -30,31 +30,11 @@ struct fdtable { struct fdtable *next; }; -static inline void __set_close_on_exec(int fd, struct fdtable *fdt) -{ - __set_bit(fd, fdt->close_on_exec); -} - -static inline void __clear_close_on_exec(int fd, struct fdtable *fdt) -{ - __clear_bit(fd, fdt->close_on_exec); -} - static inline bool close_on_exec(int fd, const struct fdtable *fdt) { return test_bit(fd, fdt->close_on_exec); } -static inline void __set_open_fd(int fd, struct fdtable *fdt) -{ - __set_bit(fd, fdt->open_fds); -} - -static inline void __clear_open_fd(int fd, struct fdtable *fdt) -{ - __clear_bit(fd, fdt->open_fds); -} - static inline bool fd_is_open(int fd, const struct fdtable *fdt) { return test_bit(fd, fdt->open_fds); -- cgit v1.2.3 From ad47bd7252bf402fe7dba92f5240b5ed16832ae7 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 21 Aug 2012 20:11:34 -0400 Subject: make expand_files() and alloc_fd() static no callers outside of fs/file.c left Signed-off-by: Al Viro --- fs/file.c | 4 ++-- include/linux/fdtable.h | 1 - include/linux/file.h | 1 - 3 files changed, 2 insertions(+), 4 deletions(-) (limited to 'include/linux/fdtable.h') diff --git a/fs/file.c b/fs/file.c index 40ddef9fe041..967bd0dadbe5 100644 --- a/fs/file.c +++ b/fs/file.c @@ -238,7 +238,7 @@ static int expand_fdtable(struct files_struct *files, int nr) * expanded and execution may have blocked. * The files->file_lock should be held on entry, and will be held on exit. */ -int expand_files(struct files_struct *files, int nr) +static int expand_files(struct files_struct *files, int nr) { struct fdtable *fdt; @@ -580,7 +580,7 @@ out: return error; } -int alloc_fd(unsigned start, unsigned flags) +static int alloc_fd(unsigned start, unsigned flags) { return __alloc_fd(current->files, start, rlimit(RLIMIT_NOFILE), flags); } diff --git a/include/linux/fdtable.h b/include/linux/fdtable.h index 9ff26319d44f..de2b71caa0f0 100644 --- a/include/linux/fdtable.h +++ b/include/linux/fdtable.h @@ -73,7 +73,6 @@ struct file_operations; struct vfsmount; struct dentry; -extern int expand_files(struct files_struct *, int nr); extern void __init files_defer_init(void); static inline struct file * fcheck_files(struct files_struct *files, unsigned int fd) diff --git a/include/linux/file.h b/include/linux/file.h index 6239591a6122..6eee54aea279 100644 --- a/include/linux/file.h +++ b/include/linux/file.h @@ -35,7 +35,6 @@ extern int replace_fd(unsigned fd, struct file *file, unsigned flags); extern void set_close_on_exec(unsigned int fd, int flag); extern bool get_close_on_exec(unsigned int fd); extern void put_filp(struct file *); -extern int alloc_fd(unsigned start, unsigned flags); extern int get_unused_fd_flags(unsigned flags); #define get_unused_fd() get_unused_fd_flags(0) extern void put_unused_fd(unsigned int fd); -- cgit v1.2.3 From c3c073f808b22dfae15ef8412b6f7b998644139a Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 21 Aug 2012 22:32:06 -0400 Subject: new helper: iterate_fd() iterates through the opened files in given descriptor table, calling a supplied function; we stop once non-zero is returned. Callback gets struct file *, descriptor number and const void * argument passed to iterator. It is called with files->file_lock held, so it is not allowed to block. tty_io, netprio_cgroup and selinux flush_unauthorized_files() converted to its use. Signed-off-by: Al Viro --- drivers/tty/tty_io.c | 36 +++++++++++------------------- fs/file.c | 21 +++++++++++++++++ include/linux/fdtable.h | 3 +++ net/core/netprio_cgroup.c | 38 ++++++++++--------------------- security/selinux/hooks.c | 57 ++++++++++++++++++----------------------------- 5 files changed, 71 insertions(+), 84 deletions(-) (limited to 'include/linux/fdtable.h') diff --git a/drivers/tty/tty_io.c b/drivers/tty/tty_io.c index b425c79675ad..71d95cfbabec 100644 --- a/drivers/tty/tty_io.c +++ b/drivers/tty/tty_io.c @@ -2791,6 +2791,13 @@ static long tty_compat_ioctl(struct file *file, unsigned int cmd, } #endif +static int this_tty(const void *t, struct file *file, unsigned fd) +{ + if (likely(file->f_op->read != tty_read)) + return 0; + return file_tty(file) != t ? 0 : fd + 1; +} + /* * This implements the "Secure Attention Key" --- the idea is to * prevent trojan horses by killing all processes associated with this @@ -2818,8 +2825,6 @@ void __do_SAK(struct tty_struct *tty) struct task_struct *g, *p; struct pid *session; int i; - struct file *filp; - struct fdtable *fdt; if (!tty) return; @@ -2849,27 +2854,12 @@ void __do_SAK(struct tty_struct *tty) continue; } task_lock(p); - if (p->files) { - /* - * We don't take a ref to the file, so we must - * hold ->file_lock instead. - */ - spin_lock(&p->files->file_lock); - fdt = files_fdtable(p->files); - for (i = 0; i < fdt->max_fds; i++) { - filp = fcheck_files(p->files, i); - if (!filp) - continue; - if (filp->f_op->read == tty_read && - file_tty(filp) == tty) { - printk(KERN_NOTICE "SAK: killed process %d" - " (%s): fd#%d opened to the tty\n", - task_pid_nr(p), p->comm, i); - force_sig(SIGKILL, p); - break; - } - } - spin_unlock(&p->files->file_lock); + i = iterate_fd(p->files, 0, this_tty, tty); + if (i != 0) { + printk(KERN_NOTICE "SAK: killed process %d" + " (%s): fd#%d opened to the tty\n", + task_pid_nr(p), p->comm, i - 1); + force_sig(SIGKILL, p); } task_unlock(p); } while_each_thread(g, p); diff --git a/fs/file.c b/fs/file.c index 967bd0dadbe5..e6e418122587 100644 --- a/fs/file.c +++ b/fs/file.c @@ -979,3 +979,24 @@ int f_dupfd(unsigned int from, struct file *file, unsigned flags) } return err; } + +int iterate_fd(struct files_struct *files, unsigned n, + int (*f)(const void *, struct file *, unsigned), + const void *p) +{ + struct fdtable *fdt; + struct file *file; + int res = 0; + if (!files) + return 0; + spin_lock(&files->file_lock); + fdt = files_fdtable(files); + while (!res && n < fdt->max_fds) { + file = rcu_dereference_check_fdtable(files, fdt->fd[n++]); + if (file) + res = f(p, file, n); + } + spin_unlock(&files->file_lock); + return res; +} +EXPORT_SYMBOL(iterate_fd); diff --git a/include/linux/fdtable.h b/include/linux/fdtable.h index de2b71caa0f0..fb7dacae0522 100644 --- a/include/linux/fdtable.h +++ b/include/linux/fdtable.h @@ -98,6 +98,9 @@ void reset_files_struct(struct files_struct *); int unshare_files(struct files_struct **); struct files_struct *dup_fd(struct files_struct *, int *); void do_close_on_exec(struct files_struct *); +int iterate_fd(struct files_struct *, unsigned, + int (*)(const void *, struct file *, unsigned), + const void *); extern int __alloc_fd(struct files_struct *files, unsigned start, unsigned end, unsigned flags); diff --git a/net/core/netprio_cgroup.c b/net/core/netprio_cgroup.c index c75e3f9d060f..5ffd084c6a83 100644 --- a/net/core/netprio_cgroup.c +++ b/net/core/netprio_cgroup.c @@ -272,38 +272,24 @@ out_free_devname: return ret; } +static int update_netprio(const void *v, struct file *file, unsigned n) +{ + int err; + struct socket *sock = sock_from_file(file, &err); + if (sock) + sock->sk->sk_cgrp_prioidx = (u32)(unsigned long)v; + return 0; +} + void net_prio_attach(struct cgroup *cgrp, struct cgroup_taskset *tset) { struct task_struct *p; + void *v; cgroup_taskset_for_each(p, cgrp, tset) { - unsigned int fd; - struct fdtable *fdt; - struct files_struct *files; - task_lock(p); - files = p->files; - if (!files) { - task_unlock(p); - continue; - } - - spin_lock(&files->file_lock); - fdt = files_fdtable(files); - for (fd = 0; fd < fdt->max_fds; fd++) { - struct file *file; - struct socket *sock; - int err; - - file = fcheck_files(files, fd); - if (!file) - continue; - - sock = sock_from_file(file, &err); - if (sock) - sock_update_netprioidx(sock->sk, p); - } - spin_unlock(&files->file_lock); + v = (void *)(unsigned long)task_netprioidx(p); + iterate_fd(p->files, 0, update_netprio, v); task_unlock(p); } } diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index 00b50113642d..4dfbcea10eb7 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -2088,15 +2088,19 @@ static int selinux_bprm_secureexec(struct linux_binprm *bprm) return (atsecure || cap_bprm_secureexec(bprm)); } +static int match_file(const void *p, struct file *file, unsigned fd) +{ + return file_has_perm(p, file, file_to_av(file)) ? fd + 1 : 0; +} + /* Derived from fs/exec.c:flush_old_files. */ static inline void flush_unauthorized_files(const struct cred *cred, struct files_struct *files) { struct file *file, *devnull = NULL; struct tty_struct *tty; - struct fdtable *fdt; - long j = -1; int drop_tty = 0; + unsigned n; tty = get_current_tty(); if (tty) { @@ -2123,41 +2127,24 @@ static inline void flush_unauthorized_files(const struct cred *cred, no_tty(); /* Revalidate access to inherited open files. */ - spin_lock(&files->file_lock); - for (;;) { - unsigned long set, i; - j++; - i = j * BITS_PER_LONG; - fdt = files_fdtable(files); - if (i >= fdt->max_fds) - break; - set = fdt->open_fds[j]; - if (!set) - continue; - spin_unlock(&files->file_lock); - for ( ; set ; i++, set >>= 1) { - if (!(set & 1)) - continue; - file = fget(i); - if (!file) - continue; - if (file_has_perm(cred, file, file_to_av(file))) { - if (devnull) { - get_file(devnull); - } else { - devnull = dentry_open(&selinux_null, - O_RDWR, cred); - if (IS_ERR(devnull)) - devnull = NULL; - } - replace_fd(i, devnull, 0); - } - fput(file); - } - spin_lock(&files->file_lock); + n = iterate_fd(files, 0, match_file, cred); + if (!n) /* none found? */ + return; + devnull = dentry_open(&selinux_null, O_RDWR, cred); + if (!IS_ERR(devnull)) { + /* replace all the matching ones with this */ + do { + get_file(devnull); + replace_fd(n - 1, devnull, 0); + } while ((n = iterate_fd(files, n, match_file, cred)) != 0); + fput(devnull); + } else { + /* just close all the matching ones */ + do { + replace_fd(n - 1, NULL, 0); + } while ((n = iterate_fd(files, n, match_file, cred)) != 0); } - spin_unlock(&files->file_lock); } /* -- cgit v1.2.3 From 864bdb3b6cbd9911222543fef1cfe36f88183f44 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 22 Aug 2012 18:42:10 -0400 Subject: new helper: daemonize_descriptors() descriptor-related parts of daemonize, done right. As the result we simplify the locking rules for ->files - we hold task_lock in *all* cases when we modify ->files. Signed-off-by: Al Viro --- fs/file.c | 6 ++++++ include/linux/fdtable.h | 1 + kernel/exit.c | 4 +--- 3 files changed, 8 insertions(+), 3 deletions(-) (limited to 'include/linux/fdtable.h') diff --git a/fs/file.c b/fs/file.c index e6e418122587..15750b80e3ce 100644 --- a/fs/file.c +++ b/fs/file.c @@ -519,6 +519,12 @@ struct files_struct init_files = { .file_lock = __SPIN_LOCK_UNLOCKED(init_task.file_lock), }; +void daemonize_descriptors(void) +{ + atomic_inc(&init_files.count); + reset_files_struct(&init_files); +} + /* * allocate a file descriptor, mark it busy. */ diff --git a/include/linux/fdtable.h b/include/linux/fdtable.h index fb7dacae0522..45052aa814c8 100644 --- a/include/linux/fdtable.h +++ b/include/linux/fdtable.h @@ -95,6 +95,7 @@ struct task_struct; struct files_struct *get_files_struct(struct task_struct *); void put_files_struct(struct files_struct *fs); void reset_files_struct(struct files_struct *); +void daemonize_descriptors(void); int unshare_files(struct files_struct **); struct files_struct *dup_fd(struct files_struct *, int *); void do_close_on_exec(struct files_struct *); diff --git a/kernel/exit.c b/kernel/exit.c index 20dfc7617c2e..095113321318 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -457,9 +457,7 @@ void daemonize(const char *name, ...) /* Become as one with the init task */ daemonize_fs_struct(); - exit_files(current); - current->files = init_task.files; - atomic_inc(¤t->files->count); + daemonize_descriptors(); reparent_to_kthreadd(); } -- cgit v1.2.3