From f2ebb3a921c1ca1e2ddd9242e95a1989a50c4c68 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 27 Feb 2014 09:35:45 -0500 Subject: smarter propagate_mnt() The current mainline has copies propagated to *all* nodes, then tears down the copies we made for nodes that do not contain counterparts of the desired mountpoint. That sets the right propagation graph for the copies (at teardown time we move the slaves of removed node to a surviving peer or directly to master), but we end up paying a fairly steep price in useless allocations. It's fairly easy to create a situation where N calls of mount(2) create exactly N bindings, with O(N^2) vfsmounts allocated and freed in process. Fortunately, it is possible to avoid those allocations/freeings. The trick is to create copies in the right order and find which one would've eventually become a master with the current algorithm. It turns out to be possible in O(nodes getting propagation) time and with no extra allocations at all. One part is that we need to make sure that eventual master will be created before its slaves, so we need to walk the propagation tree in a different order - by peer groups. And iterate through the peers before dealing with the next group. Another thing is finding the (earlier) copy that will be a master of one we are about to create; to do that we are (temporary) marking the masters of mountpoints we are attaching the copies to. Either we are in a peer of the last mountpoint we'd dealt with, or we have the following situation: we are attaching to mountpoint M, the last copy S_0 had been attached to M_0 and there are sequences S_0...S_n, M_0...M_n such that S_{i+1} is a master of S_{i}, S_{i} mounted on M{i} and we need to create a slave of the first S_{k} such that M is getting propagation from M_{k}. It means that the master of M_{k} will be among the sequence of masters of M. On the other hand, the nearest marked node in that sequence will either be the master of M_{k} or the master of M_{k-1} (the latter - in the case if M_{k-1} is a slave of something M gets propagation from, but in a wrong peer group). So we go through the sequence of masters of M until we find a marked one (P). Let N be the one before it. Then we go through the sequence of masters of S_0 until we find one (say, S) mounted on a node D that has P as master and check if D is a peer of N. If it is, S will be the master of new copy, if not - the master of S will be. That's it for the hard part; the rest is fairly simple. Iterator is in next_group(), handling of one prospective mountpoint is propagate_one(). It seems to survive all tests and gives a noticably better performance than the current mainline for setups that are seriously using shared subtrees. Cc: stable@vger.kernel.org Signed-off-by: Al Viro --- include/linux/mount.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/linux/mount.h b/include/linux/mount.h index 371d346fa270..839bac270904 100644 --- a/include/linux/mount.h +++ b/include/linux/mount.h @@ -44,6 +44,8 @@ struct mnt_namespace; #define MNT_SHARED_MASK (MNT_UNBINDABLE) #define MNT_PROPAGATION_MASK (MNT_SHARED | MNT_UNBINDABLE) +#define MNT_INTERNAL_FLAGS (MNT_SHARED | MNT_WRITE_HOLD | MNT_INTERNAL | \ + MNT_DOOMED | MNT_SYNC_UMOUNT | MNT_MARKED) #define MNT_INTERNAL 0x4000 @@ -51,6 +53,7 @@ struct mnt_namespace; #define MNT_LOCKED 0x800000 #define MNT_DOOMED 0x1000000 #define MNT_SYNC_UMOUNT 0x2000000 +#define MNT_MARKED 0x4000000 struct vfsmount { struct dentry *mnt_root; /* root of the mounted tree */ -- cgit v1.2.3 From e25115786ee540fc428a14872ebd4f56252aba32 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 5 Mar 2014 20:41:36 -0500 Subject: switch nbd to sockfd_lookup/sockfd_put Signed-off-by: Al Viro --- drivers/block/nbd.c | 48 +++++++++++++++++++----------------------------- include/linux/nbd.h | 3 +-- 2 files changed, 20 insertions(+), 31 deletions(-) (limited to 'include') diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c index 55298db36b2d..3a70ea2f7cd6 100644 --- a/drivers/block/nbd.c +++ b/drivers/block/nbd.c @@ -630,37 +630,29 @@ static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *nbd, } case NBD_CLEAR_SOCK: { - struct file *file; - + struct socket *sock = nbd->sock; nbd->sock = NULL; - file = nbd->file; - nbd->file = NULL; nbd_clear_que(nbd); BUG_ON(!list_empty(&nbd->queue_head)); BUG_ON(!list_empty(&nbd->waiting_queue)); kill_bdev(bdev); - if (file) - fput(file); + if (sock) + sockfd_put(sock); return 0; } case NBD_SET_SOCK: { - struct file *file; - if (nbd->file) + struct socket *sock; + int err; + if (nbd->sock) return -EBUSY; - file = fget(arg); - if (file) { - struct inode *inode = file_inode(file); - if (S_ISSOCK(inode->i_mode)) { - nbd->file = file; - nbd->sock = SOCKET_I(inode); - if (max_part > 0) - bdev->bd_invalidated = 1; - nbd->disconnect = 0; /* we're connected now */ - return 0; - } else { - fput(file); - } + sock = sockfd_lookup(arg, &err); + if (sock) { + nbd->sock = sock; + if (max_part > 0) + bdev->bd_invalidated = 1; + nbd->disconnect = 0; /* we're connected now */ + return 0; } return -EINVAL; } @@ -697,12 +689,12 @@ static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *nbd, case NBD_DO_IT: { struct task_struct *thread; - struct file *file; + struct socket *sock; int error; if (nbd->pid) return -EBUSY; - if (!nbd->file) + if (!nbd->sock) return -EINVAL; mutex_unlock(&nbd->tx_lock); @@ -731,15 +723,15 @@ static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *nbd, if (error) return error; sock_shutdown(nbd, 0); - file = nbd->file; - nbd->file = NULL; + sock = nbd->sock; + nbd->sock = NULL; nbd_clear_que(nbd); dev_warn(disk_to_dev(nbd->disk), "queue cleared\n"); kill_bdev(bdev); queue_flag_clear_unlocked(QUEUE_FLAG_DISCARD, nbd->disk->queue); set_device_ro(bdev, false); - if (file) - fput(file); + if (sock) + sockfd_put(sock); nbd->flags = 0; nbd->bytesize = 0; bdev->bd_inode->i_size = 0; @@ -875,9 +867,7 @@ static int __init nbd_init(void) for (i = 0; i < nbds_max; i++) { struct gendisk *disk = nbd_dev[i].disk; - nbd_dev[i].file = NULL; nbd_dev[i].magic = NBD_MAGIC; - nbd_dev[i].flags = 0; INIT_LIST_HEAD(&nbd_dev[i].waiting_queue); spin_lock_init(&nbd_dev[i].queue_lock); INIT_LIST_HEAD(&nbd_dev[i].queue_head); diff --git a/include/linux/nbd.h b/include/linux/nbd.h index ae4981ebd18e..f62f78aef4ac 100644 --- a/include/linux/nbd.h +++ b/include/linux/nbd.h @@ -24,8 +24,7 @@ struct request; struct nbd_device { int flags; int harderror; /* Code of hard error */ - struct socket * sock; - struct file * file; /* If == NULL, device is not ready, yet */ + struct socket * sock; /* If == NULL, device is not ready, yet */ int magic; spinlock_t queue_lock; -- cgit v1.2.3 From 4597e695b8baa3e2620da89c7593be70cf20566b Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 14 Mar 2014 10:06:32 -0400 Subject: get rid of DEBUG_WRITECOUNT it only makes control flow in __fput() and friends more convoluted. Signed-off-by: Al Viro --- arch/powerpc/configs/ppc6xx_defconfig | 1 - arch/powerpc/configs/ps3_defconfig | 1 - arch/s390/configs/default_defconfig | 1 - arch/sh/configs/rsk7203_defconfig | 1 - arch/xtensa/configs/iss_defconfig | 1 - arch/xtensa/configs/s6105_defconfig | 1 - fs/file_table.c | 5 ---- fs/open.c | 8 ------ include/linux/fs.h | 49 ----------------------------------- lib/Kconfig.debug | 10 ------- 10 files changed, 78 deletions(-) (limited to 'include') diff --git a/arch/powerpc/configs/ppc6xx_defconfig b/arch/powerpc/configs/ppc6xx_defconfig index c2353bf059fd..175a8b99c196 100644 --- a/arch/powerpc/configs/ppc6xx_defconfig +++ b/arch/powerpc/configs/ppc6xx_defconfig @@ -1244,7 +1244,6 @@ CONFIG_DEBUG_SPINLOCK_SLEEP=y CONFIG_DEBUG_HIGHMEM=y CONFIG_DEBUG_INFO=y CONFIG_DEBUG_VM=y -CONFIG_DEBUG_WRITECOUNT=y CONFIG_DEBUG_LIST=y CONFIG_DEBUG_SG=y # CONFIG_RCU_CPU_STALL_DETECTOR is not set diff --git a/arch/powerpc/configs/ps3_defconfig b/arch/powerpc/configs/ps3_defconfig index 139a8308070c..fdee37fab81c 100644 --- a/arch/powerpc/configs/ps3_defconfig +++ b/arch/powerpc/configs/ps3_defconfig @@ -174,7 +174,6 @@ CONFIG_DETECT_HUNG_TASK=y CONFIG_PROVE_LOCKING=y CONFIG_DEBUG_LOCKDEP=y CONFIG_DEBUG_INFO=y -CONFIG_DEBUG_WRITECOUNT=y CONFIG_DEBUG_MEMORY_INIT=y CONFIG_DEBUG_LIST=y CONFIG_RCU_CPU_STALL_TIMEOUT=60 diff --git a/arch/s390/configs/default_defconfig b/arch/s390/configs/default_defconfig index e0af2ee58751..3f538468a86e 100644 --- a/arch/s390/configs/default_defconfig +++ b/arch/s390/configs/default_defconfig @@ -550,7 +550,6 @@ CONFIG_LOCK_STAT=y CONFIG_DEBUG_LOCKDEP=y CONFIG_DEBUG_ATOMIC_SLEEP=y CONFIG_DEBUG_LOCKING_API_SELFTESTS=y -CONFIG_DEBUG_WRITECOUNT=y CONFIG_DEBUG_LIST=y CONFIG_DEBUG_SG=y CONFIG_DEBUG_NOTIFIERS=y diff --git a/arch/sh/configs/rsk7203_defconfig b/arch/sh/configs/rsk7203_defconfig index 4e5229b0c5bb..47236573db83 100644 --- a/arch/sh/configs/rsk7203_defconfig +++ b/arch/sh/configs/rsk7203_defconfig @@ -128,7 +128,6 @@ CONFIG_DEBUG_MUTEXES=y CONFIG_DEBUG_SPINLOCK_SLEEP=y CONFIG_DEBUG_INFO=y CONFIG_DEBUG_VM=y -CONFIG_DEBUG_WRITECOUNT=y CONFIG_DEBUG_LIST=y CONFIG_DEBUG_SG=y CONFIG_FRAME_POINTER=y diff --git a/arch/xtensa/configs/iss_defconfig b/arch/xtensa/configs/iss_defconfig index 4f233204faf9..711f8aa14743 100644 --- a/arch/xtensa/configs/iss_defconfig +++ b/arch/xtensa/configs/iss_defconfig @@ -627,7 +627,6 @@ CONFIG_SCHED_DEBUG=y # CONFIG_DEBUG_KOBJECT is not set # CONFIG_DEBUG_INFO is not set # CONFIG_DEBUG_VM is not set -# CONFIG_DEBUG_WRITECOUNT is not set # CONFIG_DEBUG_MEMORY_INIT is not set # CONFIG_DEBUG_LIST is not set # CONFIG_DEBUG_SG is not set diff --git a/arch/xtensa/configs/s6105_defconfig b/arch/xtensa/configs/s6105_defconfig index d929f77a0360..78318a76fa16 100644 --- a/arch/xtensa/configs/s6105_defconfig +++ b/arch/xtensa/configs/s6105_defconfig @@ -569,7 +569,6 @@ CONFIG_DEBUG_SPINLOCK_SLEEP=y # CONFIG_DEBUG_INFO is not set # CONFIG_DEBUG_VM is not set CONFIG_DEBUG_NOMMU_REGIONS=y -# CONFIG_DEBUG_WRITECOUNT is not set # CONFIG_DEBUG_MEMORY_INIT is not set # CONFIG_DEBUG_LIST is not set # CONFIG_DEBUG_SG is not set diff --git a/fs/file_table.c b/fs/file_table.c index 79ecae62209a..ee20658a0647 100644 --- a/fs/file_table.c +++ b/fs/file_table.c @@ -52,7 +52,6 @@ static void file_free_rcu(struct rcu_head *head) static inline void file_free(struct file *f) { percpu_counter_dec(&nr_files); - file_check_state(f); call_rcu(&f->f_u.fu_rcuhead, file_free_rcu); } @@ -186,7 +185,6 @@ struct file *alloc_file(struct path *path, fmode_t mode, * that we can do debugging checks at __fput() */ if ((mode & FMODE_WRITE) && !special_file(path->dentry->d_inode->i_mode)) { - file_take_write(file); WARN_ON(mnt_clone_write(path->mnt)); } if ((mode & (FMODE_READ | FMODE_WRITE)) == FMODE_READ) @@ -213,10 +211,7 @@ static void drop_file_write_access(struct file *file) return; put_write_access(inode); - if (file_check_writeable(file) != 0) - return; __mnt_drop_write(mnt); - file_release_write(file); } /* the real guts of fput() - releasing the last reference to file diff --git a/fs/open.c b/fs/open.c index 2ed7325f713e..8d0b6adfe7b8 100644 --- a/fs/open.c +++ b/fs/open.c @@ -683,7 +683,6 @@ static int do_dentry_open(struct file *f, error = __get_file_write_access(inode, f->f_path.mnt); if (error) goto cleanup_file; - file_take_write(f); } f->f_mapping = inode->i_mapping; @@ -731,14 +730,7 @@ cleanup_all: fops_put(f->f_op); if (f->f_mode & FMODE_WRITE) { if (!special_file(inode->i_mode)) { - /* - * We don't consider this a real - * mnt_want/drop_write() pair - * because it all happenend right - * here, so just reset the state. - */ put_write_access(inode); - file_reset_write(f); __mnt_drop_write(f->f_path.mnt); } } diff --git a/include/linux/fs.h b/include/linux/fs.h index 23b2a35d712e..e80659ed78fc 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -769,9 +769,6 @@ static inline int ra_has_index(struct file_ra_state *ra, pgoff_t index) index < ra->start + ra->size); } -#define FILE_MNT_WRITE_TAKEN 1 -#define FILE_MNT_WRITE_RELEASED 2 - struct file { union { struct llist_node fu_llist; @@ -809,9 +806,6 @@ struct file { struct list_head f_tfile_llink; #endif /* #ifdef CONFIG_EPOLL */ struct address_space *f_mapping; -#ifdef CONFIG_DEBUG_WRITECOUNT - unsigned long f_mnt_write_state; -#endif } __attribute__((aligned(4))); /* lest something weird decides that 2 is OK */ struct file_handle { @@ -829,49 +823,6 @@ static inline struct file *get_file(struct file *f) #define fput_atomic(x) atomic_long_add_unless(&(x)->f_count, -1, 1) #define file_count(x) atomic_long_read(&(x)->f_count) -#ifdef CONFIG_DEBUG_WRITECOUNT -static inline void file_take_write(struct file *f) -{ - WARN_ON(f->f_mnt_write_state != 0); - f->f_mnt_write_state = FILE_MNT_WRITE_TAKEN; -} -static inline void file_release_write(struct file *f) -{ - f->f_mnt_write_state |= FILE_MNT_WRITE_RELEASED; -} -static inline void file_reset_write(struct file *f) -{ - f->f_mnt_write_state = 0; -} -static inline void file_check_state(struct file *f) -{ - /* - * At this point, either both or neither of these bits - * should be set. - */ - WARN_ON(f->f_mnt_write_state == FILE_MNT_WRITE_TAKEN); - WARN_ON(f->f_mnt_write_state == FILE_MNT_WRITE_RELEASED); -} -static inline int file_check_writeable(struct file *f) -{ - if (f->f_mnt_write_state == FILE_MNT_WRITE_TAKEN) - return 0; - printk(KERN_WARNING "writeable file with no " - "mnt_want_write()\n"); - WARN_ON(1); - return -EINVAL; -} -#else /* !CONFIG_DEBUG_WRITECOUNT */ -static inline void file_take_write(struct file *filp) {} -static inline void file_release_write(struct file *filp) {} -static inline void file_reset_write(struct file *filp) {} -static inline void file_check_state(struct file *filp) {} -static inline int file_check_writeable(struct file *filp) -{ - return 0; -} -#endif /* CONFIG_DEBUG_WRITECOUNT */ - #define MAX_NON_LFS ((1UL<<31) - 1) /* Page cache limit. The filesystems should put that into their s_maxbytes diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index a48abeac753f..7a0859314bdf 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -1030,16 +1030,6 @@ config DEBUG_BUGVERBOSE of the BUG call as well as the EIP and oops trace. This aids debugging but costs about 70-100K of memory. -config DEBUG_WRITECOUNT - bool "Debug filesystem writers count" - depends on DEBUG_KERNEL - help - Enable this to catch wrong use of the writers count in struct - vfsmount. This will increase the size of each file struct by - 32 bits. - - If unsure, say N. - config DEBUG_LIST bool "Debug linked list manipulation" depends on DEBUG_KERNEL -- cgit v1.2.3 From 83f936c75e3689a63253d89c47a4d239c56d7410 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 14 Mar 2014 12:02:47 -0400 Subject: mark struct file that had write access grabbed by open() new flag in ->f_mode - FMODE_WRITER. Set by do_dentry_open() in case when it has grabbed write access, checked by __fput() to decide whether it wants to drop the sucker. Allows to stop bothering with mnt_clone_write() in alloc_file(), along with fewer special_file() checks. Signed-off-by: Al Viro --- fs/file_table.c | 37 ++++--------------------------------- fs/namespace.c | 4 +--- fs/open.c | 9 ++++----- include/linux/fs.h | 2 ++ 4 files changed, 11 insertions(+), 41 deletions(-) (limited to 'include') diff --git a/fs/file_table.c b/fs/file_table.c index ee20658a0647..ce1504fec5a1 100644 --- a/fs/file_table.c +++ b/fs/file_table.c @@ -177,43 +177,12 @@ struct file *alloc_file(struct path *path, fmode_t mode, file->f_mapping = path->dentry->d_inode->i_mapping; file->f_mode = mode; file->f_op = fop; - - /* - * These mounts don't really matter in practice - * for r/o bind mounts. They aren't userspace- - * visible. We do this for consistency, and so - * that we can do debugging checks at __fput() - */ - if ((mode & FMODE_WRITE) && !special_file(path->dentry->d_inode->i_mode)) { - WARN_ON(mnt_clone_write(path->mnt)); - } if ((mode & (FMODE_READ | FMODE_WRITE)) == FMODE_READ) i_readcount_inc(path->dentry->d_inode); return file; } EXPORT_SYMBOL(alloc_file); -/** - * drop_file_write_access - give up ability to write to a file - * @file: the file to which we will stop writing - * - * This is a central place which will give up the ability - * to write to @file, along with access to write through - * its vfsmount. - */ -static void drop_file_write_access(struct file *file) -{ - struct vfsmount *mnt = file->f_path.mnt; - struct dentry *dentry = file->f_path.dentry; - struct inode *inode = dentry->d_inode; - - if (special_file(inode->i_mode)) - return; - - put_write_access(inode); - __mnt_drop_write(mnt); -} - /* the real guts of fput() - releasing the last reference to file */ static void __fput(struct file *file) @@ -248,8 +217,10 @@ static void __fput(struct file *file) put_pid(file->f_owner.pid); if ((file->f_mode & (FMODE_READ | FMODE_WRITE)) == FMODE_READ) i_readcount_dec(inode); - if (file->f_mode & FMODE_WRITE) - drop_file_write_access(file); + if (file->f_mode & FMODE_WRITER) { + put_write_access(inode); + __mnt_drop_write(mnt); + } file->f_path.dentry = NULL; file->f_path.mnt = NULL; file->f_inode = NULL; diff --git a/fs/namespace.c b/fs/namespace.c index a66aff5bd3fe..20e8696c31a7 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -414,9 +414,7 @@ EXPORT_SYMBOL_GPL(mnt_clone_write); */ int __mnt_want_write_file(struct file *file) { - struct inode *inode = file_inode(file); - - if (!(file->f_mode & FMODE_WRITE) || special_file(inode->i_mode)) + if (!(file->f_mode & FMODE_WRITER)) return __mnt_want_write(file->f_path.mnt); else return mnt_clone_write(file->f_path.mnt); diff --git a/fs/open.c b/fs/open.c index ebef0c5fa10c..dcefb2f02d10 100644 --- a/fs/open.c +++ b/fs/open.c @@ -670,6 +670,7 @@ static int do_dentry_open(struct file *f, put_write_access(inode); goto cleanup_file; } + f->f_mode |= FMODE_WRITER; } f->f_mapping = inode->i_mapping; @@ -715,11 +716,9 @@ static int do_dentry_open(struct file *f, cleanup_all: fops_put(f->f_op); - if (f->f_mode & FMODE_WRITE) { - if (!special_file(inode->i_mode)) { - put_write_access(inode); - __mnt_drop_write(f->f_path.mnt); - } + if (f->f_mode & FMODE_WRITER) { + put_write_access(inode); + __mnt_drop_write(f->f_path.mnt); } cleanup_file: path_put(&f->f_path); diff --git a/include/linux/fs.h b/include/linux/fs.h index e80659ed78fc..d9d88a0b456e 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -125,6 +125,8 @@ typedef void (dio_iodone_t)(struct kiocb *iocb, loff_t offset, /* File needs atomic accesses to f_pos */ #define FMODE_ATOMIC_POS ((__force fmode_t)0x8000) +/* Write access to underlying fs */ +#define FMODE_WRITER ((__force fmode_t)0x10000) /* File was opened by fanotify and shouldn't generate fanotify events */ #define FMODE_NONOTIFY ((__force fmode_t)0x1000000) -- cgit v1.2.3 From 7f4b36f9bb930b3b2105a9a2cb0121fa7028c432 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 14 Mar 2014 12:45:29 -0400 Subject: get rid of files_defer_init() the only thing it's doing these days is calculation of upper limit for fs.nr_open sysctl and that can be done statically Signed-off-by: Al Viro --- fs/file.c | 11 ++++------- fs/file_table.c | 1 - include/linux/fdtable.h | 2 -- 3 files changed, 4 insertions(+), 10 deletions(-) (limited to 'include') diff --git a/fs/file.c b/fs/file.c index eb56a13dab3e..682103b95f8f 100644 --- a/fs/file.c +++ b/fs/file.c @@ -25,7 +25,10 @@ int sysctl_nr_open __read_mostly = 1024*1024; int sysctl_nr_open_min = BITS_PER_LONG; -int sysctl_nr_open_max = 1024 * 1024; /* raised later */ +/* our max() is unusable in constant expressions ;-/ */ +#define __const_max(x, y) ((x) < (y) ? (x) : (y)) +int sysctl_nr_open_max = __const_max(INT_MAX, ~(size_t)0/sizeof(void *)) & + -BITS_PER_LONG; static void *alloc_fdmem(size_t size) { @@ -429,12 +432,6 @@ void exit_files(struct task_struct *tsk) } } -void __init files_defer_init(void) -{ - sysctl_nr_open_max = min((size_t)INT_MAX, ~(size_t)0/sizeof(void *)) & - -BITS_PER_LONG; -} - struct files_struct init_files = { .count = ATOMIC_INIT(1), .fdt = &init_files.fdtab, diff --git a/fs/file_table.c b/fs/file_table.c index ce1504fec5a1..718e8e5224f8 100644 --- a/fs/file_table.c +++ b/fs/file_table.c @@ -325,6 +325,5 @@ void __init files_init(unsigned long mempages) n = (mempages * (PAGE_SIZE / 1024)) / 10; files_stat.max_files = max_t(unsigned long, n, NR_FILE); - files_defer_init(); percpu_counter_init(&nr_files, 0); } diff --git a/include/linux/fdtable.h b/include/linux/fdtable.h index 70e8e21c0a30..230f87bdf5ad 100644 --- a/include/linux/fdtable.h +++ b/include/linux/fdtable.h @@ -63,8 +63,6 @@ struct file_operations; struct vfsmount; struct dentry; -extern void __init files_defer_init(void); - #define rcu_dereference_check_fdtable(files, fdtfd) \ rcu_dereference_check((fdtfd), lockdep_is_held(&(files)->file_lock)) -- cgit v1.2.3 From 5d826c847b34de6415b4f1becd88a57ff619af50 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 14 Mar 2014 13:42:45 -0400 Subject: new helper: readlink_copy() Signed-off-by: Al Viro --- fs/namei.c | 13 +++++-------- fs/proc/namespaces.c | 14 ++++---------- fs/proc/self.c | 2 +- fs/xfs/xfs_ioctl.c | 28 +--------------------------- include/linux/fs.h | 2 +- 5 files changed, 12 insertions(+), 47 deletions(-) (limited to 'include') diff --git a/fs/namei.c b/fs/namei.c index 617de9e9967b..4fb52f0ca5cb 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -4297,11 +4297,9 @@ SYSCALL_DEFINE2(rename, const char __user *, oldname, const char __user *, newna return sys_renameat(AT_FDCWD, oldname, AT_FDCWD, newname); } -int vfs_readlink(struct dentry *dentry, char __user *buffer, int buflen, const char *link) +int readlink_copy(char __user *buffer, int buflen, const char *link) { - int len; - - len = PTR_ERR(link); + int len = PTR_ERR(link); if (IS_ERR(link)) goto out; @@ -4313,7 +4311,7 @@ int vfs_readlink(struct dentry *dentry, char __user *buffer, int buflen, const c out: return len; } -EXPORT_SYMBOL(vfs_readlink); +EXPORT_SYMBOL(readlink_copy); /* * A helper for ->readlink(). This should be used *ONLY* for symlinks that @@ -4331,7 +4329,7 @@ int generic_readlink(struct dentry *dentry, char __user *buffer, int buflen) if (IS_ERR(cookie)) return PTR_ERR(cookie); - res = vfs_readlink(dentry, buffer, buflen, nd_get_link(&nd)); + res = readlink_copy(buffer, buflen, nd_get_link(&nd)); if (dentry->d_inode->i_op->put_link) dentry->d_inode->i_op->put_link(dentry, &nd, cookie); return res; @@ -4356,8 +4354,7 @@ static char *page_getlink(struct dentry * dentry, struct page **ppage) int page_readlink(struct dentry *dentry, char __user *buffer, int buflen) { struct page *page = NULL; - char *s = page_getlink(dentry, &page); - int res = vfs_readlink(dentry,buffer,buflen,s); + int res = readlink_copy(buffer, buflen, page_getlink(dentry, &page)); if (page) { kunmap(page); page_cache_release(page); diff --git a/fs/proc/namespaces.c b/fs/proc/namespaces.c index 9ae46b87470d..89026095f2b5 100644 --- a/fs/proc/namespaces.c +++ b/fs/proc/namespaces.c @@ -146,7 +146,7 @@ static int proc_ns_readlink(struct dentry *dentry, char __user *buffer, int bufl struct task_struct *task; void *ns; char name[50]; - int len = -EACCES; + int res = -EACCES; task = get_proc_task(inode); if (!task) @@ -155,24 +155,18 @@ static int proc_ns_readlink(struct dentry *dentry, char __user *buffer, int bufl if (!ptrace_may_access(task, PTRACE_MODE_READ)) goto out_put_task; - len = -ENOENT; + res = -ENOENT; ns = ns_ops->get(task); if (!ns) goto out_put_task; snprintf(name, sizeof(name), "%s:[%u]", ns_ops->name, ns_ops->inum(ns)); - len = strlen(name); - - if (len > buflen) - len = buflen; - if (copy_to_user(buffer, name, len)) - len = -EFAULT; - + res = readlink_copy(buffer, buflen, name); ns_ops->put(ns); out_put_task: put_task_struct(task); out: - return len; + return res; } static const struct inode_operations proc_ns_link_inode_operations = { diff --git a/fs/proc/self.c b/fs/proc/self.c index ffeb202ec942..4348bb8907c2 100644 --- a/fs/proc/self.c +++ b/fs/proc/self.c @@ -16,7 +16,7 @@ static int proc_self_readlink(struct dentry *dentry, char __user *buffer, if (!tgid) return -ENOENT; sprintf(tmp, "%d", tgid); - return vfs_readlink(dentry,buffer,buflen,tmp); + return readlink_copy(buffer, buflen, tmp); } static void *proc_self_follow_link(struct dentry *dentry, struct nameidata *nd) diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c index bcfe61202115..0b18776b075e 100644 --- a/fs/xfs/xfs_ioctl.c +++ b/fs/xfs/xfs_ioctl.c @@ -271,32 +271,6 @@ xfs_open_by_handle( return error; } -/* - * This is a copy from fs/namei.c:vfs_readlink(), except for removing it's - * unused first argument. - */ -STATIC int -do_readlink( - char __user *buffer, - int buflen, - const char *link) -{ - int len; - - len = PTR_ERR(link); - if (IS_ERR(link)) - goto out; - - len = strlen(link); - if (len > (unsigned) buflen) - len = buflen; - if (copy_to_user(buffer, link, len)) - len = -EFAULT; - out: - return len; -} - - int xfs_readlink_by_handle( struct file *parfilp, @@ -334,7 +308,7 @@ xfs_readlink_by_handle( error = -xfs_readlink(XFS_I(dentry->d_inode), link); if (error) goto out_kfree; - error = do_readlink(hreq->ohandle, olen, link); + error = readlink_copy(hreq->ohandle, olen, link); if (error) goto out_kfree; diff --git a/include/linux/fs.h b/include/linux/fs.h index d9d88a0b456e..db181b542db1 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2517,7 +2517,7 @@ extern const struct file_operations generic_ro_fops; #define special_file(m) (S_ISCHR(m)||S_ISBLK(m)||S_ISFIFO(m)||S_ISSOCK(m)) -extern int vfs_readlink(struct dentry *, char __user *, int, const char *); +extern int readlink_copy(char __user *, int, const char *); extern int page_readlink(struct dentry *, char __user *, int); extern void *page_follow_link_light(struct dentry *, struct nameidata *); extern void page_put_link(struct dentry *, struct nameidata *, void *); -- cgit v1.2.3 From fbb32750a62df75d1ffea547f3908b21c5496d9f Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 2 Feb 2014 21:09:54 -0500 Subject: pipe: kill ->map() and ->unmap() all pipe_buffer_operations have the same instances of those... Signed-off-by: Al Viro --- drivers/char/virtio_console.c | 4 +-- fs/fuse/dev.c | 6 ++-- fs/pipe.c | 70 ++++++++++--------------------------------- fs/splice.c | 24 +++++---------- include/linux/pipe_fs_i.h | 19 ------------ kernel/relay.c | 2 -- kernel/trace/trace.c | 4 --- 7 files changed, 29 insertions(+), 100 deletions(-) (limited to 'include') diff --git a/drivers/char/virtio_console.c b/drivers/char/virtio_console.c index 6928d094451d..60aafb8a1f2e 100644 --- a/drivers/char/virtio_console.c +++ b/drivers/char/virtio_console.c @@ -901,9 +901,9 @@ static int pipe_to_sg(struct pipe_inode_info *pipe, struct pipe_buffer *buf, if (len + offset > PAGE_SIZE) len = PAGE_SIZE - offset; - src = buf->ops->map(pipe, buf, 1); + src = kmap_atomic(buf->page); memcpy(page_address(page) + offset, src + buf->offset, len); - buf->ops->unmap(pipe, buf, src); + kunmap_atomic(src); sg_set_page(&(sgl->sg[sgl->n]), page, len, offset); } diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c index f90af6fe6884..aac71ce373e4 100644 --- a/fs/fuse/dev.c +++ b/fs/fuse/dev.c @@ -667,7 +667,7 @@ static void fuse_copy_finish(struct fuse_copy_state *cs) struct pipe_buffer *buf = cs->currbuf; if (!cs->write) { - buf->ops->unmap(cs->pipe, buf, cs->mapaddr); + kunmap_atomic(cs->mapaddr); } else { kunmap_atomic(cs->mapaddr); buf->len = PAGE_SIZE - cs->len; @@ -706,7 +706,7 @@ static int fuse_copy_fill(struct fuse_copy_state *cs) BUG_ON(!cs->nr_segs); cs->currbuf = buf; - cs->mapaddr = buf->ops->map(cs->pipe, buf, 1); + cs->mapaddr = kmap_atomic(buf->page); cs->len = buf->len; cs->buf = cs->mapaddr + buf->offset; cs->pipebufs++; @@ -874,7 +874,7 @@ static int fuse_try_move_page(struct fuse_copy_state *cs, struct page **pagep) out_fallback_unlock: unlock_page(newpage); out_fallback: - cs->mapaddr = buf->ops->map(cs->pipe, buf, 1); + cs->mapaddr = kmap_atomic(buf->page); cs->buf = cs->mapaddr + buf->offset; err = lock_request(cs->fc, cs->req); diff --git a/fs/pipe.c b/fs/pipe.c index 78fd0d0788db..6679c95eb4c3 100644 --- a/fs/pipe.c +++ b/fs/pipe.c @@ -225,52 +225,6 @@ static void anon_pipe_buf_release(struct pipe_inode_info *pipe, page_cache_release(page); } -/** - * generic_pipe_buf_map - virtually map a pipe buffer - * @pipe: the pipe that the buffer belongs to - * @buf: the buffer that should be mapped - * @atomic: whether to use an atomic map - * - * Description: - * This function returns a kernel virtual address mapping for the - * pipe_buffer passed in @buf. If @atomic is set, an atomic map is provided - * and the caller has to be careful not to fault before calling - * the unmap function. - * - * Note that this function calls kmap_atomic() if @atomic != 0. - */ -void *generic_pipe_buf_map(struct pipe_inode_info *pipe, - struct pipe_buffer *buf, int atomic) -{ - if (atomic) { - buf->flags |= PIPE_BUF_FLAG_ATOMIC; - return kmap_atomic(buf->page); - } - - return kmap(buf->page); -} -EXPORT_SYMBOL(generic_pipe_buf_map); - -/** - * generic_pipe_buf_unmap - unmap a previously mapped pipe buffer - * @pipe: the pipe that the buffer belongs to - * @buf: the buffer that should be unmapped - * @map_data: the data that the mapping function returned - * - * Description: - * This function undoes the mapping that ->map() provided. - */ -void generic_pipe_buf_unmap(struct pipe_inode_info *pipe, - struct pipe_buffer *buf, void *map_data) -{ - if (buf->flags & PIPE_BUF_FLAG_ATOMIC) { - buf->flags &= ~PIPE_BUF_FLAG_ATOMIC; - kunmap_atomic(map_data); - } else - kunmap(buf->page); -} -EXPORT_SYMBOL(generic_pipe_buf_unmap); - /** * generic_pipe_buf_steal - attempt to take ownership of a &pipe_buffer * @pipe: the pipe that the buffer belongs to @@ -351,8 +305,6 @@ EXPORT_SYMBOL(generic_pipe_buf_release); static const struct pipe_buf_operations anon_pipe_buf_ops = { .can_merge = 1, - .map = generic_pipe_buf_map, - .unmap = generic_pipe_buf_unmap, .confirm = generic_pipe_buf_confirm, .release = anon_pipe_buf_release, .steal = generic_pipe_buf_steal, @@ -361,8 +313,6 @@ static const struct pipe_buf_operations anon_pipe_buf_ops = { static const struct pipe_buf_operations packet_pipe_buf_ops = { .can_merge = 0, - .map = generic_pipe_buf_map, - .unmap = generic_pipe_buf_unmap, .confirm = generic_pipe_buf_confirm, .release = anon_pipe_buf_release, .steal = generic_pipe_buf_steal, @@ -410,9 +360,15 @@ pipe_read(struct kiocb *iocb, const struct iovec *_iov, atomic = !iov_fault_in_pages_write(iov, chars); redo: - addr = ops->map(pipe, buf, atomic); + if (atomic) + addr = kmap_atomic(buf->page); + else + addr = kmap(buf->page); error = pipe_iov_copy_to_user(iov, addr + buf->offset, chars, atomic); - ops->unmap(pipe, buf, addr); + if (atomic) + kunmap_atomic(addr); + else + kunmap(buf->page); if (unlikely(error)) { /* * Just retry with the slow path if we failed. @@ -538,10 +494,16 @@ pipe_write(struct kiocb *iocb, const struct iovec *_iov, iov_fault_in_pages_read(iov, chars); redo1: - addr = ops->map(pipe, buf, atomic); + if (atomic) + addr = kmap_atomic(buf->page); + else + addr = kmap(buf->page); error = pipe_iov_copy_from_user(offset + addr, iov, chars, atomic); - ops->unmap(pipe, buf, addr); + if (atomic) + kunmap_atomic(addr); + else + kunmap(buf->page); ret = error; do_wakeup = 1; if (error) { diff --git a/fs/splice.c b/fs/splice.c index 12028fa41def..ca3bfbd970f3 100644 --- a/fs/splice.c +++ b/fs/splice.c @@ -136,8 +136,6 @@ error: const struct pipe_buf_operations page_cache_pipe_buf_ops = { .can_merge = 0, - .map = generic_pipe_buf_map, - .unmap = generic_pipe_buf_unmap, .confirm = page_cache_pipe_buf_confirm, .release = page_cache_pipe_buf_release, .steal = page_cache_pipe_buf_steal, @@ -156,8 +154,6 @@ static int user_page_pipe_buf_steal(struct pipe_inode_info *pipe, static const struct pipe_buf_operations user_page_pipe_buf_ops = { .can_merge = 0, - .map = generic_pipe_buf_map, - .unmap = generic_pipe_buf_unmap, .confirm = generic_pipe_buf_confirm, .release = page_cache_pipe_buf_release, .steal = user_page_pipe_buf_steal, @@ -547,8 +543,6 @@ EXPORT_SYMBOL(generic_file_splice_read); static const struct pipe_buf_operations default_pipe_buf_ops = { .can_merge = 0, - .map = generic_pipe_buf_map, - .unmap = generic_pipe_buf_unmap, .confirm = generic_pipe_buf_confirm, .release = generic_pipe_buf_release, .steal = generic_pipe_buf_steal, @@ -564,8 +558,6 @@ static int generic_pipe_buf_nosteal(struct pipe_inode_info *pipe, /* Pipe buffer operations for a socket and similar. */ const struct pipe_buf_operations nosteal_pipe_buf_ops = { .can_merge = 0, - .map = generic_pipe_buf_map, - .unmap = generic_pipe_buf_unmap, .confirm = generic_pipe_buf_confirm, .release = generic_pipe_buf_release, .steal = generic_pipe_buf_nosteal, @@ -767,13 +759,13 @@ int pipe_to_file(struct pipe_inode_info *pipe, struct pipe_buffer *buf, goto out; if (buf->page != page) { - char *src = buf->ops->map(pipe, buf, 1); + char *src = kmap_atomic(buf->page); char *dst = kmap_atomic(page); memcpy(dst + offset, src + buf->offset, this_len); flush_dcache_page(page); kunmap_atomic(dst); - buf->ops->unmap(pipe, buf, src); + kunmap_atomic(src); } ret = pagecache_write_end(file, mapping, sd->pos, this_len, this_len, page, fsdata); @@ -1067,9 +1059,9 @@ static int write_pipe_buf(struct pipe_inode_info *pipe, struct pipe_buffer *buf, void *data; loff_t tmp = sd->pos; - data = buf->ops->map(pipe, buf, 0); + data = kmap(buf->page); ret = __kernel_write(sd->u.file, data + buf->offset, sd->len, &tmp); - buf->ops->unmap(pipe, buf, data); + kunmap(buf->page); return ret; } @@ -1536,10 +1528,10 @@ static int pipe_to_user(struct pipe_inode_info *pipe, struct pipe_buffer *buf, * pages and doing an atomic copy */ if (!fault_in_pages_writeable(sd->u.userptr, sd->len)) { - src = buf->ops->map(pipe, buf, 1); + src = kmap_atomic(buf->page); ret = __copy_to_user_inatomic(sd->u.userptr, src + buf->offset, sd->len); - buf->ops->unmap(pipe, buf, src); + kunmap_atomic(src); if (!ret) { ret = sd->len; goto out; @@ -1549,13 +1541,13 @@ static int pipe_to_user(struct pipe_inode_info *pipe, struct pipe_buffer *buf, /* * No dice, use slow non-atomic map and copy */ - src = buf->ops->map(pipe, buf, 0); + src = kmap(buf->page); ret = sd->len; if (copy_to_user(sd->u.userptr, src + buf->offset, sd->len)) ret = -EFAULT; - buf->ops->unmap(pipe, buf, src); + kunmap(buf->page); out: if (ret > 0) sd->u.userptr += ret; diff --git a/include/linux/pipe_fs_i.h b/include/linux/pipe_fs_i.h index ab5752692113..6dffcebe6105 100644 --- a/include/linux/pipe_fs_i.h +++ b/include/linux/pipe_fs_i.h @@ -82,23 +82,6 @@ struct pipe_buf_operations { */ int can_merge; - /* - * ->map() returns a virtual address mapping of the pipe buffer. - * The last integer flag reflects whether this should be an atomic - * mapping or not. The atomic map is faster, however you can't take - * page faults before calling ->unmap() again. So if you need to eg - * access user data through copy_to/from_user(), then you must get - * a non-atomic map. ->map() uses the kmap_atomic slot for - * atomic maps, you have to be careful if mapping another page as - * source or destination for a copy. - */ - void * (*map)(struct pipe_inode_info *, struct pipe_buffer *, int); - - /* - * Undoes ->map(), finishes the virtual mapping of the pipe buffer. - */ - void (*unmap)(struct pipe_inode_info *, struct pipe_buffer *, void *); - /* * ->confirm() verifies that the data in the pipe buffer is there * and that the contents are good. If the pages in the pipe belong @@ -150,8 +133,6 @@ struct pipe_inode_info *alloc_pipe_info(void); void free_pipe_info(struct pipe_inode_info *); /* Generic pipe buffer ops functions */ -void *generic_pipe_buf_map(struct pipe_inode_info *, struct pipe_buffer *, int); -void generic_pipe_buf_unmap(struct pipe_inode_info *, struct pipe_buffer *, void *); void generic_pipe_buf_get(struct pipe_inode_info *, struct pipe_buffer *); int generic_pipe_buf_confirm(struct pipe_inode_info *, struct pipe_buffer *); int generic_pipe_buf_steal(struct pipe_inode_info *, struct pipe_buffer *); diff --git a/kernel/relay.c b/kernel/relay.c index 5001c9887db1..98833f664fb6 100644 --- a/kernel/relay.c +++ b/kernel/relay.c @@ -1195,8 +1195,6 @@ static void relay_pipe_buf_release(struct pipe_inode_info *pipe, static const struct pipe_buf_operations relay_pipe_buf_ops = { .can_merge = 0, - .map = generic_pipe_buf_map, - .unmap = generic_pipe_buf_unmap, .confirm = generic_pipe_buf_confirm, .release = relay_pipe_buf_release, .steal = generic_pipe_buf_steal, diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 24c1f2382557..7511de35257f 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -4316,8 +4316,6 @@ static void tracing_spd_release_pipe(struct splice_pipe_desc *spd, static const struct pipe_buf_operations tracing_pipe_buf_ops = { .can_merge = 0, - .map = generic_pipe_buf_map, - .unmap = generic_pipe_buf_unmap, .confirm = generic_pipe_buf_confirm, .release = generic_pipe_buf_release, .steal = generic_pipe_buf_steal, @@ -5194,8 +5192,6 @@ static void buffer_pipe_buf_get(struct pipe_inode_info *pipe, /* Pipe buffer operations for a buffer. */ static const struct pipe_buf_operations buffer_pipe_buf_ops = { .can_merge = 0, - .map = generic_pipe_buf_map, - .unmap = generic_pipe_buf_unmap, .confirm = generic_pipe_buf_confirm, .release = buffer_pipe_buf_release, .steal = generic_pipe_buf_steal, -- cgit v1.2.3 From c186afb4dbd0050a537b96c7fbee2dba3b57fc38 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 2 Feb 2014 21:16:54 -0500 Subject: switch ->is_partially_uptodate() to saner arguments Signed-off-by: Al Viro --- Documentation/filesystems/Locking | 2 +- Documentation/filesystems/vfs.txt | 2 +- fs/buffer.c | 6 +++--- include/linux/buffer_head.h | 4 ++-- include/linux/fs.h | 2 +- mm/filemap.c | 2 +- 6 files changed, 9 insertions(+), 9 deletions(-) (limited to 'include') diff --git a/Documentation/filesystems/Locking b/Documentation/filesystems/Locking index 5b0c083d7c0e..bb2534bc0b03 100644 --- a/Documentation/filesystems/Locking +++ b/Documentation/filesystems/Locking @@ -198,7 +198,7 @@ prototypes: unsigned long *); int (*migratepage)(struct address_space *, struct page *, struct page *); int (*launder_page)(struct page *); - int (*is_partially_uptodate)(struct page *, read_descriptor_t *, unsigned long); + int (*is_partially_uptodate)(struct page *, unsigned long, unsigned long); int (*error_remove_page)(struct address_space *, struct page *); int (*swap_activate)(struct file *); int (*swap_deactivate)(struct file *); diff --git a/Documentation/filesystems/vfs.txt b/Documentation/filesystems/vfs.txt index c53784c119c8..419e7348c481 100644 --- a/Documentation/filesystems/vfs.txt +++ b/Documentation/filesystems/vfs.txt @@ -580,7 +580,7 @@ struct address_space_operations { /* migrate the contents of a page to the specified target */ int (*migratepage) (struct page *, struct page *); int (*launder_page) (struct page *); - int (*is_partially_uptodate) (struct page *, read_descriptor_t *, + int (*is_partially_uptodate) (struct page *, unsigned long, unsigned long); void (*is_dirty_writeback) (struct page *, bool *, bool *); int (*error_remove_page) (struct mapping *mapping, struct page *page); diff --git a/fs/buffer.c b/fs/buffer.c index 27265a8b43c1..027ae3bdfbbd 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -2114,8 +2114,8 @@ EXPORT_SYMBOL(generic_write_end); * Returns true if all buffers which correspond to a file portion * we want to read are uptodate. */ -int block_is_partially_uptodate(struct page *page, read_descriptor_t *desc, - unsigned long from) +int block_is_partially_uptodate(struct page *page, unsigned long from, + unsigned long count) { unsigned block_start, block_end, blocksize; unsigned to; @@ -2127,7 +2127,7 @@ int block_is_partially_uptodate(struct page *page, read_descriptor_t *desc, head = page_buffers(page); blocksize = head->b_size; - to = min_t(unsigned, PAGE_CACHE_SIZE - from, desc->count); + to = min_t(unsigned, PAGE_CACHE_SIZE - from, count); to = from + to; if (from < blocksize && to > PAGE_CACHE_SIZE - blocksize) return 0; diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h index d77797a52b7b..c40302f909ce 100644 --- a/include/linux/buffer_head.h +++ b/include/linux/buffer_head.h @@ -210,8 +210,8 @@ int block_write_full_page(struct page *page, get_block_t *get_block, int block_write_full_page_endio(struct page *page, get_block_t *get_block, struct writeback_control *wbc, bh_end_io_t *handler); int block_read_full_page(struct page*, get_block_t*); -int block_is_partially_uptodate(struct page *page, read_descriptor_t *desc, - unsigned long from); +int block_is_partially_uptodate(struct page *page, unsigned long from, + unsigned long count); int block_write_begin(struct address_space *mapping, loff_t pos, unsigned len, unsigned flags, struct page **pagep, get_block_t *get_block); int __block_write_begin(struct page *page, loff_t pos, unsigned len, diff --git a/include/linux/fs.h b/include/linux/fs.h index db181b542db1..ddfff2ecef0b 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -385,7 +385,7 @@ struct address_space_operations { int (*migratepage) (struct address_space *, struct page *, struct page *, enum migrate_mode); int (*launder_page) (struct page *); - int (*is_partially_uptodate) (struct page *, read_descriptor_t *, + int (*is_partially_uptodate) (struct page *, unsigned long, unsigned long); void (*is_dirty_writeback) (struct page *, bool *, bool *); int (*error_remove_page)(struct address_space *, struct page *); diff --git a/mm/filemap.c b/mm/filemap.c index 7a13f6ac5421..46e98019af6c 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -1148,7 +1148,7 @@ find_page: if (!page->mapping) goto page_not_up_to_date_locked; if (!mapping->a_ops->is_partially_uptodate(page, - desc, offset)) + offset, desc->count)) goto page_not_up_to_date_locked; unlock_page(page); } -- cgit v1.2.3 From 9223687863ffa63fa655f52ef64148ee08dee4d1 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Wed, 27 Nov 2013 16:29:46 -0800 Subject: iov_iter: Move iov_iter to uio.h Signed-off-by: Kent Overstreet --- include/linux/fs.h | 32 -------------------------------- include/linux/uio.h | 50 ++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 50 insertions(+), 32 deletions(-) (limited to 'include') diff --git a/include/linux/fs.h b/include/linux/fs.h index ddfff2ecef0b..2261ac8f0534 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -295,38 +295,6 @@ struct page; struct address_space; struct writeback_control; -struct iov_iter { - const struct iovec *iov; - unsigned long nr_segs; - size_t iov_offset; - size_t count; -}; - -size_t iov_iter_copy_from_user_atomic(struct page *page, - struct iov_iter *i, unsigned long offset, size_t bytes); -size_t iov_iter_copy_from_user(struct page *page, - struct iov_iter *i, unsigned long offset, size_t bytes); -void iov_iter_advance(struct iov_iter *i, size_t bytes); -int iov_iter_fault_in_readable(struct iov_iter *i, size_t bytes); -size_t iov_iter_single_seg_count(const struct iov_iter *i); - -static inline void iov_iter_init(struct iov_iter *i, - const struct iovec *iov, unsigned long nr_segs, - size_t count, size_t written) -{ - i->iov = iov; - i->nr_segs = nr_segs; - i->iov_offset = 0; - i->count = count + written; - - iov_iter_advance(i, written); -} - -static inline size_t iov_iter_count(struct iov_iter *i) -{ - return i->count; -} - /* * "descriptor" for what we're up to with a read. * This allows us to use the same read code yet diff --git a/include/linux/uio.h b/include/linux/uio.h index c55ce243cc09..347d70ce098e 100644 --- a/include/linux/uio.h +++ b/include/linux/uio.h @@ -9,14 +9,23 @@ #ifndef __LINUX_UIO_H #define __LINUX_UIO_H +#include #include +struct page; struct kvec { void *iov_base; /* and that should *never* hold a userland pointer */ size_t iov_len; }; +struct iov_iter { + const struct iovec *iov; + unsigned long nr_segs; + size_t iov_offset; + size_t count; +}; + /* * Total number of bytes covered by an iovec. * @@ -34,8 +43,49 @@ static inline size_t iov_length(const struct iovec *iov, unsigned long nr_segs) return ret; } +static inline struct iovec iov_iter_iovec(const struct iov_iter *iter) +{ + return (struct iovec) { + .iov_base = iter->iov->iov_base + iter->iov_offset, + .iov_len = min(iter->count, + iter->iov->iov_len - iter->iov_offset), + }; +} + +#define iov_for_each(iov, iter, start) \ + for (iter = (start); \ + (iter).count && \ + ((iov = iov_iter_iovec(&(iter))), 1); \ + iov_iter_advance(&(iter), (iov).iov_len)) + unsigned long iov_shorten(struct iovec *iov, unsigned long nr_segs, size_t to); +size_t iov_iter_copy_from_user_atomic(struct page *page, + struct iov_iter *i, unsigned long offset, size_t bytes); +size_t iov_iter_copy_from_user(struct page *page, + struct iov_iter *i, unsigned long offset, size_t bytes); +void iov_iter_advance(struct iov_iter *i, size_t bytes); +int iov_iter_fault_in_readable(struct iov_iter *i, size_t bytes); +size_t iov_iter_single_seg_count(const struct iov_iter *i); + +static inline void iov_iter_init(struct iov_iter *i, + const struct iovec *iov, unsigned long nr_segs, + size_t count, size_t written) +{ + i->iov = iov; + i->nr_segs = nr_segs; + i->iov_offset = 0; + i->count = count + written; + + iov_iter_advance(i, written); +} + +static inline size_t iov_iter_count(struct iov_iter *i) +{ + return i->count; +} + int memcpy_fromiovec(unsigned char *kdata, struct iovec *iov, int len); int memcpy_toiovec(struct iovec *iov, unsigned char *kdata, int len); + #endif -- cgit v1.2.3 From 6e58e79db8a16222b31fc8da1ca2ac2dccfc4237 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Mon, 3 Feb 2014 17:07:03 -0500 Subject: introduce copy_page_to_iter, kill loop over iovec in generic_file_aio_read() generic_file_aio_read() was looping over the target iovec, with loop over (source) pages nested inside that. Just set an iov_iter up and pass *that* to do_generic_file_aio_read(). With copy_page_to_iter() doing all work of mapping and copying a page to iovec and advancing iov_iter. Switch shmem_file_aio_read() to the same and kill file_read_actor(), while we are at it. Signed-off-by: Al Viro --- include/linux/fs.h | 1 - include/linux/uio.h | 2 + mm/filemap.c | 202 ++++++++++++++++++++++++++++------------------------ mm/shmem.c | 77 +++++++------------- 4 files changed, 138 insertions(+), 144 deletions(-) (limited to 'include') diff --git a/include/linux/fs.h b/include/linux/fs.h index 2261ac8f0534..2a5b1744f80a 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2390,7 +2390,6 @@ extern int generic_file_mmap(struct file *, struct vm_area_struct *); extern int generic_file_readonly_mmap(struct file *, struct vm_area_struct *); extern int generic_file_remap_pages(struct vm_area_struct *, unsigned long addr, unsigned long size, pgoff_t pgoff); -extern int file_read_actor(read_descriptor_t * desc, struct page *page, unsigned long offset, unsigned long size); int generic_write_checks(struct file *file, loff_t *pos, size_t *count, int isblk); extern ssize_t generic_file_aio_read(struct kiocb *, const struct iovec *, unsigned long, loff_t); extern ssize_t __generic_file_aio_write(struct kiocb *, const struct iovec *, unsigned long, diff --git a/include/linux/uio.h b/include/linux/uio.h index 347d70ce098e..199bcc34241b 100644 --- a/include/linux/uio.h +++ b/include/linux/uio.h @@ -67,6 +67,8 @@ size_t iov_iter_copy_from_user(struct page *page, void iov_iter_advance(struct iov_iter *i, size_t bytes); int iov_iter_fault_in_readable(struct iov_iter *i, size_t bytes); size_t iov_iter_single_seg_count(const struct iov_iter *i); +size_t copy_page_to_iter(struct page *page, size_t offset, size_t bytes, + struct iov_iter *i); static inline void iov_iter_init(struct iov_iter *i, const struct iovec *iov, unsigned long nr_segs, diff --git a/mm/filemap.c b/mm/filemap.c index bfb7a97d6d0f..a16eb2c4f316 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -1085,11 +1085,90 @@ static void shrink_readahead_size_eio(struct file *filp, ra->ra_pages /= 4; } +size_t copy_page_to_iter(struct page *page, size_t offset, size_t bytes, + struct iov_iter *i) +{ + size_t skip, copy, left, wanted; + const struct iovec *iov; + char __user *buf; + void *kaddr, *from; + + if (unlikely(bytes > i->count)) + bytes = i->count; + + if (unlikely(!bytes)) + return 0; + + wanted = bytes; + iov = i->iov; + skip = i->iov_offset; + buf = iov->iov_base + skip; + copy = min(bytes, iov->iov_len - skip); + + if (!fault_in_pages_writeable(buf, copy)) { + kaddr = kmap_atomic(page); + from = kaddr + offset; + + /* first chunk, usually the only one */ + left = __copy_to_user_inatomic(buf, from, copy); + copy -= left; + skip += copy; + from += copy; + bytes -= copy; + + while (unlikely(!left && bytes)) { + iov++; + buf = iov->iov_base; + copy = min(bytes, iov->iov_len); + left = __copy_to_user_inatomic(buf, from, copy); + copy -= left; + skip = copy; + from += copy; + bytes -= copy; + } + if (likely(!bytes)) { + kunmap_atomic(kaddr); + goto done; + } + offset = from - kaddr; + buf += copy; + kunmap_atomic(kaddr); + copy = min(bytes, iov->iov_len - skip); + } + /* Too bad - revert to non-atomic kmap */ + kaddr = kmap(page); + from = kaddr + offset; + left = __copy_to_user(buf, from, copy); + copy -= left; + skip += copy; + from += copy; + bytes -= copy; + while (unlikely(!left && bytes)) { + iov++; + buf = iov->iov_base; + copy = min(bytes, iov->iov_len); + left = __copy_to_user(buf, from, copy); + copy -= left; + skip = copy; + from += copy; + bytes -= copy; + } + kunmap(page); +done: + i->count -= wanted - bytes; + i->nr_segs -= iov - i->iov; + i->iov = iov; + i->iov_offset = skip; + return wanted - bytes; +} +EXPORT_SYMBOL(copy_page_to_iter); + /** * do_generic_file_read - generic file read routine * @filp: the file to read * @ppos: current file position - * @desc: read_descriptor + * @iter: data destination + * @written: already copied * * This is a generic file read routine, and uses the * mapping->a_ops->readpage() function for the actual low-level stuff. @@ -1097,8 +1176,8 @@ static void shrink_readahead_size_eio(struct file *filp, * This is really ugly. But the goto's actually try to clarify some * of the logic when it comes to error handling etc. */ -static void do_generic_file_read(struct file *filp, loff_t *ppos, - read_descriptor_t *desc) +static ssize_t do_generic_file_read(struct file *filp, loff_t *ppos, + struct iov_iter *iter, ssize_t written) { struct address_space *mapping = filp->f_mapping; struct inode *inode = mapping->host; @@ -1108,12 +1187,12 @@ static void do_generic_file_read(struct file *filp, loff_t *ppos, pgoff_t prev_index; unsigned long offset; /* offset into pagecache page */ unsigned int prev_offset; - int error; + int error = 0; index = *ppos >> PAGE_CACHE_SHIFT; prev_index = ra->prev_pos >> PAGE_CACHE_SHIFT; prev_offset = ra->prev_pos & (PAGE_CACHE_SIZE-1); - last_index = (*ppos + desc->count + PAGE_CACHE_SIZE-1) >> PAGE_CACHE_SHIFT; + last_index = (*ppos + iter->count + PAGE_CACHE_SIZE-1) >> PAGE_CACHE_SHIFT; offset = *ppos & ~PAGE_CACHE_MASK; for (;;) { @@ -1148,7 +1227,7 @@ find_page: if (!page->mapping) goto page_not_up_to_date_locked; if (!mapping->a_ops->is_partially_uptodate(page, - offset, desc->count)) + offset, iter->count)) goto page_not_up_to_date_locked; unlock_page(page); } @@ -1198,24 +1277,23 @@ page_ok: /* * Ok, we have the page, and it's up-to-date, so * now we can copy it to user space... - * - * The file_read_actor routine returns how many bytes were - * actually used.. - * NOTE! This may not be the same as how much of a user buffer - * we filled up (we may be padding etc), so we can only update - * "pos" here (the actor routine has to update the user buffer - * pointers and the remaining count). */ - ret = file_read_actor(desc, page, offset, nr); + + ret = copy_page_to_iter(page, offset, nr, iter); offset += ret; index += offset >> PAGE_CACHE_SHIFT; offset &= ~PAGE_CACHE_MASK; prev_offset = offset; page_cache_release(page); - if (ret == nr && desc->count) - continue; - goto out; + written += ret; + if (!iov_iter_count(iter)) + goto out; + if (ret < nr) { + error = -EFAULT; + goto out; + } + continue; page_not_up_to_date: /* Get exclusive access to the page ... */ @@ -1250,6 +1328,7 @@ readpage: if (unlikely(error)) { if (error == AOP_TRUNCATED_PAGE) { page_cache_release(page); + error = 0; goto find_page; } goto readpage_error; @@ -1280,7 +1359,6 @@ readpage: readpage_error: /* UHHUH! A synchronous read error occurred. Report it */ - desc->error = error; page_cache_release(page); goto out; @@ -1291,16 +1369,17 @@ no_cached_page: */ page = page_cache_alloc_cold(mapping); if (!page) { - desc->error = -ENOMEM; + error = -ENOMEM; goto out; } error = add_to_page_cache_lru(page, mapping, index, GFP_KERNEL); if (error) { page_cache_release(page); - if (error == -EEXIST) + if (error == -EEXIST) { + error = 0; goto find_page; - desc->error = error; + } goto out; } goto readpage; @@ -1313,44 +1392,7 @@ out: *ppos = ((loff_t)index << PAGE_CACHE_SHIFT) + offset; file_accessed(filp); -} - -int file_read_actor(read_descriptor_t *desc, struct page *page, - unsigned long offset, unsigned long size) -{ - char *kaddr; - unsigned long left, count = desc->count; - - if (size > count) - size = count; - - /* - * Faults on the destination of a read are common, so do it before - * taking the kmap. - */ - if (!fault_in_pages_writeable(desc->arg.buf, size)) { - kaddr = kmap_atomic(page); - left = __copy_to_user_inatomic(desc->arg.buf, - kaddr + offset, size); - kunmap_atomic(kaddr); - if (left == 0) - goto success; - } - - /* Do it the slow way */ - kaddr = kmap(page); - left = __copy_to_user(desc->arg.buf, kaddr + offset, size); - kunmap(page); - - if (left) { - size -= left; - desc->error = -EFAULT; - } -success: - desc->count = count - size; - desc->written += size; - desc->arg.buf += size; - return size; + return written ? written : error; } /* @@ -1408,14 +1450,15 @@ generic_file_aio_read(struct kiocb *iocb, const struct iovec *iov, { struct file *filp = iocb->ki_filp; ssize_t retval; - unsigned long seg = 0; size_t count; loff_t *ppos = &iocb->ki_pos; + struct iov_iter i; count = 0; retval = generic_segment_checks(iov, &nr_segs, &count, VERIFY_WRITE); if (retval) return retval; + iov_iter_init(&i, iov, nr_segs, count, 0); /* coalesce the iovecs and go direct-to-BIO for O_DIRECT */ if (filp->f_flags & O_DIRECT) { @@ -1437,6 +1480,11 @@ generic_file_aio_read(struct kiocb *iocb, const struct iovec *iov, if (retval > 0) { *ppos = pos + retval; count -= retval; + /* + * If we did a short DIO read we need to skip the + * section of the iov that we've already read data into. + */ + iov_iter_advance(&i, retval); } /* @@ -1453,39 +1501,7 @@ generic_file_aio_read(struct kiocb *iocb, const struct iovec *iov, } } - count = retval; - for (seg = 0; seg < nr_segs; seg++) { - read_descriptor_t desc; - loff_t offset = 0; - - /* - * If we did a short DIO read we need to skip the section of the - * iov that we've already read data into. - */ - if (count) { - if (count > iov[seg].iov_len) { - count -= iov[seg].iov_len; - continue; - } - offset = count; - count = 0; - } - - desc.written = 0; - desc.arg.buf = iov[seg].iov_base + offset; - desc.count = iov[seg].iov_len - offset; - if (desc.count == 0) - continue; - desc.error = 0; - do_generic_file_read(filp, ppos, &desc); - retval += desc.written; - if (desc.error) { - retval = retval ?: desc.error; - break; - } - if (desc.count > 0) - break; - } + retval = do_generic_file_read(filp, ppos, &i, retval); out: return retval; } diff --git a/mm/shmem.c b/mm/shmem.c index 9398e6cd48cb..17d3799d04bd 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -1462,13 +1462,25 @@ shmem_write_end(struct file *file, struct address_space *mapping, return copied; } -static void do_shmem_file_read(struct file *filp, loff_t *ppos, read_descriptor_t *desc) +static ssize_t shmem_file_aio_read(struct kiocb *iocb, + const struct iovec *iov, unsigned long nr_segs, loff_t pos) { - struct inode *inode = file_inode(filp); + struct file *file = iocb->ki_filp; + struct inode *inode = file_inode(file); struct address_space *mapping = inode->i_mapping; pgoff_t index; unsigned long offset; enum sgp_type sgp = SGP_READ; + int error; + ssize_t retval; + size_t count; + loff_t *ppos = &iocb->ki_pos; + struct iov_iter iter; + + retval = generic_segment_checks(iov, &nr_segs, &count, VERIFY_WRITE); + if (retval) + return retval; + iov_iter_init(&iter, iov, nr_segs, count, 0); /* * Might this read be for a stacking filesystem? Then when reading @@ -1496,10 +1508,10 @@ static void do_shmem_file_read(struct file *filp, loff_t *ppos, read_descriptor_ break; } - desc->error = shmem_getpage(inode, index, &page, sgp, NULL); - if (desc->error) { - if (desc->error == -EINVAL) - desc->error = 0; + error = shmem_getpage(inode, index, &page, sgp, NULL); + if (error) { + if (error == -EINVAL) + error = 0; break; } if (page) @@ -1543,61 +1555,26 @@ static void do_shmem_file_read(struct file *filp, loff_t *ppos, read_descriptor_ /* * Ok, we have the page, and it's up-to-date, so * now we can copy it to user space... - * - * The actor routine returns how many bytes were actually used.. - * NOTE! This may not be the same as how much of a user buffer - * we filled up (we may be padding etc), so we can only update - * "pos" here (the actor routine has to update the user buffer - * pointers and the remaining count). */ - ret = file_read_actor(desc, page, offset, nr); + ret = copy_page_to_iter(page, offset, nr, &iter); + retval += ret; offset += ret; index += offset >> PAGE_CACHE_SHIFT; offset &= ~PAGE_CACHE_MASK; page_cache_release(page); - if (ret != nr || !desc->count) + if (!iov_iter_count(&iter)) break; - + if (ret < nr) { + error = -EFAULT; + break; + } cond_resched(); } *ppos = ((loff_t) index << PAGE_CACHE_SHIFT) + offset; - file_accessed(filp); -} - -static ssize_t shmem_file_aio_read(struct kiocb *iocb, - const struct iovec *iov, unsigned long nr_segs, loff_t pos) -{ - struct file *filp = iocb->ki_filp; - ssize_t retval; - unsigned long seg; - size_t count; - loff_t *ppos = &iocb->ki_pos; - - retval = generic_segment_checks(iov, &nr_segs, &count, VERIFY_WRITE); - if (retval) - return retval; - - for (seg = 0; seg < nr_segs; seg++) { - read_descriptor_t desc; - - desc.written = 0; - desc.arg.buf = iov[seg].iov_base; - desc.count = iov[seg].iov_len; - if (desc.count == 0) - continue; - desc.error = 0; - do_shmem_file_read(filp, ppos, &desc); - retval += desc.written; - if (desc.error) { - retval = retval ?: desc.error; - break; - } - if (desc.count > 0) - break; - } - return retval; + file_accessed(file); + return retval ? retval : error; } static ssize_t shmem_file_splice_read(struct file *in, loff_t *ppos, -- cgit v1.2.3 From 86d564c84c38b1ec06d9f2120d6a7373dcaeff0c Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 8 Feb 2014 20:42:52 -0500 Subject: constify blk_rq_map_user_iov() and friends sg_iovec array passed to it can be const Signed-off-by: Al Viro --- block/blk-map.c | 2 +- fs/bio.c | 10 +++++----- include/linux/bio.h | 5 +++-- include/linux/blkdev.h | 4 ++-- 4 files changed, 11 insertions(+), 10 deletions(-) (limited to 'include') diff --git a/block/blk-map.c b/block/blk-map.c index ae4ae1047fd9..86d93779c066 100644 --- a/block/blk-map.c +++ b/block/blk-map.c @@ -188,7 +188,7 @@ EXPORT_SYMBOL(blk_rq_map_user); * unmapping. */ int blk_rq_map_user_iov(struct request_queue *q, struct request *rq, - struct rq_map_data *map_data, struct sg_iovec *iov, + struct rq_map_data *map_data, const struct sg_iovec *iov, int iov_count, unsigned int len, gfp_t gfp_mask) { struct bio *bio; diff --git a/fs/bio.c b/fs/bio.c index 8754e7b6eb49..7065837ae9f3 100644 --- a/fs/bio.c +++ b/fs/bio.c @@ -1003,7 +1003,7 @@ struct bio_map_data { }; static void bio_set_map_data(struct bio_map_data *bmd, struct bio *bio, - struct sg_iovec *iov, int iov_count, + const struct sg_iovec *iov, int iov_count, int is_our_pages) { memcpy(bmd->sgvecs, iov, sizeof(struct sg_iovec) * iov_count); @@ -1023,7 +1023,7 @@ static struct bio_map_data *bio_alloc_map_data(int nr_segs, sizeof(struct sg_iovec) * iov_count, gfp_mask); } -static int __bio_copy_iov(struct bio *bio, struct sg_iovec *iov, int iov_count, +static int __bio_copy_iov(struct bio *bio, const struct sg_iovec *iov, int iov_count, int to_user, int from_user, int do_free_page) { int ret = 0, i; @@ -1121,7 +1121,7 @@ EXPORT_SYMBOL(bio_uncopy_user); */ struct bio *bio_copy_user_iov(struct request_queue *q, struct rq_map_data *map_data, - struct sg_iovec *iov, int iov_count, + const struct sg_iovec *iov, int iov_count, int write_to_vm, gfp_t gfp_mask) { struct bio_map_data *bmd; @@ -1260,7 +1260,7 @@ EXPORT_SYMBOL(bio_copy_user); static struct bio *__bio_map_user_iov(struct request_queue *q, struct block_device *bdev, - struct sg_iovec *iov, int iov_count, + const struct sg_iovec *iov, int iov_count, int write_to_vm, gfp_t gfp_mask) { int i, j; @@ -1408,7 +1408,7 @@ EXPORT_SYMBOL(bio_map_user); * device. Returns an error pointer in case of error. */ struct bio *bio_map_user_iov(struct request_queue *q, struct block_device *bdev, - struct sg_iovec *iov, int iov_count, + const struct sg_iovec *iov, int iov_count, int write_to_vm, gfp_t gfp_mask) { struct bio *bio; diff --git a/include/linux/bio.h b/include/linux/bio.h index 5a4d39b4686b..21e27208316c 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -388,7 +388,7 @@ struct sg_iovec; struct rq_map_data; extern struct bio *bio_map_user_iov(struct request_queue *, struct block_device *, - struct sg_iovec *, int, int, gfp_t); + const struct sg_iovec *, int, int, gfp_t); extern void bio_unmap_user(struct bio *); extern struct bio *bio_map_kern(struct request_queue *, void *, unsigned int, gfp_t); @@ -414,7 +414,8 @@ extern int bio_alloc_pages(struct bio *bio, gfp_t gfp); extern struct bio *bio_copy_user(struct request_queue *, struct rq_map_data *, unsigned long, unsigned int, int, gfp_t); extern struct bio *bio_copy_user_iov(struct request_queue *, - struct rq_map_data *, struct sg_iovec *, + struct rq_map_data *, + const struct sg_iovec *, int, int, gfp_t); extern int bio_uncopy_user(struct bio *); void zero_fill_bio(struct bio *bio); diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 4afa4f8f6090..a639fd8a6d7b 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -823,8 +823,8 @@ extern int blk_rq_map_user(struct request_queue *, struct request *, extern int blk_rq_unmap_user(struct bio *); extern int blk_rq_map_kern(struct request_queue *, struct request *, void *, unsigned int, gfp_t); extern int blk_rq_map_user_iov(struct request_queue *, struct request *, - struct rq_map_data *, struct sg_iovec *, int, - unsigned int, gfp_t); + struct rq_map_data *, const struct sg_iovec *, + int, unsigned int, gfp_t); extern int blk_execute_rq(struct request_queue *, struct gendisk *, struct request *, int); extern void blk_execute_rq_nowait(struct request_queue *, struct gendisk *, -- cgit v1.2.3 From 41fc56d573c35a212688b12b48af8c303f9bb6d2 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 9 Feb 2014 12:58:52 -0500 Subject: kill the 4th argument of __generic_file_aio_write() It's always equal to &iocb->ki_pos, where iocb is the value of the 1st argument. Signed-off-by: Al Viro --- fs/block_dev.c | 2 +- fs/ext4/file.c | 2 +- fs/udf/file.c | 2 +- include/linux/fs.h | 3 +-- mm/filemap.c | 13 ++++++------- 5 files changed, 10 insertions(+), 12 deletions(-) (limited to 'include') diff --git a/fs/block_dev.c b/fs/block_dev.c index 1e86823a9cbd..764bd3b8d2fa 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -1518,7 +1518,7 @@ ssize_t blkdev_aio_write(struct kiocb *iocb, const struct iovec *iov, BUG_ON(iocb->ki_pos != pos); blk_start_plug(&plug); - ret = __generic_file_aio_write(iocb, iov, nr_segs, &iocb->ki_pos); + ret = __generic_file_aio_write(iocb, iov, nr_segs); if (ret > 0) { ssize_t err; diff --git a/fs/ext4/file.c b/fs/ext4/file.c index 1a5073959f32..d564bcfb23c5 100644 --- a/fs/ext4/file.c +++ b/fs/ext4/file.c @@ -146,7 +146,7 @@ ext4_file_dio_write(struct kiocb *iocb, const struct iovec *iov, overwrite = 1; } - ret = __generic_file_aio_write(iocb, iov, nr_segs, &iocb->ki_pos); + ret = __generic_file_aio_write(iocb, iov, nr_segs); mutex_unlock(&inode->i_mutex); if (ret > 0) { diff --git a/fs/udf/file.c b/fs/udf/file.c index 1037637957c7..d2c170f8b035 100644 --- a/fs/udf/file.c +++ b/fs/udf/file.c @@ -171,7 +171,7 @@ static ssize_t udf_file_aio_write(struct kiocb *iocb, const struct iovec *iov, } else up_write(&iinfo->i_data_sem); - retval = __generic_file_aio_write(iocb, iov, nr_segs, &iocb->ki_pos); + retval = __generic_file_aio_write(iocb, iov, nr_segs); mutex_unlock(&inode->i_mutex); if (retval > 0) { diff --git a/include/linux/fs.h b/include/linux/fs.h index 2a5b1744f80a..e677d1e1189f 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2392,8 +2392,7 @@ extern int generic_file_remap_pages(struct vm_area_struct *, unsigned long addr, unsigned long size, pgoff_t pgoff); int generic_write_checks(struct file *file, loff_t *pos, size_t *count, int isblk); extern ssize_t generic_file_aio_read(struct kiocb *, const struct iovec *, unsigned long, loff_t); -extern ssize_t __generic_file_aio_write(struct kiocb *, const struct iovec *, unsigned long, - loff_t *); +extern ssize_t __generic_file_aio_write(struct kiocb *, const struct iovec *, unsigned long); extern ssize_t generic_file_aio_write(struct kiocb *, const struct iovec *, unsigned long, loff_t); extern ssize_t generic_file_direct_write(struct kiocb *, const struct iovec *, unsigned long *, loff_t, loff_t *, size_t, size_t); diff --git a/mm/filemap.c b/mm/filemap.c index c4730efa5d9e..ce2246dd90de 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -2222,14 +2222,14 @@ EXPORT_SYMBOL(generic_file_buffered_write); * avoid syncing under i_mutex. */ ssize_t __generic_file_aio_write(struct kiocb *iocb, const struct iovec *iov, - unsigned long nr_segs, loff_t *ppos) + unsigned long nr_segs) { struct file *file = iocb->ki_filp; struct address_space * mapping = file->f_mapping; size_t ocount; /* original count */ size_t count; /* after file limit checks */ struct inode *inode = mapping->host; - loff_t pos; + loff_t pos = iocb->ki_pos; ssize_t written; ssize_t err; @@ -2239,7 +2239,6 @@ ssize_t __generic_file_aio_write(struct kiocb *iocb, const struct iovec *iov, return err; count = ocount; - pos = *ppos; /* We can write back this queue in page reclaim */ current->backing_dev_info = mapping->backing_dev_info; @@ -2266,7 +2265,7 @@ ssize_t __generic_file_aio_write(struct kiocb *iocb, const struct iovec *iov, ssize_t written_buffered; written = generic_file_direct_write(iocb, iov, &nr_segs, pos, - ppos, count, ocount); + &iocb->ki_pos, count, ocount); if (written < 0 || written == count) goto out; /* @@ -2276,7 +2275,7 @@ ssize_t __generic_file_aio_write(struct kiocb *iocb, const struct iovec *iov, pos += written; count -= written; written_buffered = generic_file_buffered_write(iocb, iov, - nr_segs, pos, ppos, count, + nr_segs, pos, &iocb->ki_pos, count, written); /* * If generic_file_buffered_write() retuned a synchronous error @@ -2310,7 +2309,7 @@ ssize_t __generic_file_aio_write(struct kiocb *iocb, const struct iovec *iov, } } else { written = generic_file_buffered_write(iocb, iov, nr_segs, - pos, ppos, count, written); + pos, &iocb->ki_pos, count, written); } out: current->backing_dev_info = NULL; @@ -2339,7 +2338,7 @@ ssize_t generic_file_aio_write(struct kiocb *iocb, const struct iovec *iov, BUG_ON(iocb->ki_pos != pos); mutex_lock(&inode->i_mutex); - ret = __generic_file_aio_write(iocb, iov, nr_segs, &iocb->ki_pos); + ret = __generic_file_aio_write(iocb, iov, nr_segs); mutex_unlock(&inode->i_mutex); if (ret > 0) { -- cgit v1.2.3 From fcacafd269adc88f41b68cb77a3f957a66563428 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 9 Feb 2014 13:37:49 -0500 Subject: kill the 5th argument of generic_file_buffered_write() same story - it's &iocb->ki_pos in all cases Signed-off-by: Al Viro --- fs/ceph/file.c | 3 +-- fs/ocfs2/file.c | 2 +- fs/xfs/xfs_file.c | 2 +- include/linux/fs.h | 2 +- mm/filemap.c | 9 ++++----- 5 files changed, 8 insertions(+), 10 deletions(-) (limited to 'include') diff --git a/fs/ceph/file.c b/fs/ceph/file.c index 09c7afe32e49..a798db5e5e39 100644 --- a/fs/ceph/file.c +++ b/fs/ceph/file.c @@ -978,8 +978,7 @@ retry_snap: * can not run at the same time */ written = generic_file_buffered_write(iocb, iov, nr_segs, - pos, &iocb->ki_pos, - count, 0); + pos, count, 0); mutex_unlock(&inode->i_mutex); } diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c index 51632c40e896..89099cce14fe 100644 --- a/fs/ocfs2/file.c +++ b/fs/ocfs2/file.c @@ -2383,7 +2383,7 @@ relock: } else { current->backing_dev_info = file->f_mapping->backing_dev_info; written = generic_file_buffered_write(iocb, iov, nr_segs, *ppos, - ppos, count, 0); + count, 0); current->backing_dev_info = NULL; } diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c index 64b48eade91d..175ce58fbfa3 100644 --- a/fs/xfs/xfs_file.c +++ b/fs/xfs/xfs_file.c @@ -738,7 +738,7 @@ xfs_file_buffered_aio_write( write_retry: trace_xfs_file_buffered_write(ip, count, iocb->ki_pos, 0); ret = generic_file_buffered_write(iocb, iovp, nr_segs, - pos, &iocb->ki_pos, count, 0); + pos, count, 0); /* * If we just got an ENOSPC, try to write back all dirty inodes to diff --git a/include/linux/fs.h b/include/linux/fs.h index e677d1e1189f..830e37420f5e 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2397,7 +2397,7 @@ extern ssize_t generic_file_aio_write(struct kiocb *, const struct iovec *, unsi extern ssize_t generic_file_direct_write(struct kiocb *, const struct iovec *, unsigned long *, loff_t, loff_t *, size_t, size_t); extern ssize_t generic_file_buffered_write(struct kiocb *, const struct iovec *, - unsigned long, loff_t, loff_t *, size_t, ssize_t); + unsigned long, loff_t, size_t, ssize_t); extern ssize_t do_sync_read(struct file *filp, char __user *buf, size_t len, loff_t *ppos); extern ssize_t do_sync_write(struct file *filp, const char __user *buf, size_t len, loff_t *ppos); extern int generic_segment_checks(const struct iovec *iov, diff --git a/mm/filemap.c b/mm/filemap.c index ce2246dd90de..9d515a1a2372 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -2183,7 +2183,7 @@ again: ssize_t generic_file_buffered_write(struct kiocb *iocb, const struct iovec *iov, - unsigned long nr_segs, loff_t pos, loff_t *ppos, + unsigned long nr_segs, loff_t pos, size_t count, ssize_t written) { struct file *file = iocb->ki_filp; @@ -2195,7 +2195,7 @@ generic_file_buffered_write(struct kiocb *iocb, const struct iovec *iov, if (likely(status >= 0)) { written += status; - *ppos = pos + status; + iocb->ki_pos = pos + status; } return written ? written : status; @@ -2275,8 +2275,7 @@ ssize_t __generic_file_aio_write(struct kiocb *iocb, const struct iovec *iov, pos += written; count -= written; written_buffered = generic_file_buffered_write(iocb, iov, - nr_segs, pos, &iocb->ki_pos, count, - written); + nr_segs, pos, count, written); /* * If generic_file_buffered_write() retuned a synchronous error * then we want to return the number of bytes which were @@ -2309,7 +2308,7 @@ ssize_t __generic_file_aio_write(struct kiocb *iocb, const struct iovec *iov, } } else { written = generic_file_buffered_write(iocb, iov, nr_segs, - pos, &iocb->ki_pos, count, written); + pos, count, written); } out: current->backing_dev_info = NULL; -- cgit v1.2.3 From 5cb6c6c7eb1ed24744b41fad47d9a25b72207098 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 11 Feb 2014 20:58:20 -0500 Subject: generic_file_direct_write(): get rid of ppos argument always equal to &iocb->ki_pos. Signed-off-by: Al Viro --- fs/btrfs/file.c | 2 +- fs/fuse/file.c | 3 +-- fs/ocfs2/file.c | 2 +- fs/xfs/xfs_file.c | 2 +- include/linux/fs.h | 2 +- mm/filemap.c | 6 +++--- 6 files changed, 8 insertions(+), 9 deletions(-) (limited to 'include') diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index f6032a2bfab9..8ed4b165abbd 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -1640,7 +1640,7 @@ static ssize_t __btrfs_direct_write(struct kiocb *iocb, loff_t endbyte; int err; - written = generic_file_direct_write(iocb, iov, &nr_segs, pos, &iocb->ki_pos, + written = generic_file_direct_write(iocb, iov, &nr_segs, pos, count, ocount); if (written < 0 || written == count) diff --git a/fs/fuse/file.c b/fs/fuse/file.c index a91d3b4d32f3..fd06d1ebc2eb 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -1143,8 +1143,7 @@ static ssize_t fuse_file_aio_write(struct kiocb *iocb, const struct iovec *iov, goto out; if (file->f_flags & O_DIRECT) { - written = generic_file_direct_write(iocb, iov, &nr_segs, - pos, &iocb->ki_pos, + written = generic_file_direct_write(iocb, iov, &nr_segs, pos, count, ocount); if (written < 0 || written == count) goto out; diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c index 89099cce14fe..77b8a742866f 100644 --- a/fs/ocfs2/file.c +++ b/fs/ocfs2/file.c @@ -2375,7 +2375,7 @@ relock: if (direct_io) { written = generic_file_direct_write(iocb, iov, &nr_segs, *ppos, - ppos, count, ocount); + count, ocount); if (written < 0) { ret = written; goto out_dio; diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c index 175ce58fbfa3..e593554ce65e 100644 --- a/fs/xfs/xfs_file.c +++ b/fs/xfs/xfs_file.c @@ -699,7 +699,7 @@ xfs_file_dio_aio_write( trace_xfs_file_direct_write(ip, count, iocb->ki_pos, 0); ret = generic_file_direct_write(iocb, iovp, - &nr_segs, pos, &iocb->ki_pos, count, ocount); + &nr_segs, pos, count, ocount); out: xfs_rw_iunlock(ip, iolock); diff --git a/include/linux/fs.h b/include/linux/fs.h index 830e37420f5e..9dfd7c7ff8e3 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2395,7 +2395,7 @@ extern ssize_t generic_file_aio_read(struct kiocb *, const struct iovec *, unsig extern ssize_t __generic_file_aio_write(struct kiocb *, const struct iovec *, unsigned long); extern ssize_t generic_file_aio_write(struct kiocb *, const struct iovec *, unsigned long, loff_t); extern ssize_t generic_file_direct_write(struct kiocb *, const struct iovec *, - unsigned long *, loff_t, loff_t *, size_t, size_t); + unsigned long *, loff_t, size_t, size_t); extern ssize_t generic_file_buffered_write(struct kiocb *, const struct iovec *, unsigned long, loff_t, size_t, ssize_t); extern ssize_t do_sync_read(struct file *filp, char __user *buf, size_t len, loff_t *ppos); diff --git a/mm/filemap.c b/mm/filemap.c index 9d515a1a2372..93e9cf576452 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -1985,7 +1985,7 @@ EXPORT_SYMBOL(pagecache_write_end); ssize_t generic_file_direct_write(struct kiocb *iocb, const struct iovec *iov, - unsigned long *nr_segs, loff_t pos, loff_t *ppos, + unsigned long *nr_segs, loff_t pos, size_t count, size_t ocount) { struct file *file = iocb->ki_filp; @@ -2046,7 +2046,7 @@ generic_file_direct_write(struct kiocb *iocb, const struct iovec *iov, i_size_write(inode, pos); mark_inode_dirty(inode); } - *ppos = pos; + iocb->ki_pos = pos; } out: return written; @@ -2265,7 +2265,7 @@ ssize_t __generic_file_aio_write(struct kiocb *iocb, const struct iovec *iov, ssize_t written_buffered; written = generic_file_direct_write(iocb, iov, &nr_segs, pos, - &iocb->ki_pos, count, ocount); + count, ocount); if (written < 0 || written == count) goto out; /* -- cgit v1.2.3 From 3b93f911d55cf8b3540f82fac4eeddfee7b5de0c Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 11 Feb 2014 21:34:08 -0500 Subject: export generic_perform_write(), start getting rid of generic_file_buffer_write() Signed-off-by: Al Viro --- include/linux/fs.h | 2 ++ mm/filemap.c | 37 +++++++++++++++++++++---------------- 2 files changed, 23 insertions(+), 16 deletions(-) (limited to 'include') diff --git a/include/linux/fs.h b/include/linux/fs.h index 9dfd7c7ff8e3..87be51aff9d7 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -48,6 +48,7 @@ struct cred; struct swap_info_struct; struct seq_file; struct workqueue_struct; +struct iov_iter; extern void __init inode_init(void); extern void __init inode_init_early(void); @@ -2396,6 +2397,7 @@ extern ssize_t __generic_file_aio_write(struct kiocb *, const struct iovec *, un extern ssize_t generic_file_aio_write(struct kiocb *, const struct iovec *, unsigned long, loff_t); extern ssize_t generic_file_direct_write(struct kiocb *, const struct iovec *, unsigned long *, loff_t, size_t, size_t); +extern ssize_t generic_perform_write(struct file *, struct iov_iter *, loff_t); extern ssize_t generic_file_buffered_write(struct kiocb *, const struct iovec *, unsigned long, loff_t, size_t, ssize_t); extern ssize_t do_sync_read(struct file *filp, char __user *buf, size_t len, loff_t *ppos); diff --git a/mm/filemap.c b/mm/filemap.c index 93e9cf576452..09bfc9b3bb51 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -2092,7 +2092,7 @@ found: } EXPORT_SYMBOL(grab_cache_page_write_begin); -static ssize_t generic_perform_write(struct file *file, +ssize_t generic_perform_write(struct file *file, struct iov_iter *i, loff_t pos) { struct address_space *mapping = file->f_mapping; @@ -2180,6 +2180,7 @@ again: return written ? written : status; } +EXPORT_SYMBOL(generic_perform_write); ssize_t generic_file_buffered_write(struct kiocb *iocb, const struct iovec *iov, @@ -2230,8 +2231,10 @@ ssize_t __generic_file_aio_write(struct kiocb *iocb, const struct iovec *iov, size_t count; /* after file limit checks */ struct inode *inode = mapping->host; loff_t pos = iocb->ki_pos; - ssize_t written; + ssize_t written = 0; ssize_t err; + ssize_t status; + struct iov_iter from; ocount = 0; err = generic_segment_checks(iov, &nr_segs, &ocount, VERIFY_READ); @@ -2242,8 +2245,6 @@ ssize_t __generic_file_aio_write(struct kiocb *iocb, const struct iovec *iov, /* We can write back this queue in page reclaim */ current->backing_dev_info = mapping->backing_dev_info; - written = 0; - err = generic_write_checks(file, &pos, &count, S_ISBLK(inode->i_mode)); if (err) goto out; @@ -2259,44 +2260,47 @@ ssize_t __generic_file_aio_write(struct kiocb *iocb, const struct iovec *iov, if (err) goto out; + iov_iter_init(&from, iov, nr_segs, count, 0); + /* coalesce the iovecs and go direct-to-BIO for O_DIRECT */ if (unlikely(file->f_flags & O_DIRECT)) { loff_t endbyte; - ssize_t written_buffered; - written = generic_file_direct_write(iocb, iov, &nr_segs, pos, + written = generic_file_direct_write(iocb, iov, &from.nr_segs, pos, count, ocount); if (written < 0 || written == count) goto out; + iov_iter_advance(&from, written); + /* * direct-io write to a hole: fall through to buffered I/O * for completing the rest of the request. */ pos += written; count -= written; - written_buffered = generic_file_buffered_write(iocb, iov, - nr_segs, pos, count, written); + + status = generic_perform_write(file, &from, pos); /* - * If generic_file_buffered_write() retuned a synchronous error + * If generic_perform_write() returned a synchronous error * then we want to return the number of bytes which were * direct-written, or the error code if that was zero. Note * that this differs from normal direct-io semantics, which * will return -EFOO even if some bytes were written. */ - if (written_buffered < 0) { - err = written_buffered; + if (unlikely(status < 0) && !written) { + err = status; goto out; } - + iocb->ki_pos = pos + status; /* * We need to ensure that the page cache pages are written to * disk and invalidated to preserve the expected O_DIRECT * semantics. */ - endbyte = pos + written_buffered - written - 1; + endbyte = pos + status - 1; err = filemap_write_and_wait_range(file->f_mapping, pos, endbyte); if (err == 0) { - written = written_buffered; + written += status; invalidate_mapping_pages(mapping, pos >> PAGE_CACHE_SHIFT, endbyte >> PAGE_CACHE_SHIFT); @@ -2307,8 +2311,9 @@ ssize_t __generic_file_aio_write(struct kiocb *iocb, const struct iovec *iov, */ } } else { - written = generic_file_buffered_write(iocb, iov, nr_segs, - pos, count, written); + written = generic_perform_write(file, &from, pos); + if (likely(written >= 0)) + iocb->ki_pos = pos + written; } out: current->backing_dev_info = NULL; -- cgit v1.2.3 From ccad2365668f12336dd497d9039e8584af836070 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 11 Feb 2014 22:36:48 -0500 Subject: kill generic_file_buffered_write() Signed-off-by: Al Viro --- include/linux/fs.h | 2 -- mm/filemap.c | 23 +---------------------- 2 files changed, 1 insertion(+), 24 deletions(-) (limited to 'include') diff --git a/include/linux/fs.h b/include/linux/fs.h index 87be51aff9d7..c309b7a0da2d 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2398,8 +2398,6 @@ extern ssize_t generic_file_aio_write(struct kiocb *, const struct iovec *, unsi extern ssize_t generic_file_direct_write(struct kiocb *, const struct iovec *, unsigned long *, loff_t, size_t, size_t); extern ssize_t generic_perform_write(struct file *, struct iov_iter *, loff_t); -extern ssize_t generic_file_buffered_write(struct kiocb *, const struct iovec *, - unsigned long, loff_t, size_t, ssize_t); extern ssize_t do_sync_read(struct file *filp, char __user *buf, size_t len, loff_t *ppos); extern ssize_t do_sync_write(struct file *filp, const char __user *buf, size_t len, loff_t *ppos); extern int generic_segment_checks(const struct iovec *iov, diff --git a/mm/filemap.c b/mm/filemap.c index 09bfc9b3bb51..1a455142784d 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -76,7 +76,7 @@ * ->mmap_sem * ->lock_page (access_process_vm) * - * ->i_mutex (generic_file_buffered_write) + * ->i_mutex (generic_perform_write) * ->mmap_sem (fault_in_pages_readable->do_page_fault) * * bdi->wb.list_lock @@ -2182,27 +2182,6 @@ again: } EXPORT_SYMBOL(generic_perform_write); -ssize_t -generic_file_buffered_write(struct kiocb *iocb, const struct iovec *iov, - unsigned long nr_segs, loff_t pos, - size_t count, ssize_t written) -{ - struct file *file = iocb->ki_filp; - ssize_t status; - struct iov_iter i; - - iov_iter_init(&i, iov, nr_segs, count, written); - status = generic_perform_write(file, &i, pos); - - if (likely(status >= 0)) { - written += status; - iocb->ki_pos = pos + status; - } - - return written ? written : status; -} -EXPORT_SYMBOL(generic_file_buffered_write); - /** * __generic_file_aio_write - write data to a file * @iocb: IO state structure (file, offset, etc.) -- cgit v1.2.3