From 5fecf3a1e1a0af61eb34eb6976ec9f59cca65d3f Mon Sep 17 00:00:00 2001 From: Daniel Thompson Date: Mon, 27 Oct 2014 18:51:43 +0000 Subject: staging: android: logger: Fix log corruption regression Since commit cd678fce4280 ("switch logger to ->write_iter()"), any attempt to write to the log results in the log data being written over its own metadata, thus rendering the log unreadable. The problem was first detected when I ran an Android userspace on the v3.18-rc1 kernel. However the issue can also be observed with a non-Android userspace by using echo/cat to write to/from /dev/log_main . This patch resolves the problem by using a temporary to track the status of not-yet-committed writes to the log buffer. Signed-off-by: Daniel Thompson Cc: Al Viro Signed-off-by: Al Viro --- drivers/staging/android/logger.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/drivers/staging/android/logger.c b/drivers/staging/android/logger.c index 28b93d39a94e..a673ffa34aa3 100644 --- a/drivers/staging/android/logger.c +++ b/drivers/staging/android/logger.c @@ -420,7 +420,7 @@ static ssize_t logger_write_iter(struct kiocb *iocb, struct iov_iter *from) struct logger_log *log = file_get_log(iocb->ki_filp); struct logger_entry header; struct timespec now; - size_t len, count; + size_t len, count, w_off; count = min_t(size_t, iocb->ki_nbytes, LOGGER_ENTRY_MAX_PAYLOAD); @@ -452,11 +452,14 @@ static ssize_t logger_write_iter(struct kiocb *iocb, struct iov_iter *from) memcpy(log->buffer + log->w_off, &header, len); memcpy(log->buffer, (char *)&header + len, sizeof(header) - len); - len = min(count, log->size - log->w_off); + /* Work with a copy until we are ready to commit the whole entry */ + w_off = logger_offset(log, log->w_off + sizeof(struct logger_entry)); - if (copy_from_iter(log->buffer + log->w_off, len, from) != len) { + len = min(count, log->size - w_off); + + if (copy_from_iter(log->buffer + w_off, len, from) != len) { /* - * Note that by not updating w_off, this abandons the + * Note that by not updating log->w_off, this abandons the * portion of the new entry that *was* successfully * copied, just above. This is intentional to avoid * message corruption from missing fragments. @@ -470,7 +473,7 @@ static ssize_t logger_write_iter(struct kiocb *iocb, struct iov_iter *from) return -EFAULT; } - log->w_off = logger_offset(log, log->w_off + count); + log->w_off = logger_offset(log, w_off + count); mutex_unlock(&log->mutex); /* wake up any blocked readers */ -- cgit v1.2.3 From 54ef6df3f3f1353d99c80c437259d317b2cd1cbd Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Mon, 27 Oct 2014 21:11:27 -0700 Subject: rcu: Provide counterpart to rcu_dereference() for non-RCU situations Although rcu_dereference() and friends can be used in situations where object lifetimes are being managed by something other than RCU, the resulting sparse and lockdep-RCU noise can be annoying. This commit therefore supplies a lockless_dereference(), which provides the protection for dereferences without the RCU-related debugging noise. Reported-by: Al Viro Signed-off-by: Paul E. McKenney Signed-off-by: Al Viro --- include/linux/rcupdate.h | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index a4a819ffb2d1..53ff1a752d7e 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -616,6 +616,21 @@ static inline void rcu_preempt_sleep_check(void) */ #define RCU_INITIALIZER(v) (typeof(*(v)) __force __rcu *)(v) +/** + * lockless_dereference() - safely load a pointer for later dereference + * @p: The pointer to load + * + * Similar to rcu_dereference(), but for situations where the pointed-to + * object's lifetime is managed by something other than RCU. That + * "something other" might be reference counting or simple immortality. + */ +#define lockless_dereference(p) \ +({ \ + typeof(p) _________p1 = ACCESS_ONCE(p); \ + smp_read_barrier_depends(); /* Dependency order vs. p above. */ \ + (_________p1); \ +}) + /** * rcu_assign_pointer() - assign to RCU-protected pointer * @p: pointer to assign to -- cgit v1.2.3 From d45f00ae43e63eff1b3d79df20610ae1ef645ebd Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 28 Oct 2014 18:27:28 -0400 Subject: overlayfs: barriers for opening upper-layer directory make sure that a) all stores done by opening struct file don't leak past storing the reference in od->upperfile b) the lockless side has read dependency barrier Signed-off-by: Al Viro --- fs/overlayfs/readdir.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/overlayfs/readdir.c b/fs/overlayfs/readdir.c index 910553f37aca..8c8ce9d87ba3 100644 --- a/fs/overlayfs/readdir.c +++ b/fs/overlayfs/readdir.c @@ -454,12 +454,13 @@ static int ovl_dir_fsync(struct file *file, loff_t start, loff_t end, if (!od->is_upper && ovl_path_type(dentry) == OVL_PATH_MERGE) { struct inode *inode = file_inode(file); - realfile = od->upperfile; + realfile =lockless_dereference(od->upperfile); if (!realfile) { struct path upperpath; ovl_path_upper(dentry, &upperpath); realfile = ovl_path_open(&upperpath, O_RDONLY); + smp_mb__before_spinlock(); mutex_lock(&inode->i_mutex); if (!od->upperfile) { if (IS_ERR(realfile)) { -- cgit v1.2.3 From c2096537d40f026672c4c6adfcd7247ce5799604 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Mon, 27 Oct 2014 13:48:48 +0100 Subject: ovl: fix check for cursor ovl_cache_entry.name is now an array not a pointer, so it makes no sense test for it being NULL. Detected by coverity. From: Miklos Szeredi Fixes: 68bf8611076a ("overlayfs: make ovl_cache_entry->name an array instead of +pointer") Signed-off-by: Miklos Szeredi Signed-off-by: Al Viro --- fs/overlayfs/readdir.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/fs/overlayfs/readdir.c b/fs/overlayfs/readdir.c index 8c8ce9d87ba3..3fbf0d306e12 100644 --- a/fs/overlayfs/readdir.c +++ b/fs/overlayfs/readdir.c @@ -21,9 +21,10 @@ struct ovl_cache_entry { unsigned int len; unsigned int type; u64 ino; - bool is_whiteout; struct list_head l_node; struct rb_node node; + bool is_whiteout; + bool is_cursor; char name[]; }; @@ -251,7 +252,7 @@ static int ovl_dir_mark_whiteouts(struct dentry *dir, mutex_lock(&dir->d_inode->i_mutex); list_for_each_entry(p, rdd->list, l_node) { - if (!p->name) + if (p->is_cursor) continue; if (p->type != DT_CHR) @@ -307,7 +308,6 @@ static inline int ovl_dir_read_merged(struct path *upperpath, } out: return err; - } static void ovl_seek_cursor(struct ovl_dir_file *od, loff_t pos) @@ -316,7 +316,7 @@ static void ovl_seek_cursor(struct ovl_dir_file *od, loff_t pos) loff_t off = 0; list_for_each_entry(p, &od->cache->entries, l_node) { - if (!p->name) + if (p->is_cursor) continue; if (off >= pos) break; @@ -389,7 +389,7 @@ static int ovl_iterate(struct file *file, struct dir_context *ctx) p = list_entry(od->cursor.l_node.next, struct ovl_cache_entry, l_node); /* Skip cursors */ - if (p->name) { + if (!p->is_cursor) { if (!p->is_whiteout) { if (!dir_emit(ctx, p->name, p->len, p->ino, p->type)) break; @@ -519,6 +519,7 @@ static int ovl_dir_open(struct inode *inode, struct file *file) od->realfile = realfile; od->is_real = (type != OVL_PATH_MERGE); od->is_upper = (type != OVL_PATH_LOWER); + od->cursor.is_cursor = true; file->private_data = od; return 0; -- cgit v1.2.3 From d1b72cc6d8cb766c802fdc70a5edc2f0ba8a2b57 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Mon, 27 Oct 2014 15:42:01 +0100 Subject: overlayfs: fix lockdep misannotation In an overlay directory that shadows an empty lower directory, say /mnt/a/empty102, do: touch /mnt/a/empty102/x unlink /mnt/a/empty102/x rmdir /mnt/a/empty102 It's actually harmless, but needs another level of nesting between I_MUTEX_CHILD and I_MUTEX_NORMAL. Signed-off-by: Miklos Szeredi Tested-by: David Howells Signed-off-by: Al Viro --- fs/namei.c | 2 +- fs/overlayfs/readdir.c | 2 +- include/linux/fs.h | 9 ++++++--- 3 files changed, 8 insertions(+), 5 deletions(-) diff --git a/fs/namei.c b/fs/namei.c index 42df664e95e5..922f27068c4c 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -2497,7 +2497,7 @@ struct dentry *lock_rename(struct dentry *p1, struct dentry *p2) } mutex_lock_nested(&p1->d_inode->i_mutex, I_MUTEX_PARENT); - mutex_lock_nested(&p2->d_inode->i_mutex, I_MUTEX_CHILD); + mutex_lock_nested(&p2->d_inode->i_mutex, I_MUTEX_PARENT2); return NULL; } EXPORT_SYMBOL(lock_rename); diff --git a/fs/overlayfs/readdir.c b/fs/overlayfs/readdir.c index 3fbf0d306e12..401f0840f5cc 100644 --- a/fs/overlayfs/readdir.c +++ b/fs/overlayfs/readdir.c @@ -571,7 +571,7 @@ void ovl_cleanup_whiteouts(struct dentry *upper, struct list_head *list) { struct ovl_cache_entry *p; - mutex_lock_nested(&upper->d_inode->i_mutex, I_MUTEX_PARENT); + mutex_lock_nested(&upper->d_inode->i_mutex, I_MUTEX_CHILD); list_for_each_entry(p, list, l_node) { struct dentry *dentry; diff --git a/include/linux/fs.h b/include/linux/fs.h index 4e41a4a331bb..01036262095f 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -639,11 +639,13 @@ static inline int inode_unhashed(struct inode *inode) * 2: child/target * 3: xattr * 4: second non-directory - * The last is for certain operations (such as rename) which lock two + * 5: second parent (when locking independent directories in rename) + * + * I_MUTEX_NONDIR2 is for certain operations (such as rename) which lock two * non-directories at once. * * The locking order between these classes is - * parent -> child -> normal -> xattr -> second non-directory + * parent[2] -> child -> grandchild -> normal -> xattr -> second non-directory */ enum inode_i_mutex_lock_class { @@ -651,7 +653,8 @@ enum inode_i_mutex_lock_class I_MUTEX_PARENT, I_MUTEX_CHILD, I_MUTEX_XATTR, - I_MUTEX_NONDIR2 + I_MUTEX_NONDIR2, + I_MUTEX_PARENT2, }; void lock_two_nondirectories(struct inode *, struct inode*); -- cgit v1.2.3 From f643ff550afbc82a2bc7026f4a6d64427e4fbc99 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 28 Oct 2014 18:37:40 -0400 Subject: isofs_cmp(): we'll never see a dentry for . or .. Signed-off-by: Al Viro --- fs/isofs/namei.c | 20 ++------------------ 1 file changed, 2 insertions(+), 18 deletions(-) diff --git a/fs/isofs/namei.c b/fs/isofs/namei.c index 95295640d9c8..6f6dd0c6429f 100644 --- a/fs/isofs/namei.c +++ b/fs/isofs/namei.c @@ -18,23 +18,6 @@ static int isofs_cmp(struct dentry *dentry, const char *compare, int dlen) { struct qstr qstr; - - if (!compare) - return 1; - - /* check special "." and ".." files */ - if (dlen == 1) { - /* "." */ - if (compare[0] == 0) { - if (!dentry->d_name.len) - return 0; - compare = "."; - } else if (compare[0] == 1) { - compare = ".."; - dlen = 2; - } - } - qstr.name = compare; qstr.len = dlen; return dentry->d_op->d_compare(NULL, NULL, dentry->d_name.len, dentry->d_name.name, &qstr); @@ -146,7 +129,8 @@ isofs_find_entry(struct inode *dir, struct dentry *dentry, (!(de->flags[-sbi->s_high_sierra] & 1))) && (sbi->s_showassoc || (!(de->flags[-sbi->s_high_sierra] & 4)))) { - match = (isofs_cmp(dentry, dpnt, dlen) == 0); + if (dpnt && (dlen > 1 || dpnt[0] > 1)) + match = (isofs_cmp(dentry, dpnt, dlen) == 0); } if (match) { isofs_normalize_block_and_offset(de, -- cgit v1.2.3 From b0afd8e5db7b11aa9078e82e7f9abc30dc35a3c1 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 28 Oct 2014 18:40:11 -0400 Subject: isofs: don't bother with ->d_op for normal case we only need it for joliet and case-insensitive mounts Signed-off-by: Al Viro --- fs/isofs/inode.c | 24 ++---------------------- fs/isofs/namei.c | 2 ++ 2 files changed, 4 insertions(+), 22 deletions(-) diff --git a/fs/isofs/inode.c b/fs/isofs/inode.c index 881b3bd0143f..fe839b915116 100644 --- a/fs/isofs/inode.c +++ b/fs/isofs/inode.c @@ -29,13 +29,9 @@ #define BEQUIET static int isofs_hashi(const struct dentry *parent, struct qstr *qstr); -static int isofs_hash(const struct dentry *parent, struct qstr *qstr); static int isofs_dentry_cmpi(const struct dentry *parent, const struct dentry *dentry, unsigned int len, const char *str, const struct qstr *name); -static int isofs_dentry_cmp(const struct dentry *parent, - const struct dentry *dentry, - unsigned int len, const char *str, const struct qstr *name); #ifdef CONFIG_JOLIET static int isofs_hashi_ms(const struct dentry *parent, struct qstr *qstr); @@ -134,10 +130,6 @@ static const struct super_operations isofs_sops = { static const struct dentry_operations isofs_dentry_ops[] = { - { - .d_hash = isofs_hash, - .d_compare = isofs_dentry_cmp, - }, { .d_hash = isofs_hashi, .d_compare = isofs_dentry_cmpi, @@ -257,25 +249,12 @@ static int isofs_dentry_cmp_common( return 1; } -static int -isofs_hash(const struct dentry *dentry, struct qstr *qstr) -{ - return isofs_hash_common(qstr, 0); -} - static int isofs_hashi(const struct dentry *dentry, struct qstr *qstr) { return isofs_hashi_common(qstr, 0); } -static int -isofs_dentry_cmp(const struct dentry *parent, const struct dentry *dentry, - unsigned int len, const char *str, const struct qstr *name) -{ - return isofs_dentry_cmp_common(len, str, name, 0, 0); -} - static int isofs_dentry_cmpi(const struct dentry *parent, const struct dentry *dentry, unsigned int len, const char *str, const struct qstr *name) @@ -930,7 +909,8 @@ root_found: if (opt.check == 'r') table++; - s->s_d_op = &isofs_dentry_ops[table]; + if (table) + s->s_d_op = &isofs_dentry_ops[table - 1]; /* get the root dentry */ s->s_root = d_make_root(inode); diff --git a/fs/isofs/namei.c b/fs/isofs/namei.c index 6f6dd0c6429f..7b543e6b6526 100644 --- a/fs/isofs/namei.c +++ b/fs/isofs/namei.c @@ -20,6 +20,8 @@ isofs_cmp(struct dentry *dentry, const char *compare, int dlen) struct qstr qstr; qstr.name = compare; qstr.len = dlen; + if (likely(!dentry->d_op)) + return dentry->d_name.len != dlen || memcmp(dentry->d_name.name, compare, dlen); return dentry->d_op->d_compare(NULL, NULL, dentry->d_name.len, dentry->d_name.name, &qstr); } -- cgit v1.2.3 From b2de525f095708b2adbadaec3f1e4017a23d1e09 Mon Sep 17 00:00:00 2001 From: David Jeffery Date: Mon, 29 Sep 2014 10:21:10 -0400 Subject: Return short read or 0 at end of a raw device, not EIO Author: David Jeffery Changes to the basic direct I/O code have broken the raw driver when reading to the end of a raw device. Instead of returning a short read for a read that extends partially beyond the device's end or 0 when at the end of the device, these reads now return EIO. The raw driver needs the same end of device handling as was added for normal block devices. Using blkdev_read_iter, which has the needed size checks, prevents the EIO conditions at the end of the device. Signed-off-by: David Jeffery Signed-off-by: Al Viro --- drivers/char/raw.c | 2 +- fs/block_dev.c | 3 ++- include/linux/fs.h | 1 + 3 files changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/char/raw.c b/drivers/char/raw.c index 0102dc788608..a24891b97547 100644 --- a/drivers/char/raw.c +++ b/drivers/char/raw.c @@ -285,7 +285,7 @@ static long raw_ctl_compat_ioctl(struct file *file, unsigned int cmd, static const struct file_operations raw_fops = { .read = new_sync_read, - .read_iter = generic_file_read_iter, + .read_iter = blkdev_read_iter, .write = new_sync_write, .write_iter = blkdev_write_iter, .fsync = blkdev_fsync, diff --git a/fs/block_dev.c b/fs/block_dev.c index cc9d4114cda0..1d9c9f3754f8 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -1585,7 +1585,7 @@ ssize_t blkdev_write_iter(struct kiocb *iocb, struct iov_iter *from) } EXPORT_SYMBOL_GPL(blkdev_write_iter); -static ssize_t blkdev_read_iter(struct kiocb *iocb, struct iov_iter *to) +ssize_t blkdev_read_iter(struct kiocb *iocb, struct iov_iter *to) { struct file *file = iocb->ki_filp; struct inode *bd_inode = file->f_mapping->host; @@ -1599,6 +1599,7 @@ static ssize_t blkdev_read_iter(struct kiocb *iocb, struct iov_iter *to) iov_iter_truncate(to, size); return generic_file_read_iter(iocb, to); } +EXPORT_SYMBOL_GPL(blkdev_read_iter); /* * Try to release a page associated with block device when the system diff --git a/include/linux/fs.h b/include/linux/fs.h index 01036262095f..9ab779e8a63c 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2469,6 +2469,7 @@ extern ssize_t new_sync_read(struct file *filp, char __user *buf, size_t len, lo extern ssize_t new_sync_write(struct file *filp, const char __user *buf, size_t len, loff_t *ppos); /* fs/block_dev.c */ +extern ssize_t blkdev_read_iter(struct kiocb *iocb, struct iov_iter *to); extern ssize_t blkdev_write_iter(struct kiocb *iocb, struct iov_iter *from); extern int blkdev_fsync(struct file *filp, loff_t start, loff_t end, int datasync); -- cgit v1.2.3 From 9f2f7d4c8dfcf4617af5de6ea381b91deac3db48 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Fri, 31 Oct 2014 20:02:42 +0100 Subject: ovl: initialize ->is_cursor Signed-off-by: Miklos Szeredi Signed-off-by: Al Viro --- fs/overlayfs/readdir.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/overlayfs/readdir.c b/fs/overlayfs/readdir.c index 401f0840f5cc..4e9d7c1fea52 100644 --- a/fs/overlayfs/readdir.c +++ b/fs/overlayfs/readdir.c @@ -93,6 +93,7 @@ static struct ovl_cache_entry *ovl_cache_entry_new(const char *name, int len, p->type = d_type; p->ino = ino; p->is_whiteout = false; + p->is_cursor = false; } return p; -- cgit v1.2.3